001: #ifndef __INCLUDE_HTML2TXT_H__
002: #define __INCLUDE_HTML2TXT_H__
003:
004: #include <string.h>
005: #include <memory.h>
006: #include "common.h"
007:
008: static char* h2tprog = "html2txt v1.0";
009:
010:
011:
012: enum SHOW_LINK {
013: SW_NOLINK = 0,
014: SW_LINK = 1
015: };
016: enum URL_OPTION {
017: BASE_NONE = 0,
018: BASE_URL = 1
019: };
020:
021:
022:
023: #define SKIPTAG_N 3
024: static char* skip_tag[] = {
025: "script",
026: "style",
027: "xml"
028: };
029:
030:
031:
032:
033:
034:
035: class CHtmlOption {
036: public:
037: char cmd[256];
038: char fIn[MAX_PATH],fOut[MAX_PATH];
039: int Hdst;
040: int link;
041: int url;
042: public:
043: CHtmlOption()
044: {
045: memset(cmd,0,sizeof(char)*256);
046: strcpy(fIn,std_in);
047: strcpy(fOut,std_out);
048: Hdst = DST_FILE;
049: link = SW_NOLINK;
050: url = BASE_NONE;
051: }
052: CHtmlOption& operator=(CHtmlOption &opt)
053: {
054: strcpy(cmd,opt.cmd);
055: strcpy(fIn,opt.fIn);
056: strcpy(fOut,opt.fOut);
057: Hdst = opt.Hdst;
058: link = opt.link;
059: url = opt.url;
060: return *this;
061: }
062: };
063:
064:
065:
066:
067:
068:
069: class CHtml2Txt
070: {
071: FILE *OFP;
072: CHtmlOption cmdopt;
073: int Run,tci,fcLen,lineSum,tabSum;
074: int swQuote;
075: char DocTitle[256],BaseURL[MAX_PATH],BaseFile[MAX_PATH];
076:
077: char *Hcode,*Tcode;
078: public:
079:
080: void usage();
081:
082: int Read_arg(int argc,char* argv[],CHtmlOption& cmdopt);
083:
084: CHtml2Txt(int argc,char* argv[]);
085: CHtml2Txt(CHtmlOption opt);
086: CHtml2Txt();
087:
088: ~CHtml2Txt();
089:
090: void InitClass();
091: void FreeCode();
092:
093:
094:
095: void SetURL(char* baseurl);
096:
097:
098:
099: void SetCode(char* src);
100: int ReadCode();
101: int ReadFile(char* fname);
102:
103:
104:
105: int Convert();
106:
107:
108:
109: int CodeLen();
110: void GetCode(char* dst);
111:
112:
113:
114: void WriteCode();
115: void TextOut();
116:
117:
118:
119:
120: int Initbuf(int buflen);
121:
122: void strpush(char* buf);
123: void charpush(char buf);
124:
125:
126:
127: int rep_key(char* h_code,int iPos,char* sym,char* repstr);
128:
129:
130:
131: int rep_tag(char* h_code,int iPos,char* tag,char* repstr);
132: int ch_title(char* h_code,int iPos);
133: int get_title(char* h_code,int iPos);
134: int ch_ruby(char* h_code,int iPos);
135: int get_ruby(char* h_code,int iPos);
136: int ch_line(char* h_code,int iPos);
137: int rep_line(char* h_code,int iPos);
138: int ch_link(char* h_code,int iPos);
139: int rep_link(char* h_code,int iPos);
140: int ch_image(char* h_code,int iPos);
141: int rep_image(char* h_code,int iPos);
142: int ch_region(char* h_code,int iPos,int tagi);
143: int region_skip(char* h_code,int iPos,int tagi);
144:
145:
146:
147: void html2txt();
148: };
149:
150: #endif