001: //-------------------------------------------------------------------------
002: // HTML document to TEXT converter
003: // html2txt.cpp: Last Update: <2004/11/25 23:25:27 A.Murakami>
004: // USAGE: html2txt htmlfile [-disp] [-link] [-out textfile] [-url baseurl]
005: //  option:
006: //    -disp output to display (default .txt file).
007: //    -link output link[ref url and image]
008: //    -out  textfile output filename
009: //    -url  set base URL
010: //-------------------------------------------------------------------------
011: #include <stdio.h>
012: #include <stdlib.h>
013: #include <string.h>
014: #include <memory.h>
015: #include <ctype.h>
016: #include "html2txt.h"
017: 
018: //-----------------------------------------------------------------
019: // main 関数として使用時
020: //-----------------------------------------------------------------
021: //#define USE_LIB
022: #ifndef USE_LIB
023: int main(int argc,char* argv[])
024: {
025:     CHtml2Txt* h2t = new CHtml2Txt(argc,argv);
026:     if(h2t->ReadCode()){
027:         if(h2t->Convert())
028:             h2t->WriteCode();
029:     }
030:     delete h2t;
031:     return 0;
032: }
033: #endif
034: 
035: //-----------------------------------------------------------------
036: //
037: // CHtml2Txt Class:
038: //   convert C/C++ source to HTML document
039: //
040: //-----------------------------------------------------------------
041: //-----------------------------------------------------------------
042: // コマンドヘルプ
043: //-----------------------------------------------------------------
044: void CHtml2Txt::usage()
045: {
046:     fprintf(stderr,"USAGE: %s htmlfile [-disp] [-link] [-out textfile] [-url baseurl]\n",
047:             cmdopt.cmd);
048:     fprintf(stderr,"option:\n");
049:     fprintf(stderr,"\t-disp \toutput to display (default .txt file).\n");
050:     fprintf(stderr,"\t-link \toutput link[ref url and image]\n");
051:     fprintf(stderr,"\t-out htmlfile\toutput filename\n");
052:     fprintf(stderr,"\t-url  \tset base URL\n");
053: }
054: //-----------------------------------------------------------------
055: // プログラムの引数の読取[FALSE:失敗,TRUE:正常]
056: //-----------------------------------------------------------------
057: int CHtml2Txt::Read_arg(int argc,char* argv[],CHtmlOption& cmdopt)
058: {
059:     // 引数の解釈
060:     strcpy(cmdopt.cmd,argv[0]);
061:     if(argc==1) { return FALSE; }
062:     for(int i=1;i<argc;i++) {
063:         if(argv[i][0] == '-'){
064:             if(!strcmp(argv[i], "-out")) {
065:                 if(++i>=argc) return FALSE;
066:                 strcpy(cmdopt.fOut,argv[i]);
067:             } else if(!strcmp(argv[i], "-url")) {
068:                 if(++i>=argc) return FALSE;
069:                 SetURL(argv[i]);
070:             } else if(!strcmp(argv[i], "-disp")) {
071:                 cmdopt.Hdst = DST_DISP;
072:             } else if(!strcmp(argv[i], "-link")) {
073:                 cmdopt.link = SW_LINK;
074:             } else {
075:                 fprintf(stderr,"unregonized option '%s'\n",argv[i]);
076:                 return FALSE;
077:             }
078:         } else {
079:             strcpy(cmdopt.fIn,argv[i]);
080:         }
081:     }
082:     return TRUE;
083: }
084: //-----------------------------------------------------------------
085: // コンストラクタ:
086: //   コマンド引数より変換のオプションの読込み
087: //-----------------------------------------------------------------
088: CHtml2Txt::CHtml2Txt(int argc,char* argv[])
089: {
090:     Run = Read_arg(argc,argv,cmdopt);
091:     if(Run == FALSE){
092:         usage(); return;
093:     }
094:     InitClass();
095: }
096: //-----------------------------------------------------------------
097: // コンストラクタ:
098: //   オプションの受け渡し
099: //-----------------------------------------------------------------
100: CHtml2Txt::CHtml2Txt(CHtmlOption opt)
101: {
102:     Run = TRUE;
103:     cmdopt = opt;
104:     InitClass();
105: }
106: //-----------------------------------------------------------------
107: // コンストラクタ:
108: //   全引数の指定
109: //-----------------------------------------------------------------
110: CHtml2Txt::CHtml2Txt()
111: {
112:     Run = TRUE;
113:     InitClass();
114: }
115: // デストラクタ
116: CHtml2Txt::~CHtml2Txt()
117: {
118:     if(Run){
119:         if(Hcode!=NULL) free(Hcode);
120:         if(Tcode!=NULL) free(Tcode);
121:     }
122: }
123: //-----------------------------------------------------------------
124: // クラスデータの初期化
125: //-----------------------------------------------------------------
126: void CHtml2Txt::InitClass()
127: {
128:     // 出力ファイル名の設定
129:     char dir[128],file[128],ext[10];
130:     if(strcmp(cmdopt.fIn,std_in)){
131:         split_path(cmdopt.fIn,dir,file,ext);
132:         sprintf(DocTitle,"%s%s",file,ext);
133:         if(!strcmp(cmdopt.fOut,std_out)){
134:             if(cmdopt.Hdst == DST_FILE){
135:                 sprintf(cmdopt.fOut,"%s.txt",file);
136:             }
137:         }
138:     } else {
139:         sprintf(DocTitle,"html2txt");
140:     }
141:     // 変数初期化
142:     Hcode = Tcode = NULL;
143:     OFP = NULL;
144: }
145: //-----------------------------------------------------------------
146: // ソース,HTML コードの解放
147: //-----------------------------------------------------------------
148: void CHtml2Txt::FreeCode()
149: {
150:     if(Run){
151:         if(Hcode!=NULL) free(Hcode), Hcode = NULL;
152:         if(Tcode!=NULL) free(Tcode), Tcode = NULL;
153:     }
154: }
155: //-----------------------------------------------------------------
156: // URL の指定
157: //-----------------------------------------------------------------
158: void CHtml2Txt::SetURL(char* baseurl)
159: {
160:     int ui,len=strlen(baseurl);
161:     memset(BaseURL,0,sizeof(char)*MAX_PATH);
162:     memset(BaseFile,0,sizeof(char)*MAX_PATH);
163:     for(ui=len;ui>=0;ui--){
164:         if(baseurl[ui]=='/') break;
165:     }
166:     if(ui>=7){// [http://]以上の場合
167:         strncpy(BaseURL,baseurl,sizeof(char)*ui);
168:         if(ui!=len)
169:             strncpy(BaseFile,baseurl+ui+1,len-ui-1);
170:     } else {
171:         strcpy(BaseURL,baseurl);
172:     }
173:     cmdopt.url = BASE_URL;
174: }
175: //-----------------------------------------------------------------
176: // HTMLコードの読込み[配列]
177: //-----------------------------------------------------------------
178: void CHtml2Txt::SetCode(char* src)
179: {
180:     // 入力ファイルの文字数取得
181:     lineSum = tabSum = fcLen = 0;
182:     str_count(src,fcLen,lineSum,tabSum);
183:     // 文字配列の確保
184:     Hcode = (char*)calloc(fcLen+1,sizeof(char));
185:     strcpy(Hcode,src);
186: }
187: //-----------------------------------------------------------------
188: // HTMLコードの読込み[ファイル]
189: //-----------------------------------------------------------------
190: int CHtml2Txt::ReadCode()
191: {
192:     if(Run==FALSE) return FALSE;
193:     // 入力ファイルの文字数取得
194:     lineSum = tabSum = fcLen = 0;
195:     Fstr_count(cmdopt.fIn,fcLen,lineSum,tabSum);
196:     if(fcLen == 0){
197:         return FALSE;
198:     }
199:     // 文字配列の確保
200:     Hcode = (char*)calloc(fcLen+1,sizeof(char));
201:     // 入力ファイルから文字列の取得
202:     if(!read_string(cmdopt.fIn,fcLen,Hcode)){
203:         fprintf(stderr,"input file reading error.\n");
204:         return FALSE;
205:     }
206:     return TRUE;
207: }
208: //-----------------------------------------------------------------
209: // 指定されたファイルからの読込み
210: //-----------------------------------------------------------------
211: int CHtml2Txt::ReadFile(char* fname)
212: {
213:     if(Run==FALSE) return FALSE;
214:     // 入力ファイルの設定
215:     strcpy(cmdopt.fIn,fname);
216:     // 出力ファイルの設定
217:     char dir[128],file[128],ext[10];
218:     split_path(cmdopt.fIn,dir,file,ext);
219:     sprintf(DocTitle,"%s%s",file,ext);
220:     sprintf(cmdopt.fOut,"%s.txt",file);
221:     // 入力ファイルの文字数取得
222:     lineSum = tabSum = fcLen = 0;
223:     Fstr_count(cmdopt.fIn,fcLen,lineSum,tabSum);
224:     if(fcLen == 0){
225:         return FALSE;
226:     }
227:     // 文字配列の確保
228:     Hcode = (char*)calloc(fcLen+1,sizeof(char));
229:     // 入力ファイルから文字列の取得
230:     if(!read_string(cmdopt.fIn,fcLen,Hcode)){
231:         fprintf(stderr,"input file reading error.\n");
232:         return FALSE;
233:     }
234:     return TRUE;
235: }
236: //-----------------------------------------------------------------
237: // 変換実行
238: //-----------------------------------------------------------------
239: int CHtml2Txt::Convert()
240: {
241:     if(Run==FALSE) return FALSE;
242:     //--------------------------------------------------
243:     // 書き出し用TEXT文字列の確保
244:     //--------------------------------------------------
245:     int buflen = FMINSIZ+fcLen;
246:     if(!Initbuf(buflen)){
247:         fprintf(stderr,"cannot alloc output buffer.\n");
248:         return FALSE;
249:     }
250:     // convert: c/c++ source file to HTML source
251:     html2txt();
252:     return TRUE;
253: }
254: //-----------------------------------------------------------------
255: // HTMLコードの長さ
256: //-----------------------------------------------------------------
257: int CHtml2Txt::CodeLen()
258: {
259:     return strlen(Tcode);
260: }
261: //-----------------------------------------------------------------
262: // HTMLコードのコピー
263: //-----------------------------------------------------------------
264: void CHtml2Txt::GetCode(char* dst)
265: {
266:     strcat(dst,Tcode);
267: }
268: //-----------------------------------------------------------------
269: // ファイル出力
270: //-----------------------------------------------------------------
271: void CHtml2Txt::WriteCode()
272: {
273:     if(cmdopt.Hdst == DST_FILE){
274:         if(!strcmp(cmdopt.fOut,std_out)){
275:             fprintf(stderr,"**%s\n\toutput file is not detected.\n",h2tprog);
276:             return;
277:         }
278:         fprintf(stderr,"**%s\n\t%s -> %s\n",h2tprog,cmdopt.fIn,cmdopt.fOut);
279:     }
280:     //--------------------------------------------------
281:     // 出力設定
282:     //--------------------------------------------------
283:     if(!cmdopt.Hdst){
284:         if((OFP=fopen(cmdopt.fOut,"w"))==NULL){
285:             fprintf(stderr,"output file[%s]: cannot writable.\n",cmdopt.fOut);
286:             return;
287:         }
288:     } else OFP = stdout;
289:     //--------------------------------------------------
290:     // コードの出力
291:     //--------------------------------------------------
292:     TextOut();
293:     // 後片付け
294:     fclose(OFP);
295: }
296: //-----------------------------------------------------------------
297: // Text output
298: //-----------------------------------------------------------------
299: void CHtml2Txt::TextOut()
300: {
301:     fwrite(Tcode,sizeof(char),strlen(Tcode),OFP);
302: }
303: //-----------------------------------------------------------------
304: // HTML 出力バッファの初期化
305: //-----------------------------------------------------------------
306: int CHtml2Txt::Initbuf(int buflen)
307: {
308:     tci = 0; swQuote = 0;
309:     Tcode = (char*)calloc(buflen,sizeof(char));
310:     memset(Tcode,0,buflen);
311:     if(Tcode == NULL) return 0;
312:     return 1;
313: }
314: //-----------------------------------------------------------------
315: // 出力バッファへ文字列の追加
316: //-----------------------------------------------------------------
317: void CHtml2Txt::strpush(char* buf)
318: {
319:     for(int i=0;i<(signed)strlen(buf);i++){
320:         Tcode[tci++] = buf[i];
321:         if(swQuote) if(buf[i]=='\n')
322:             Tcode[tci++] = '\t';
323:     }
324: }
325: // 出力バッファへ文字の追加
326: void CHtml2Txt::charpush(char buf)
327: {
328:     Tcode[tci++] = buf;
329:     if(swQuote) if(buf=='\n')
330:         Tcode[tci++] = '\t';
331: }
332: //--------------------------------------------------
333: // 記号の変換
334: //--------------------------------------------------
335: int CHtml2Txt::rep_key(char* h_code,int iPos,char* sym,char* repstr)
336: {
337:     if(Kwd_sch(h_code,fcLen,iPos,sym)){
338:         strpush(repstr);
339:         return 1;
340:     }
341:     return 0;
342: }
343: //-----------------------------------------------------------------
344: // タグの変換
345: //-----------------------------------------------------------------
346: int CHtml2Txt::rep_tag(char* h_code,int iPos,char* tag,char* repstr)
347: {
348:     int endc = h_code[iPos+1+strlen(tag)];
349:     if(Kwd_sch(h_code,fcLen,iPos+1,tag) &&
350:        (h_code[iPos] == '<' ) &&
351:        (endc == ' ' || endc == '>') ){
352:         strpush(repstr);
353:         return strlen(tag);
354:     }
355:     return 0;
356: }
357: //-----------------------------------------------------------------
358: // タイトル
359: //-----------------------------------------------------------------
360: int CHtml2Txt::ch_title(char* h_code,int iPos)
361: {
362:     if(Kwd_sch(h_code,fcLen,iPos,"<title>")) return 1;
363:     return 0;
364: }
365: int CHtml2Txt::get_title(char* h_code,int iPos)
366: {
367:     int i=iPos+6,iStart=1,tcnt=0;
368:     memset(DocTitle,0,sizeof(char)*256);
369:     while(iStart && i++<fcLen){
370:         if(Kwd_sch(h_code,fcLen,i,"</title>")){
371:             iStart = 0; i+=7;
372:             DocTitle[tcnt] = 0;
373:             strpush("Title: ");
374:             strpush(DocTitle);
375:             charpush('\n');
376:             fprintf(stderr,"%s: [%s]\n",cmdopt.fIn,DocTitle);
377:         } else {
378:             DocTitle[tcnt++] = h_code[i];
379:             //charpush(h_code[i]);
380:         }
381:     }
382:     return (i-iPos);
383: }
384: //-----------------------------------------------------------------
385: // 水平ライン
386: //-----------------------------------------------------------------
387: int CHtml2Txt::ch_line(char* h_code,int iPos)
388: {
389:     if(Kwd_sch(h_code,fcLen,iPos,"<hr ") || 
390:        Kwd_sch(h_code,fcLen,iPos,"<hr>") ||
391:        Kwd_sch(h_code,fcLen,iPos,"<hr\n")) {
392:         return 1;
393:     }
394:     return 0;
395: }
396: int CHtml2Txt::rep_line(char* h_code,int iPos)
397: {
398:     int i=iPos,iStart=1;
399:     unsigned char c;
400:     strpush("<hr");
401:     i+=2;
402:     while(iStart && i++<fcLen){
403:         c = h_code[i];
404:         if(c == '>')  { iStart=0; }
405:         if(c == '\n') { charpush(' '); continue; }
406:         charpush(c);
407:     }
408:     strpush("<p>\n");
409:     return (i-iPos);
410: }
411: //-----------------------------------------------------------------
412: // リンク先
413: //-----------------------------------------------------------------
414: int CHtml2Txt::ch_link(char* h_code,int iPos)
415: {
416:     if(Kwd_sch(h_code,fcLen,iPos,"<a "))  return 1;
417:     if(Kwd_sch(h_code,fcLen,iPos,"<a\n")) return 1;
418:     return 0;
419: }
420: int CHtml2Txt::rep_link(char* h_code,int iPos)
421: {
422:     int i=iPos,iStart=1,urlcnt=0,ref_start=0;
423:     unsigned char c;
424:     char url[MAX_PATH];
425:     strpush("<a ");
426:     i+=2;
427:     while(iStart && i++<fcLen){
428:         c = h_code[i];
429:         if(c == '>')  { iStart=0; }
430:         if(c == '\n') { charpush(' '); continue; }
431:         if(Kwd_sch(h_code,fcLen,i,"href=")){
432:             urlcnt = 0; ref_start=1;
433:             memset(url,0,sizeof(char)*MAX_PATH);
434:             strpush("href=");
435:             i+=4;
436:             continue;
437:         }
438:         if(ref_start){
439:             if(!urlcnt && c=='"') { continue; }
440:             if(c != '"' && c != ' ' && c != '>') {
441:                 url[urlcnt++]=c;
442:                 continue;
443:             } else {
444:                 ref_start = 0;
445:                 charpush('"');
446:                 if(cmdopt.url == BASE_URL){
447:                     if(!Kwd_sch(url,strlen(url),0,"http://") &&
448:                        !Kwd_sch(url,strlen(url),0,"mailto:") &&
449:                        !Kwd_sch(url,strlen(url),0,"javascript:") ) {
450:                         strpush(BaseURL);
451:                         charpush('/');
452:                         if(url[0] == '#'){
453:                             strpush(BaseFile);
454:                         }
455:                         strpush(url);
456:                     } else strpush(url);
457:                 } else strpush(url);
458:                 charpush('"');
459:                 if(c=='>') charpush('>');
460:                 continue;
461:             }
462:         }
463:         charpush(c);
464:     }
465:     return (i-iPos);
466: }
467: //-----------------------------------------------------------------
468: // 画像の表示
469: //-----------------------------------------------------------------
470: int CHtml2Txt::ch_image(char* h_code,int iPos)
471: {
472:     if(Kwd_sch(h_code,fcLen,iPos,"<img "))  return 1;
473:     if(Kwd_sch(h_code,fcLen,iPos,"<img\n")) return 1;
474:     return 0;
475: }
476: int CHtml2Txt::rep_image(char* h_code,int iPos)
477: {
478:     int i=iPos,iStart=1,imgcnt=0,src_start=0;
479:     unsigned char c;
480:     char img[MAX_PATH];
481:     strpush("<img ");
482:     i+=4;
483:     while(iStart && i++<fcLen){
484:         c = h_code[i];
485:         if(c == '>')  { iStart=0; }
486:         if(c == '\n') { charpush(' '); continue; }
487:         if(Kwd_sch(h_code,fcLen,i,"src=")){
488:             imgcnt = 0; src_start=1;
489:             memset(img,0,sizeof(char)*MAX_PATH);
490:             strpush("src=");
491:             i+=3;
492:             continue;
493:         }
494:         if(src_start){
495:             if(!imgcnt && c=='"') continue;
496:             if(c != '\"' && c != ' ' && c != '>') {
497:                 img[imgcnt++]=c;
498:                 continue;
499:             } else {
500:                 src_start = 0;
501:                 charpush('"');
502:                 if(cmdopt.url == BASE_URL){
503:                     if(!strstr(img,"http://")){
504:                         strpush(BaseURL);
505:                         charpush('/');
506:                         strpush(img);
507:                     } else strpush(img);
508:                 } else strpush(img);
509:                 charpush('"');
510:                 if(c=='>') charpush('>');
511:                 continue;
512:             }
513:         }
514:         charpush(c);
515:     }
516:     return (i-iPos);
517: }
518: //-----------------------------------------------------------------
519: // 指定タグの区間を無視
520: //-----------------------------------------------------------------
521: int CHtml2Txt::ch_region(char* h_code,int iPos,int tagi)
522: {
523:     unsigned char c = h_code[iPos],ec;
524:     if(c!='<') return 0;
525:     ec = h_code[iPos+1+strlen(skip_tag[tagi])];
526:     if(Kwd_sch(h_code,fcLen,iPos+1,skip_tag[tagi]) &&
527:        (ec == ' ' || ec == '>')){
528:         return 1;
529:     }
530:     return 0;
531: }
532: int CHtml2Txt::region_skip(char* h_code,int iPos,int tagi)
533: {
534:     int i=iPos,iStart=1;
535:     char endstr[128];
536:     sprintf(endstr,"</%s>",skip_tag[tagi]);
537:     i+=strlen(skip_tag[tagi])+1;
538:     while(iStart && i++<fcLen){
539:         if(Kwd_sch(h_code,fcLen,i,endstr)){
540:             iStart = 0;
541:         }
542:     }
543:     i += strlen(endstr)-1;
544:     return (i-iPos);
545: }
546: //-----------------------------------------------------------------
547: // convert: HTML code to text
548: //-----------------------------------------------------------------
549: void CHtml2Txt::html2txt()
550: {
551:     int i,iPos,ltop,iStart=0;
552:     int swPre=0,swComment=0;
553:     unsigned char c,nxt_c,pre_c=0;
554:     // 変換開始
555:     for(iPos=0;iPos<fcLen;pre_c=Hcode[iPos++]){
556:         c = Hcode[iPos];
557:         nxt_c = (iPos<fcLen-1)?Hcode[iPos+1]:0;
558:         if(c == '\n' && !swPre) continue;
559:         //--------------------------------------------------
560:         // 先頭の余計なスペース/タブの除去
561:         //--------------------------------------------------
562:         if(pre_c == '\n'){
563:             ltop = line_schtop(Hcode,fcLen,iPos);
564:             if(ltop && Hcode[iPos+ltop] == '<'){
565:                 iPos += (ltop-1);
566:                 continue;
567:             }
568:         }
569:         //--------------------------------------------------
570:         // タイトルの取得
571:         //--------------------------------------------------
572:         if(ch_title(Hcode,iPos)){
573:             iPos += get_title(Hcode,iPos);
574:             continue;
575:         }
576:         //--------------------------------------------------
577:         // タグの置き換え
578:         //--------------------------------------------------
579:         iPos += rep_tag(Hcode,iPos,"h1","\n");
580:         iPos += rep_tag(Hcode,iPos,"/h1","\n");
581:         iPos += rep_tag(Hcode,iPos,"h2","\n");
582:         iPos += rep_tag(Hcode,iPos,"/h2","\n");
583:         iPos += rep_tag(Hcode,iPos,"h3","\n");
584:         iPos += rep_tag(Hcode,iPos,"/h3","\n");
585:         iPos += rep_tag(Hcode,iPos,"h4","\n");
586:         iPos += rep_tag(Hcode,iPos,"/h4","\n");
587:         // comment
588:         iPos += rep_tag(Hcode,iPos,"cite","\"");
589:         iPos += rep_tag(Hcode,iPos,"/cite","\"");
590:         // list
591:         iPos += rep_tag(Hcode,iPos,"ul","\n");
592:         iPos += rep_tag(Hcode,iPos,"ol","\n");
593:         iPos += rep_tag(Hcode,iPos,"dl","\n");
594:         iPos += rep_tag(Hcode,iPos,"li","\n\t・");
595:         iPos += rep_tag(Hcode,iPos,"dt","\n\t・");
596:         // line break
597:         iPos += rep_tag(Hcode,iPos,"/tr","\n");
598:         iPos += rep_tag(Hcode,iPos,"div","\n");
599:         iPos += rep_tag(Hcode,iPos,"/div","\n");
600:         iPos += rep_tag(Hcode,iPos,"p","\n\n");
601:         iPos += rep_tag(Hcode,iPos,"/p","\n");
602:         iPos += rep_tag(Hcode,iPos,"br","\n");
603:         iPos += rep_tag(Hcode,iPos,"/br","\n");
604:         // other
605:         //iPos += rep_tag(Hcode,iPos,"td","\t");
606:         //--------------------------------------------------
607:         // BlockQuote
608:         // →改行後タブ
609:         //--------------------------------------------------
610:         if(Kwd_sch(Hcode,fcLen,iPos,"<blockquote>"))  swQuote = 1;
611:         if(Kwd_sch(Hcode,fcLen,iPos,"</blockquote>")) swQuote = 0;
612:         //--------------------------------------------------
613:         // Preview
614:         //--------------------------------------------------
615:         if(Kwd_sch(Hcode,fcLen,iPos,"<pre>"))         swPre = 1;
616:         if(Kwd_sch(Hcode,fcLen,iPos,"</pre>"))        swPre = 0;
617:         //--------------------------------------------------
618:         // 無視するタグの区間
619:         //--------------------------------------------------
620:         for(i=0;i<SKIPTAG_N;i++){
621:             if(ch_region(Hcode,iPos,i)){
622:                 iPos += region_skip(Hcode,iPos,i);
623:                 continue;
624:             }
625:         }
626:         //--------------------------------------------------
627:         // 水平ライン
628:         //--------------------------------------------------
629:         if(cmdopt.link==SW_LINK){
630:             if(ch_line(Hcode,iPos)){
631:                 iPos += rep_line(Hcode,iPos);
632:                 continue;
633:             }
634:         }
635:         //--------------------------------------------------
636:         // リンク[画像]先のチェック
637:         //   base URL を解析し実際にリンク先へ移動できるように
638:         //--------------------------------------------------
639:         // 画像
640:         if(cmdopt.link==SW_LINK){
641:             if(ch_image(Hcode,iPos)){
642:                 iPos += rep_image(Hcode,iPos);
643:                 continue;
644:             }
645:         }
646:         // リンク
647:         if(cmdopt.link==SW_LINK){
648:             if(ch_link(Hcode,iPos)){
649:                 iPos += rep_link(Hcode,iPos);
650:                 continue;
651:             }
652:             if(Kwd_sch(Hcode,fcLen,iPos,"</a>")){
653:                 strpush("</a>");
654:                 iPos += 3;
655:                 continue;
656:             }
657:         }
658:         //--------------------------------------------------
659:         // HTML Command Skipper
660:         //--------------------------------------------------
661:         if(c == '<'){
662:             if(nxt_c != '\0'){
663:                 if(nxt_c>='a' && nxt_c<='z' ) iStart=1;
664:                 if(nxt_c>='A' && nxt_c<='Z' ) iStart=1;
665:                 if(nxt_c=='/'               ) iStart=1;
666:                 if(nxt_c=='?'               ) iStart=1;
667:                 if(nxt_c=='!'){
668:                     if(Kwd_sch(Hcode,fcLen,iPos,"<!--")){
669:                         swComment = 1; strpush("\nComment:\n\t");
670:                         iPos += 3; continue;
671:                     } else {
672:                         iStart = 1;
673:                     }
674:                 }
675:             }
676:         }
677:         // Comment Region
678:         if(swComment && Kwd_sch(Hcode,fcLen,iPos,"-->")){
679:             swComment = 0; strpush("\n");
680:             iPos += 2; continue;
681:         }
682:         // end command.
683:         if(c=='>' && iStart==1) {
684:             iStart=0; continue;
685:         }
686:         //--------------------------------------------------
687:         // 記号
688:         //--------------------------------------------------
689:         if(iStart==0){
690:             if(c=='&'){
691:                 if(rep_key(Hcode,iPos,"&lt;","<"))    { iPos+=3; continue; }
692:                 if(rep_key(Hcode,iPos,"&gt;",">"))    { iPos+=3; continue; }
693:                 if(rep_key(Hcode,iPos,"&amp;","&"))   { iPos+=4; continue; }
694:                 if(rep_key(Hcode,iPos,"&quot;","\"")) { iPos+=5; continue; }
695:                 if(rep_key(Hcode,iPos,"&Aacute;","")){ iPos+=7; continue; }
696:                 if(rep_key(Hcode,iPos,"&Iacute;","。")){ iPos+=7; continue; }
697:                 if(rep_key(Hcode,iPos,"&ETH;","ム"))   { iPos+=4; continue; }
698:                 if(rep_key(Hcode,iPos,"&Ntilde;","、")){ iPos+=7; continue; }
699:                 if(rep_key(Hcode,iPos,"&Oacute;","「")){ iPos+=7; continue; }
700:                 if(rep_key(Hcode,iPos,"&Oslash;","0")){ iPos+=7; continue; }
701:                 if(rep_key(Hcode,iPos,"&Uacute;","」")){ iPos+=7; continue; }
702:                 if(rep_key(Hcode,iPos,"&Yacute;","Y")){ iPos+=7; continue; }
703:                 if(rep_key(Hcode,iPos,"&aacute;","")){ iPos+=7; continue; }
704:                 if(rep_key(Hcode,iPos,"&iacute;","。")){ iPos+=7; continue; }
705:                 if(rep_key(Hcode,iPos,"&eth;","ム"))   { iPos+=4; continue; }
706:                 if(rep_key(Hcode,iPos,"&ntilde;","、")){ iPos+=7; continue; }
707:                 if(rep_key(Hcode,iPos,"&oacute;","「")){ iPos+=7; continue; }
708:                 if(rep_key(Hcode,iPos,"&oslash;","0")){ iPos+=7; continue; }
709:                 if(rep_key(Hcode,iPos,"&uacute;","」")){ iPos+=7; continue; }
710:                 if(rep_key(Hcode,iPos,"&yacute;","Y")){ iPos+=7; continue; }
711:                 if(rep_key(Hcode,iPos,"&reg;","(r)")) { iPos+=4; continue; }
712:                 if(rep_key(Hcode,iPos,"&copy;","(c)")){ iPos+=5; continue; }
713:                 if(rep_key(Hcode,iPos,"&trade;","tm")){ iPos+=6; continue; }
714:                 if(rep_key(Hcode,iPos,"&nbsp;"," "))  { iPos+=5; continue; }
715:             }
716:             // [\r\n] を [\n] へ
717:             if(c=='\r' && nxt_c=='\n'){
718:                 c = '\n'; iPos++;
719:             }
720:             charpush(c);
721:         }
722:     }
723:     charpush('\0');
724: }
inserted by FC2 system