equal
deleted
inserted
replaced
20 #include "HtmlParse.h" |
20 #include "HtmlParse.h" |
21 #include <iostream> |
21 #include <iostream> |
22 #include <sstream> |
22 #include <sstream> |
23 #include <set> |
23 #include <set> |
24 #include <algorithm> |
24 #include <algorithm> |
|
25 #include <memory> |
25 using namespace std; |
26 using namespace std; |
26 |
27 |
27 |
28 |
28 const string WhiteSpace(" \t\r\n"); |
29 const string WhiteSpace(" \t\r\n"); |
29 const string gValidText("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_<>()-+. "); |
30 const string gValidText("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_<>()-+. "); |
31 const string KEllipsis("\205"); |
32 const string KEllipsis("\205"); |
32 const string KUnknown("unknown"); |
33 const string KUnknown("unknown"); |
33 |
34 |
34 string TrimWhiteSpace(const string& aString) |
35 string TrimWhiteSpace(const string& aString) |
35 { |
36 { |
36 int start = aString.find_first_not_of(WhiteSpace); |
37 string::size_type start = aString.find_first_not_of(WhiteSpace); |
37 if (start == string::npos) |
38 if (start == string::npos) |
38 return ""; |
39 return ""; |
39 else |
40 else |
40 return aString.substr(start, 1+aString.find_last_not_of(WhiteSpace)-start); |
41 return aString.substr(start, 1+aString.find_last_not_of(WhiteSpace)-start); |
41 } |
42 } |
93 set<string> gKnownTitles; |
94 set<string> gKnownTitles; |
94 |
95 |
95 void THtmlParseLayoutTable::ExtractTitle(const string& aText) |
96 void THtmlParseLayoutTable::ExtractTitle(const string& aText) |
96 { |
97 { |
97 iName = UnHtml(aText); |
98 iName = UnHtml(aText); |
98 int pos = iName.find_first_not_of("1234567890.\t\r\n "); |
99 string::size_type pos = iName.find_first_not_of("1234567890.\t\r\n "); |
99 if (pos == string::npos) |
100 if (pos == string::npos) |
100 return; |
101 return; |
101 iName = iName.substr(pos); |
102 iName = iName.substr(pos); |
102 iName = StripTo(iName, gValidText); |
103 iName = StripTo(iName, gValidText); |
103 iName = TrimWhiteSpace(iName); |
104 iName = TrimWhiteSpace(iName); |
207 PutBack(*pC); |
208 PutBack(*pC); |
208 } |
209 } |
209 |
210 |
210 int THtmlParseLayoutTable::GetVal(const string& aText, const string& aField) |
211 int THtmlParseLayoutTable::GetVal(const string& aText, const string& aField) |
211 { |
212 { |
212 int pos = aText.find(aField+"="); |
213 string::size_type pos = aText.find(aField+"="); |
213 if (pos == string::npos) |
214 if (pos == string::npos) |
214 return 0; |
215 return 0; |
215 string val = aText.substr(pos + aField.length() + 1); |
216 string val = aText.substr(pos + aField.length() + 1); |
216 val = val.substr(0, val.find_first_of(WhiteSpace + ">")); |
217 val = val.substr(0, val.find_first_of(WhiteSpace + ">")); |
217 stringstream s(val); |
218 stringstream s(val); |
250 // comma in the middle of the cell. |
251 // comma in the middle of the cell. |
251 // needed because of odd formatting found. |
252 // needed because of odd formatting found. |
252 if (aText.length()==0) |
253 if (aText.length()==0) |
253 return aText; |
254 return aText; |
254 |
255 |
255 int pos = aText.find_last_not_of(" ,"); |
256 string::size_type pos = aText.find_last_not_of(" ,"); |
256 if (pos == string::npos) |
257 if (pos == string::npos) |
257 return ""; |
258 return ""; |
258 |
259 |
259 string text; |
260 string text; |
260 if (aText.substr(0,pos+1).find(",") == string::npos) |
261 if (aText.substr(0,pos+1).find(",") == string::npos) |
514 |
515 |
515 int lastComma = -1; |
516 int lastComma = -1; |
516 int lastSpace = -1; |
517 int lastSpace = -1; |
517 int lastNum = -1; |
518 int lastNum = -1; |
518 |
519 |
519 for (int i=0; i<cell.length(); i++) |
520 for (string::size_type i=0; i<cell.length(); i++) |
520 { |
521 { |
521 char c = cell[i]; |
522 char c = cell[i]; |
522 if (c == ',') |
523 if (c == ',') |
523 lastComma = i; |
524 lastComma = i; |
524 else if (c == ' ' || c == '/') |
525 else if (c == ' ' || c == '/') |
553 } |
554 } |
554 |
555 |
555 string THtmlParseLayoutTable::UnHtml(const string& aText) |
556 string THtmlParseLayoutTable::UnHtml(const string& aText) |
556 { |
557 { |
557 string str(""); |
558 string str(""); |
558 for (int i=0; i<aText.size(); i++) |
559 for (string::size_type i=0; i<aText.size(); i++) |
559 { |
560 { |
560 char c = aText[i]; |
561 char c = aText[i]; |
561 if (c == '&') |
562 if (c == '&') |
562 { |
563 { |
563 string s = aText.substr(i); |
564 string s = aText.substr(i); |
564 int pos = s.find(";"); |
565 string::size_type pos = s.find(";"); |
565 if (pos != string::npos) |
566 if (pos != string::npos) |
566 { |
567 { |
567 i+=pos; |
568 i+=pos; |
568 c = HtmlChar(s.substr(1, pos-1)); |
569 c = HtmlChar(s.substr(1, pos-1)); |
569 } |
570 } |
586 {"#9", '\t'} |
587 {"#9", '\t'} |
587 }; |
588 }; |
588 |
589 |
589 char THtmlParseLayoutTable::HtmlChar(const string& aText) |
590 char THtmlParseLayoutTable::HtmlChar(const string& aText) |
590 { |
591 { |
591 for (int i=0; i<sizeof(gHtmlChars)/sizeof(THtmlChar); i++) |
592 for (unsigned int i=0; i<sizeof(gHtmlChars)/sizeof(THtmlChar); i++) |
592 { |
593 { |
593 if (aText == gHtmlChars[i].iString) |
594 if (aText == gHtmlChars[i].iString) |
594 return gHtmlChars[i].iChar; |
595 return gHtmlChars[i].iChar; |
595 } |
596 } |
596 return '_'; |
597 return '_'; |
632 }; |
633 }; |
633 |
634 |
634 string THtmlParseLayoutTable::ConvertToAknName(const string& aText) |
635 string THtmlParseLayoutTable::ConvertToAknName(const string& aText) |
635 { |
636 { |
636 string ret = aText; |
637 string ret = aText; |
637 for (int i=0; i<sizeof(gAknNameConversionTable)/sizeof(SConvertAknName); i++) |
638 for (unsigned int i=0; i<sizeof(gAknNameConversionTable)/sizeof(SConvertAknName); i++) |
638 { |
639 { |
639 string laf = gAknNameConversionTable[i].iLaf; |
640 string laf = gAknNameConversionTable[i].iLaf; |
640 string akn = gAknNameConversionTable[i].iAkn; |
641 string akn = gAknNameConversionTable[i].iAkn; |
641 int pos; |
642 string::size_type pos; |
642 while ((pos = ret.find(laf)) != string::npos) |
643 while ((pos = ret.find(laf)) != string::npos) |
643 { |
644 { |
644 ret.erase(pos, laf.length()); |
645 ret.erase(pos, laf.length()); |
645 ret.insert(pos, akn); |
646 ret.insert(pos, akn); |
646 } |
647 } |