|
1 /* |
|
2 * Copyright (c) 2002 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include "HtmlParse.h" |
|
21 #include <iostream> |
|
22 #include <sstream> |
|
23 #include <set> |
|
24 #include <algorithm> |
|
25 using namespace std; |
|
26 |
|
27 |
|
28 const string WhiteSpace(" \t\r\n"); |
|
29 const string gValidText("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_<>()-+. "); |
|
30 const string gValidNum("0123456789+-p ,"); |
|
31 const string KEllipsis("\205"); |
|
32 const string KUnknown("unknown"); |
|
33 |
|
34 string TrimWhiteSpace(const string& aString) |
|
35 { |
|
36 int start = aString.find_first_not_of(WhiteSpace); |
|
37 if (start == string::npos) |
|
38 return ""; |
|
39 else |
|
40 return aString.substr(start, 1+aString.find_last_not_of(WhiteSpace)-start); |
|
41 } |
|
42 |
|
43 |
|
44 THtmlParseLayoutTable::THtmlParseLayoutTable(TLayout* aTables) |
|
45 : TLayoutTable(aTables) |
|
46 { |
|
47 } |
|
48 |
|
49 THtmlParseLayoutTable::~THtmlParseLayoutTable() |
|
50 { |
|
51 } |
|
52 |
|
53 bool THtmlParseLayoutTable::Read(istream& in) |
|
54 { |
|
55 iIn = ∈ |
|
56 |
|
57 if (!SearchForTableStart()) |
|
58 return false; |
|
59 |
|
60 ExtractTable(); |
|
61 |
|
62 iIn = 0; |
|
63 |
|
64 return true; |
|
65 } |
|
66 |
|
67 bool THtmlParseLayoutTable::SearchForTableStart() |
|
68 { |
|
69 string text; |
|
70 while (!Eof()) |
|
71 { |
|
72 string next = Next(); |
|
73 if (IsToken(next)) |
|
74 { |
|
75 if (next.substr(0,6) == "<TABLE") |
|
76 { |
|
77 ExtractTitle(text); |
|
78 return true; |
|
79 } |
|
80 } |
|
81 else |
|
82 { |
|
83 if (next.substr(0,5) == "Table") |
|
84 text = next.substr(5); |
|
85 else |
|
86 text += next; |
|
87 } |
|
88 } |
|
89 |
|
90 return false; |
|
91 } |
|
92 |
|
93 set<string> gKnownTitles; |
|
94 |
|
95 void THtmlParseLayoutTable::ExtractTitle(const string& aText) |
|
96 { |
|
97 iName = UnHtml(aText); |
|
98 int pos = iName.find_first_not_of("1234567890.\t\r\n "); |
|
99 if (pos == string::npos) |
|
100 return; |
|
101 iName = iName.substr(pos); |
|
102 iName = StripTo(iName, gValidText); |
|
103 iName = TrimWhiteSpace(iName); |
|
104 |
|
105 while (gKnownTitles.find(iName) != gKnownTitles.end()) |
|
106 iName += "_dup"; |
|
107 |
|
108 gKnownTitles.insert(iName); |
|
109 } |
|
110 |
|
111 void THtmlParseLayoutTable::ExtractTable() |
|
112 { |
|
113 string cell; |
|
114 string td; |
|
115 Row row; |
|
116 while (!Eof()) |
|
117 { |
|
118 string tok = Next(); |
|
119 if (IsToken(tok)) |
|
120 { |
|
121 if (tok.substr(0,4) == "<TD ") |
|
122 { |
|
123 td = tok; |
|
124 } |
|
125 else if (tok == "</TD>") |
|
126 { |
|
127 row.push_back(CleanCell(cell, row)); |
|
128 cell = ""; |
|
129 int colspan = GetVal(td, "COLSPAN"); |
|
130 for (int i=1; i<colspan; i++) |
|
131 row.push_back(""); |
|
132 } |
|
133 else if (tok == "</TR>") |
|
134 { |
|
135 if (!row[1].length()) |
|
136 row[1] = "untitled"; |
|
137 AddRow(row); |
|
138 row.erase(row.begin(), row.end()); |
|
139 } |
|
140 else if (tok == "</TABLE>") |
|
141 { |
|
142 break; |
|
143 } |
|
144 else if (cell.length() && (tok == "<BR>" || tok == "<P>")) |
|
145 { |
|
146 cell += ", "; |
|
147 } |
|
148 } |
|
149 else |
|
150 { |
|
151 cell += tok; |
|
152 } |
|
153 } |
|
154 } |
|
155 |
|
156 string THtmlParseLayoutTable::CleanCell(const string& cell, const Row& row) |
|
157 { |
|
158 if (iColumnNames.size() < 6) // table header |
|
159 return cell; |
|
160 else if (row.size() < 2 || row.size() == iColumnNames.size()) // free text |
|
161 return StripTo(UnHtml(cell),gValidText+KEllipsis); |
|
162 else |
|
163 return CleanMultiCell(StripTo(UnHtml(cell),gValidText+"/,*")); // value cell |
|
164 } |
|
165 |
|
166 string THtmlParseLayoutTable::Next() |
|
167 { |
|
168 string s; |
|
169 char c; |
|
170 |
|
171 do { |
|
172 c = Get(); |
|
173 } while (!Eof() && WhiteSpace.find(c) != string::npos); |
|
174 s += c; |
|
175 |
|
176 if (c == '<') |
|
177 { |
|
178 do { |
|
179 c = Get(); |
|
180 s += toupper(c); |
|
181 } while (!Eof() && c != '>'); |
|
182 } |
|
183 else |
|
184 { |
|
185 do { |
|
186 c = Get(); |
|
187 s += c; |
|
188 } while (!Eof() && c != '<'); |
|
189 |
|
190 if (c == '<') |
|
191 PutBack(c); |
|
192 |
|
193 s = s.substr(0, s.find_last_not_of(WhiteSpace)); |
|
194 } |
|
195 |
|
196 return s; |
|
197 } |
|
198 |
|
199 bool THtmlParseLayoutTable::IsToken(const string& aText) |
|
200 { |
|
201 return aText.length()>0 && aText[0] == '<'; |
|
202 } |
|
203 |
|
204 void THtmlParseLayoutTable::PutBack(const string& aText) |
|
205 { |
|
206 for (string::const_reverse_iterator pC = aText.rbegin(); pC != aText.rend(); ++pC) |
|
207 PutBack(*pC); |
|
208 } |
|
209 |
|
210 int THtmlParseLayoutTable::GetVal(const string& aText, const string& aField) |
|
211 { |
|
212 int pos = aText.find(aField+"="); |
|
213 if (pos == string::npos) |
|
214 return 0; |
|
215 string val = aText.substr(pos + aField.length() + 1); |
|
216 val = val.substr(0, val.find_first_of(WhiteSpace + ">")); |
|
217 stringstream s(val); |
|
218 int ret; |
|
219 s >> ret; |
|
220 return ret; |
|
221 } |
|
222 |
|
223 char THtmlParseLayoutTable::Get() |
|
224 { |
|
225 if (iPutBack.length()) |
|
226 { |
|
227 char c = iPutBack[iPutBack.length()-1]; |
|
228 iPutBack.erase(iPutBack.length()-1, 1); |
|
229 return c; |
|
230 } |
|
231 else |
|
232 { |
|
233 return iIn->get(); |
|
234 } |
|
235 } |
|
236 |
|
237 void THtmlParseLayoutTable::PutBack(char aChar) |
|
238 { |
|
239 iPutBack += aChar; |
|
240 } |
|
241 |
|
242 bool THtmlParseLayoutTable::Eof() |
|
243 { |
|
244 return iPutBack.length()==0 && iIn->eof(); |
|
245 } |
|
246 |
|
247 string THtmlParseLayoutTable::CleanMultiCell(const string& aText) |
|
248 { |
|
249 // This function removes trailing commas from the end of cells where there is no |
|
250 // comma in the middle of the cell. |
|
251 // needed because of odd formatting found. |
|
252 if (aText.length()==0) |
|
253 return aText; |
|
254 |
|
255 int pos = aText.find_last_not_of(" ,"); |
|
256 if (pos == string::npos) |
|
257 return ""; |
|
258 |
|
259 string text; |
|
260 if (aText.substr(0,pos+1).find(",") == string::npos) |
|
261 text = aText.substr(0,pos+1); |
|
262 else |
|
263 text = aText; |
|
264 |
|
265 return text; |
|
266 } |
|
267 |
|
268 const int KMaxStars = 16; |
|
269 struct SCellParamDesc |
|
270 { |
|
271 SCellParamDesc(); |
|
272 bool operator==(const SCellParamDesc& a); |
|
273 int iParams; |
|
274 bool iStars[KMaxStars]; |
|
275 }; |
|
276 |
|
277 SCellParamDesc::SCellParamDesc() |
|
278 { |
|
279 iParams=0; |
|
280 for (int i=0; i<KMaxStars; i++) |
|
281 iStars[i] = false; |
|
282 } |
|
283 |
|
284 bool SCellParamDesc::operator==(const SCellParamDesc& a) |
|
285 { |
|
286 if (iParams!=a.iParams) |
|
287 return false; |
|
288 |
|
289 for (int i=0; i<KMaxStars; i++) |
|
290 { |
|
291 if (iStars[i]!=a.iStars[i]) |
|
292 return false; |
|
293 } |
|
294 |
|
295 return true; |
|
296 } |
|
297 |
|
298 THtmlParseLayoutTable::Row THtmlParseLayoutTable::MakeParamTable(const Row& row, int start, int num) |
|
299 { |
|
300 Row params; |
|
301 vector<SCellParamDesc> paramDescs; |
|
302 |
|
303 // initialise params |
|
304 for (Row::const_iterator pR = row.begin(); pR != row.end(); ++pR) |
|
305 { |
|
306 params.push_back(""); |
|
307 paramDescs.push_back(SCellParamDesc()); |
|
308 } |
|
309 |
|
310 // count params and stars |
|
311 int i; |
|
312 for (i=0; i<num; i++) |
|
313 { |
|
314 SCellParamDesc& desc = paramDescs[start+i]; |
|
315 const string& cell = row[start+i]; |
|
316 desc.iParams = 1; |
|
317 |
|
318 int starCount=0; |
|
319 for (string::const_iterator pC = cell.begin(); pC != cell.end(); ++pC) |
|
320 { |
|
321 if (*pC == '*') |
|
322 { |
|
323 starCount++; |
|
324 } |
|
325 else |
|
326 { |
|
327 if (starCount) |
|
328 { |
|
329 desc.iStars[starCount-1] = true; |
|
330 starCount = 0; |
|
331 } |
|
332 if (*pC == ',') |
|
333 { |
|
334 desc.iParams++; |
|
335 } |
|
336 } |
|
337 } |
|
338 |
|
339 if (starCount) |
|
340 desc.iStars[starCount-1] = true; |
|
341 } |
|
342 |
|
343 // assign parameter names |
|
344 string name("aCommon1"); |
|
345 for (i=0; i<num; i++) |
|
346 { |
|
347 SCellParamDesc& desc = paramDescs[start+i]; |
|
348 if (desc.iParams == 1) |
|
349 continue; |
|
350 |
|
351 string& param = params[start+i]; |
|
352 |
|
353 int count = 0; |
|
354 bool first = true; |
|
355 |
|
356 // look for a similar cell |
|
357 for (int j=0; j<num; j++) |
|
358 { |
|
359 if (paramDescs[start+j] == desc) |
|
360 { |
|
361 count++; |
|
362 if (count == 1 && j<i) |
|
363 { |
|
364 first = false; |
|
365 param = params[start+j]; |
|
366 } |
|
367 } |
|
368 } |
|
369 |
|
370 // assign a new name if there is no similar cell already named |
|
371 if (count>1 && first) |
|
372 { |
|
373 param = name; |
|
374 name[7]++; |
|
375 } |
|
376 } |
|
377 |
|
378 return params; |
|
379 } |
|
380 |
|
381 void THtmlParseLayoutTable::AddRow(Row& row) |
|
382 { |
|
383 if (row.size()==0) |
|
384 return; // empty row |
|
385 |
|
386 const string& first = row[0]; |
|
387 |
|
388 if (first.length()==0 || first == "P") |
|
389 AddTitleRow(row); |
|
390 else if (first == "No.") |
|
391 AddColumnNameRow(row); |
|
392 else if (first == KEllipsis) |
|
393 iAppend = true; |
|
394 else |
|
395 AddLineRow(row); |
|
396 } |
|
397 |
|
398 void THtmlParseLayoutTable::AddColumnNameRow(Row& row) |
|
399 { |
|
400 if (row.size()<4) |
|
401 return; // unknown row type |
|
402 |
|
403 if (row[1] == "Value") |
|
404 row[1] = "Item"; |
|
405 |
|
406 if (row[1] == "Font") |
|
407 iType = ETextTable; |
|
408 else if (row[1] == "Item") |
|
409 iType = EWindowTable; |
|
410 else |
|
411 return; // unknown row type |
|
412 |
|
413 SetDefaultColumnNames(); |
|
414 iColumnNames.insert(iColumnNames.begin()+3, "b/r"); // for skins LAF |
|
415 |
|
416 int foundCount = 0; |
|
417 vector<string> foundColNames; |
|
418 for (Row::iterator pCol = row.begin()+1; pCol != row.end(); pCol++) |
|
419 { |
|
420 string cellTitle = *pCol; |
|
421 if (cellTitle == "Remarks.") |
|
422 cellTitle = "Remarks"; |
|
423 if (iType == ETextTable && cellTitle == "Margins") |
|
424 { |
|
425 foundColNames.push_back("l"); |
|
426 foundColNames.push_back("r"); |
|
427 pCol++; |
|
428 } |
|
429 else if (find(iColumnNames.begin(),iColumnNames.end(),cellTitle) != iColumnNames.end()) |
|
430 { |
|
431 foundColNames.push_back(cellTitle); |
|
432 foundCount++; |
|
433 } |
|
434 else |
|
435 { |
|
436 foundColNames.push_back(KUnknown); |
|
437 } |
|
438 } |
|
439 iColumnNames = foundColNames; |
|
440 |
|
441 if (foundCount < 4) |
|
442 { |
|
443 iColumnNames.clear(); |
|
444 iType = EUnknownTable; |
|
445 } |
|
446 } |
|
447 |
|
448 void THtmlParseLayoutTable::AddLineRow(const Row& row) |
|
449 { |
|
450 if (iColumnNames.size() == 0 || row.size() != iColumnNames.size()+1) |
|
451 return; // unknown row type; |
|
452 |
|
453 Row params = MakeParamTable(row, 2, iColumnNames.size()-2); |
|
454 |
|
455 Row::const_iterator pCell = row.begin(); |
|
456 Row::const_iterator pParam = params.begin(); |
|
457 |
|
458 int id; |
|
459 stringstream stream(*pCell); |
|
460 stream >> id; |
|
461 if (id == 0) |
|
462 return; // bad id; |
|
463 |
|
464 TLayoutLine* line = new TLayoutLine(this, id); |
|
465 push_back(line); |
|
466 |
|
467 bool found_l = false; |
|
468 for (vector<string>::iterator pCol = iColumnNames.begin(); pCol != iColumnNames.end(); ++pCol) |
|
469 { |
|
470 string col = *pCol; |
|
471 if (col == "b/r") |
|
472 col = found_l ? "b" : "r"; |
|
473 |
|
474 ++pCell; |
|
475 ++pParam; |
|
476 TValues values(line, col); |
|
477 |
|
478 if (pParam->size()) |
|
479 values.iParam = *pParam; |
|
480 |
|
481 string cell = *pCell; |
|
482 if (IsValueColumn(col)) |
|
483 cell = ConvertToAknName(SplitMultiCell(cell, IsNumericColumn(col) ? gValidNum : gValidText)); |
|
484 |
|
485 int pos=0; |
|
486 do { |
|
487 int newPos = cell.find(',', pos); |
|
488 values.push_back(TrimWhiteSpace(cell.substr(pos, newPos-pos))); |
|
489 pos = newPos+1; |
|
490 } while (pos); |
|
491 |
|
492 if ((values.size()>1 || values[0].size()>0) && col == "l") |
|
493 found_l = true; |
|
494 |
|
495 if (col != KUnknown) |
|
496 line->insert(make_pair(col, values)); |
|
497 } |
|
498 } |
|
499 |
|
500 void THtmlParseLayoutTable::AddTitleRow(const Row& row) |
|
501 { |
|
502 if (row.size() < 2) |
|
503 return; |
|
504 if (row[0] == "P") |
|
505 iParentName = TrimWhiteSpace(row[1]); |
|
506 } |
|
507 |
|
508 string THtmlParseLayoutTable::SplitMultiCell(const string& aCell, const string& aValid) |
|
509 { |
|
510 string cell = aCell; |
|
511 |
|
512 // Make sure commas are in correctly! |
|
513 cell = StripTo(UnHtml(cell), aValid+"/,"); |
|
514 |
|
515 int lastComma = -1; |
|
516 int lastSpace = -1; |
|
517 int lastNum = -1; |
|
518 |
|
519 for (int i=0; i<cell.length(); i++) |
|
520 { |
|
521 char c = cell[i]; |
|
522 if (c == ',') |
|
523 lastComma = i; |
|
524 else if (c == ' ' || c == '/') |
|
525 lastSpace = i; |
|
526 else |
|
527 { |
|
528 if (lastSpace > lastNum && lastNum > lastComma) |
|
529 { |
|
530 cell[lastSpace] = ','; |
|
531 lastComma = lastSpace; |
|
532 } |
|
533 lastNum = i; |
|
534 } |
|
535 } |
|
536 |
|
537 cell = StripTo(UnHtml(cell), aValid+","); |
|
538 if (cell == "-") |
|
539 cell = ""; |
|
540 |
|
541 return cell; |
|
542 } |
|
543 |
|
544 string THtmlParseLayoutTable::StripTo(const string& aText, const string& aValid) |
|
545 { |
|
546 string cell(""); |
|
547 for (string::const_iterator pC = aText.begin(); pC != aText.end(); ++pC) |
|
548 { |
|
549 if (aValid.find(*pC) != string::npos) |
|
550 cell += *pC; |
|
551 } |
|
552 return cell; |
|
553 } |
|
554 |
|
555 string THtmlParseLayoutTable::UnHtml(const string& aText) |
|
556 { |
|
557 string str(""); |
|
558 for (int i=0; i<aText.size(); i++) |
|
559 { |
|
560 char c = aText[i]; |
|
561 if (c == '&') |
|
562 { |
|
563 string s = aText.substr(i); |
|
564 int pos = s.find(";"); |
|
565 if (pos != string::npos) |
|
566 { |
|
567 i+=pos; |
|
568 c = HtmlChar(s.substr(1, pos-1)); |
|
569 } |
|
570 } |
|
571 else if (c == char(0x96)) |
|
572 { |
|
573 c = '-'; |
|
574 } |
|
575 str += c; |
|
576 } |
|
577 return str; |
|
578 } |
|
579 |
|
580 struct THtmlChar {char* iString; char iChar;}; |
|
581 const THtmlChar gHtmlChars[] = |
|
582 { |
|
583 {"gt", '>'}, |
|
584 {"lt", '<'}, |
|
585 {"nbsp", ' '}, |
|
586 {"#9", '\t'} |
|
587 }; |
|
588 |
|
589 char THtmlParseLayoutTable::HtmlChar(const string& aText) |
|
590 { |
|
591 for (int i=0; i<sizeof(gHtmlChars)/sizeof(THtmlChar); i++) |
|
592 { |
|
593 if (aText == gHtmlChars[i].iString) |
|
594 return gHtmlChars[i].iChar; |
|
595 } |
|
596 return '_'; |
|
597 } |
|
598 |
|
599 struct SConvertAknName |
|
600 { |
|
601 char* iLaf; |
|
602 char* iAkn; |
|
603 }; |
|
604 |
|
605 SConvertAknName gAknNameConversionTable[] = |
|
606 { |
|
607 { "left", "ELayoutAlignLeft" }, |
|
608 { "right", "ELayoutAlignRight" }, |
|
609 { "centre", "ELayoutAlignCenter" }, |
|
610 { "center", "ELayoutAlignCenter" }, |
|
611 { "bidi", "ELayoutAlignBidi" }, |
|
612 { "qfn_latin_bold_19", "ELatinBold19" }, |
|
613 { "qfn_latin_bold_17", "ELatinBold17" }, |
|
614 { "qfn_latin_bold_13", "ELatinBold13" }, |
|
615 { "qfn_latin_bold_12", "ELatinBold12" }, |
|
616 { "qfn_latin_plain_12", "ELatinPlain12" }, |
|
617 { "qfn_latin_plain_13", "ELatinPlain12" }, // made up for elaf spec, only needed by navi pane? |
|
618 { "qfn_latin_clock_14", "ELatinClock14" }, |
|
619 { "qfn_<ref>_plain_12", "EApacPlain12" }, |
|
620 { "qfn_<ref>_plain_16", "EApacPlain16" }, |
|
621 { "qfn_china_plain_12", "EApacPlain12" }, |
|
622 { "qfn_number_plain_5", "ENumberPlain5" }, |
|
623 { "qfn_china_plain_16", "EApacPlain16" }, |
|
624 { "qfn_clock_bold_30", "EClockBold30" }, |
|
625 { "qfn_number_bold_14", "ELatinClock14" }, |
|
626 { "gfn_<ref>_plain_12", "EApacPlain12" }, |
|
627 { "gfn_<ref>_plain_16", "EApacPlain16" }, |
|
628 { "gfn_latin_bold_16", "ELatinBold16" }, |
|
629 { "qfn_calc_21", "ECalcBold21" }, |
|
630 { "qfn_calc_oper_21", "ECalcOperBold21" }, |
|
631 { "qfn_calc_oper_13", "ECalcOperBold13" } |
|
632 }; |
|
633 |
|
634 string THtmlParseLayoutTable::ConvertToAknName(const string& aText) |
|
635 { |
|
636 string ret = aText; |
|
637 for (int i=0; i<sizeof(gAknNameConversionTable)/sizeof(SConvertAknName); i++) |
|
638 { |
|
639 string laf = gAknNameConversionTable[i].iLaf; |
|
640 string akn = gAknNameConversionTable[i].iAkn; |
|
641 int pos; |
|
642 while ((pos = ret.find(laf)) != string::npos) |
|
643 { |
|
644 ret.erase(pos, laf.length()); |
|
645 ret.insert(pos, akn); |
|
646 } |
|
647 } |
|
648 return ret; |
|
649 } |
|
650 |
|
651 |
|
652 void THtmlParseLayout::Parse(istream &aIn) |
|
653 { |
|
654 while (!aIn.eof()) |
|
655 { |
|
656 auto_ptr<THtmlParseLayoutTable> table(new THtmlParseLayoutTable(this)); |
|
657 if (table->Read(aIn)) |
|
658 { |
|
659 if (table->size() > 0) |
|
660 push_back(table.release()); |
|
661 } |
|
662 else |
|
663 break; |
|
664 } |
|
665 Compile(); |
|
666 } |
|
667 |
|
668 // End of File |