|
1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 //======================================================================================== |
|
17 //the html to be parsed looks like: |
|
18 //<tag attribute=attribute_value attribute = "attribute value">text ... etc |
|
19 // |
|
20 //(1)'tag' is written to iTagBuffer |
|
21 //(2)'attribute' is written to iAttributeBuffer and 'attribute value' is written |
|
22 // to iAttributeValueBuffer, and each pair are appended to the array iAttributes |
|
23 //(3)iStartOfTextPosition is set to the start of 'text' |
|
24 // |
|
25 //when < is seen, 'text' is written to the richtext object |
|
26 //when > is seen, 'iTagBuffer' is recognised, and any required action is performed; similarly for iAttributes |
|
27 //======================================================================================== |
|
28 |
|
29 #include "CHtmlToCrtConvParser.h" |
|
30 #include "CHtmlToCrtConvBuffer.h" |
|
31 #include "CHtmlToCrtConvHashTable.h" |
|
32 #include "CHtmlToCrtConvHash.h" |
|
33 #include "CHtmlToCrtConvActionProcessor.h" |
|
34 #include "MHtmlToCrtConvResourceFile.h" |
|
35 |
|
36 const TInt KAttributeValueBufferGranularity = 10; |
|
37 const TInt KDefaultTextPosition = -1; |
|
38 |
|
39 CHtmlToCrtConvParser* CHtmlToCrtConvParser::NewL(CRichText& aRichText, CHtmlToCrtConvBuffer& aBuffer, MHtmlToCrtConvResourceFile& aResourceFile) |
|
40 { |
|
41 CHtmlToCrtConvParser* self=new(ELeave) CHtmlToCrtConvParser(aBuffer); |
|
42 CleanupStack::PushL(self); |
|
43 self->ConstructL(aRichText, aResourceFile); |
|
44 CleanupStack::Pop(self); |
|
45 return self; |
|
46 } |
|
47 |
|
48 void CHtmlToCrtConvParser::ConstructL(CRichText& aRichText, MHtmlToCrtConvResourceFile& aResourceFile) |
|
49 { |
|
50 iAttributeValueBuffer=CBufFlat::NewL(KAttributeValueBufferGranularity); |
|
51 iHashTable=CHtmlToCrtConvHashTable::NewL(); |
|
52 iActionProcessor=CHtmlToCrtConvActionProcessor::NewL(aRichText, aResourceFile); |
|
53 } |
|
54 |
|
55 CHtmlToCrtConvParser::CHtmlToCrtConvParser(CHtmlToCrtConvBuffer& aBuffer) |
|
56 :iStartOfTextPosition(KDefaultTextPosition) |
|
57 ,iEndOfTextPosition(KDefaultTextPosition) |
|
58 ,iBuffer(aBuffer) |
|
59 { |
|
60 } |
|
61 |
|
62 CHtmlToCrtConvParser::~CHtmlToCrtConvParser() |
|
63 { |
|
64 delete iHashTable; |
|
65 delete iActionProcessor; |
|
66 iAttributes.ResetAndDestroy(); |
|
67 iAttributes.Close(); |
|
68 delete iAttributeValueBuffer; |
|
69 } |
|
70 |
|
71 //============================================================= |
|
72 //DoOneStepL |
|
73 //============================================================= |
|
74 TBool CHtmlToCrtConvParser::DoOneStepL() |
|
75 { |
|
76 TChar currentCharacter; |
|
77 TBool moreProcessingReqd=ETrue; |
|
78 |
|
79 for (TInt ii=0; ii<KCharsProcessedInOneStep; ii++) |
|
80 { |
|
81 moreProcessingReqd=iBuffer.ReadCharacterL(currentCharacter, iBufferPosition, iEndOfBufferReached); |
|
82 |
|
83 if (!moreProcessingReqd) |
|
84 { |
|
85 if (iStartOfTextPosition!=KDefaultTextPosition) |
|
86 { |
|
87 iBuffer.GetToEndOfBufferL(iTextBuffer, iStartOfTextPosition); |
|
88 WriteToRichTextL(); |
|
89 } |
|
90 break; |
|
91 } |
|
92 |
|
93 if (InspectCurrentCharacter(currentCharacter)) |
|
94 { |
|
95 DoActionL(currentCharacter); |
|
96 } |
|
97 |
|
98 if (iEndOfBufferReached && (iStartOfTextPosition != KDefaultTextPosition) && moreProcessingReqd) |
|
99 { |
|
100 iEndOfTextPosition=iBufferPosition; |
|
101 iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition); |
|
102 WriteToRichTextL(); |
|
103 iParserState=EInitialState; |
|
104 iStartOfTextPosition=KDefaultTextPosition; |
|
105 iEndOfTextPosition=KDefaultTextPosition; |
|
106 } |
|
107 } |
|
108 return moreProcessingReqd; |
|
109 } |
|
110 //====================================================================== |
|
111 //InspectCurrentCharacterL - updates iParserState, iTagState and iInTag |
|
112 //====================================================================== |
|
113 TBool CHtmlToCrtConvParser::InspectCurrentCharacter(TChar aChar) |
|
114 { |
|
115 TBool processCharacter=ETrue; |
|
116 |
|
117 if (iParserState==EReadingJavascript) |
|
118 { |
|
119 if (aChar=='>') |
|
120 iParserState=ESeeEndOfTagWhileReadingJavascript; |
|
121 } |
|
122 |
|
123 else if (aChar=='<') |
|
124 { |
|
125 iTagState =EOpeningTag; |
|
126 iParserState=ESeeStartOfTag; |
|
127 iInTag =ETrue; |
|
128 } |
|
129 else if (iInTag) |
|
130 { |
|
131 if (aChar=='/' || aChar=='=' || aChar=='"' || aChar=='>' || aChar=='!') |
|
132 { |
|
133 SeeSpecialCharactersInTag(aChar, processCharacter); |
|
134 } |
|
135 else if (aChar.IsSpace()) |
|
136 { |
|
137 SeeWhiteSpaceCharacterInTag(processCharacter); |
|
138 } |
|
139 else if (iParserState==ESeeExclamationMark && aChar=='-') |
|
140 { |
|
141 iParserState=EReadingJavascript; |
|
142 processCharacter=EFalse; |
|
143 } |
|
144 else |
|
145 { |
|
146 SeeOtherCharactersInTag(); |
|
147 } |
|
148 } |
|
149 else if (aChar == '&') |
|
150 { |
|
151 iParserState = EStartOfCharacterEntity; |
|
152 iInCharacterEntity = ETrue; |
|
153 processCharacter = EFalse; |
|
154 iStartOfTextPosition = KDefaultTextPosition; |
|
155 } |
|
156 else if (iInCharacterEntity) |
|
157 { |
|
158 if (aChar == ';') |
|
159 { |
|
160 iParserState = EEndOfCharacterEntity; |
|
161 iInCharacterEntity = EFalse; |
|
162 } |
|
163 else |
|
164 { |
|
165 iParserState = EReadingCharacterEntity; |
|
166 } |
|
167 } |
|
168 else |
|
169 { |
|
170 SeeOtherCharactersNotInTag(processCharacter); |
|
171 } |
|
172 |
|
173 return processCharacter; |
|
174 } |
|
175 //============================================================= |
|
176 //functions used by InspectCurrentCharacterL |
|
177 //============================================================= |
|
178 void CHtmlToCrtConvParser::SeeWhiteSpaceCharacterInTag(TBool& aBool) |
|
179 { |
|
180 switch (iParserState) |
|
181 { |
|
182 case EReadingOpeningTag: |
|
183 iParserState=EFinishedReadingTag; |
|
184 aBool=EFalse; |
|
185 break; |
|
186 case EReadingClosingTag: |
|
187 iParserState=EFinishedReadingTag; |
|
188 aBool=EFalse; |
|
189 break; |
|
190 case EReadingAttribute: |
|
191 iParserState=EFinishedReadingAttribute; |
|
192 aBool=EFalse; |
|
193 break; |
|
194 case EReadingAttributeValue: |
|
195 iParserState=EFinishedReadingAttributeValue; |
|
196 break; |
|
197 case EReadingAttributeValueWithinInvCommas: |
|
198 break; |
|
199 default: |
|
200 break; |
|
201 } |
|
202 }; |
|
203 |
|
204 void CHtmlToCrtConvParser::SeeSpecialCharactersInTag(TChar aChar, TBool& aBool) |
|
205 { |
|
206 switch (aChar) |
|
207 { |
|
208 case '/': |
|
209 if (iParserState==EReadingAttributeValue || iParserState==EReadingAttributeValueWithinInvCommas) |
|
210 aBool=ETrue; |
|
211 else if (iParserState==ESeeStartOfTag) |
|
212 { |
|
213 iTagState=EClosingTag; |
|
214 iParserState=ESeeClosingTagIndicator; |
|
215 aBool=EFalse; |
|
216 } |
|
217 break; |
|
218 case '=': |
|
219 if(iParserState!=EReadingAttributeValue && iParserState!=EReadingAttributeValueWithinInvCommas) |
|
220 { |
|
221 iParserState=ESeeEquals; |
|
222 aBool=EFalse; |
|
223 } |
|
224 else |
|
225 { |
|
226 aBool=ETrue; |
|
227 } |
|
228 break; |
|
229 case '"': |
|
230 if (iParserState==ESeeEquals) |
|
231 { |
|
232 iParserState=EReadingAttributeValueWithinInvCommas; |
|
233 aBool=EFalse; |
|
234 } |
|
235 else |
|
236 { |
|
237 iParserState=EFinishedReadingAttributeValue; |
|
238 } |
|
239 break; |
|
240 case '>': |
|
241 iParserState=ESeeEndOfTag; |
|
242 iInTag=EFalse; |
|
243 break; |
|
244 case '!': |
|
245 if(iParserState==ESeeStartOfTag) |
|
246 { |
|
247 iParserState=ESeeExclamationMark; |
|
248 aBool=EFalse; |
|
249 } |
|
250 break; |
|
251 default: |
|
252 break; |
|
253 } |
|
254 }; |
|
255 |
|
256 void CHtmlToCrtConvParser::SeeOtherCharactersInTag() |
|
257 { |
|
258 switch (iParserState) |
|
259 { |
|
260 case ESeeStartOfTag: |
|
261 iParserState=EReadingOpeningTag; |
|
262 break; |
|
263 case ESeeClosingTagIndicator: |
|
264 iParserState=EReadingClosingTag; |
|
265 break; |
|
266 case ESeeEquals: |
|
267 iParserState=EReadingAttributeValue; |
|
268 break; |
|
269 case EFinishedReadingTag: |
|
270 iParserState=EReadingAttribute; |
|
271 break; |
|
272 case EFinishedReadingAttribute: |
|
273 iAttributeBuffer.Zero(); |
|
274 iParserState=EReadingAttribute; |
|
275 break; |
|
276 case EFinishedReadingAttributeValue: |
|
277 iParserState=EReadingAttribute; |
|
278 break; |
|
279 default: |
|
280 break; |
|
281 } |
|
282 }; |
|
283 |
|
284 void CHtmlToCrtConvParser::SeeOtherCharactersNotInTag(TBool& aBool) |
|
285 { |
|
286 switch (iParserState) |
|
287 { |
|
288 case EReadingText: |
|
289 aBool=EFalse; |
|
290 break; |
|
291 case ESeeEndOfTag: |
|
292 case EEndOfCharacterEntity: |
|
293 iParserState=EReadingText; |
|
294 break; |
|
295 case EInitialState: |
|
296 iParserState=EReadingText; |
|
297 break; |
|
298 default: |
|
299 break; |
|
300 } |
|
301 }; |
|
302 //============================================================= |
|
303 //DoActionL |
|
304 //============================================================= |
|
305 void CHtmlToCrtConvParser::DoActionL(TChar aChar) |
|
306 { |
|
307 switch(iParserState) |
|
308 { |
|
309 case EReadingText: |
|
310 iStartOfTextPosition=iBufferPosition; |
|
311 break; |
|
312 case EReadingOpeningTag: |
|
313 WriteToTagBufferL(aChar); |
|
314 break; |
|
315 case EReadingClosingTag: |
|
316 WriteToTagBufferL(aChar); |
|
317 break; |
|
318 case EEndOfCharacterEntity: |
|
319 DoEntityOperationL(); |
|
320 break; |
|
321 case EReadingCharacterEntity: |
|
322 WriteToEntityBufferL(aChar); |
|
323 break; |
|
324 case EReadingAttribute: |
|
325 WriteToAttributeBufferL(aChar); |
|
326 break; |
|
327 case EReadingAttributeValue: |
|
328 WriteToAttributeValueBufferL(aChar); |
|
329 break; |
|
330 case EReadingAttributeValueWithinInvCommas: |
|
331 WriteToAttributeValueBufferL(aChar); |
|
332 break; |
|
333 case EFinishedReadingAttributeValue: |
|
334 WriteToAttributeArrayL(); |
|
335 iAttributeBuffer.Zero(); |
|
336 iAttributeValueBuffer->Reset(); |
|
337 break; |
|
338 case ESeeStartOfTag: |
|
339 if (iStartOfTextPosition!=KDefaultTextPosition) |
|
340 { |
|
341 iEndOfTextPosition=iBufferPosition - 1; |
|
342 iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition); |
|
343 WriteToRichTextL(); |
|
344 iStartOfTextPosition=KDefaultTextPosition; |
|
345 iEndOfTextPosition=KDefaultTextPosition; |
|
346 } |
|
347 break; |
|
348 case ESeeEndOfTag: |
|
349 if(iAttributeValueBuffer->Size()) |
|
350 { |
|
351 WriteToAttributeArrayL(); |
|
352 iAttributeBuffer.Zero(); |
|
353 iAttributeValueBuffer->Reset(); |
|
354 } |
|
355 DoTagOperationL(); |
|
356 break; |
|
357 case ESeeEndOfTagWhileReadingJavascript: |
|
358 { |
|
359 _LIT(KJavascriptEnd, "--"); |
|
360 if(!(iLastTwoCharacters.Compare(KJavascriptEnd))) |
|
361 { |
|
362 iParserState=EInitialState;//finished reading javascript |
|
363 iInTag=EFalse; |
|
364 } |
|
365 else |
|
366 iParserState=EReadingJavascript; |
|
367 break; |
|
368 } |
|
369 case EReadingJavascript: |
|
370 //keep record of last two characters encountered, for comparing with -- when > seen |
|
371 if(iLastTwoCharacters.Length()==2) |
|
372 iLastTwoCharacters.Copy(iLastTwoCharacters.Right(1)); |
|
373 |
|
374 iLastTwoCharacters.Append(aChar); |
|
375 break; |
|
376 default: |
|
377 break; |
|
378 }; |
|
379 } |
|
380 //============================================================= |
|
381 //DoTagOperationL |
|
382 //============================================================= |
|
383 void CHtmlToCrtConvParser::DoTagOperationL() |
|
384 { |
|
385 const TInt count=iAttributes.Count(); |
|
386 THtmlToCrtConvTagType tagType=iHashTable->LookupTag(iTagBuffer); |
|
387 |
|
388 if (iTagBuffer.Length()) |
|
389 { |
|
390 iActionProcessor->DoTagActionL(tagType, iTagState); |
|
391 iTagBuffer.Zero(); |
|
392 |
|
393 TBool imgTagResourceReqd=EFalse; |
|
394 for(TInt ii=0; ii<count; ii++) |
|
395 { |
|
396 THtmlToCrtConvAttributeType attributeType=(iAttributes)[ii]->Type(); |
|
397 const TDesC8& attributeValue=(iAttributes)[ii]->Value(); |
|
398 iActionProcessor->DoAttributeActionL(tagType, iTagState, attributeType, attributeValue, imgTagResourceReqd); |
|
399 } |
|
400 if(imgTagResourceReqd) |
|
401 { |
|
402 iActionProcessor->InsertImgTagResourceL(); |
|
403 } |
|
404 iAttributes.ResetAndDestroy(); |
|
405 } |
|
406 } |
|
407 |
|
408 void CHtmlToCrtConvParser::DoEntityOperationL() |
|
409 { |
|
410 const TDesC16& entity = iHashTable->LookupEntity(iEntityBuffer); |
|
411 if(entity != KHtmlEntityUnknown) |
|
412 { |
|
413 iTextBuffer.Set(entity); |
|
414 WriteToRichTextL(); |
|
415 } |
|
416 iEntityBuffer.Zero(); |
|
417 } |
|
418 |
|
419 void CHtmlToCrtConvParser::WriteToRichTextL() |
|
420 { |
|
421 ASSERT(iTextBuffer.Length() > 0); |
|
422 if (iTextBuffer.Length()) |
|
423 { |
|
424 iActionProcessor->DoWriteTextL(iTextBuffer); |
|
425 } |
|
426 } |
|
427 |
|
428 void CHtmlToCrtConvParser::WriteToTagBufferL(TChar aChar) |
|
429 { |
|
430 if (iTagBuffer.Length()==KTagBufferLength) |
|
431 { |
|
432 iTagBuffer.Zero(); |
|
433 } |
|
434 iTagBuffer.Append(aChar); |
|
435 } |
|
436 |
|
437 void CHtmlToCrtConvParser::WriteToEntityBufferL(TChar aChar) |
|
438 { |
|
439 if (iEntityBuffer.Length()==KEntityBufferLength) |
|
440 iEntityBuffer.Zero(); |
|
441 |
|
442 iEntityBuffer.Append(aChar); |
|
443 } |
|
444 |
|
445 void CHtmlToCrtConvParser::WriteToAttributeBufferL(TChar aChar) |
|
446 { |
|
447 if (iAttributeBuffer.Length()==KAttributeBufferLength) |
|
448 { |
|
449 iAttributeBuffer.Zero(); |
|
450 } |
|
451 iAttributeBuffer.Append(aChar); |
|
452 } |
|
453 |
|
454 void CHtmlToCrtConvParser::WriteToAttributeValueBufferL(TChar aChar) |
|
455 { |
|
456 TInt size=iAttributeValueBuffer->Size(); |
|
457 TBuf8<1> temp; |
|
458 temp.Append(aChar); |
|
459 iAttributeValueBuffer->InsertL(size, temp); |
|
460 } |
|
461 |
|
462 void CHtmlToCrtConvParser::WriteToAttributeArrayL() |
|
463 { |
|
464 iAttributeValueBuffer->Compress(); |
|
465 if (iAttributeBuffer.Length() && iAttributeValueBuffer->Size()) |
|
466 { |
|
467 THtmlToCrtConvAttributeType attType=iHashTable->LookupAttribute(iAttributeBuffer); |
|
468 TPtr8 pAttributeTag(iAttributeValueBuffer->Ptr(0)); |
|
469 CHtmlToCrtConvAttribute* attribute=CHtmlToCrtConvAttribute::NewLC(attType, pAttributeTag); |
|
470 |
|
471 User::LeaveIfError(iAttributes.Append(attribute)); |
|
472 CleanupStack::Pop(attribute); |
|
473 } |
|
474 } |
|
475 |
|
476 //============================================================= |
|
477 //CHtmlToCrtConvAttribute class |
|
478 //============================================================= |
|
479 CHtmlToCrtConvAttribute* CHtmlToCrtConvAttribute::NewLC(THtmlToCrtConvAttributeType aType, TDesC8& aValue) |
|
480 { |
|
481 CHtmlToCrtConvAttribute* self=new(ELeave) CHtmlToCrtConvAttribute(aType); |
|
482 CleanupStack::PushL(self); |
|
483 self -> ConstructL(aValue); |
|
484 return self; |
|
485 } |
|
486 |
|
487 CHtmlToCrtConvAttribute::CHtmlToCrtConvAttribute(THtmlToCrtConvAttributeType aType) |
|
488 :iType(aType) |
|
489 { |
|
490 } |
|
491 |
|
492 void CHtmlToCrtConvAttribute::ConstructL(TDesC8& aValue) |
|
493 { |
|
494 iValue=aValue.AllocL(); |
|
495 } |
|
496 |
|
497 CHtmlToCrtConvAttribute::~CHtmlToCrtConvAttribute() |
|
498 { |
|
499 delete iValue; |
|
500 } |