|
1 /* |
|
2 * Copyright (c) 2002 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * An HTML stripper |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 // INCLUDE FILES |
|
21 #include "MsgMailViewerHtmlConv.h" |
|
22 |
|
23 #include <barsread.h> |
|
24 #include <eikenv.h> |
|
25 #include <f32file.h> |
|
26 #include <aknnotewrappers.h> |
|
27 #include <charconv.h> |
|
28 #include <MailPlainView.rsg> |
|
29 #include <MsgMailViewer.rsg> |
|
30 #include "MailLog.h" |
|
31 |
|
32 |
|
33 |
|
34 // LOCAL CONSTANTS AND MACROS |
|
35 _LIT (KLineBreak, "<br>"); |
|
36 _LIT (KUnnumberedList, "<ul>"); |
|
37 _LIT (KUnnumberedListEnd, "</ul>"); |
|
38 _LIT (KOrderedList, "<ol>"); |
|
39 _LIT (KOrderedListEnd, "</ol>"); |
|
40 _LIT (KListItem, "<li>"); |
|
41 _LIT (KLessThan, "<"); |
|
42 _LIT (KGreaterThan, ">"); |
|
43 _LIT (KAmpersand, "&"); |
|
44 _LIT (KDefinitionList, "<dl>"); |
|
45 _LIT (KDefinitionListEnd, "</dl>"); |
|
46 _LIT (KDefinitionListTerm, "<dt>"); |
|
47 _LIT (KDefinitionListDef, "<dd>"); |
|
48 //_LIT (KDefinitionListDefEnd, "</dd>"); |
|
49 _LIT (KBodyTag, "<body>"); |
|
50 _LIT (KScript, "<script"); |
|
51 _LIT (KScriptEnd, "</script>"); |
|
52 _LIT (KNumberedItemSeparator, ". "); |
|
53 _LIT (KNewParagraph, "<p>"); |
|
54 _LIT (KHeadStart, "<head>"); |
|
55 _LIT (KHeadEnd, "</head>"); |
|
56 _LIT (KHeading, "<h"); |
|
57 _LIT (KHeadingEnd, "</h"); |
|
58 _LIT (KStartHtml, "<html>"); |
|
59 _LIT (KEndHtml, "</html>"); |
|
60 _LIT (KHorizontalRuler, "<hr>"); |
|
61 _LIT (KImage, "<img"); |
|
62 _LIT (KImageAlt, "alt"); |
|
63 _LIT (KImageSrc, "src"); |
|
64 _LIT (KSpace, " "); |
|
65 _LIT (KNewline, "\n"); |
|
66 _LIT (KListItemSymbol, "- "); |
|
67 _LIT (KSemicolon, ";"); |
|
68 //_LIT (KQuotationMark, "\""); |
|
69 _LIT (KBlock, "<div>"); |
|
70 _LIT (KBlockEnd, "</div>"); |
|
71 _LIT(KCharSet, "charset="); |
|
72 _LIT( KCommentTagEnd, "-->"); |
|
73 _LIT( KOlId, "olid" ); |
|
74 |
|
75 const TInt KMaxOrdinalLength(5); |
|
76 // "10000 is a reasonable size for a converted block, it has been |
|
77 // tested that bigger size doesn't provide any faster conversion" |
|
78 // Blocksize has been lowered to 1000 to make the UI more |
|
79 // responsive during processing. Based on tests, the performance |
|
80 // is not noticeably changed by the smaller block size. |
|
81 // |
|
82 const TInt KMaxConvBlockLength( 1000 ); |
|
83 |
|
84 const TInt KSampleBufferSize = 256; |
|
85 const TInt KMimimumConfidenceLevel = 50; |
|
86 const TInt KMaxAmountOfSamples = KSampleBufferSize / 2; |
|
87 |
|
88 enum TConversionState { |
|
89 EDeleteHeader = 1, |
|
90 EInitCharConverter, |
|
91 EConvert, |
|
92 ERemoveTags}; |
|
93 |
|
94 enum TStatusFlags { |
|
95 EForceUseCharacterSet = KBit0, |
|
96 EOwnsFileNameArray = KBit1 |
|
97 }; |
|
98 |
|
99 // MODULE DATA STRUCTURES |
|
100 |
|
101 |
|
102 // ================= MEMBER FUNCTIONS ======================= |
|
103 |
|
104 CStringPair::CStringPair() |
|
105 { |
|
106 } |
|
107 |
|
108 CStringPair::~CStringPair() |
|
109 { |
|
110 delete iName; |
|
111 delete iValue; |
|
112 } |
|
113 |
|
114 CStringPair* CStringPair::NewLC(const TDesC& aName,const TDesC& aValue) |
|
115 { |
|
116 CStringPair* temp = new(ELeave) CStringPair(); |
|
117 CleanupStack::PushL(temp); |
|
118 temp->ConstructL(aName, aValue); |
|
119 return temp; |
|
120 } |
|
121 |
|
122 void CStringPair::ConstructL(const TDesC& aName,const TDesC& aValue) |
|
123 { |
|
124 iName = aName.AllocL(); |
|
125 iValue = aValue.AllocL(); |
|
126 } |
|
127 |
|
128 // C++ default constructor cannot contain any code that might leave |
|
129 CMsgMailViewerHtmlConv::CMsgMailViewerHtmlConv() |
|
130 : CActive( CActive::EPriorityLow ), |
|
131 iPos(0), |
|
132 iReturnValue(KErrNone), |
|
133 iErrorPos(KMaxTInt), |
|
134 iConversionState(EDeleteHeader), |
|
135 iCharacterSetId(0), |
|
136 iStatusFlags(EOwnsFileNameArray) |
|
137 { |
|
138 CActiveScheduler::Add( this ); |
|
139 } |
|
140 |
|
141 // Symbian OS default constructor can leave. |
|
142 void CMsgMailViewerHtmlConv::ConstructL(RFile& aHandle, |
|
143 TUint aCharSet) |
|
144 { |
|
145 iFileIdArray = new(ELeave) RPointerArray<HBufC>; |
|
146 LOG1("CMsgMailViewerHtmlConv::ConstructL:%08x", aCharSet); |
|
147 iCharacterSetId = aCharSet; |
|
148 |
|
149 // read HTML content |
|
150 TInt size; |
|
151 User::LeaveIfError(aHandle.Size(size)); |
|
152 HBufC8 *buf = HBufC8::NewLC(size); |
|
153 TPtr8 ptr(buf->Des()); |
|
154 User::LeaveIfError(aHandle.Read(ptr)); |
|
155 |
|
156 iOrigText = HBufC::NewL(buf->Length()); |
|
157 TPtr ptr16(iOrigText->Des()); |
|
158 ptr16.Copy(ptr); |
|
159 CleanupStack::PopAndDestroy(); // buf |
|
160 } |
|
161 |
|
162 |
|
163 // Two-phased constructor. |
|
164 CMsgMailViewerHtmlConv* CMsgMailViewerHtmlConv::NewLC( |
|
165 RFile& aHandle, TUint aCharset, TBool aForced) |
|
166 { |
|
167 CMsgMailViewerHtmlConv* self = new (ELeave) CMsgMailViewerHtmlConv(); |
|
168 |
|
169 CleanupStack::PushL( self ); |
|
170 if (aForced) |
|
171 { |
|
172 self->iStatusFlags |= EForceUseCharacterSet; |
|
173 } |
|
174 self->ConstructL(aHandle, aCharset); |
|
175 |
|
176 return self; |
|
177 } |
|
178 |
|
179 CMsgMailViewerHtmlConv* CMsgMailViewerHtmlConv::NewL( |
|
180 RFile& aHandle, TUint aCharset, TBool aForced) |
|
181 { |
|
182 CMsgMailViewerHtmlConv* self = |
|
183 CMsgMailViewerHtmlConv::NewLC( aHandle, aCharset, aForced ); |
|
184 CleanupStack::Pop( self ); |
|
185 return self; |
|
186 } |
|
187 |
|
188 // Destructor |
|
189 CMsgMailViewerHtmlConv::~CMsgMailViewerHtmlConv() |
|
190 { |
|
191 Cancel(); |
|
192 delete iOrigText; |
|
193 delete iCharSetName; |
|
194 if (iMacros) |
|
195 { |
|
196 iMacros->ResetAndDestroy(); |
|
197 } |
|
198 delete iMacros; |
|
199 |
|
200 if (iFileIdArray && ( iStatusFlags & EOwnsFileNameArray ) ) |
|
201 { |
|
202 iFileIdArray->ResetAndDestroy(); |
|
203 delete iFileIdArray; |
|
204 } |
|
205 delete iCharConv; |
|
206 } |
|
207 |
|
208 TInt CMsgMailViewerHtmlConv::Convert() |
|
209 { |
|
210 TRAP( iReturnValue, DoConversionL() ); |
|
211 LOG1( "CMsgMailViewerHtmlConv::Convert return:%d", iReturnValue ); |
|
212 return iReturnValue; |
|
213 } |
|
214 |
|
215 void CMsgMailViewerHtmlConv::Convert( TRequestStatus* aStatus ) |
|
216 { |
|
217 LOG( "CMsgMailViewerHtmlConv::Convert (Async)"); |
|
218 ASSERT( aStatus ); |
|
219 Cancel(); |
|
220 iReqStatus = aStatus; |
|
221 *aStatus = KRequestPending; |
|
222 ContinueAsyncConvert(); |
|
223 } |
|
224 |
|
225 void CMsgMailViewerHtmlConv::DoConvertL() |
|
226 { |
|
227 if ( !iCharConv ) |
|
228 { |
|
229 //missing charconv is considered to be a |
|
230 //corrupt message situtation, the html part |
|
231 //will be available in attachment view |
|
232 ShowErrorL(); |
|
233 User::Leave( KErrCorrupt ); |
|
234 } |
|
235 |
|
236 // Conversion "cursor" location, initially start of descriptor |
|
237 TInt toConvertStart( 0 ); |
|
238 // Length of text to still convert |
|
239 TInt toConvertLength( iOrigText->Length() ); |
|
240 // Conversion is done in blocks to control heap consumption, |
|
241 // size of a block is always <= KMaxConvBlockLength |
|
242 TInt blockLength( Min( toConvertLength, KMaxConvBlockLength ) ); |
|
243 // Initially no block is partially converted -> remainder == 0 |
|
244 TInt blockRemainder( 0 ); |
|
245 // Converter's state variable, mustn't be tampered with during conversion |
|
246 TInt state( CCnvCharacterSetConverter::KStateDefault ); |
|
247 |
|
248 // 8bit source buffer and 16bit target buffer for a block. We use |
|
249 // first block's size as buffer size, since the following blocks |
|
250 // are either of same size or smaller |
|
251 HBufC8* buf8 = HBufC8::NewLC( blockLength ); |
|
252 HBufC* buf = HBufC::NewLC( blockLength ); |
|
253 |
|
254 // Pointers to the source and target texts. |
|
255 TPtr origPtr( iOrigText->Des() ); |
|
256 TPtr8 srcPtr( buf8->Des() ); |
|
257 TPtr dstPtr( buf->Des() ); |
|
258 |
|
259 while ( toConvertLength > 0 ) // conversion loop |
|
260 { |
|
261 // Create a block for this conversion round |
|
262 blockLength = Min( toConvertLength, KMaxConvBlockLength ); |
|
263 srcPtr.Copy( iOrigText->Mid( toConvertStart, blockLength ) ); |
|
264 |
|
265 // Convert a block, return number of characters that weren't converted |
|
266 blockRemainder = iCharConv->ConvertToUnicode( dstPtr, srcPtr, state ); |
|
267 // Handle possible errors |
|
268 if ( blockRemainder < 0 ) |
|
269 { |
|
270 iPos = toConvertStart; |
|
271 ShowErrorL(); |
|
272 break; |
|
273 } |
|
274 |
|
275 // Store the contents of the output buffer |
|
276 // Here unicodeLength may be less than convertedLength, e.g. in |
|
277 // case of "ä" ->"ä", and that must be taken into account |
|
278 TInt convertedLength = srcPtr.Length() - blockRemainder; |
|
279 TInt unicodeLength = dstPtr.Length(); |
|
280 // Replace convertedLength characters with unicodeLength characters |
|
281 origPtr.Replace( toConvertStart, convertedLength, dstPtr ); |
|
282 // Update progress status, move starting point to next |
|
283 // unconverted character |
|
284 toConvertStart += unicodeLength; |
|
285 toConvertLength = iOrigText->Length() - toConvertStart; |
|
286 } |
|
287 |
|
288 CleanupStack::PopAndDestroy( 2 ); // buf8, buf |
|
289 } |
|
290 |
|
291 void CMsgMailViewerHtmlConv::DoConversionL() |
|
292 { |
|
293 switch (iConversionState) |
|
294 { |
|
295 case EDeleteHeader: |
|
296 { |
|
297 LOG("EDeleteHeader"); |
|
298 |
|
299 // Try to find charset name if not set already |
|
300 if ( !(iStatusFlags & EForceUseCharacterSet) ) |
|
301 { |
|
302 TInt charSetPos(iOrigText->FindF(KCharSet())); |
|
303 if ( charSetPos != KErrNotFound) |
|
304 { |
|
305 // Begin of charset name |
|
306 charSetPos += KCharSet().Length(); |
|
307 TInt maxIdLength(charSetPos+20); // Give up if id end not found at this point. |
|
308 for (TInt i = charSetPos; i<maxIdLength; ++i) |
|
309 { |
|
310 if((*iOrigText)[i] == '"') |
|
311 { |
|
312 iCharSetName = iOrigText->Mid( |
|
313 charSetPos, i-charSetPos).AllocL(); |
|
314 break; |
|
315 } |
|
316 } |
|
317 } |
|
318 } |
|
319 |
|
320 // 5 for <body |
|
321 TInt bodyPos(iOrigText->FindF(KBodyTag().Left(5))); |
|
322 if ( bodyPos != KErrNotFound) |
|
323 { |
|
324 // Delete from start to body tag |
|
325 Delete(bodyPos); |
|
326 // Delete body tag |
|
327 RemoveTagL(); |
|
328 } |
|
329 else // no body tag, invalid HTML, search for <html> then |
|
330 { |
|
331 bodyPos = iOrigText->FindF(KStartHtml().Left(5)); |
|
332 if (bodyPos != KErrNotFound) |
|
333 { |
|
334 Delete(bodyPos); |
|
335 RemoveTagL(); |
|
336 } |
|
337 } |
|
338 |
|
339 iConversionState++; |
|
340 break; |
|
341 } |
|
342 |
|
343 case EInitCharConverter: |
|
344 { |
|
345 RFs fs = CEikonEnv::Static()->FsSession(); |
|
346 iCharConv = CCnvCharacterSetConverter::NewL(); |
|
347 // Try to find character set id from the HTML header |
|
348 if ( iCharSetName && iCharSetName->Length() ) |
|
349 { |
|
350 HBufC8* buf8 = HBufC8::NewLC( iCharSetName->Length() ); |
|
351 buf8->Des().Copy( *iCharSetName ); |
|
352 |
|
353 iCharacterSetId = iCharConv-> |
|
354 ConvertStandardNameOfCharacterSetToIdentifierL( |
|
355 *buf8, fs); |
|
356 |
|
357 CleanupStack::PopAndDestroy(); // buf8 |
|
358 } |
|
359 |
|
360 if (iCharacterSetId > 0) |
|
361 { |
|
362 LOG1("CMsgMailViewerHtmlConv::EInitCharConverter:%08x", |
|
363 iCharacterSetId); |
|
364 const TInt ret(iCharConv->PrepareToConvertToOrFromL( |
|
365 iCharacterSetId, |
|
366 fs)); |
|
367 |
|
368 if (ret == CCnvCharacterSetConverter::ENotAvailable) |
|
369 { |
|
370 LOG( "CMsgMailViewerHtmlConv::EInitCharConverter: ENotAvailable" ); |
|
371 iCharacterSetId = 0; |
|
372 delete iCharConv; |
|
373 iCharConv = NULL; |
|
374 } |
|
375 |
|
376 } |
|
377 else |
|
378 { |
|
379 //regular method of getting charset id failed. |
|
380 //Try autodetect instead. |
|
381 |
|
382 TInt confidence=0; |
|
383 const CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* availableSets = |
|
384 CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC( fs ); |
|
385 |
|
386 HBufC8* sample = HBufC8::NewLC( KSampleBufferSize ); |
|
387 TPtr8 sampleDes = sample->Des(); |
|
388 |
|
389 sampleDes.Copy(iOrigText->Left( iOrigText->Length() > KMaxAmountOfSamples ? |
|
390 KMaxAmountOfSamples : iOrigText->Length())); |
|
391 |
|
392 iCharConv->AutoDetectCharSetL(confidence, iCharacterSetId, *availableSets, sampleDes); |
|
393 |
|
394 if(confidence > KMimimumConfidenceLevel) |
|
395 { |
|
396 const TInt ret(iCharConv->PrepareToConvertToOrFromL( |
|
397 iCharacterSetId, |
|
398 fs)); |
|
399 } |
|
400 else |
|
401 { |
|
402 //autodetect not reliable enough. |
|
403 delete iCharConv; |
|
404 iCharConv = NULL; |
|
405 } |
|
406 |
|
407 CleanupStack::PopAndDestroy(2); // availableSets, sample |
|
408 } |
|
409 |
|
410 iConversionState++; |
|
411 break; |
|
412 } |
|
413 |
|
414 case EConvert: |
|
415 { |
|
416 LOG("EConvert"); |
|
417 DoConvertL(); |
|
418 |
|
419 // reset position to beginning before step to next state. |
|
420 iPos = 0; |
|
421 iConversionState++; |
|
422 break; |
|
423 } |
|
424 |
|
425 case ERemoveTags: |
|
426 { |
|
427 LOG("ERemoveTags"); |
|
428 /** |
|
429 * This operation is done inside the CAknWaitnoteWrapper StepL |
|
430 * (CActive::RunL) and thus it should not take too much time. |
|
431 * We use maxloops to break out from time consuming operation, |
|
432 * so that wait note can be updated. |
|
433 */ |
|
434 TInt maxloops(10000); |
|
435 while (iReturnValue == KErrNone && iPos < iOrigText->Length()) |
|
436 { |
|
437 if (maxloops < 0) |
|
438 { |
|
439 LOG("maxloops"); |
|
440 // return to update wait note. |
|
441 return; |
|
442 } |
|
443 TPtrC character = iOrigText->Mid(iPos, 1); |
|
444 if (character == KAmpersand) |
|
445 { |
|
446 // Replace |
|
447 ReplaceMacroL(); |
|
448 // takes more time, so decrease by ten |
|
449 maxloops -= 10; |
|
450 } |
|
451 else if (character == KLessThan) |
|
452 { |
|
453 // Remove possible tag |
|
454 HandleTagL(0); |
|
455 // takes more time, so decrease by ten |
|
456 maxloops -= 10; |
|
457 } |
|
458 // replace linefeeds with one space |
|
459 else if (character[0] == 0x0A || character[0] == 0x0D) |
|
460 { |
|
461 // +1 peek next character |
|
462 if (iPos+1 < iOrigText->Length() && |
|
463 (iOrigText->Mid(iPos+1, 1)[0] == 0x0A || |
|
464 iOrigText->Mid(iPos+1, 1)[0] == 0x0D)) |
|
465 { |
|
466 Delete(1); // delete character |
|
467 } |
|
468 else |
|
469 { |
|
470 // insert space |
|
471 TPtr ptr(iOrigText->Des()); |
|
472 ptr.Replace(iPos, 1, KSpace); |
|
473 // move to next char |
|
474 iPos++; |
|
475 } |
|
476 } |
|
477 else |
|
478 { |
|
479 // +1 peek next char |
|
480 if (character == KSpace && |
|
481 iPos+1 < iOrigText->Length() && |
|
482 iOrigText->Mid(iPos+1, 1) == KSpace) |
|
483 { |
|
484 // delete multible spaces |
|
485 Delete(1); |
|
486 } |
|
487 else |
|
488 { |
|
489 // move to next char |
|
490 iPos++; |
|
491 } |
|
492 maxloops--; |
|
493 } |
|
494 } |
|
495 iConversionState++; |
|
496 // conversion ended |
|
497 if (iReturnValue == KErrNone) |
|
498 { |
|
499 iReturnValue = KErrEof; |
|
500 } |
|
501 break; |
|
502 } |
|
503 |
|
504 default: |
|
505 LOG("default"); |
|
506 // conversion ended |
|
507 iReturnValue = KErrEof; |
|
508 break; |
|
509 } |
|
510 } |
|
511 |
|
512 |
|
513 TInt CMsgMailViewerHtmlConv::HandleListL(const TInt aIndent, |
|
514 const TBool aNumbered) |
|
515 { |
|
516 TInt item(1); |
|
517 TInt len(RemoveTagL()); // remove <ol> |
|
518 TInt remaining(len - iPos); |
|
519 // length of the buf is 4, so we can find and distinguish <li> and </ol |
|
520 TBuf<4> buf(iOrigText->Mid(iPos, Min(4, remaining))); |
|
521 buf.LowerCase(); |
|
522 const TInt currPos(iPos); |
|
523 |
|
524 while (iPos < len && buf != (aNumbered ? KOrderedListEnd().Left(4) : |
|
525 KUnnumberedListEnd().Left(4))) |
|
526 { |
|
527 if (buf == KListItem) |
|
528 { |
|
529 RemoveTagL(); |
|
530 |
|
531 HBufC *fill = HBufC::NewLC(aIndent + KMaxOrdinalLength); |
|
532 TPtr fillPtr = fill->Des(); |
|
533 |
|
534 fillPtr.AppendFill(' ', aIndent); |
|
535 if (aNumbered) |
|
536 { |
|
537 fillPtr.AppendNum(item); |
|
538 fillPtr.Append(KNumberedItemSeparator); |
|
539 } |
|
540 else |
|
541 { |
|
542 fillPtr.Append(KListItemSymbol); |
|
543 } |
|
544 |
|
545 InsertTextL(fillPtr); |
|
546 CleanupStack::PopAndDestroy(); // fill |
|
547 |
|
548 InsertLinefeedL(); |
|
549 |
|
550 len = iOrigText->Length(); |
|
551 iPos += aIndent + (aNumbered ? 3 : 2); // iPos is always moved at least by 1 |
|
552 item++; |
|
553 } |
|
554 else if ( buf.Left(1)[0] == 0x0A || buf.Left(1)[0] == 0x0D ) |
|
555 { |
|
556 Delete(1); // delete linefeeds |
|
557 } |
|
558 else if (buf.Left(1) == KLessThan) |
|
559 { |
|
560 len = HandleTagL(aIndent + 2); |
|
561 } |
|
562 else if (buf.Left(1) == KAmpersand) |
|
563 { |
|
564 len = ReplaceMacroL(); |
|
565 } |
|
566 else |
|
567 { |
|
568 iPos++; |
|
569 } |
|
570 remaining = len - iPos; |
|
571 buf = iOrigText->Mid(iPos, Min(4, remaining)); |
|
572 buf.LowerCase(); |
|
573 } |
|
574 |
|
575 if ( iPos != len) |
|
576 { |
|
577 InsertLinefeedL(); |
|
578 len = RemoveTagL(); // delete </ol> |
|
579 } |
|
580 else |
|
581 { |
|
582 iPos = currPos; |
|
583 ShowErrorL(); |
|
584 len = iOrigText->Length(); |
|
585 } |
|
586 |
|
587 return len; |
|
588 } |
|
589 |
|
590 TInt CMsgMailViewerHtmlConv::HandleDListL(TInt aIndent) |
|
591 { |
|
592 TInt len(Delete(4)); // remove <dl> |
|
593 TInt remaining(len - iPos); |
|
594 // length of the buf is 4, so we can find and distinguish <li> and </dl |
|
595 TBuf<4> buf(iOrigText->Mid(iPos, Min(4, remaining))); |
|
596 buf.LowerCase(); |
|
597 const TInt currPos(iPos); |
|
598 |
|
599 while (iPos < len && buf != KDefinitionListEnd().Left(4)) |
|
600 { |
|
601 if (buf == KDefinitionListTerm) |
|
602 { |
|
603 len = RemoveTagL(); |
|
604 HBufC *fill = HBufC::NewLC(aIndent); |
|
605 TPtr fillPtr = fill->Des(); |
|
606 |
|
607 fillPtr.AppendFill(' ', aIndent); |
|
608 InsertTextL(fillPtr); |
|
609 CleanupStack::PopAndDestroy(); // fill |
|
610 |
|
611 InsertLinefeedL(); |
|
612 iPos += aIndent; // skip spaces |
|
613 len = iOrigText->Length(); |
|
614 } |
|
615 else if (buf == KDefinitionListDef) |
|
616 { |
|
617 len = RemoveTagL(); |
|
618 InsertTextL(KSpace); |
|
619 } |
|
620 else if (buf.Left(1) == KLessThan) |
|
621 { |
|
622 len = HandleTagL(aIndent + 2); |
|
623 } |
|
624 else if (buf.Left(1) == KAmpersand) |
|
625 { |
|
626 len = ReplaceMacroL(); |
|
627 } |
|
628 else |
|
629 { |
|
630 iPos++; |
|
631 } |
|
632 remaining = len - iPos; |
|
633 buf = iOrigText->Mid(iPos, Min(4, remaining)); |
|
634 buf.LowerCase(); |
|
635 } |
|
636 |
|
637 if ( iPos != len) |
|
638 { |
|
639 len = RemoveTagL(); // delete </dl> |
|
640 } |
|
641 else // </dl> missing |
|
642 { |
|
643 iPos = currPos; |
|
644 ShowErrorL(); |
|
645 len = iOrigText->Length(); |
|
646 } |
|
647 |
|
648 return len; |
|
649 } |
|
650 |
|
651 TInt CMsgMailViewerHtmlConv::HandleTagL(TInt aIndent) |
|
652 { |
|
653 TInt length(iOrigText->Length()); |
|
654 const TInt remaining(length - iPos); |
|
655 |
|
656 if (!remaining) |
|
657 { |
|
658 // Nothing to remove |
|
659 iReturnValue = KErrEof; |
|
660 return iOrigText->Length(); |
|
661 } |
|
662 |
|
663 // Longest HTML tag is 6 characters long, thus six char buffer |
|
664 TBuf<6> buf(iOrigText->Mid(iPos+1, Min(6, remaining - 1))); |
|
665 buf.LowerCase(); |
|
666 buf.TrimAll(); |
|
667 |
|
668 if (buf.Left(2) == KLineBreak().Mid(1,2) || |
|
669 buf.Left(1) == KNewParagraph().Mid(1,1) || |
|
670 buf.Left(2) == KHorizontalRuler().Mid(1,2) || |
|
671 buf.Left(3) == KBlock().Mid(1,3)) |
|
672 { |
|
673 length = RemoveTagL(); // Delete <br>, <p>, <hr> or <div> |
|
674 InsertLinefeedL(); |
|
675 return length; |
|
676 } |
|
677 else if (buf.Left(4) == KBlockEnd().Mid(1,4)) |
|
678 { |
|
679 length = RemoveTagL(); // Delete </div> |
|
680 if ( (length - iPos) > 6 ) // still data to peek to? |
|
681 { |
|
682 // peek the next tag and if it's <div>, don't add newline |
|
683 TPtrC peekPtr(iOrigText->Mid(iPos)); |
|
684 TInt found = peekPtr.Find(KLessThan); |
|
685 // found + 3 there needs to be at least 3 characters for "div" check |
|
686 if ( found != KErrNotFound && iPos+found+3 < length ) |
|
687 { |
|
688 // +1 step over "<" |
|
689 TBuf<6> peekbuf( iOrigText->Mid( |
|
690 iPos+found+1, Min(6, length - iPos - 1)) ); |
|
691 peekbuf.LowerCase(); |
|
692 peekbuf.TrimAll(); |
|
693 // compare "div" part |
|
694 if ( peekbuf.Length() > 3 && |
|
695 peekbuf.Left(3) != KBlock().Mid(1,3) ) |
|
696 { |
|
697 // next tag is not <div>, add newline |
|
698 InsertLinefeedL(); |
|
699 } |
|
700 } |
|
701 } |
|
702 return length; |
|
703 } |
|
704 else if (buf.Left(2) == KUnnumberedList().Mid(1,2)) |
|
705 { |
|
706 return HandleUListL(aIndent); |
|
707 } |
|
708 else if (buf.Left(2) == KOrderedList().Mid(1,2)) |
|
709 { |
|
710 // check for Outlook's olid tag, not to be confused |
|
711 // with ordered list tag <ol> |
|
712 if( buf.Left( 4 ).Match( KOlId ) == 0 ) |
|
713 { |
|
714 return RemoveTagL(); // Delete <olid ....> |
|
715 } |
|
716 return HandleOListL(aIndent); |
|
717 } |
|
718 else if (buf.Left(2) == KDefinitionList().Mid(1,2)) |
|
719 { |
|
720 return HandleDListL(aIndent); |
|
721 } |
|
722 else if (buf.Left(4) == KHeadStart().Mid(1,4)) |
|
723 { |
|
724 const TInt currPos(iPos); |
|
725 length = RemoveTagL(); // Remove <head ...> |
|
726 TPtrC tagPos(iOrigText->Mid(iPos)); |
|
727 TBool found(EFalse); |
|
728 TInt endTag(tagPos.Find(KLessThan)); |
|
729 while (!found && endTag != KErrNotFound |
|
730 && iOrigText->Length() >= endTag+5 ) |
|
731 { |
|
732 buf = iOrigText->Mid(endTag+1, 4); |
|
733 buf.TrimAll(); |
|
734 buf.LowerCase(); |
|
735 if (buf == KHeadEnd().Mid(1, 4)) |
|
736 { |
|
737 found = ETrue; |
|
738 Delete(endTag - iPos); |
|
739 return RemoveTagL(); |
|
740 } |
|
741 else |
|
742 { |
|
743 TPtrC tmpPos(iOrigText->Mid(endTag+1)); |
|
744 const TInt p(tmpPos.Find(KLessThan)); |
|
745 endTag = p == KErrNotFound ? KErrNotFound : endTag + p + 1; |
|
746 } |
|
747 } |
|
748 if (endTag == KErrNotFound) |
|
749 { |
|
750 iPos = currPos; |
|
751 ShowErrorL(); |
|
752 return iOrigText->Length(); |
|
753 } |
|
754 } |
|
755 else if (buf.Left(1) == KHeading().Mid(1,1)) |
|
756 { |
|
757 CDesCArrayFlat *headArray = new(ELeave) CDesCArrayFlat(6); |
|
758 CleanupStack::PushL(headArray); |
|
759 for (TInt i = 1; i < 7; i++) |
|
760 { |
|
761 TBuf<3> tag = KHeading().Mid(1); |
|
762 tag.AppendNum(i); |
|
763 tag.Append(KGreaterThan); |
|
764 headArray->AppendL(tag); |
|
765 } |
|
766 TInt foundInPos; |
|
767 if (headArray->Find(buf.Left(3), foundInPos) == 0) |
|
768 { |
|
769 length = RemoveTagL(); // remove <h?> |
|
770 InsertLinefeedL(); |
|
771 CleanupStack::PopAndDestroy(); // headArray |
|
772 return length; |
|
773 } |
|
774 CleanupStack::PopAndDestroy(); // headArray |
|
775 } |
|
776 else if (buf.Left(2) == KHeadingEnd().Mid(1,2) && |
|
777 buf != KEndHtml().Mid(1, 6)) |
|
778 { |
|
779 CDesCArrayFlat *headArray = new(ELeave) CDesCArrayFlat(6); |
|
780 CleanupStack::PushL(headArray); |
|
781 for (TInt i = 1; i < 7; i++) |
|
782 { |
|
783 TBuf<4> tag = KHeadingEnd().Mid(1); |
|
784 tag.AppendNum(i); |
|
785 tag.Append(KGreaterThan); |
|
786 headArray->AppendL(tag); |
|
787 } |
|
788 TInt foundInPos; |
|
789 if (headArray->Find(buf.Left(4), foundInPos) == 0) |
|
790 { |
|
791 length = RemoveTagL(); // remove </h?> |
|
792 InsertLinefeedL(); |
|
793 CleanupStack::PopAndDestroy(); // headArray |
|
794 return length; |
|
795 } |
|
796 CleanupStack::PopAndDestroy(); // headArray |
|
797 } |
|
798 else if (buf == KScript().Mid(1)) |
|
799 { |
|
800 const TInt currPos(iPos); |
|
801 length = RemoveTagL(); // Remove <script ...> |
|
802 TPtrC tagPos(iOrigText->Mid(iPos)); |
|
803 TBool found(EFalse); |
|
804 TInt endTag(tagPos.Find(KLessThan)); |
|
805 while (!found && endTag != KErrNotFound |
|
806 && iOrigText->Length() >= endTag+7 ) |
|
807 { |
|
808 buf = iOrigText->Mid(endTag+1, 6); |
|
809 buf.TrimAll(); |
|
810 buf.LowerCase(); |
|
811 if (buf == KScriptEnd().Mid(1, 6)) |
|
812 { |
|
813 found = ETrue; |
|
814 Delete(endTag - iPos); |
|
815 return RemoveTagL(); |
|
816 } |
|
817 else |
|
818 { |
|
819 TPtrC tmpPos(iOrigText->Mid(endTag+1)); |
|
820 const TInt p(tmpPos.Find(KLessThan)); |
|
821 endTag = p == KErrNotFound ? KErrNotFound : endTag + p + 1; |
|
822 } |
|
823 } |
|
824 if (endTag == KErrNotFound) |
|
825 { |
|
826 iPos = currPos; |
|
827 ShowErrorL(); |
|
828 return iOrigText->Length(); |
|
829 } |
|
830 } |
|
831 else if (buf.Left(3) == KImage().Mid(1,3)) |
|
832 { |
|
833 TBool inQuoted(EFalse); // in quoted string |
|
834 TBool inAltPart(EFalse); |
|
835 TBool inAltQuotedPart(EFalse); |
|
836 TBool inSrcPart(EFalse); |
|
837 const TInt KInitialSize(50); |
|
838 HBufC* inlineImage = HBufC::NewLC(KInitialSize); |
|
839 |
|
840 while ((*iOrigText)[iPos] != '>' && iPos < iOrigText->Length()) |
|
841 { |
|
842 TChar ch((*iOrigText)[iPos]); |
|
843 const TInt remaining(iOrigText->Length() - iPos); |
|
844 HBufC* lowerCaseCopy = iOrigText->Mid(iPos, |
|
845 remaining > 3 ? 3 : remaining).AllocLC(); |
|
846 TPtr lowerCasePtr(lowerCaseCopy->Des()); |
|
847 lowerCasePtr.LowerCase(); |
|
848 |
|
849 if (inQuoted && ch != '"') |
|
850 { |
|
851 if ( inlineImage->Length() == inlineImage->Des().MaxLength() ) |
|
852 { |
|
853 HBufC* temp = inlineImage->ReAllocL( |
|
854 inlineImage->Length() + KInitialSize); |
|
855 CleanupStack::Pop(2); //lowerCaseCopy, inlineImage |
|
856 CleanupStack::PushL( inlineImage = temp ); |
|
857 CleanupStack::PushL( lowerCaseCopy ); |
|
858 } |
|
859 |
|
860 // Leave alt quoted to the body |
|
861 if ( inAltQuotedPart ) |
|
862 { |
|
863 iPos++; |
|
864 } |
|
865 else if ( inSrcPart ) |
|
866 { |
|
867 inlineImage->Des().Append(ch); |
|
868 Delete(1); |
|
869 } |
|
870 else |
|
871 { |
|
872 // other queted parts are deleted |
|
873 Delete(1); |
|
874 } |
|
875 |
|
876 } |
|
877 else if (ch == '"') |
|
878 { |
|
879 Delete(1); |
|
880 inQuoted = !inQuoted; |
|
881 if (inAltPart) |
|
882 { |
|
883 inAltQuotedPart = ETrue; |
|
884 inAltPart = EFalse; |
|
885 } |
|
886 else if (inAltQuotedPart) |
|
887 { |
|
888 inAltQuotedPart = EFalse; |
|
889 } |
|
890 else if (inSrcPart && !inQuoted) |
|
891 { |
|
892 // end of Src Quoted |
|
893 inSrcPart = EFalse; |
|
894 } |
|
895 } |
|
896 else if (!lowerCaseCopy->Compare(KImageAlt)) |
|
897 { |
|
898 Delete(KImageAlt().Length()); // delete alt |
|
899 inAltPart = ETrue; |
|
900 inSrcPart = EFalse; |
|
901 } |
|
902 else if (!lowerCaseCopy->Compare(KImageSrc)) |
|
903 { |
|
904 Delete(KImageSrc().Length()); // delete src |
|
905 inAltPart = EFalse; |
|
906 inSrcPart = ETrue; |
|
907 } |
|
908 else |
|
909 { |
|
910 Delete(1); |
|
911 } |
|
912 CleanupStack::PopAndDestroy(); // lowerCaseCopy |
|
913 } |
|
914 if ( inlineImage->Length() ) |
|
915 { |
|
916 User::LeaveIfError( iFileIdArray->Append(inlineImage) ); |
|
917 CleanupStack::Pop(); // inlineImage |
|
918 } |
|
919 else |
|
920 { |
|
921 CleanupStack::PopAndDestroy(); // inlineImage |
|
922 } |
|
923 |
|
924 if (iPos < iOrigText->Length()) |
|
925 { |
|
926 Delete(1); // delete > |
|
927 } |
|
928 return iOrigText->Length(); |
|
929 } |
|
930 |
|
931 return RemoveTagL(); |
|
932 } |
|
933 |
|
934 TInt CMsgMailViewerHtmlConv::RemoveTagL() |
|
935 { |
|
936 const TInt currPos(iPos); |
|
937 TPtrC tagPos(iOrigText->Mid(iPos)); |
|
938 const TInt tagLength(tagPos.Find(KGreaterThan)); |
|
939 if (tagLength == KErrNotFound) |
|
940 { |
|
941 iReturnValue = KErrEof; // ">" not found at the end |
|
942 iPos = 0; |
|
943 return 0; |
|
944 } |
|
945 |
|
946 if (tagPos[1] == '!') // start of the comment |
|
947 { |
|
948 const TInt commentLength( tagPos.Find( KCommentTagEnd ) ); |
|
949 if (commentLength != KErrNotFound) |
|
950 return Delete(commentLength + 3); // 3 for --> |
|
951 |
|
952 return Delete(tagLength+1);// not comment delete whole tag; |
|
953 } |
|
954 // Check that > really belongs to this tag |
|
955 TPtrC tmpPos(iOrigText->Mid(iPos+1)); |
|
956 const TInt firstTagStart(tmpPos.Find(KLessThan)); |
|
957 if (tagLength != KErrNotFound && |
|
958 ( firstTagStart == KErrNotFound || |
|
959 tagLength <= firstTagStart ) ) |
|
960 { |
|
961 return Delete(tagLength+1); |
|
962 } |
|
963 else |
|
964 { |
|
965 // check that found < really belongs to tag which end was found |
|
966 // and it's not only wrongly encoded |
|
967 if (firstTagStart < tagLength) |
|
968 { |
|
969 iPos++; |
|
970 return iOrigText->Length(); |
|
971 } |
|
972 else |
|
973 { |
|
974 iPos = currPos; |
|
975 ShowErrorL(); |
|
976 return iOrigText->Length(); |
|
977 } |
|
978 } |
|
979 } |
|
980 |
|
981 RPointerArray<HBufC>* CMsgMailViewerHtmlConv::FileIdArray() |
|
982 { |
|
983 iStatusFlags &= ~EOwnsFileNameArray; |
|
984 return iFileIdArray; |
|
985 } |
|
986 |
|
987 TBool CMsgMailViewerHtmlConv::AutoParsedCharSetNameAndIdentifier() const |
|
988 { |
|
989 TBool autoParsed = EFalse; |
|
990 // |
|
991 if ( !(iStatusFlags & EForceUseCharacterSet) ) |
|
992 { |
|
993 autoParsed = ( iCharSetName != NULL ) && ( iCharacterSetId != 0); |
|
994 } |
|
995 // |
|
996 return autoParsed; |
|
997 } |
|
998 |
|
999 TUint CMsgMailViewerHtmlConv::AutoParsedCharSetIdentifier() const |
|
1000 { |
|
1001 return iCharacterSetId; |
|
1002 } |
|
1003 |
|
1004 |
|
1005 |
|
1006 TInt CMsgMailViewerHtmlConv::Delete(const TInt aLen) |
|
1007 { |
|
1008 TPtr ptr(iOrigText->Des()); |
|
1009 ptr.Delete(iPos, aLen); |
|
1010 return iOrigText->Length(); |
|
1011 } |
|
1012 |
|
1013 // InsertTextL() |
|
1014 // It is checked that text to be inserted fits into a descriptor and |
|
1015 // a descriptor's size is doubled if not. In practice this should never |
|
1016 // happen because insertions are usually one character to the place where |
|
1017 // there used to be i.e. a 4-char macro. |
|
1018 // |
|
1019 TInt CMsgMailViewerHtmlConv::InsertTextL(const TDesC& aChars) |
|
1020 { |
|
1021 if (iOrigText->Des().MaxLength() < iOrigText->Length() + aChars.Length()) |
|
1022 { |
|
1023 iOrigText = iOrigText->ReAllocL(( |
|
1024 iOrigText->Length() + aChars.Length()) * 2); |
|
1025 } |
|
1026 |
|
1027 TPtr ptr(iOrigText->Des()); |
|
1028 ptr.Insert(iPos, aChars); |
|
1029 return iOrigText->Length(); |
|
1030 } |
|
1031 |
|
1032 TInt CMsgMailViewerHtmlConv::ReplaceMacroL() |
|
1033 { |
|
1034 if (!iMacros) |
|
1035 { |
|
1036 InitMacroArrayL(); |
|
1037 } |
|
1038 |
|
1039 TInt len(iOrigText->Length()); |
|
1040 const TPtrC macroPos(iOrigText->Mid(iPos+1)); |
|
1041 const TInt macroLength(macroPos.Find(KSemicolon)); |
|
1042 if (macroLength > 0) |
|
1043 { |
|
1044 const TPtrC macroPtr(iOrigText->Mid(iPos+1, macroLength)); |
|
1045 |
|
1046 TInt pos; |
|
1047 // A dummy string pair for Find-method |
|
1048 TDesC tmpDesC(KSpace); |
|
1049 CStringPair* tmpPair = CStringPair::NewLC(macroPtr, tmpDesC); |
|
1050 if (iMacros->FindInOrder(tmpPair, pos, |
|
1051 TLinearOrder<CStringPair>(CStringPair::Compare)) != KErrNotFound) |
|
1052 { |
|
1053 // + 2 for '&' + ';' |
|
1054 Delete( ((*iMacros)[pos])->GetName()->Length() + 2); |
|
1055 len = InsertTextL( *((*iMacros)[pos]->GetValue()) ); |
|
1056 iPos++; |
|
1057 CleanupStack::PopAndDestroy(); // tmpPair |
|
1058 return len; |
|
1059 } |
|
1060 CleanupStack::PopAndDestroy(); // tmpPair |
|
1061 if (macroPtr[0] == '#') // numeric form |
|
1062 { |
|
1063 const TPtrC ptr(iOrigText->Mid(iPos+2, macroLength - 1)); // skip # |
|
1064 TLex lex(ptr); |
|
1065 TUint value(0); |
|
1066 TInt err(KErrNone); |
|
1067 if (User::LowerCase(lex.Peek()) == 'x') |
|
1068 { // character in hex |
|
1069 lex.Inc(1); // skip x in € |
|
1070 err = lex.Val(value, EHex); |
|
1071 } |
|
1072 else |
|
1073 { // character in decimal |
|
1074 err = lex.Val(value); |
|
1075 } |
|
1076 if (err == KErrNone) |
|
1077 { |
|
1078 // Check how many chars we have actually consumed. |
|
1079 TInt offset = lex.Offset(); |
|
1080 // NCR could be missing the semicolon. |
|
1081 if( lex.Peek() == ';' ) |
|
1082 { |
|
1083 offset++; |
|
1084 } |
|
1085 |
|
1086 // + 2 for # & ; |
|
1087 Delete( offset + 2 ); |
|
1088 TBuf<1> buf; |
|
1089 buf.Append(value); |
|
1090 len = InsertTextL(buf); |
|
1091 iPos++; |
|
1092 return len; |
|
1093 } |
|
1094 } |
|
1095 } |
|
1096 |
|
1097 // this isn't correctly encoded macro, but let's skip it instead of |
|
1098 // showing an error, so user can see as much of the message as possible |
|
1099 iPos++; |
|
1100 return iOrigText->Length(); |
|
1101 } |
|
1102 |
|
1103 void CMsgMailViewerHtmlConv::InitMacroArrayL() |
|
1104 { |
|
1105 TResourceReader reader; |
|
1106 CEikonEnv::Static()->CreateResourceReaderLC(reader, R_MAIL_HTML_MACRO); |
|
1107 const TInt count(reader.ReadInt16()); |
|
1108 iMacros = new(ELeave) RPointerArray<CStringPair>(count); |
|
1109 for (TInt i=0; i<count; i++) |
|
1110 { |
|
1111 HBufC* macro = reader.ReadHBufCL(); |
|
1112 CleanupStack::PushL(macro); |
|
1113 HBufC* realText = reader.ReadHBufCL(); |
|
1114 CleanupStack::PushL(realText); |
|
1115 CStringPair* tmp = CStringPair::NewLC(*macro, *realText); |
|
1116 // this should never fail, because we allocated enough memory in |
|
1117 // construction |
|
1118 User::LeaveIfError(iMacros->InsertInOrder( |
|
1119 tmp, TLinearOrder<CStringPair>(CStringPair::Compare))); |
|
1120 CleanupStack::Pop(); // tmp |
|
1121 CleanupStack::PopAndDestroy(2); // macro, realText |
|
1122 } |
|
1123 CleanupStack::PopAndDestroy(); // reader |
|
1124 iMacros->Compress(); |
|
1125 } |
|
1126 |
|
1127 void CMsgMailViewerHtmlConv::ShowErrorL() |
|
1128 { |
|
1129 iErrorPos = iPos; |
|
1130 TInt deleteRest(iOrigText->Length() - iPos); |
|
1131 if (deleteRest > 0) |
|
1132 { |
|
1133 Delete(deleteRest); |
|
1134 } |
|
1135 InsertLinefeedL(); |
|
1136 HBufC* text = |
|
1137 CEikonEnv::Static()->AllocReadResourceLC(R_ERROR_IN_HTML_TEXT); |
|
1138 InsertTextL(*text); |
|
1139 CleanupStack::PopAndDestroy(); // text |
|
1140 iReturnValue = KErrCorrupt; |
|
1141 iPos = iOrigText->Length(); |
|
1142 } |
|
1143 |
|
1144 void CMsgMailViewerHtmlConv::InsertLinefeedL() |
|
1145 { |
|
1146 if (iPos > 0 && iPos < iOrigText->Length() ) |
|
1147 { |
|
1148 TPtr ptr(iOrigText->Des()); |
|
1149 ptr.Insert(iPos, KNewline); |
|
1150 iPos++; |
|
1151 } |
|
1152 } |
|
1153 |
|
1154 void CMsgMailViewerHtmlConv::RunL() |
|
1155 { |
|
1156 TInt err = Convert(); |
|
1157 if( !err ) // KErrNone |
|
1158 { |
|
1159 // still data to convert. Continue work on next loop. |
|
1160 ContinueAsyncConvert(); |
|
1161 } |
|
1162 else if( err == KErrEof ) |
|
1163 { |
|
1164 // success |
|
1165 User::RequestComplete( iReqStatus, KErrNone ); |
|
1166 } |
|
1167 else |
|
1168 { |
|
1169 // error occured |
|
1170 User::RequestComplete( iReqStatus, err ); |
|
1171 } |
|
1172 } |
|
1173 |
|
1174 void CMsgMailViewerHtmlConv::DoCancel() |
|
1175 { |
|
1176 User::RequestComplete( iReqStatus, KErrCancel ); |
|
1177 } |
|
1178 |
|
1179 void CMsgMailViewerHtmlConv::ContinueAsyncConvert() |
|
1180 { |
|
1181 iStatus = KRequestPending; |
|
1182 TRequestStatus* status = &iStatus; |
|
1183 User::RequestComplete( status, KErrNone ); |
|
1184 SetActive(); |
|
1185 } |
|
1186 |
|
1187 // ================= OTHER EXPORTED FUNCTIONS ============== |
|
1188 |
|
1189 |
|
1190 // End of File |