# HG changeset patch # User teknolog # Date 1267281644 0 # Node ID cda6e6257514f49e8bef2ce9820fbe8eb97c209b # Parent 4bcc91e704838e6445e2d2eb56c626f76a521d85 Improved HTML cleaning diff -r 4bcc91e70483 -r cda6e6257514 application/src/PodcastFeedView.cpp --- a/application/src/PodcastFeedView.cpp Thu Feb 25 22:05:50 2010 +0000 +++ b/application/src/PodcastFeedView.cpp Sat Feb 27 14:40:44 2010 +0000 @@ -119,6 +119,11 @@ iUpdater = CPodcastFeedViewUpdater::NewL(*this); DP("CPodcastFeedView::ConstructL END"); + + TBuf<1024> test; + test.Copy(_L("Yeah, we’re doing an episode. "hej" © fl ong Should you get used to it? Ummm… maybe not. But let’s enjoy the ride while it lasts! Today we just yammer on about whatever, mostly just about what we’ve been up to. Until next time!")); + + PodcastUtils::CleanHtmlL(test); } CPodcastFeedView::~CPodcastFeedView() diff -r 4bcc91e70483 -r cda6e6257514 engine/src/PodcastUtils.cpp --- a/engine/src/PodcastUtils.cpp Thu Feb 25 22:05:50 2010 +0000 +++ b/engine/src/PodcastUtils.cpp Sat Feb 27 14:40:44 2010 +0000 @@ -68,18 +68,18 @@ EXPORT_C void PodcastUtils::CleanHtmlL(TDes &str) { -#ifdef UIQ - _LIT(KLineBreak, "\r\n"); -#else +// miscellaneous cleanup const TChar KLineBreak(CEditableText::ELineBreak); -#endif _LIT(KNewLine, "\n"); + // ReplaceChar(str, '"', '\''); ReplaceString(str, KNewLine, KNullDesC); + str.Trim(); -// DP2("CleanHtml %d, %S", str.Length(), &str); + +// strip out HTML tags + TInt startPos = str.Locate('<'); TInt endPos = str.Locate('>'); - //DP3("length: %d, startPos: %d, endPos: %d", str.Length(), startPos, endPos); HBufC* tmpBuf = HBufC::NewLC(KMaxDescriptionLength); TPtr tmp(tmpBuf->Des()); while (startPos != KErrNotFound && endPos != KErrNotFound && endPos > startPos) { @@ -108,21 +108,95 @@ startPos = str.Locate('<'); endPos = str.Locate('>'); } - - str.Trim(); - _LIT(KAmp, "&"); - _LIT(KQuot, """); - _LIT(KNbsp, " "); - _LIT(KCopy, "©"); - _LIT(KCopyReplacement, "(c)"); - if(str.Locate('&') != KErrNotFound) { - ReplaceString(str, KAmp, KNullDesC); - ReplaceString(str, KQuot, KNullDesC); - ReplaceString(str, KNbsp, KNullDesC); - ReplaceString(str, KCopy, KCopyReplacement); - } - ReplaceChar(str, '"', '\''); - + +// change HTML encoded chars to unicode + startPos = str.Locate('&'); + endPos = str.Locate(';'); + while (startPos != KErrNotFound && endPos != KErrNotFound && endPos > startPos) + { + TPtrC ptr(str.Mid(startPos+1, endPos-startPos)); + // check for whitespace + if (ptr.Locate(' ') == KErrNotFound) + { + // numerical constant + if (ptr[0] == '#') + { + TUint length = endPos - startPos; + if (length > 2) + { + tmp.Copy(str.Left(startPos)); + ptr.Set(str.Mid(startPos+2, length-2)); + + TUint charCode = 0; + + if (ptr[0] == 'x') + { + // hexadecimal + ptr.Set(ptr.Mid(1)); + TLex16 lex(ptr); + lex.Val(charCode, EHex); + } + else + { + //decimal + TLex16 lex(ptr); + lex.Val(charCode, EDecimal); + } + + TChar charChar(charCode); + tmp.Append(charChar); + tmp.Append(str.Mid(endPos+1)); + str.Copy(tmp); + } + } + // literal constant + else + { + _LIT(KAmp, "amp;"); + _LIT(KQuot, "quot;"); + _LIT(KNbsp, "nbsp;"); + _LIT(KCopy, "copy;"); + + // copy start of string + tmp.Copy(str.Left(startPos)); + + if (ptr.CompareF(KAmp) == 0) + { + tmp.Append('&'); + } + else if (ptr.CompareF(KQuot) == 0) + { + tmp.Append('"'); + } + else if (ptr.CompareF(KNbsp) == 0) + { + tmp.Append(' '); + } + else if (ptr.CompareF(KCopy) == 0) + { + tmp.Append('\xA9'); + } + + // copy end of string + tmp.Append(str.Mid(endPos+1)); + str.Copy(tmp); + } + } + + TInt newPos = str.Mid(startPos+1).Locate('&'); + + if (newPos != KErrNotFound) + { + startPos = startPos+1 + newPos; + endPos = str.Locate(';'); + } + else + { + startPos = KErrNotFound; + endPos = KErrNotFound; + } + } + CleanupStack::PopAndDestroy(tmpBuf); }