engine/src/FeedParser.cpp
changeset 2 29cda98b007e
child 60 4d230e702aa3
equal deleted inserted replaced
1:5f8e5adbbed9 2:29cda98b007e
       
     1 /*
       
     2 * Copyright (c) 2007-2010 Sebastian Brannstrom, Lars Persson, EmbedDev AB
       
     3 *
       
     4 * All rights reserved.
       
     5 * This component and the accompanying materials are made available
       
     6 * under the terms of the License "Eclipse Public License v1.0"
       
     7 * which accompanies this distribution, and is available
       
     8 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     9 *
       
    10 * Initial Contributors:
       
    11 * EmbedDev AB - initial contribution.
       
    12 *
       
    13 * Contributors:
       
    14 *
       
    15 * Description:
       
    16 *
       
    17 */
       
    18 
       
    19 #include "FeedParser.h"
       
    20 #include <f32file.h>
       
    21 #include <bautils.h>
       
    22 #include <s32file.h>
       
    23 #include <charconv.h>
       
    24 #include <xml/stringdictionarycollection.h>
       
    25 #include <utf.h>
       
    26 #include <tinternetdate.h>
       
    27 #include "debug.h"
       
    28 
       
    29 using namespace Xml;
       
    30 const TInt KMaxParseBuffer = 1024;
       
    31 const TInt KMaxStringBuffer = 100;
       
    32 
       
    33 CFeedParser::CFeedParser(MFeedParserObserver& aCallbacks, RFs& aFs) : 	iCallbacks(aCallbacks), iRfs(aFs)
       
    34 {
       
    35 }
       
    36 
       
    37 CFeedParser::~CFeedParser()
       
    38 {	
       
    39 }
       
    40 
       
    41 void CFeedParser::ParseFeedL(const TFileName &feedFileName, CFeedInfo *info, TUint aMaxItems)
       
    42 	{
       
    43 	//DP1("ParseFeedL BEGIN: %S", &feedFileName);		
       
    44 	
       
    45 	_LIT8(KXmlMimeType, "text/xml");
       
    46 	// Contruct the parser object
       
    47 	CParser* parser = CParser::NewLC(KXmlMimeType, *this);
       
    48 	iActiveFeed = info;
       
    49 	iFeedState = EStateRoot;
       
    50 	iActiveShow = NULL;
       
    51 	iItemsParsed = 0;
       
    52 	iMaxItems = aMaxItems;
       
    53 	iStoppedParsing = EFalse;
       
    54 	iEncoding = ELatin1;
       
    55 
       
    56 	ParseL(*parser, iRfs, feedFileName);
       
    57 
       
    58 	CleanupStack::PopAndDestroy(parser);	
       
    59 	
       
    60 	//DP("ParseFeedL END");
       
    61 	}
       
    62 
       
    63 // from MContentHandler
       
    64 void CFeedParser::OnStartDocumentL(const RDocumentParameters& aDocParam, TInt /*aErrorCode*/)
       
    65 	{
       
    66 	DP("OnStartDocumentL()");
       
    67 	HBufC* charset = HBufC::NewLC(KMaxParseBuffer);
       
    68 	charset->Des().Copy(aDocParam.CharacterSetName().DesC());
       
    69 	iEncoding = EUtf8;
       
    70 	if (charset->CompareF(_L("utf-8")) == 0) {
       
    71 		DP("setting UTF8");
       
    72 		iEncoding = EUtf8;
       
    73 	} else if (charset->CompareF(_L("ISO-8859-1")) == 0) {
       
    74 		iEncoding = EUtf8; //Latin1;
       
    75 	} else {
       
    76 		DP1("unknown charset: %S", &charset);
       
    77 	}
       
    78 	CleanupStack::PopAndDestroy(charset);//buffer
       
    79 	}
       
    80 
       
    81 void CFeedParser::OnEndDocumentL(TInt /*aErrorCode*/)
       
    82 	{
       
    83 	//DP("OnEndDocumentL()");
       
    84 	iCallbacks.ParsingCompleteL(iActiveFeed);
       
    85 	}
       
    86 
       
    87 void CFeedParser::OnStartElementL(const RTagInfo& aElement, const RAttributeArray& aAttributes, TInt /*aErrorCode*/)
       
    88 	{
       
    89 	if (iStoppedParsing) {
       
    90 		iActiveShow = NULL;
       
    91 		return;
       
    92 	}
       
    93 	
       
    94 	TBuf<KMaxStringBuffer> str;
       
    95 	str.Copy(aElement.LocalName().DesC());
       
    96 	//DP2("OnStartElementL START state=%d, element=%S", iFeedState, &str);
       
    97 	iBuffer.Zero();
       
    98 	switch (iFeedState) {
       
    99 	case EStateRoot:
       
   100 		// <channel>
       
   101 		if (str.CompareF(KTagChannel) == 0) {
       
   102 			iFeedState = EStateChannel;
       
   103 		}
       
   104 		break;
       
   105 	case EStateChannel:
       
   106 		// <channel> <item>
       
   107 		if(str.CompareF(KTagItem) == 0) {
       
   108 			//DP("New item");
       
   109 			iFeedState=EStateItem;
       
   110 
       
   111 			iActiveShow = NULL;
       
   112 			iActiveShow = CShowInfo::NewL();
       
   113 			if (iActiveShow == NULL) {
       
   114 				DP("Out of memory!");
       
   115 				iStoppedParsing = ETrue;
       
   116 				return;
       
   117 			}
       
   118 			iActiveShow->SetFeedUid(iActiveFeed->Uid());
       
   119 
       
   120 		// <channel> <lastBuildDate>
       
   121 		} else if (str.CompareF(KTagLastBuildDate) == 0) {
       
   122 			DP("LastBuildDate BEGIN");
       
   123 			iFeedState=EStateChannelLastBuildDate;
       
   124 		// <channel> <link>
       
   125 		}else if (str.CompareF(KTagTitle) == 0) {
       
   126 			iFeedState=EStateChannelTitle;
       
   127 		// <channel> <link>
       
   128 		} else if (str.CompareF(KTagLink) == 0) {
       
   129 			iFeedState = EStateChannelLink;
       
   130 		// <channel> <description>
       
   131 		} else if (str.CompareF(KTagDescription) == 0) {
       
   132 			iFeedState=EStateChannelDescription;
       
   133 		// <channel> <image>
       
   134 		} else if (str.CompareF(KTagImage) == 0) {
       
   135 			for (int i=0;i<aAttributes.Count();i++) {
       
   136 				RAttribute attr = aAttributes[i];
       
   137 				TBuf<KMaxStringBuffer> attr16;
       
   138 				attr16.Copy(attr.Attribute().LocalName().DesC().Left(KMaxStringBuffer));
       
   139 				HBufC* val16 = CnvUtfConverter::ConvertToUnicodeFromUtf8L(attr.Value().DesC().Left(KMaxParseBuffer));
       
   140 				CleanupStack::PushL(val16);
       
   141 						
       
   142 				// href=...
       
   143 				if (attr16.Compare(KTagHref) == 0) {
       
   144 					iActiveFeed->SetImageUrlL(*val16);
       
   145 				}
       
   146 				CleanupStack::PopAndDestroy(val16);
       
   147 			}
       
   148 					
       
   149 			iFeedState=EStateChannelImage;
       
   150 		}
       
   151 		break;
       
   152 	case EStateChannelImage:
       
   153 		// <channel> <image> <url>
       
   154 		if (str.CompareF(KTagUrl) == 0) {
       
   155 			iFeedState=EStateChannelImageUrl;
       
   156 		} else {
       
   157 			iFeedState=EStateChannelImage;
       
   158 		}
       
   159 		break;
       
   160 	case EStateItem:
       
   161 		// <channel> <item> <title>
       
   162 		if (str.CompareF(KTagTitle) == 0) {
       
   163 			iFeedState=EStateItemTitle;
       
   164 		// <channel> <item> <link>
       
   165 		} else if (str.CompareF(KTagLink) == 0) {
       
   166 			iFeedState=EStateItemLink;
       
   167 		// <channel> <item> <enclosure ...>
       
   168 		} else if (str.CompareF(KTagEnclosure) == 0) {
       
   169 			//DP("Enclosure START");
       
   170 			for (int i=0;i<aAttributes.Count();i++) {
       
   171 				RAttribute attr = aAttributes[i];
       
   172 				TBuf<KMaxStringBuffer> attr16;
       
   173 				attr16.Copy(attr.Attribute().LocalName().DesC());
       
   174 				// url=...
       
   175 				if (attr16.Compare(KTagUrl) == 0) {
       
   176 					HBufC* val16 = HBufC::NewLC(KMaxParseBuffer);
       
   177 					val16->Des().Copy(attr.Value().DesC());
       
   178 					iActiveShow->SetUrlL(*val16);
       
   179 					CleanupStack::PopAndDestroy(val16);
       
   180 				// length=...
       
   181 				} else if (attr16.Compare(KTagLength) == 0) {
       
   182 					TLex8 lex(attr.Value().DesC());
       
   183 					TUint size = 0;
       
   184 					lex.Val(size, EDecimal);
       
   185 					iActiveShow->SetShowSize(size);
       
   186 				}
       
   187 			}
       
   188 		// <channel> <item> <description>
       
   189 		} else if (str.CompareF(KTagDescription) == 0) {
       
   190 			iFeedState=EStateItemDescription;
       
   191 		// <channel> <item> <pubdate>
       
   192 		} else if (str.CompareF(KTagPubDate) == 0) {
       
   193 			//DP("LastBuildDate BEGIN");
       
   194 			iFeedState = EStateItemPubDate;
       
   195 		}
       
   196 		break;
       
   197 	default:
       
   198 		//DP2("Ignoring tag %S when in state %d", &str, iFeedState);
       
   199 		break;
       
   200 	}
       
   201 //	DP1("OnStartElementL END state=%d", iFeedState);
       
   202 	}
       
   203 
       
   204 void CFeedParser::OnEndElementL(const RTagInfo& aElement, TInt /*aErrorCode*/)
       
   205 	{
       
   206 	
       
   207 	if (iStoppedParsing) {
       
   208 		return;
       
   209 	}
       
   210 	
       
   211 	iBuffer.Trim();
       
   212 	
       
   213 	TDesC8 lName = aElement.LocalName().DesC();
       
   214 	TBuf<KMaxStringBuffer> str;
       
   215 	str.Copy(aElement.LocalName().DesC());
       
   216 
       
   217 	//DP2("OnEndElementL START state=%d, element=%S", iFeedState, &str);
       
   218 
       
   219 	switch (iFeedState) {
       
   220 		case EStateChannelTitle:
       
   221 			if(str.CompareF(KTagTitle) == 0) {
       
   222 				if (iActiveFeed->CustomTitle() == EFalse) {
       
   223 					iActiveFeed->SetTitleL(iBuffer);
       
   224 				}
       
   225 				iFeedState = EStateChannel;
       
   226 			}
       
   227 			break;
       
   228 		case EStateChannelLink:
       
   229 			iActiveFeed->SetLinkL(iBuffer);
       
   230 			iFeedState = EStateChannel;
       
   231 			break;
       
   232 		case EStateChannelDescription:
       
   233 			iActiveFeed->SetDescriptionL(iBuffer);
       
   234 			iFeedState = EStateChannel;
       
   235 			break;
       
   236 		case EStateChannelLastBuildDate:
       
   237 			{
       
   238 			//DP("LastBuildDate END");
       
   239 			TInternetDate internetDate;
       
   240 			TBuf8<128> temp;
       
   241 			temp.Copy(iBuffer);
       
   242 					
       
   243 			TRAPD(parseError, internetDate.SetDateL(temp));
       
   244 			if(parseError == KErrNone) {				
       
   245 				if (TTime(internetDate.DateTime()) > iActiveFeed->BuildDate()) {
       
   246 					DP("Successfully parsed build date");
       
   247 					iActiveFeed->SetBuildDate(TTime(internetDate.DateTime()));
       
   248 				} else {
       
   249 					DP("*** Nothing new, aborting parsing");
       
   250 					iStoppedParsing = ETrue;
       
   251 				}
       
   252 			} else {
       
   253 				DP("Failed to parse last build date");
       
   254 			}
       
   255 			iFeedState = EStateChannel;
       
   256 			}
       
   257 			break;
       
   258 		case EStateChannelImageUrl:
       
   259 			//DP1("Image url: %S", &iBuffer);
       
   260 			iActiveFeed->SetImageUrlL(iBuffer);
       
   261 			iFeedState = EStateChannelImage;
       
   262 			break;
       
   263 		case EStateChannelImage:
       
   264 			if(str.CompareF(KTagImage) == 0) {
       
   265 				iFeedState = EStateChannel;
       
   266 			}
       
   267 			break;
       
   268 		case EStateItem:
       
   269 			if (str.CompareF(KTagItem) == 0) 
       
   270 				{				
       
   271 				iCallbacks.NewShowL(*iActiveShow);
       
   272 				
       
   273 				delete iActiveShow;				
       
   274 				
       
   275 				// We should now be finished with the show.
       
   276 				iActiveShow = NULL;
       
   277 				
       
   278 				iItemsParsed++;
       
   279 				//DP2("iItemsParsed: %d, iMaxItems: %d", iItemsParsed, iMaxItems);
       
   280 				if (iItemsParsed > iMaxItems) 
       
   281 					{
       
   282 					iStoppedParsing = ETrue;
       
   283 					DP("*** Too many items, aborting parsing");
       
   284 					}
       
   285 				
       
   286 				iFeedState=EStateChannel;
       
   287 				}
       
   288 			break;
       
   289 		case EStateItemPubDate:
       
   290 			DP1("PubDate END: iBuffer='%S'", &iBuffer);
       
   291 			if (str.CompareF(KTagPubDate) == 0) {
       
   292 				// hack for feeds that don't always write day as two digits
       
   293 				TChar five(iBuffer[5]);
       
   294 				TChar six(iBuffer[6]);
       
   295 				
       
   296 				if (five.IsDigit() && !six.IsDigit()) {
       
   297 					TBuf<KMaxStringBuffer> fix;
       
   298 					fix.Copy(iBuffer.Left(4));
       
   299 					fix.Append(_L(" 0"));
       
   300 					fix.Append(iBuffer.Mid(5));
       
   301 					iBuffer.Copy(fix);
       
   302 				}
       
   303 				// end hack
       
   304 				
       
   305 				// hack for feeds that write out months in full
       
   306 				
       
   307 				if (iBuffer[11] != ' ') {
       
   308 					TPtrC midPtr = iBuffer.Mid(8);
       
   309 					
       
   310 					int spacePos = midPtr.Find(_L(" "));
       
   311 					
       
   312 					if (spacePos != KErrNotFound) {
       
   313 						//DP1("Month: %S", &midPtr.Left(spacePos));
       
   314 						
       
   315 						TBuf16<KBufferLength> newBuffer;
       
   316 						newBuffer.Copy(iBuffer.Left(11));
       
   317 						newBuffer.Append(_L(" "));
       
   318 						newBuffer.Append(iBuffer.Mid(11+spacePos));
       
   319 						//DP1("newBuffer: %S", &newBuffer);
       
   320 						iBuffer.Copy(newBuffer);
       
   321 					}
       
   322 				}
       
   323 				
       
   324 				// hack for feeds that write days and months as UPPERCASE
       
   325 				TChar one(iBuffer[1]);
       
   326 				TChar two(iBuffer[2]);
       
   327 				TChar nine(iBuffer[9]);
       
   328 				TChar ten(iBuffer[10]);
       
   329 
       
   330 				one.LowerCase();
       
   331 				two.LowerCase();
       
   332 				nine.LowerCase();
       
   333 				ten.LowerCase();
       
   334 				
       
   335 				iBuffer[1] = one;
       
   336 				iBuffer[2] = two;
       
   337 				iBuffer[9] = nine;
       
   338 				iBuffer[10] = ten;
       
   339 				
       
   340 				TBuf8<128> temp;
       
   341 				temp.Copy(iBuffer);
       
   342 
       
   343 				TInternetDate internetDate;
       
   344 				TRAPD(parseError, internetDate.SetDateL(temp));
       
   345 				if(parseError == KErrNone) {				
       
   346 					//DP1("PubDate parse success: '%S'", &iBuffer);
       
   347 					iActiveShow->SetPubDate(TTime(internetDate.DateTime()));
       
   348 			
       
   349 					
       
   350 					DP6("Successfully parsed pubdate %d/%d/%d %d:%d:%d",
       
   351 							iActiveShow->PubDate().DateTime().Year(),
       
   352 							iActiveShow->PubDate().DateTime().Month(),
       
   353 							iActiveShow->PubDate().DateTime().Day(),
       
   354 							iActiveShow->PubDate().DateTime().Hour(),
       
   355 							iActiveShow->PubDate().DateTime().Minute(),
       
   356 							iActiveShow->PubDate().DateTime().Second());
       
   357 							
       
   358 				} else {
       
   359 					DP2("Pubdate parse error: '%S', error=%d", &iBuffer, parseError);
       
   360 				}
       
   361 			}
       
   362 			iFeedState=EStateItem;
       
   363 			break;
       
   364 		case EStateItemTitle:
       
   365 			//DP1("title: %S", &iBuffer);
       
   366 			iActiveShow->SetTitleL(iBuffer);
       
   367 			iFeedState = EStateItem;
       
   368 			break;
       
   369 		case EStateItemLink:
       
   370 			if (iActiveShow->Url().Length() == 0) {
       
   371 				iActiveShow->SetUrlL(iBuffer);
       
   372 			}
       
   373 			iFeedState = EStateItem;
       
   374 			break;
       
   375 		case EStateItemDescription:
       
   376 			iActiveShow->SetDescriptionL(iBuffer);
       
   377 			iFeedState = EStateItem;
       
   378 			break;
       
   379 		default:
       
   380 			// fall back to channel level when in doubt
       
   381 			iFeedState = EStateChannel;
       
   382 			//DP2("Don't know how to handle end tag %S when in state %d", &str, iFeedState);
       
   383 			break;
       
   384 	}
       
   385 
       
   386 	//DP1("OnEndElementL END state=%d", iFeedState);	
       
   387 	}
       
   388 
       
   389 void CFeedParser::OnContentL(const TDesC8& aBytes, TInt /*aErrorCode*/)
       
   390 	{
       
   391 	TBuf<KBufferLength> temp;
       
   392 	if (iEncoding == EUtf8) {
       
   393 		CnvUtfConverter::ConvertToUnicodeFromUtf8(temp, aBytes);
       
   394 	} else {
       
   395 		temp.Copy(aBytes);
       
   396 	}
       
   397 	
       
   398 	if(temp.Length() + iBuffer.Length() < KBufferLength) {
       
   399 		iBuffer.Append(temp);
       
   400 	}
       
   401 	}
       
   402 
       
   403 void CFeedParser::OnStartPrefixMappingL(const RString& /*aPrefix*/, const RString& /*aUri*/, TInt /*aErrorCode*/)
       
   404 	{
       
   405 	DP("OnStartPrefixMappingL()");
       
   406 	}
       
   407 
       
   408 void CFeedParser::OnEndPrefixMappingL(const RString& /*aPrefix*/, TInt /*aErrorCode*/)
       
   409 	{
       
   410 	DP("OnEndPrefixMappingL()");
       
   411 	}
       
   412 
       
   413 void CFeedParser::OnIgnorableWhiteSpaceL(const TDesC8& /*aBytes*/, TInt /*aErrorCode*/)
       
   414 	{
       
   415 	DP("OnIgnorableWhiteSpaceL()");
       
   416 	}
       
   417 
       
   418 void CFeedParser::OnSkippedEntityL(const RString& /*aName*/, TInt /*aErrorCode*/)
       
   419 	{
       
   420 	DP("OnSkippedEntityL()");
       
   421 	}
       
   422 
       
   423 void CFeedParser::OnProcessingInstructionL(const TDesC8& /*aTarget*/, const TDesC8& /*aData*/, TInt /*aErrorCode*/)
       
   424 	{
       
   425 	DP("OnProcessingInstructionL()");
       
   426 	}
       
   427 
       
   428 void CFeedParser::OnError(TInt aErrorCode)
       
   429 	{
       
   430 	DP1("CFeedParser::OnError %d", aErrorCode);
       
   431 	}
       
   432 
       
   433 TAny* CFeedParser::GetExtendedInterface(const TInt32 /*aUid*/)
       
   434 	{
       
   435 	DP("GetExtendedInterface()");
       
   436 	return NULL;
       
   437 	}
       
   438 
       
   439 CFeedInfo& CFeedParser::ActiveFeed()
       
   440 	{
       
   441 		return *iActiveFeed;
       
   442 	}