Re-enabled max items parsed, because disabling this causes shows to turn up as new multiple times. This again breaks feeds that add new shows at the bottom, so we need to solve this properly.
/*
* Copyright (c) 2007-2010 Sebastian Brannstrom, Lars Persson, EmbedDev AB
*
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of the License "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* EmbedDev AB - initial contribution.
*
* Contributors:
*
* Description:
*
*/
#include "FeedParser.h"
#include <f32file.h>
#include <bautils.h>
#include <s32file.h>
#include <charconv.h>
#include <xml/stringdictionarycollection.h>
#include <utf.h>
#include <tinternetdate.h>
#include <e32hashtab.h>
#include "debug.h"
#include "podcastutils.h"
using namespace Xml;
const TInt KMaxParseBuffer = 1024;
const TInt KMaxStringBuffer = 100;
CFeedParser::CFeedParser(MFeedParserObserver& aCallbacks, RFs& aFs) : iCallbacks(aCallbacks), iRfs(aFs)
{
}
CFeedParser::~CFeedParser()
{
}
void CFeedParser::ParseFeedL(const TFileName &feedFileName, CFeedInfo *info, TUint aMaxItems)
{
//DP1("ParseFeedL BEGIN: %S", &feedFileName);
_LIT8(KXmlMimeType, "text/xml");
// Contruct the parser object
CParser* parser = CParser::NewLC(KXmlMimeType, *this);
iActiveFeed = info;
iFeedState = EStateRoot;
iActiveShow = NULL;
iItemsParsed = 0;
iMaxItems = aMaxItems;
iStoppedParsing = EFalse;
iEncoding = ELatin1;
ParseL(*parser, iRfs, feedFileName);
CleanupStack::PopAndDestroy(parser);
//DP("ParseFeedL END");
}
// from MContentHandler
void CFeedParser::OnStartDocumentL(const RDocumentParameters& aDocParam, TInt /*aErrorCode*/)
{
DP("OnStartDocumentL()");
HBufC* charset = HBufC::NewLC(KMaxParseBuffer);
charset->Des().Copy(aDocParam.CharacterSetName().DesC());
iEncoding = EUtf8;
if (charset->CompareF(_L("utf-8")) == 0) {
DP("setting UTF8");
iEncoding = EUtf8;
} else if (charset->CompareF(_L("ISO-8859-1")) == 0) {
iEncoding = EUtf8; //Latin1;
} else {
DP1("unknown charset: %S", &charset);
}
CleanupStack::PopAndDestroy(charset);//buffer
}
void CFeedParser::OnEndDocumentL(TInt /*aErrorCode*/)
{
//DP("OnEndDocumentL()");
iCallbacks.ParsingCompleteL(iActiveFeed);
}
void CFeedParser::OnStartElementL(const RTagInfo& aElement, const RAttributeArray& aAttributes, TInt /*aErrorCode*/)
{
if (iStoppedParsing) {
iActiveShow = NULL;
return;
}
TBuf<KMaxStringBuffer> str;
str.Copy(aElement.LocalName().DesC());
//DP2("OnStartElementL START state=%d, element=%S", iFeedState, &str);
iBuffer.Zero();
switch (iFeedState) {
case EStateRoot:
// <channel>
if (str.CompareF(KTagChannel) == 0) {
iFeedState = EStateChannel;
}
break;
case EStateChannel:
// <channel> <item>
if(str.CompareF(KTagItem) == 0) {
//DP("New item");
iFeedState=EStateItem;
iActiveShow = NULL;
iActiveShow = CShowInfo::NewL();
if (iActiveShow == NULL) {
DP("Out of memory!");
iStoppedParsing = ETrue;
return;
}
iActiveShow->SetFeedUid(iActiveFeed->Uid());
// <channel> <lastBuildDate>
} else if (str.CompareF(KTagLastBuildDate) == 0) {
DP("LastBuildDate BEGIN");
iFeedState=EStateChannelLastBuildDate;
// <channel> <link>
}else if (str.CompareF(KTagTitle) == 0) {
iFeedState=EStateChannelTitle;
// <channel> <link>
} else if (str.CompareF(KTagLink) == 0) {
iFeedState = EStateChannelLink;
// <channel> <description>
} else if (str.CompareF(KTagDescription) == 0) {
iFeedState=EStateChannelDescription;
// <channel> <image>
} else if (str.CompareF(KTagImage) == 0) {
for (int i=0;i<aAttributes.Count();i++) {
RAttribute attr = aAttributes[i];
TBuf<KMaxStringBuffer> attr16;
attr16.Copy(attr.Attribute().LocalName().DesC().Left(KMaxStringBuffer));
HBufC* val16 = CnvUtfConverter::ConvertToUnicodeFromUtf8L(attr.Value().DesC().Left(KMaxParseBuffer));
CleanupStack::PushL(val16);
// href=...
if (attr16.Compare(KTagHref) == 0) {
iActiveFeed->SetImageUrlL(*val16);
}
CleanupStack::PopAndDestroy(val16);
}
iFeedState=EStateChannelImage;
}
break;
case EStateChannelImage:
// <channel> <image> <url>
if (str.CompareF(KTagUrl) == 0) {
iFeedState=EStateChannelImageUrl;
} else {
iFeedState=EStateChannelImage;
}
break;
case EStateItem:
iUid = 0;
// <channel> <item> <title>
if (str.CompareF(KTagTitle) == 0) {
iFeedState=EStateItemTitle;
// <channel> <item> <link>
} else if (str.CompareF(KTagLink) == 0) {
iFeedState=EStateItemLink;
// <channel> <item> <enclosure ...>
} else if (str.CompareF(KTagEnclosure) == 0) {
//DP("Enclosure START");
for (int i=0;i<aAttributes.Count();i++) {
RAttribute attr = aAttributes[i];
TBuf<KMaxStringBuffer> attr16;
attr16.Copy(attr.Attribute().LocalName().DesC());
// url=...
if (attr16.Compare(KTagUrl) == 0) {
HBufC* val16 = HBufC::NewLC(KMaxParseBuffer);
val16->Des().Copy(attr.Value().DesC());
iActiveShow->SetUrlL(*val16);
if (PodcastUtils::IsVideoShow(*val16)) {
iActiveShow->SetShowType(EVideoPodcast);
}
CleanupStack::PopAndDestroy(val16);
// length=...
} else if (attr16.Compare(KTagLength) == 0) {
TLex8 lex(attr.Value().DesC());
TUint size = 0;
lex.Val(size, EDecimal);
iActiveShow->SetShowSize(size);
}
}
// <channel> <item> <description>
} else if (str.CompareF(KTagDescription) == 0) {
iFeedState=EStateItemDescription;
// <channel> <item> <pubdate>
} else if (str.CompareF(KTagPubDate) == 0) {
//DP("LastBuildDate BEGIN");
iFeedState = EStateItemPubDate;
// <channel> <item> <guid>
} else if (str.CompareF(KTagGuid) == 0) {
iFeedState = EStateItemGuid;
}
break;
default:
//DP2("Ignoring tag %S when in state %d", &str, iFeedState);
break;
}
// DP1("OnStartElementL END state=%d", iFeedState);
}
void CFeedParser::OnEndElementL(const RTagInfo& aElement, TInt /*aErrorCode*/)
{
if (iStoppedParsing) {
return;
}
iBuffer.Trim();
TDesC8 lName = aElement.LocalName().DesC();
TBuf<KMaxStringBuffer> str;
str.Copy(aElement.LocalName().DesC());
//DP2("OnEndElementL START state=%d, element=%S", iFeedState, &str);
switch (iFeedState) {
case EStateChannelTitle:
if(str.CompareF(KTagTitle) == 0) {
if (iActiveFeed->CustomTitle() == EFalse) {
iActiveFeed->SetTitleL(iBuffer);
}
iFeedState = EStateChannel;
}
break;
case EStateChannelLink:
iActiveFeed->SetLinkL(iBuffer);
iFeedState = EStateChannel;
break;
case EStateChannelDescription:
iActiveFeed->SetDescriptionL(iBuffer);
iFeedState = EStateChannel;
break;
case EStateChannelLastBuildDate:
{
//DP("LastBuildDate END");
TInternetDate internetDate;
TBuf8<128> temp;
temp.Copy(iBuffer);
TRAPD(parseError, internetDate.SetDateL(temp));
if(parseError == KErrNone) {
if (TTime(internetDate.DateTime()) > iActiveFeed->BuildDate()) {
DP("Successfully parsed build date");
iActiveFeed->SetBuildDate(TTime(internetDate.DateTime()));
} else {
DP("*** Nothing new, aborting parsing");
iStoppedParsing = ETrue;
}
} else {
DP("Failed to parse last build date");
}
iFeedState = EStateChannel;
}
break;
case EStateChannelImageUrl:
//DP1("Image url: %S", &iBuffer);
iActiveFeed->SetImageUrlL(iBuffer);
iFeedState = EStateChannelImage;
break;
case EStateChannelImage:
if(str.CompareF(KTagImage) == 0) {
iFeedState = EStateChannel;
}
break;
case EStateItem:
if (str.CompareF(KTagItem) == 0)
{
// check if we have a valid pubdate
if (iActiveShow->PubDate().Int64() == 0)
{
// set pubDate to present time
TTime now;
now.UniversalTime();
// but we want reverse sorting, so let's do a little trick...
TTimeIntervalHours delta;
delta = iItemsParsed;
// ... remove an hour per show we've parsed so far
now -= delta;
iActiveShow->SetPubDate(now);
}
if (iUid)
{
iActiveShow->SetUid(iUid);
}
iCallbacks.NewShowL(*iActiveShow);
delete iActiveShow;
// We should now be finished with the show.
iActiveShow = NULL;
iItemsParsed++;
DP2("iItemsParsed: %d, iMaxItems: %d", iItemsParsed, iMaxItems);
if (iItemsParsed >= iMaxItems)
{
iStoppedParsing = ETrue;
DP("*** Too many items, aborting parsing");
}
iFeedState=EStateChannel;
}
break;
case EStateItemPubDate:
DP1("PubDate END: iBuffer='%S'", &iBuffer);
if (str.CompareF(KTagPubDate) == 0) {
// hack for feeds that don't always write day as two digits
TChar five(iBuffer[5]);
TChar six(iBuffer[6]);
if (five.IsDigit() && !six.IsDigit()) {
TBuf<KMaxStringBuffer> fix;
fix.Copy(iBuffer.Left(4));
fix.Append(_L(" 0"));
fix.Append(iBuffer.Mid(5));
iBuffer.Copy(fix);
}
// end hack
// hack for feeds that write out months in full
if (iBuffer[11] != ' ') {
TPtrC midPtr = iBuffer.Mid(8);
int spacePos = midPtr.Find(_L(" "));
if (spacePos != KErrNotFound) {
//DP1("Month: %S", &midPtr.Left(spacePos));
TBuf16<KBufferLength> newBuffer;
newBuffer.Copy(iBuffer.Left(11));
newBuffer.Append(_L(" "));
newBuffer.Append(iBuffer.Mid(11+spacePos));
//DP1("newBuffer: %S", &newBuffer);
iBuffer.Copy(newBuffer);
}
}
// hack for feeds that write days and months as UPPERCASE
TChar one(iBuffer[1]);
TChar two(iBuffer[2]);
TChar nine(iBuffer[9]);
TChar ten(iBuffer[10]);
one.LowerCase();
two.LowerCase();
nine.LowerCase();
ten.LowerCase();
iBuffer[1] = one;
iBuffer[2] = two;
iBuffer[9] = nine;
iBuffer[10] = ten;
TBuf8<128> temp;
temp.Copy(iBuffer);
TInternetDate internetDate;
TRAPD(parseError, internetDate.SetDateL(temp));
if(parseError == KErrNone) {
//DP1("PubDate parse success: '%S'", &iBuffer);
iActiveShow->SetPubDate(TTime(internetDate.DateTime()));
DP6("Successfully parsed pubdate %d/%d/%d %d:%d:%d",
iActiveShow->PubDate().DateTime().Year(),
iActiveShow->PubDate().DateTime().Month(),
iActiveShow->PubDate().DateTime().Day(),
iActiveShow->PubDate().DateTime().Hour(),
iActiveShow->PubDate().DateTime().Minute(),
iActiveShow->PubDate().DateTime().Second());
} else {
DP2("Pubdate parse error: '%S', error=%d", &iBuffer, parseError);
}
}
iFeedState=EStateItem;
break;
case EStateItemGuid:
iUid = DefaultHash::Des16(iBuffer);
iFeedState=EStateItem;
break;
case EStateItemTitle:
//DP1("title: %S", &iBuffer);
iActiveShow->SetTitleL(iBuffer);
iFeedState = EStateItem;
break;
case EStateItemLink:
if (iActiveShow->Url().Length() == 0) {
iActiveShow->SetUrlL(iBuffer);
if (PodcastUtils::IsVideoShow(iBuffer)) {
iActiveShow->SetShowType(EVideoPodcast);
}
}
iFeedState = EStateItem;
break;
case EStateItemDescription:
iActiveShow->SetDescriptionL(iBuffer);
iFeedState = EStateItem;
break;
default:
// fall back to channel level when in doubt
iFeedState = EStateChannel;
//DP2("Don't know how to handle end tag %S when in state %d", &str, iFeedState);
break;
}
//DP1("OnEndElementL END state=%d", iFeedState);
}
void CFeedParser::OnContentL(const TDesC8& aBytes, TInt /*aErrorCode*/)
{
TBuf<KBufferLength> temp;
if (iEncoding == EUtf8) {
CnvUtfConverter::ConvertToUnicodeFromUtf8(temp, aBytes);
} else {
temp.Copy(aBytes);
}
if(temp.Length() + iBuffer.Length() < KBufferLength) {
iBuffer.Append(temp);
}
}
void CFeedParser::OnStartPrefixMappingL(const RString& /*aPrefix*/, const RString& /*aUri*/, TInt /*aErrorCode*/)
{
DP("OnStartPrefixMappingL()");
}
void CFeedParser::OnEndPrefixMappingL(const RString& /*aPrefix*/, TInt /*aErrorCode*/)
{
DP("OnEndPrefixMappingL()");
}
void CFeedParser::OnIgnorableWhiteSpaceL(const TDesC8& /*aBytes*/, TInt /*aErrorCode*/)
{
DP("OnIgnorableWhiteSpaceL()");
}
void CFeedParser::OnSkippedEntityL(const RString& /*aName*/, TInt /*aErrorCode*/)
{
DP("OnSkippedEntityL()");
}
void CFeedParser::OnProcessingInstructionL(const TDesC8& /*aTarget*/, const TDesC8& /*aData*/, TInt /*aErrorCode*/)
{
DP("OnProcessingInstructionL()");
}
void CFeedParser::OnError(TInt aErrorCode)
{
DP1("CFeedParser::OnError %d", aErrorCode);
}
TAny* CFeedParser::GetExtendedInterface(const TInt32 /*aUid*/)
{
DP("GetExtendedInterface()");
return NULL;
}
CFeedInfo& CFeedParser::ActiveFeed()
{
return *iActiveFeed;
}