diff -r 000000000000 -r dd21522fd290 browserutilities/feedsengine/FeedsServer/FeedHandler/src/FeedParser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/browserutilities/feedsengine/FeedsServer/FeedHandler/src/FeedParser.cpp Mon Mar 30 12:54:55 2009 +0300 @@ -0,0 +1,711 @@ +/* +* Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of the License "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: Parser "base" class. +* +*/ + + +#include + +#include "FeedParser.h" +#include "FeedParserObserver.h" +#include "LeakTracker.h" +#include "XmlUtils.h" + + +_LIT8(KUrlStr, "url"); + +// ----------------------------------------------------------------------------- +// CFeedParser::CFeedParser +// C++ default constructor can NOT contain any code, that +// might leave. +// ----------------------------------------------------------------------------- +// +CFeedParser::CFeedParser(CXmlUtils& aXmlUtils): + iFeedMappings(3), iItemMappings(3), iXmlUtils(aXmlUtils) + { + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::~CFeedParser +// Deconstructor. +// ----------------------------------------------------------------------------- +// +CFeedParser::~CFeedParser() + { + iFeedMappings.Close(); + iItemMappings.Close(); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::AddFeedMappingL +// +// Add a feed ElementHandler mapping. See AddMappingL. +// ----------------------------------------------------------------------------- +// +void CFeedParser::AddFeedMappingL(const TDesC8& aNamespace, const TDesC8& aElementName, + TInt aValueId, ElementHandlerFunctionL aHandler) + { + AddMappingL(iFeedMappings, aNamespace, aElementName, aValueId, aHandler); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::HandleFeedChildL +// +// Process a child of a feed element. See HandleChildL +// ----------------------------------------------------------------------------- +// +void CFeedParser::HandleFeedChildL(TXmlEngElement aNode, + MFeedParserObserver& aObserver) const + { + HandleChildL(iFeedMappings, aNode, aObserver); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::AddItemMappingL +// +// Add an item ElementHandler mapping. See AddMappingL. +// ----------------------------------------------------------------------------- +// +void CFeedParser::AddItemMappingL(const TDesC8& aNamespace, const TDesC8& aElementName, + TInt aValueId, ElementHandlerFunctionL aHandler) + { + AddMappingL(iItemMappings, aNamespace, aElementName, aValueId, aHandler); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::HandleItemChildL +// +// Process a child of a item element. See HandleChildL +// ----------------------------------------------------------------------------- +// +void CFeedParser::HandleItemChildL(TXmlEngElement aNode, + MFeedParserObserver& aObserver) const + { + HandleChildL(iItemMappings, aNode, aObserver); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::ElementHandlerCDataL +// +// An ElementHandler function that extracts the value from the +// child text nodes. +// ----------------------------------------------------------------------------- +// +void CFeedParser::ElementHandlerCDataL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& aObserver) + { + TDesC* ucs2Des = NULL; + + // Get the text. + ucs2Des = aXmlUtils.ExtractTextL(aNode); + if ((ucs2Des == NULL) || (ucs2Des->Length() == 0)) + { + delete ucs2Des; + return; + } + + // Set the value + CleanupStack::PushL(ucs2Des); + aObserver.AddAttributeL(aValueId, *ucs2Des); + CleanupStack::PopAndDestroy(ucs2Des); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::ElementHandlerTextL +// +// An ElementHandler function that extracts the value from the +// child text nodes then resolves any html entities and removes any markup. +// ----------------------------------------------------------------------------- +// +void CFeedParser::ElementHandlerTextL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& aObserver) + { + HBufC* ucs2Des = NULL; + + // Get the text. + ucs2Des = aXmlUtils.ExtractTextL(aNode); + if ((ucs2Des == NULL) || (ucs2Des->Length() == 0)) + { + delete ucs2Des; + return; + } + + // Clean it up. + TPtr ptr(ucs2Des->Des()); + + CleanupStack::PushL(ucs2Des); + (void) aXmlUtils.CleanupMarkupL(ptr, 0); + + // Set the value + if (ucs2Des->Length() > 0) + { + aObserver.AddAttributeL(aValueId, *ucs2Des); + } + + CleanupStack::PopAndDestroy(ucs2Des); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::ElementHandlerUrlChildL +// +// An ElementHandler function that extracts the value from a child +// url element. If the element doesn't contain any elements and it contains +// text it is extracted instead. +// ----------------------------------------------------------------------------- +// +void CFeedParser::ElementHandlerUrlChildL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& aObserver) + { + TXmlEngElement urlNode = NULL; + + // Get the url element. + urlNode = aXmlUtils.GetFirstNamedChild(aNode, KUrlStr()); + + // If it doesn't have a url child and it has no children at all just + // extract the url from the node's text. + if ((urlNode.IsNull()) && (!urlNode.HasChildNodes())) + { + urlNode = aNode; + } + + // Extracts the value url. + ElementHandlerUrlL(aParser, aXmlUtils, urlNode, aValueId, aObserver); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::ElementHandlerUrlL +// +// An ElementHandler function that extracts the value from the +// child text nodes. It further performs url related clean up. +// ----------------------------------------------------------------------------- +// +void CFeedParser::ElementHandlerUrlL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, + TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& aObserver) + { + TDesC* ucs2Des = NULL; + + // Get the text. + ucs2Des = aXmlUtils.ExtractTextL(aNode); + if ((ucs2Des == NULL) || (ucs2Des->Length() == 0)) + { + delete ucs2Des; + return; + } + + CleanupStack::PushL(ucs2Des); + + // Clean up the url. + TPtr ptr(const_cast(ucs2Des->Ptr()), ucs2Des->Length()); + aXmlUtils.CleanupUrlL(ptr); + + // TODO: Resolve the href using feed url (the feed's url can be stored in CFeedParser). + + // Set the value + aObserver.AddAttributeL(aValueId, *ucs2Des); + CleanupStack::PopAndDestroy(ucs2Des); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::ElementHandlerDateL +// +// An ElementHandler function that extracts the date from the +// child text nodes. It can handle date formats defined in RFC 3339, RFC 822, +// RFC 1123, RFC 850, and RFC 1036 +// ----------------------------------------------------------------------------- +// +void CFeedParser::ElementHandlerDateL(const CFeedParser& /*aParser*/, + CXmlUtils& /*aXmlUtils*/, TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& aObserver) + { + TTime date; + TBool dateSet = EFalse; + RBuf8 rbuf; + + // Get the text. + aNode.WholeTextContentsCopyL( rbuf ); + if ( rbuf.Length() == 0 ) + { + rbuf.Close(); + return; + } + rbuf.CleanupClosePushL(); + + // Try to handle the date with TInternetDate first. + TInternetDate internetData; + + TRAPD(err, internetData.SetDateL( rbuf )); + if (err == KErrNone) + { + date = internetData.DateTime(); + dateSet = ETrue; + } + + // Otherwise if that didn't work then try to interpret it as a RFC 3339 date. + else if (err == KErrCorrupt) + { + TRAP(err, ParseRFC3339L( rbuf, date )); + if (err == KErrNone) + { + dateSet = ETrue; + } + } + + // Set the value + if (dateSet) + { + TBuf16<25> str; + + str.Format(_L("%Ld"), date.Int64()); + aObserver.AddAttributeL(aValueId, str); + } + + CleanupStack::PopAndDestroy( /*rbuf*/ ); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::AddMappingL +// +// Makes a mapping between an element's namespance and name with the normalized +// value-id (i.e. title or description) and a function which extracts out +// relevant data and applies it to a provided ValueHolder using the value-id +// (see HandleChildL). +// ----------------------------------------------------------------------------- +// +void CFeedParser::AddMappingL(RArray& aMappings, + const TDesC8& aNamespace, const TDesC8& aElementName, TInt aValueId, + ElementHandlerFunctionL aHandler) + { + ElementHandlerMapEntry entry; + + // Init the entry. + entry.iElementNamespace.Set(aNamespace); + entry.iElementName.Set(aElementName); + entry.iValueId = aValueId; + entry.iElementHandlerL = aHandler; + + // Append the entry. + User::LeaveIfError(aMappings.Append(entry)); + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::HandleChildL +// +// Using the mapping defined by calls to AddMappingL it passes aNode, +// aValueHolder and associated value-id to the associated function. The method +// does what is needed to extract the relevant values from the node and applies +// them on aValueHolder (using the normalized value-id). +// ----------------------------------------------------------------------------- +// +void CFeedParser::HandleChildL(const RArray& aMappings, + TXmlEngElement aNode, MFeedParserObserver& aObserver) const + { + // Find the corresponding entry in the map. + for (TInt i = 0; i < aMappings.Count(); i++) + { + // Call the ElementHandler. + if (iXmlUtils.IsNamed(aNode, aMappings[i].iElementNamespace, + aMappings[i].iElementName)) + { + (aMappings[i].iElementHandlerL)(*this, iXmlUtils, aNode, + aMappings[i].iValueId, aObserver); + break; + } + } + } + + +// ----------------------------------------------------------------------------- +// CFeedParser::RFC3339DateL +// +// Converts the given RFC 3339 date into a TTime. +// RFC 3339 format (examples): YYYY-MM-DDTHH:MM:SS.FFZ, +// YYYY-MM-DDTHH:MM:SS.FF+HH:MM or YYYY-MM-DDTHH:MM:SS.FF-HH:MM +// ----------------------------------------------------------------------------- +// +void CFeedParser::ParseRFC3339L(const TDesC8& aDateStr, TTime& aDate) + { + enum TPart {EYear, EMonth, EDay, EHour, EMin, ESec, EMicro, EZoneHour, EZoneMin, ENone}; + + TDateTime dateTime; + TInt num; + TBuf<30> timeStr; + TBuf<30> zoneHour; + TBuf<30> zoneMin; + TPart part = EYear; + TInt zone = 0; + + timeStr.Zero(); + + if (aDateStr.Length() > 30) + { + User::Leave(KErrCorrupt); + } + + for (TInt i = 0; i < aDateStr.Length(); i++) + { + TChar c(aDateStr[i]); + + // Extract the year. + if (part == EYear) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if (c == '-') + { + // If timeStr.Length() equals 2 then get the current + // year and use the first two digits to set the four digit year. + if (timeStr.Length() == 2) + { + TTime now; + TInt year; + TBuf<4> yearStr; + + now.UniversalTime(); + year = now.DateTime().Year(); + yearStr.AppendNum(year); + + timeStr.Insert(0, yearStr.Left(1)); + timeStr.Insert(1, yearStr.Mid(1, 1)); + } + + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetYear(num)); + timeStr.Zero(); + + part = EMonth; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the month. + else if (part == EMonth) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if (c == '-') + { + TLex16 lex(timeStr); + TMonth month = EJanuary; + + lex.Val(num); + + // Convert num to the month enum. + switch (num) + { + case 1: + month = EJanuary; + break; + case 2: + month = EFebruary; + break; + case 3: + month = EMarch; + break; + case 4: + month = EApril; + break; + case 5: + month = EMay; + break; + case 6: + month = EJune; + break; + case 7: + month = EJuly; + break; + case 8: + month = EAugust; + break; + case 9: + month = ESeptember; + break; + case 10: + month = EOctober; + break; + case 11: + month = ENovember; + break; + case 12: + month = EDecember; + break; + default: + User::Leave(KErrCorrupt); + break; + } + + User::LeaveIfError(dateTime.SetMonth(month)); + timeStr.Zero(); + + part = EDay; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the day. + else if (part == EDay) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if ((c == 'T') || (c == 't')) + { + TLex16 lex(timeStr); + + lex.Val(num); + // Day is zero based for some odd reason... + User::LeaveIfError(dateTime.SetDay(num - 1)); + timeStr.Zero(); + + part = EHour; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the hour. + else if (part == EHour) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if (c == ':') + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetHour(num)); + timeStr.Zero(); + + part = EMin; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the min. + else if (part == EMin) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if (c == ':') + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetMinute(num)); + timeStr.Zero(); + + part = ESec; + } + else + { + User::Leave(KErrCorrupt); + } + } + + + // Extract the sec. + else if (part == ESec) + { + if (c.IsDigit()) + { + timeStr.Append(c); + } + else if (c == '.') + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetSecond(num)); + timeStr.Zero(); + + part = EMicro; + } + else if ((c == 'Z') || (c == 'z')) + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetSecond(num)); + timeStr.Zero(); + + zone = 0; + part = ENone; + } + else if (c == '+') + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetSecond(num)); + timeStr.Zero(); + + zone = 1; + part = EZoneHour; + } + else if (c == '-') + { + TLex16 lex(timeStr); + + lex.Val(num); + User::LeaveIfError(dateTime.SetSecond(num)); + timeStr.Zero(); + + zone = -1; + part = EZoneHour; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Ignore the micro-sec. + else if (part == EMicro) + { + if (c.IsDigit()) + { + // Ignore microseconds. + } + else if ((c == 'Z') || (c == 'z')) + { + zone = 0; + part = ENone; + } + else if (c == '+') + { + zone = 1; + part = EZoneHour; + } + else if (c == '-') + { + zone = -1; + part = EZoneHour; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the hour. + else if (part == EZoneHour) + { + if (c.IsDigit()) + { + zoneHour.Append(c); + } + else if (c == ':') + { + part = EZoneMin; + } + else + { + User::Leave(KErrCorrupt); + } + } + + // Extract the min. + else if (part == EZoneMin) + { + if (c.IsDigit()) + { + zoneMin.Append(c); + } + else + { + User::Leave(KErrCorrupt); + } + } + + else + { + User::Leave(KErrCorrupt); + } + } + + // Handle the common date only format (i.e. YYYY-MM-DD). + if ((part == EDay) && ((aDateStr.Length() == 10) || (aDateStr.Length() == 8))) + { + TLex16 lex(timeStr); + + lex.Val(num); + + // Day is zero based for some odd reason... + User::LeaveIfError(dateTime.SetDay(num - 1)); + } + + // Convert timeStr into a TTime. + aDate = dateTime; + + // Adjust the aDate to UTC. + if (zone != 0) + { + TLex16 zHoursLex(zoneHour); + TLex16 zMinLex(zoneMin); + TInt zHours; + TInt zMins; + + // Get the offset. + zHoursLex.Val(zHours); + zMinLex.Val(zMins); + + // Adjust it back to UTC. + if (zone > 0) + { + aDate -= TTimeIntervalHours(zHours); + aDate -= TTimeIntervalMinutes(zMins); + } + + // Otherwise adjust it forward to UTC. + else + { + aDate += TTimeIntervalHours(zHours); + aDate += TTimeIntervalMinutes(zMins); + } + } + } +