browserutilities/feedsengine/FeedsServer/FeedHandler/src/RssFeedParser.cpp
changeset 0 dd21522fd290
child 36 0ed94ceaa377
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 * Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:  RSS parser
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include "FeedAttributes.h"
       
    20 #include "FeedParserObserver.h"
       
    21 #include "LeakTracker.h"
       
    22 #include "RssFeedParser.h"
       
    23 #include "XmlUtils.h"
       
    24 
       
    25 
       
    26 // Element and attribute names used by this parser.
       
    27 _LIT8(KRssHead, "rss");
       
    28 _LIT8(KRdfHead, "RDF");
       
    29 
       
    30 _LIT8(KDc, "dc");
       
    31 _LIT8(KEnc, "enc");
       
    32 
       
    33 _LIT8(KChannel, "channel");
       
    34 _LIT8(KTitle, "title");
       
    35 _LIT8(KLink, "link");
       
    36 _LIT8(KDescription, "description");
       
    37 _LIT8(KItem, "item");
       
    38 _LIT8(KAbout, "about");
       
    39 _LIT8(KGuid, "guid");
       
    40 _LIT8(KEnclosure, "enclosure");
       
    41 _LIT8(KUrl, "url");
       
    42 _LIT8(KResource, "resource");
       
    43 _LIT8(KType, "type");
       
    44 _LIT8(KLength, "length");
       
    45 _LIT8(KDate, "date");
       
    46 _LIT8(KPubDate, "pubdate");
       
    47 _LIT8(KLastBuildDate, "lastbuilddate");
       
    48 
       
    49 //#ifdef _DEBUG                    
       
    50 _LIT8(KImage, "image");
       
    51 _LIT8(KTextInput, "textinput");
       
    52 //#endif
       
    53 
       
    54 // -----------------------------------------------------------------------------
       
    55 // RssFeedParser::NewL
       
    56 //
       
    57 // Two-phased constructor.
       
    58 // -----------------------------------------------------------------------------
       
    59 //
       
    60 CRssFeedParser* CRssFeedParser::NewL(CXmlUtils& aXmlUtils)
       
    61     {
       
    62     CRssFeedParser* self = new (ELeave) CRssFeedParser(aXmlUtils);
       
    63     
       
    64     CleanupStack::PushL(self);
       
    65     self->ConstructL();
       
    66     CleanupStack::Pop();
       
    67 
       
    68     return self;
       
    69     }
       
    70 
       
    71         
       
    72 // -----------------------------------------------------------------------------
       
    73 // CRssFeedParser::CRssFeedParser
       
    74 // C++ default constructor can NOT contain any code, that
       
    75 // might leave.
       
    76 // -----------------------------------------------------------------------------
       
    77 //
       
    78 CRssFeedParser::CRssFeedParser(CXmlUtils& aXmlUtils):
       
    79         CFeedParser(aXmlUtils), iLeakTracker(CLeakTracker::ERssFeedParser)
       
    80     {
       
    81     }
       
    82         
       
    83 
       
    84 // -----------------------------------------------------------------------------
       
    85 // CRssFeedParser::ConstructL
       
    86 // Symbian 2nd phase constructor can leave.
       
    87 // -----------------------------------------------------------------------------
       
    88 //
       
    89 void CRssFeedParser::ConstructL()
       
    90     {
       
    91     // The mappings are used to map elements to handler methods.  For example when
       
    92     // a <title> element is found its contents are extracted as CDATA and passed
       
    93     // on to the observer as a EFeedAttributeTitle.
       
    94     
       
    95     // Add mappings to process the children of a channel.
       
    96     AddFeedMappingL(KNullDesC8(), KItem(), EFeedAttributeUnused, ElementHandlerItemL);
       
    97 
       
    98     AddFeedMappingL(KNullDesC8(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL);
       
    99     AddFeedMappingL(KNullDesC8(), KLink(), EFeedAttributeLink, ElementHandlerUrlL);
       
   100     AddFeedMappingL(KNullDesC8(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL);
       
   101     AddFeedMappingL(KNullDesC8(), KPubDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
       
   102     AddFeedMappingL(KNullDesC8(), KLastBuildDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
       
   103 
       
   104     AddFeedMappingL(KDc(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL);
       
   105     AddFeedMappingL(KDc(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL);    
       
   106     AddFeedMappingL(KDc(), KDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
       
   107 
       
   108 //#ifdef _DEBUG
       
   109     AddFeedMappingL(KNullDesC8(), KImage(), EFeedAttributeUnused, ElementHandlerOtherL);
       
   110     AddFeedMappingL(KNullDesC8(), KTextInput(), EFeedAttributeUnused, ElementHandlerOtherL);
       
   111 //#endif
       
   112 
       
   113     // Add mappings to process the children of an item.
       
   114     AddItemMappingL(KNullDesC8(), KTitle(), EItemAttributeTitle, ElementHandlerTextL);
       
   115     AddItemMappingL(KNullDesC8(), KLink(), EItemAttributeLink, ElementHandlerUrlL);
       
   116     AddItemMappingL(KNullDesC8(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL);    
       
   117     AddItemMappingL(KNullDesC8(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL);    
       
   118     AddItemMappingL(KNullDesC8(), KPubDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); 
       
   119 
       
   120     AddItemMappingL(KDc(), KTitle(), EItemAttributeTitle, ElementHandlerTextL);
       
   121     AddItemMappingL(KDc(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL);
       
   122     AddItemMappingL(KDc(), KDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); 
       
   123     AddItemMappingL(KEnc(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL);    
       
   124     }
       
   125 
       
   126 
       
   127 // -----------------------------------------------------------------------------
       
   128 // CRssFeedParser::~CRssFeedParser
       
   129 // Deconstructor.
       
   130 // -----------------------------------------------------------------------------
       
   131 //
       
   132 CRssFeedParser::~CRssFeedParser()
       
   133     {
       
   134     }
       
   135 
       
   136         
       
   137 // -----------------------------------------------------------------------------
       
   138 // CRssFeedParser::ParseFeedL
       
   139 //
       
   140 // Creates a Feed instance from the given document. 
       
   141 // -----------------------------------------------------------------------------
       
   142 //
       
   143 void CRssFeedParser::ParseFeedL(RXmlEngDocument aDocument, 
       
   144         MFeedParserObserver& aObserver)
       
   145     {
       
   146 	TXmlEngElement  rootNode;
       
   147 	TXmlEngElement  channelNode;
       
   148 
       
   149     // Get the root node.
       
   150     rootNode = iXmlUtils.GetDocumentFirstElement(aDocument);
       
   151 
       
   152     // Find the channel element.
       
   153     channelNode = iXmlUtils.GetFirstNamedChild(rootNode, KChannel());    
       
   154     if (channelNode.IsNull())
       
   155         {
       
   156         User::Leave(KErrCorrupt);
       
   157         }
       
   158         
       
   159     iFeedTimestampNode = NULL;
       
   160     
       
   161     // Process the channel's children.
       
   162     aObserver.FeedBeginsL();
       
   163     
       
   164     // Process the elements in the channel node.
       
   165     ProcessElementsL(channelNode, aObserver);
       
   166 
       
   167     // Process all of the other elements except the channel element as it is
       
   168     // processed above.  This second call is needed in order to support RSS 1.0
       
   169     // as well as malformed RSS 2.0.
       
   170     ProcessElementsL(rootNode, aObserver);
       
   171     
       
   172     // Set the timestamp attribute.
       
   173     if (iFeedTimestampNode.NotNull())
       
   174         {
       
   175         ElementHandlerDateL(*this, iXmlUtils, iFeedTimestampNode, 
       
   176                 EFeedAttributeTimestamp, aObserver);
       
   177         }
       
   178         
       
   179     aObserver.FeedEndsL();
       
   180     }
       
   181 
       
   182 
       
   183 // -----------------------------------------------------------------------------
       
   184 // CRssFeedParser::IsFeedSupported
       
   185 //
       
   186 // Returns true if this feed parser can process the given document. 
       
   187 // -----------------------------------------------------------------------------
       
   188 //
       
   189 TBool CRssFeedParser::IsFeedSupported(CXmlUtils& aXmlUtils, 
       
   190         RXmlEngDocument aDocument, const TDesC& /*aContentType*/)
       
   191     {
       
   192     TXmlEngElement  node;
       
   193     
       
   194     // Get the root element.
       
   195     node = aXmlUtils.GetDocumentFirstElement(aDocument);
       
   196 
       
   197     // If the root node is missing the feed isn't supported.    
       
   198     if (node.IsNull())
       
   199         {
       
   200         return EFalse;
       
   201         }
       
   202 
       
   203     // Check the content-type.
       
   204     // TODO:
       
   205 
       
   206     // If the root node is not HEAD_STR the feed isn't supported.
       
   207     if (!aXmlUtils.IsNamed(node, KRssHead) && !aXmlUtils.IsNamed(node, KRdfHead))
       
   208         {
       
   209         return EFalse;
       
   210         }
       
   211 
       
   212     return ETrue;
       
   213     }
       
   214 
       
   215 
       
   216 // -----------------------------------------------------------------------------
       
   217 // CRssFeedParser::ProcessElementsL
       
   218 //
       
   219 // Process all elements except the channel elements.
       
   220 // -----------------------------------------------------------------------------
       
   221 //
       
   222 void CRssFeedParser::ProcessElementsL(TXmlEngElement aNode,
       
   223         MFeedParserObserver& aObserver) const
       
   224     {
       
   225     TXmlEngElement  node = NULL;
       
   226     
       
   227     node = iXmlUtils.GetFirstElementChild(aNode);
       
   228     
       
   229     while (node.NotNull())
       
   230         {
       
   231         // Process the element.
       
   232         HandleFeedChildL(node, aObserver);
       
   233 
       
   234         // Get the next element.
       
   235         node = iXmlUtils.GetNextSiblingElement(node);
       
   236         }
       
   237     }
       
   238 
       
   239         
       
   240 // -----------------------------------------------------------------------------
       
   241 // CRssFeedParser::ElementHandlerItemL
       
   242 //
       
   243 // A ElementHandler function that populates the Item instance with the 
       
   244 // values from the given item node.
       
   245 // -----------------------------------------------------------------------------
       
   246 //
       
   247 void CRssFeedParser::ElementHandlerItemL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
       
   248         TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
       
   249     {
       
   250     CRssFeedParser&  parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser)));
       
   251     TXmlEngElement         node;
       
   252     
       
   253     // Create an empty Item.
       
   254     aObserver.ItemBeginsL();
       
   255     
       
   256     // Process the item's children.
       
   257     parser.iItemTimestampNode = NULL;    
       
   258     node = aXmlUtils.GetFirstElementChild(aNode);
       
   259 
       
   260     while (node.NotNull())
       
   261         {
       
   262         // Process the element.
       
   263         parser.HandleItemChildL(node, aObserver);
       
   264 
       
   265         // Get the next element.
       
   266         node = aXmlUtils.GetNextSiblingElement(node);
       
   267         }
       
   268             
       
   269     // Postprocess the EItemAttributeIdStr attribute.
       
   270     parser.SetItemIdStrAttributeL(aNode, aObserver);
       
   271         
       
   272     // Postprocess the EItemAttributeTimestamp attribute.
       
   273     if (parser.iItemTimestampNode.NotNull())
       
   274         {
       
   275         ElementHandlerDateL(aParser, aXmlUtils, parser.iItemTimestampNode, 
       
   276                 EItemAttributeTimestamp, aObserver);
       
   277         }
       
   278         
       
   279     // Add the Item to the Feed.
       
   280     aObserver.ItemEndsL();    
       
   281     }
       
   282 
       
   283 
       
   284 // -----------------------------------------------------------------------------
       
   285 // CRssFeedParser::ElementHandlerEnclosureL
       
   286 //
       
   287 // An ElementHandler function that extracts the an enclosure.
       
   288 // -----------------------------------------------------------------------------
       
   289 //
       
   290 void CRssFeedParser::ElementHandlerEnclosureL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, 
       
   291         TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
       
   292     {
       
   293     HBufC*  url = NULL;
       
   294     HBufC*  resource = NULL;
       
   295     HBufC*  type = NULL;
       
   296     HBufC*  length = NULL;
       
   297     
       
   298     // Create an empty enclosure.
       
   299     aObserver.EnclosureBeginsL();
       
   300     
       
   301     // Extract and the enclosure attributes
       
   302     url = aXmlUtils.AttributeL(aNode, KUrl);
       
   303     CleanupStack::PushL(url);    
       
   304         
       
   305     resource = aXmlUtils.AttributeL(aNode, KResource);
       
   306     CleanupStack::PushL(resource);    
       
   307         
       
   308     type = aXmlUtils.AttributeL(aNode, KType);
       
   309     CleanupStack::PushL(type);    
       
   310         
       
   311     length = aXmlUtils.AttributeL(aNode, KLength);
       
   312     CleanupStack::PushL(length);    
       
   313         
       
   314     // Add either the resource or url attribute.
       
   315     if (resource != NULL)
       
   316         {        
       
   317         aObserver.AddAttributeL(EEnclosureAttributeLink, *resource);
       
   318         }
       
   319     else if (url != NULL)
       
   320         {        
       
   321         aObserver.AddAttributeL(EEnclosureAttributeLink, *url);
       
   322         }
       
   323 
       
   324     // Add the other attributes.
       
   325     if (type != NULL)
       
   326         {        
       
   327         aObserver.AddAttributeL(EEnclosureAttributeContentType, *type);
       
   328         }
       
   329 
       
   330     if (length != NULL)
       
   331         {        
       
   332         aObserver.AddAttributeL(EEnclosureAttributeSize, *length);
       
   333         }
       
   334     
       
   335     // Signal the end of the enclosure.
       
   336     aObserver.EnclosureEndsL();
       
   337     
       
   338     CleanupStack::PopAndDestroy(length);
       
   339     CleanupStack::PopAndDestroy(type);
       
   340     CleanupStack::PopAndDestroy(resource);
       
   341     CleanupStack::PopAndDestroy(url);
       
   342     }
       
   343 
       
   344 
       
   345 // -----------------------------------------------------------------------------
       
   346 // CRssFeedParser::ElementHandlerTimestampL
       
   347 //
       
   348 // A ElementHandler function that determines the timestamp to use.  The
       
   349 // point is to track the most relevant timestamp.
       
   350 // -----------------------------------------------------------------------------
       
   351 //
       
   352 void CRssFeedParser::ElementHandlerTimestampL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
       
   353         TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& /*aObserver*/)
       
   354     {
       
   355     CRssFeedParser&  parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser)));
       
   356     
       
   357     // Handle feed related timestamps
       
   358     if (aValueId == EFeedAttributeTimestamp)
       
   359         {
       
   360         if (aXmlUtils.IsNamed(aNode, KLastBuildDate))
       
   361             {
       
   362             parser.iFeedTimestampNode = aNode;
       
   363             }
       
   364 
       
   365         // As LastBuildDate is more relevant only update iTimestamp if it wasn't 
       
   366         // already set.  PubDate and dc:date mean the same thing, so once it's set, it's set.
       
   367         else if (parser.iFeedTimestampNode.IsNull())
       
   368             {            
       
   369             if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate))
       
   370                 {
       
   371                 parser.iFeedTimestampNode = aNode;
       
   372                 }
       
   373             }
       
   374         }
       
   375         
       
   376     // Otherwise handle item related timestamps
       
   377     else
       
   378         {
       
   379         // PubDate and dc:date mean the same thing, so once it's set, it's set.
       
   380         if (parser.iItemTimestampNode.IsNull())
       
   381             {
       
   382             if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate))
       
   383                 {
       
   384                 parser.iItemTimestampNode = aNode;
       
   385                 }
       
   386             }
       
   387         }
       
   388     }
       
   389 
       
   390 
       
   391 // -----------------------------------------------------------------------------
       
   392 // CRssFeedParser::ElementHandlerOtherL
       
   393 //
       
   394 // A ElementHandler function that used for feed validation.
       
   395 // -----------------------------------------------------------------------------
       
   396 //
       
   397 void CRssFeedParser::ElementHandlerOtherL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
       
   398         TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
       
   399     {
       
   400     const CRssFeedParser&  self = static_cast<const CRssFeedParser&>(aParser);
       
   401     TXmlEngElement               node = NULL;
       
   402     TDesC*                 text;
       
   403     
       
   404     if ((node = aXmlUtils.GetFirstNamedChild(aNode, KTitle)).NotNull())
       
   405         {
       
   406         text = self.iXmlUtils.ExtractTextL(node);
       
   407         CleanupStack::PushL(text);
       
   408         
       
   409         if ((text != NULL) && (text->Length() > 0))
       
   410             {
       
   411             aObserver.OtherTitleL();
       
   412             }
       
   413             
       
   414         CleanupStack::PopAndDestroy(text);
       
   415         }
       
   416     
       
   417     if ((node = aXmlUtils.GetFirstNamedChild(aNode, KDescription)).NotNull())
       
   418         {
       
   419         text = self.iXmlUtils.ExtractTextL(node);
       
   420         CleanupStack::PushL(text);
       
   421         
       
   422         if ((text != NULL) && (text->Length() > 0))
       
   423             {
       
   424             aObserver.OtherDescriptionL();
       
   425             }
       
   426             
       
   427         CleanupStack::PopAndDestroy(text);
       
   428         }
       
   429     
       
   430     if ((node = aXmlUtils.GetFirstNamedChild(aNode, KLink)).NotNull())
       
   431         {
       
   432         text = self.iXmlUtils.ExtractTextL(node);
       
   433         CleanupStack::PushL(text);
       
   434         
       
   435         if ((text != NULL) && (text->Length() > 0))
       
   436             {
       
   437             aObserver.OtherLinkL();
       
   438             }
       
   439             
       
   440         CleanupStack::PopAndDestroy(text);
       
   441         }
       
   442     }
       
   443 
       
   444 
       
   445 // -----------------------------------------------------------------------------
       
   446 // CRssFeedParser::SetItemIdStrAttributeL
       
   447 //
       
   448 // Determine and set the unique IdStr attribute (unique to the feed that is).
       
   449 // -----------------------------------------------------------------------------
       
   450 //
       
   451 void CRssFeedParser::SetItemIdStrAttributeL(TXmlEngElement aItemNode, 
       
   452         MFeedParserObserver& aObserver)
       
   453     {    
       
   454     const TInt  KStrChunk = 30;
       
   455     
       
   456     TDesC*    aboutAttribute = NULL;
       
   457     TDesC*    description = NULL;
       
   458     TDesC*    title = NULL;
       
   459     HBufC*    idStr = NULL;
       
   460     HBufC*    url = NULL;
       
   461     TXmlEngElement  node;
       
   462 
       
   463     // If the about attribute is present then use it.
       
   464     aboutAttribute = iXmlUtils.AttributeL(aItemNode, KAbout);
       
   465     if (aboutAttribute != NULL)
       
   466         {        
       
   467         CleanupStack::PushL(aboutAttribute);
       
   468         aObserver.AddAttributeL(EItemAttributeIdStr, *aboutAttribute);
       
   469         CleanupStack::PopAndDestroy(aboutAttribute);
       
   470         
       
   471         return;
       
   472         }
       
   473         
       
   474     // Otherwise if the GUid node is present use it.
       
   475     node = iXmlUtils.GetFirstNamedChild(aItemNode, KGuid);
       
   476     if (node.NotNull())
       
   477         {
       
   478         ElementHandlerCDataL(*this, iXmlUtils, node, EItemAttributeIdStr, aObserver);        
       
   479         return;
       
   480         }
       
   481 
       
   482     // Otherwise create a idStr from the first 30 chars of the description and title
       
   483     // and the last 30 chars of the url.  This doesn't guarantee a unique id, but 
       
   484     // it very likely.
       
   485     node = iXmlUtils.GetFirstNamedChild(aItemNode, KDescription);
       
   486     description = iXmlUtils.ExtractSimpleTextL(node, KStrChunk);
       
   487     CleanupStack::PushL(description);
       
   488     
       
   489     node = iXmlUtils.GetFirstNamedChild(aItemNode, KTitle);
       
   490     title = iXmlUtils.ExtractSimpleTextL(node, KStrChunk);
       
   491     CleanupStack::PushL(title);
       
   492     
       
   493     node = iXmlUtils.GetFirstNamedChild(aItemNode, KLink);
       
   494     url = iXmlUtils.ExtractSimpleTextL(node, KStrChunk, ETrue);
       
   495     CleanupStack::PushL(url);
       
   496     
       
   497     // Construct the idStr from the parts.
       
   498     TInt  len = 0;
       
   499     
       
   500     if (description != NULL)
       
   501         {
       
   502         len += description->Length();
       
   503         }
       
   504     if (title != NULL)
       
   505         {
       
   506         len += title->Length();
       
   507         }
       
   508     if (url != NULL)
       
   509         {
       
   510         len += url->Length();
       
   511         }
       
   512     
       
   513     idStr = HBufC::NewL(len);
       
   514     CleanupStack::PushL(idStr);
       
   515 
       
   516     TPtr  ptr(idStr->Des());
       
   517     
       
   518     if (description != NULL)
       
   519         {
       
   520         ptr.Append(*description);
       
   521         }
       
   522     if (title != NULL)
       
   523         {
       
   524         ptr.Append(*title);
       
   525         }
       
   526     if (url != NULL)
       
   527         {
       
   528         ptr.Append(*url);
       
   529         }
       
   530         
       
   531     // Replace any chars that may interfere with the database.
       
   532     _LIT(KSpace, " ");
       
   533     
       
   534     for (TInt i = 0; i < ptr.Length(); i++)
       
   535         {
       
   536         if (ptr[i] == '\'')
       
   537             {
       
   538             ptr.Replace(i, 1, KSpace());
       
   539             }
       
   540         }
       
   541     
       
   542     // Set the idStr attribute.
       
   543     aObserver.AddAttributeL(EItemAttributeIdStr, *idStr);
       
   544     
       
   545     CleanupStack::PopAndDestroy(idStr);    
       
   546     CleanupStack::PopAndDestroy(url);    
       
   547     CleanupStack::PopAndDestroy(title);    
       
   548     CleanupStack::PopAndDestroy(description);    
       
   549     }