browserutilities/feedsengine/FeedsServer/FeedHandler/src/RssFeedParser.cpp
changeset 0 dd21522fd290
child 36 0ed94ceaa377
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/browserutilities/feedsengine/FeedsServer/FeedHandler/src/RssFeedParser.cpp	Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,549 @@
+/*
+* Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of the License "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:  RSS parser
+*
+*/
+
+
+#include "FeedAttributes.h"
+#include "FeedParserObserver.h"
+#include "LeakTracker.h"
+#include "RssFeedParser.h"
+#include "XmlUtils.h"
+
+
+// Element and attribute names used by this parser.
+_LIT8(KRssHead, "rss");
+_LIT8(KRdfHead, "RDF");
+
+_LIT8(KDc, "dc");
+_LIT8(KEnc, "enc");
+
+_LIT8(KChannel, "channel");
+_LIT8(KTitle, "title");
+_LIT8(KLink, "link");
+_LIT8(KDescription, "description");
+_LIT8(KItem, "item");
+_LIT8(KAbout, "about");
+_LIT8(KGuid, "guid");
+_LIT8(KEnclosure, "enclosure");
+_LIT8(KUrl, "url");
+_LIT8(KResource, "resource");
+_LIT8(KType, "type");
+_LIT8(KLength, "length");
+_LIT8(KDate, "date");
+_LIT8(KPubDate, "pubdate");
+_LIT8(KLastBuildDate, "lastbuilddate");
+
+//#ifdef _DEBUG                    
+_LIT8(KImage, "image");
+_LIT8(KTextInput, "textinput");
+//#endif
+
+// -----------------------------------------------------------------------------
+// RssFeedParser::NewL
+//
+// Two-phased constructor.
+// -----------------------------------------------------------------------------
+//
+CRssFeedParser* CRssFeedParser::NewL(CXmlUtils& aXmlUtils)
+    {
+    CRssFeedParser* self = new (ELeave) CRssFeedParser(aXmlUtils);
+    
+    CleanupStack::PushL(self);
+    self->ConstructL();
+    CleanupStack::Pop();
+
+    return self;
+    }
+
+        
+// -----------------------------------------------------------------------------
+// CRssFeedParser::CRssFeedParser
+// C++ default constructor can NOT contain any code, that
+// might leave.
+// -----------------------------------------------------------------------------
+//
+CRssFeedParser::CRssFeedParser(CXmlUtils& aXmlUtils):
+        CFeedParser(aXmlUtils), iLeakTracker(CLeakTracker::ERssFeedParser)
+    {
+    }
+        
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ConstructL
+// Symbian 2nd phase constructor can leave.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ConstructL()
+    {
+    // The mappings are used to map elements to handler methods.  For example when
+    // a <title> element is found its contents are extracted as CDATA and passed
+    // on to the observer as a EFeedAttributeTitle.
+    
+    // Add mappings to process the children of a channel.
+    AddFeedMappingL(KNullDesC8(), KItem(), EFeedAttributeUnused, ElementHandlerItemL);
+
+    AddFeedMappingL(KNullDesC8(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL);
+    AddFeedMappingL(KNullDesC8(), KLink(), EFeedAttributeLink, ElementHandlerUrlL);
+    AddFeedMappingL(KNullDesC8(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL);
+    AddFeedMappingL(KNullDesC8(), KPubDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
+    AddFeedMappingL(KNullDesC8(), KLastBuildDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
+
+    AddFeedMappingL(KDc(), KTitle(), EFeedAttributeTitle, ElementHandlerTextL);
+    AddFeedMappingL(KDc(), KDescription(), EFeedAttributeDescription, ElementHandlerCDataL);    
+    AddFeedMappingL(KDc(), KDate(), EFeedAttributeTimestamp, ElementHandlerTimestampL); 
+
+//#ifdef _DEBUG
+    AddFeedMappingL(KNullDesC8(), KImage(), EFeedAttributeUnused, ElementHandlerOtherL);
+    AddFeedMappingL(KNullDesC8(), KTextInput(), EFeedAttributeUnused, ElementHandlerOtherL);
+//#endif
+
+    // Add mappings to process the children of an item.
+    AddItemMappingL(KNullDesC8(), KTitle(), EItemAttributeTitle, ElementHandlerTextL);
+    AddItemMappingL(KNullDesC8(), KLink(), EItemAttributeLink, ElementHandlerUrlL);
+    AddItemMappingL(KNullDesC8(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL);    
+    AddItemMappingL(KNullDesC8(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL);    
+    AddItemMappingL(KNullDesC8(), KPubDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); 
+
+    AddItemMappingL(KDc(), KTitle(), EItemAttributeTitle, ElementHandlerTextL);
+    AddItemMappingL(KDc(), KDescription(), EItemAttributeDescription, ElementHandlerCDataL);
+    AddItemMappingL(KDc(), KDate(), EItemAttributeTimestamp, ElementHandlerTimestampL); 
+    AddItemMappingL(KEnc(), KEnclosure(), EItemAttributeEnclosure, ElementHandlerEnclosureL);    
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::~CRssFeedParser
+// Deconstructor.
+// -----------------------------------------------------------------------------
+//
+CRssFeedParser::~CRssFeedParser()
+    {
+    }
+
+        
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ParseFeedL
+//
+// Creates a Feed instance from the given document. 
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ParseFeedL(RXmlEngDocument aDocument, 
+        MFeedParserObserver& aObserver)
+    {
+	TXmlEngElement  rootNode;
+	TXmlEngElement  channelNode;
+
+    // Get the root node.
+    rootNode = iXmlUtils.GetDocumentFirstElement(aDocument);
+
+    // Find the channel element.
+    channelNode = iXmlUtils.GetFirstNamedChild(rootNode, KChannel());    
+    if (channelNode.IsNull())
+        {
+        User::Leave(KErrCorrupt);
+        }
+        
+    iFeedTimestampNode = NULL;
+    
+    // Process the channel's children.
+    aObserver.FeedBeginsL();
+    
+    // Process the elements in the channel node.
+    ProcessElementsL(channelNode, aObserver);
+
+    // Process all of the other elements except the channel element as it is
+    // processed above.  This second call is needed in order to support RSS 1.0
+    // as well as malformed RSS 2.0.
+    ProcessElementsL(rootNode, aObserver);
+    
+    // Set the timestamp attribute.
+    if (iFeedTimestampNode.NotNull())
+        {
+        ElementHandlerDateL(*this, iXmlUtils, iFeedTimestampNode, 
+                EFeedAttributeTimestamp, aObserver);
+        }
+        
+    aObserver.FeedEndsL();
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::IsFeedSupported
+//
+// Returns true if this feed parser can process the given document. 
+// -----------------------------------------------------------------------------
+//
+TBool CRssFeedParser::IsFeedSupported(CXmlUtils& aXmlUtils, 
+        RXmlEngDocument aDocument, const TDesC& /*aContentType*/)
+    {
+    TXmlEngElement  node;
+    
+    // Get the root element.
+    node = aXmlUtils.GetDocumentFirstElement(aDocument);
+
+    // If the root node is missing the feed isn't supported.    
+    if (node.IsNull())
+        {
+        return EFalse;
+        }
+
+    // Check the content-type.
+    // TODO:
+
+    // If the root node is not HEAD_STR the feed isn't supported.
+    if (!aXmlUtils.IsNamed(node, KRssHead) && !aXmlUtils.IsNamed(node, KRdfHead))
+        {
+        return EFalse;
+        }
+
+    return ETrue;
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ProcessElementsL
+//
+// Process all elements except the channel elements.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ProcessElementsL(TXmlEngElement aNode,
+        MFeedParserObserver& aObserver) const
+    {
+    TXmlEngElement  node = NULL;
+    
+    node = iXmlUtils.GetFirstElementChild(aNode);
+    
+    while (node.NotNull())
+        {
+        // Process the element.
+        HandleFeedChildL(node, aObserver);
+
+        // Get the next element.
+        node = iXmlUtils.GetNextSiblingElement(node);
+        }
+    }
+
+        
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ElementHandlerItemL
+//
+// A ElementHandler function that populates the Item instance with the 
+// values from the given item node.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ElementHandlerItemL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
+        TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
+    {
+    CRssFeedParser&  parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser)));
+    TXmlEngElement         node;
+    
+    // Create an empty Item.
+    aObserver.ItemBeginsL();
+    
+    // Process the item's children.
+    parser.iItemTimestampNode = NULL;    
+    node = aXmlUtils.GetFirstElementChild(aNode);
+
+    while (node.NotNull())
+        {
+        // Process the element.
+        parser.HandleItemChildL(node, aObserver);
+
+        // Get the next element.
+        node = aXmlUtils.GetNextSiblingElement(node);
+        }
+            
+    // Postprocess the EItemAttributeIdStr attribute.
+    parser.SetItemIdStrAttributeL(aNode, aObserver);
+        
+    // Postprocess the EItemAttributeTimestamp attribute.
+    if (parser.iItemTimestampNode.NotNull())
+        {
+        ElementHandlerDateL(aParser, aXmlUtils, parser.iItemTimestampNode, 
+                EItemAttributeTimestamp, aObserver);
+        }
+        
+    // Add the Item to the Feed.
+    aObserver.ItemEndsL();    
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ElementHandlerEnclosureL
+//
+// An ElementHandler function that extracts the an enclosure.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ElementHandlerEnclosureL(const CFeedParser& /*aParser*/, CXmlUtils& aXmlUtils, 
+        TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
+    {
+    HBufC*  url = NULL;
+    HBufC*  resource = NULL;
+    HBufC*  type = NULL;
+    HBufC*  length = NULL;
+    
+    // Create an empty enclosure.
+    aObserver.EnclosureBeginsL();
+    
+    // Extract and the enclosure attributes
+    url = aXmlUtils.AttributeL(aNode, KUrl);
+    CleanupStack::PushL(url);    
+        
+    resource = aXmlUtils.AttributeL(aNode, KResource);
+    CleanupStack::PushL(resource);    
+        
+    type = aXmlUtils.AttributeL(aNode, KType);
+    CleanupStack::PushL(type);    
+        
+    length = aXmlUtils.AttributeL(aNode, KLength);
+    CleanupStack::PushL(length);    
+        
+    // Add either the resource or url attribute.
+    if (resource != NULL)
+        {        
+        aObserver.AddAttributeL(EEnclosureAttributeLink, *resource);
+        }
+    else if (url != NULL)
+        {        
+        aObserver.AddAttributeL(EEnclosureAttributeLink, *url);
+        }
+
+    // Add the other attributes.
+    if (type != NULL)
+        {        
+        aObserver.AddAttributeL(EEnclosureAttributeContentType, *type);
+        }
+
+    if (length != NULL)
+        {        
+        aObserver.AddAttributeL(EEnclosureAttributeSize, *length);
+        }
+    
+    // Signal the end of the enclosure.
+    aObserver.EnclosureEndsL();
+    
+    CleanupStack::PopAndDestroy(length);
+    CleanupStack::PopAndDestroy(type);
+    CleanupStack::PopAndDestroy(resource);
+    CleanupStack::PopAndDestroy(url);
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ElementHandlerTimestampL
+//
+// A ElementHandler function that determines the timestamp to use.  The
+// point is to track the most relevant timestamp.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ElementHandlerTimestampL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
+        TXmlEngElement aNode, TInt aValueId, MFeedParserObserver& /*aObserver*/)
+    {
+    CRssFeedParser&  parser(const_cast<CRssFeedParser&>(static_cast<const CRssFeedParser&>(aParser)));
+    
+    // Handle feed related timestamps
+    if (aValueId == EFeedAttributeTimestamp)
+        {
+        if (aXmlUtils.IsNamed(aNode, KLastBuildDate))
+            {
+            parser.iFeedTimestampNode = aNode;
+            }
+
+        // As LastBuildDate is more relevant only update iTimestamp if it wasn't 
+        // already set.  PubDate and dc:date mean the same thing, so once it's set, it's set.
+        else if (parser.iFeedTimestampNode.IsNull())
+            {            
+            if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate))
+                {
+                parser.iFeedTimestampNode = aNode;
+                }
+            }
+        }
+        
+    // Otherwise handle item related timestamps
+    else
+        {
+        // PubDate and dc:date mean the same thing, so once it's set, it's set.
+        if (parser.iItemTimestampNode.IsNull())
+            {
+            if (aXmlUtils.IsNamed(aNode, KPubDate) || aXmlUtils.IsNamed(aNode, KDc, KDate))
+                {
+                parser.iItemTimestampNode = aNode;
+                }
+            }
+        }
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::ElementHandlerOtherL
+//
+// A ElementHandler function that used for feed validation.
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::ElementHandlerOtherL(const CFeedParser& aParser, CXmlUtils& aXmlUtils, 
+        TXmlEngElement aNode, TInt /*aValueId*/, MFeedParserObserver& aObserver)
+    {
+    const CRssFeedParser&  self = static_cast<const CRssFeedParser&>(aParser);
+    TXmlEngElement               node = NULL;
+    TDesC*                 text;
+    
+    if ((node = aXmlUtils.GetFirstNamedChild(aNode, KTitle)).NotNull())
+        {
+        text = self.iXmlUtils.ExtractTextL(node);
+        CleanupStack::PushL(text);
+        
+        if ((text != NULL) && (text->Length() > 0))
+            {
+            aObserver.OtherTitleL();
+            }
+            
+        CleanupStack::PopAndDestroy(text);
+        }
+    
+    if ((node = aXmlUtils.GetFirstNamedChild(aNode, KDescription)).NotNull())
+        {
+        text = self.iXmlUtils.ExtractTextL(node);
+        CleanupStack::PushL(text);
+        
+        if ((text != NULL) && (text->Length() > 0))
+            {
+            aObserver.OtherDescriptionL();
+            }
+            
+        CleanupStack::PopAndDestroy(text);
+        }
+    
+    if ((node = aXmlUtils.GetFirstNamedChild(aNode, KLink)).NotNull())
+        {
+        text = self.iXmlUtils.ExtractTextL(node);
+        CleanupStack::PushL(text);
+        
+        if ((text != NULL) && (text->Length() > 0))
+            {
+            aObserver.OtherLinkL();
+            }
+            
+        CleanupStack::PopAndDestroy(text);
+        }
+    }
+
+
+// -----------------------------------------------------------------------------
+// CRssFeedParser::SetItemIdStrAttributeL
+//
+// Determine and set the unique IdStr attribute (unique to the feed that is).
+// -----------------------------------------------------------------------------
+//
+void CRssFeedParser::SetItemIdStrAttributeL(TXmlEngElement aItemNode, 
+        MFeedParserObserver& aObserver)
+    {    
+    const TInt  KStrChunk = 30;
+    
+    TDesC*    aboutAttribute = NULL;
+    TDesC*    description = NULL;
+    TDesC*    title = NULL;
+    HBufC*    idStr = NULL;
+    HBufC*    url = NULL;
+    TXmlEngElement  node;
+
+    // If the about attribute is present then use it.
+    aboutAttribute = iXmlUtils.AttributeL(aItemNode, KAbout);
+    if (aboutAttribute != NULL)
+        {        
+        CleanupStack::PushL(aboutAttribute);
+        aObserver.AddAttributeL(EItemAttributeIdStr, *aboutAttribute);
+        CleanupStack::PopAndDestroy(aboutAttribute);
+        
+        return;
+        }
+        
+    // Otherwise if the GUid node is present use it.
+    node = iXmlUtils.GetFirstNamedChild(aItemNode, KGuid);
+    if (node.NotNull())
+        {
+        ElementHandlerCDataL(*this, iXmlUtils, node, EItemAttributeIdStr, aObserver);        
+        return;
+        }
+
+    // Otherwise create a idStr from the first 30 chars of the description and title
+    // and the last 30 chars of the url.  This doesn't guarantee a unique id, but 
+    // it very likely.
+    node = iXmlUtils.GetFirstNamedChild(aItemNode, KDescription);
+    description = iXmlUtils.ExtractSimpleTextL(node, KStrChunk);
+    CleanupStack::PushL(description);
+    
+    node = iXmlUtils.GetFirstNamedChild(aItemNode, KTitle);
+    title = iXmlUtils.ExtractSimpleTextL(node, KStrChunk);
+    CleanupStack::PushL(title);
+    
+    node = iXmlUtils.GetFirstNamedChild(aItemNode, KLink);
+    url = iXmlUtils.ExtractSimpleTextL(node, KStrChunk, ETrue);
+    CleanupStack::PushL(url);
+    
+    // Construct the idStr from the parts.
+    TInt  len = 0;
+    
+    if (description != NULL)
+        {
+        len += description->Length();
+        }
+    if (title != NULL)
+        {
+        len += title->Length();
+        }
+    if (url != NULL)
+        {
+        len += url->Length();
+        }
+    
+    idStr = HBufC::NewL(len);
+    CleanupStack::PushL(idStr);
+
+    TPtr  ptr(idStr->Des());
+    
+    if (description != NULL)
+        {
+        ptr.Append(*description);
+        }
+    if (title != NULL)
+        {
+        ptr.Append(*title);
+        }
+    if (url != NULL)
+        {
+        ptr.Append(*url);
+        }
+        
+    // Replace any chars that may interfere with the database.
+    _LIT(KSpace, " ");
+    
+    for (TInt i = 0; i < ptr.Length(); i++)
+        {
+        if (ptr[i] == '\'')
+            {
+            ptr.Replace(i, 1, KSpace());
+            }
+        }
+    
+    // Set the idStr attribute.
+    aObserver.AddAttributeL(EItemAttributeIdStr, *idStr);
+    
+    CleanupStack::PopAndDestroy(idStr);    
+    CleanupStack::PopAndDestroy(url);    
+    CleanupStack::PopAndDestroy(title);    
+    CleanupStack::PopAndDestroy(description);    
+    }