browserutilities/feedsengine/FeedsServer/XmlUtils/src/XmlUtils.cpp
changeset 0 dd21522fd290
child 25 0ed94ceaa377
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 * Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:  Misc. libxml2 related utilities.
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 
       
    20 #include <charconv.h>
       
    21 #include <EscapeUtils.h>
       
    22 #include <utf.h>
       
    23 
       
    24 #include "CleanupLibXml2.h"  
       
    25 #include "LeakTracker.h"
       
    26 #include "Logger.h"
       
    27 #include "XmlEncoding.h"
       
    28 #include "XmlEntity.h"
       
    29 #include "XmlUtils.h"
       
    30 
       
    31 // Local data-types
       
    32 struct ErrorContext
       
    33     {
       
    34     TInt   error;
       
    35     TBool  retry;
       
    36     };
       
    37 
       
    38 // Static variables
       
    39 static CXmlEntity*  sXmlEntity = NULL;
       
    40 
       
    41 
       
    42 // -----------------------------------------------------------------------------
       
    43 // EntityResolverL
       
    44 //
       
    45 // A local function to resolve entities found in the buffer being parsed. 
       
    46 // -----------------------------------------------------------------------------
       
    47 //
       
    48 static xmlEntityPtr EntityResolverL(void* /*aContext*/, const xmlChar *aName) 
       
    49     {
       
    50     xmlEntityPtr  entity = NULL;
       
    51 	
       
    52     // Use the standard resolver.
       
    53     entity = xmlGetPredefinedEntity(aName);
       
    54     
       
    55     // Resolve other entities too.
       
    56     if ((sXmlEntity != NULL) && (entity == NULL))
       
    57         {
       
    58         entity = const_cast<xmlEntityPtr>(sXmlEntity->ResolveL(aName));
       
    59         }
       
    60 
       
    61     return entity;
       
    62     }
       
    63 
       
    64 
       
    65 // -----------------------------------------------------------------------------
       
    66 // StructuredErrorReporter
       
    67 //
       
    68 // A local function to collect errors found while parsing the buffer.
       
    69 // See http://www.xmlsoft.org/html/libxml-xmlerror.html.
       
    70 // -----------------------------------------------------------------------------
       
    71 //
       
    72 static void StructuredErrorReporter(void* aContext, xmlErrorPtr error)
       
    73     {
       
    74     _LIT(KErr, "error");
       
    75     
       
    76     TBool          ignore = EFalse;
       
    77     TPtrC          type(KErr);
       
    78     ErrorContext*  context = static_cast<ErrorContext*>(aContext);
       
    79 
       
    80     if (context != NULL)
       
    81         {        
       
    82         context->error = error->code; 
       
    83 
       
    84         switch (error->code)
       
    85             {
       
    86             case XML_ERR_UNSUPPORTED_ENCODING:          // 32
       
    87                 // Try to reparse the buffer with another char-encoding.
       
    88                 context->retry = ETrue;
       
    89                 ignore = ETrue;
       
    90                 break;
       
    91 
       
    92             case XML_ERR_UNDECLARED_ENTITY:             // 26
       
    93             case XML_WAR_UNDECLARED_ENTITY:             // 27
       
    94                 type.Set(_L("warning"));
       
    95                 break;
       
    96 
       
    97             case XML_ERR_INTERNAL_ERROR:                // 1
       
    98             case XML_ERR_DOCUMENT_EMPTY:                // 4
       
    99             case XML_ERR_DOCUMENT_END:                  // 5
       
   100             case XML_ERR_INVALID_CHAR:                  // 9
       
   101             case XML_ERR_INVALID_DEC_CHARREF:           // 7
       
   102             case XML_ERR_ENTITYREF_SEMICOL_MISSING:     // 23
       
   103             case XML_NS_ERR_QNAME:                      // 38
       
   104             case XML_ERR_LT_IN_ATTRIBUTE:               // 39
       
   105             case XML_ERR_ATTRIBUTE_NOT_STARTED:         // 41
       
   106             case XML_ERR_ATTRIBUTE_WITHOUT_VALUE:       // 42
       
   107             case XML_ERR_ATTRIBUTE_REDEFINED:           // 43
       
   108             case XML_ERR_LITERAL_NOT_STARTED:           // 46
       
   109             case XML_ERR_PI_NOT_STARTED:                // 47
       
   110             case XML_ERR_PI_NOT_FINISHED:               // 61
       
   111             case XML_ERR_DOCTYPE_NOT_FINISHED:          // 62
       
   112             case XML_ERR_MISPLACED_CDATA_END:           // 63
       
   113             case XML_ERR_RESERVED_XML_NAME:             // 64
       
   114             case XML_ERR_SPACE_REQUIRED:                // 65
       
   115             case XML_ERR_NAME_REQUIRED:                 // 68
       
   116             case XML_ERR_URI_REQUIRED:                  // 70
       
   117             case XML_ERR_GT_REQUIRED:                   // 73
       
   118             case XML_ERR_LTSLASH_REQUIRED:              // 74
       
   119             case XML_ERR_HYPHEN_IN_COMMENT:             // 80
       
   120             case XML_ERR_TAG_NAME_MISMATCH:             // 76
       
   121             case XML_ERR_TAG_NOT_FINISHED:              // 77
       
   122             case XML_NS_ERR_UNDEFINED_NAMESPACE:        // 201
       
   123                 // Just log these as errors and continue if possible.
       
   124                 break;
       
   125 
       
   126             default:
       
   127                 break;
       
   128             }
       
   129         }
       
   130         
       
   131     if (ignore == EFalse)
       
   132         {
       
   133         FEED_LOG2(_L("Feeds"), _L("Feeds.log"), 
       
   134                 EFileLoggingModeAppend, _L("FEEDS: parser %S!!! Code:%d"), &type, error->code);
       
   135         }
       
   136     }
       
   137 
       
   138 
       
   139 // -----------------------------------------------------------------------------
       
   140 // CXmlUtils::NewL
       
   141 //
       
   142 // Two-phased constructor.
       
   143 // -----------------------------------------------------------------------------
       
   144 //
       
   145 CXmlUtils* CXmlUtils::NewL()
       
   146     {
       
   147     CXmlUtils* self = new (ELeave) CXmlUtils();
       
   148     
       
   149     CleanupStack::PushL(self);
       
   150     self->ConstructL();
       
   151     CleanupStack::Pop();
       
   152 
       
   153     return self;
       
   154     }
       
   155 
       
   156         
       
   157 // -----------------------------------------------------------------------------
       
   158 // CXmlUtils::CXmlUtils
       
   159 //
       
   160 // C++ default constructor.
       
   161 // -----------------------------------------------------------------------------
       
   162 //
       
   163 CXmlUtils::CXmlUtils():
       
   164         iLeakTracker(CLeakTracker::ELibXml2)
       
   165     {
       
   166     }
       
   167         
       
   168 
       
   169 // -----------------------------------------------------------------------------
       
   170 // CXmlEncoding::ConstructL
       
   171 //
       
   172 // Symbian 2nd phase constructor can leave.
       
   173 // -----------------------------------------------------------------------------
       
   174 //
       
   175 void CXmlUtils::ConstructL()
       
   176     {
       
   177     // will call XmlEngine::XmlEngineAttachL()
       
   178     iImpl.OpenL();
       
   179 
       
   180     // Create the XmlEncoding singleton.
       
   181     iXmlEncoding = CXmlEncoding::NewL();
       
   182     
       
   183     // Create the XmlEntity singleton.
       
   184     iXmlEntity = CXmlEntity::NewL();
       
   185     }
       
   186         
       
   187 
       
   188 // -----------------------------------------------------------------------------
       
   189 // CXmlUtils::~CXmlUtils
       
   190 //
       
   191 // Deconstructor.
       
   192 // -----------------------------------------------------------------------------
       
   193 //
       
   194 CXmlUtils::~CXmlUtils()
       
   195     {
       
   196     delete iXmlEncoding;
       
   197     delete iXmlEntity;
       
   198 
       
   199     // will call XmlEngine::XmlEngineCleanup()
       
   200     iImpl.Close();
       
   201     }
       
   202 
       
   203 
       
   204 // -----------------------------------------------------------------------------
       
   205 // CXmlUtils::ParseBufferL
       
   206 //
       
   207 // Returns a libxml2 document from the provided buffer.
       
   208 // -----------------------------------------------------------------------------
       
   209 //
       
   210 RXmlEngDocument CXmlUtils::ParseBufferL(const TDesC8& aBuffer, const TDesC& aCharSet) const
       
   211     {
       
   212     xmlDocPtr         document = NULL;
       
   213     xmlParserCtxtPtr  contxt = NULL;
       
   214     ErrorContext      errorContext;
       
   215     HBufC8*           utf8Buffer = NULL;
       
   216     const xmlChar*    parseBuffer = NULL;
       
   217     TInt              parseLength = 0;
       
   218     TUint             encoding;
       
   219     RXmlEngDocument   doc;
       
   220     
       
   221     if (aBuffer.Length() == 0)
       
   222         {
       
   223         User::Leave(KErrArgument);
       
   224         }
       
   225     
       
   226     // Prepare the parser's context.
       
   227   	User::LeaveIfNull(contxt = xmlNewParserCtxt());
       
   228     
       
   229     sXmlEntity = iXmlEntity;
       
   230     contxt->sax->getEntity = EntityResolverL;
       
   231     CleanupStack::PushL(TCleanupItem(&CleanupParseBuffer, contxt));
       
   232     
       
   233     // Set the error callback
       
   234     xmlSetStructuredErrorFunc(&errorContext, StructuredErrorReporter);
       
   235 
       
   236     // If need be convert the buffer to utf8.
       
   237     if (iXmlEncoding->DetermineCharEncodingL(aBuffer, aCharSet, encoding))
       
   238         {
       
   239         if (encoding != KCharacterSetIdentifierUtf8)
       
   240             {
       
   241             utf8Buffer = iXmlEncoding->ConvertToUtf8L(encoding, aBuffer);
       
   242             CleanupStack::PushL(utf8Buffer);
       
   243 
       
   244             // Skip anything before the xml prolog.
       
   245             parseBuffer = utf8Buffer->Ptr();
       
   246             parseLength = utf8Buffer->Length();
       
   247 
       
   248             SkipCharsBeforeXmlProlog(&parseBuffer, parseLength);
       
   249             }
       
   250         }
       
   251 
       
   252     // Otherwise its already utf8 so use the provided buffer.
       
   253     if (utf8Buffer == NULL)
       
   254         {
       
   255         // Skip anything before the xml prolog.
       
   256         parseBuffer = aBuffer.Ptr();
       
   257         parseLength = aBuffer.Length();
       
   258 
       
   259         SkipCharsBeforeXmlProlog(&parseBuffer, parseLength);
       
   260         }
       
   261 
       
   262     // Parse the document.
       
   263     errorContext.error = XML_ERR_OK;
       
   264     errorContext.retry = EFalse;        
       
   265 
       
   266     document = xmlSAXParseMemory(contxt->sax, (char*) parseBuffer, parseLength, FALSE);
       
   267     
       
   268     if (document == NULL)
       
   269         {
       
   270         User::Leave(KErrCorrupt);
       
   271         }
       
   272 
       
   273     if (utf8Buffer != NULL)
       
   274         {
       
   275         CleanupStack::PopAndDestroy(utf8Buffer);
       
   276         }
       
   277     
       
   278     CleanupStack::PopAndDestroy(/*contxt*/);    
       
   279 
       
   280     // Wrap and return the document.
       
   281     CleanupStack::PushL(document);
       
   282     doc.OpenL( (RXmlEngDOMImplementation&)iImpl, document );
       
   283     CleanupStack::Pop(/*document*/);
       
   284     
       
   285     return doc;
       
   286     }
       
   287 
       
   288 
       
   289 // -----------------------------------------------------------------------------
       
   290 // CXmlUtils::CleanupParseBuffer
       
   291 // 
       
   292 // The cleanup method for ParseBufferL
       
   293 // -----------------------------------------------------------------------------
       
   294 //
       
   295 void CXmlUtils::CleanupParseBuffer(TAny *aPtr)
       
   296     {
       
   297     xmlResetLastError();
       
   298     xmlSetGenericErrorFunc(NULL, NULL);
       
   299     xmlFreeParserCtxt(static_cast<xmlParserCtxtPtr>(aPtr));
       
   300     }
       
   301 
       
   302 
       
   303 // -----------------------------------------------------------------------------
       
   304 // CXmlUtils::SkipCharsBeforeXmlProlog
       
   305 // 
       
   306 // Skip any chars before the XML-prolog.
       
   307 // -----------------------------------------------------------------------------
       
   308 //
       
   309 void CXmlUtils::SkipCharsBeforeXmlProlog(const TUint8** aString, TInt& aLen) const
       
   310     {
       
   311     _LIT8(KStart, "<?xml");
       
   312     
       
   313     const xmlChar*  origStr = *aString;
       
   314     TInt            origLen = aLen;
       
   315 
       
   316     while (aLen > 0)
       
   317         {
       
   318         if ((aLen >= 5) && (xmlStrncasecmp(*aString, KStart().Ptr(), 5) == 0))
       
   319             {
       
   320             break;
       
   321             }
       
   322         else
       
   323             {
       
   324             (*aString)++;
       
   325             aLen--;
       
   326             }
       
   327         }
       
   328 
       
   329     // If the xml prolog is missing then do nothing.
       
   330     if (aLen == 0)
       
   331         {
       
   332         *aString = origStr;
       
   333         aLen = origLen;
       
   334         }
       
   335     }
       
   336 
       
   337 
       
   338 // -----------------------------------------------------------------------------
       
   339 // CXmlUtils::CleanupUrlL
       
   340 // 
       
   341 // Resolves any entities and escaped chars in the given url.  
       
   342 // -----------------------------------------------------------------------------
       
   343 //
       
   344 void CXmlUtils::CleanupUrlL(TDes& aUrl) const
       
   345     {
       
   346     HBufC*  temp = NULL;
       
   347 
       
   348     // Resolve entities.
       
   349     (void) ResolveEntitiesL(aUrl);
       
   350 
       
   351     // Unescape the url.
       
   352     temp = EscapeUtils::EscapeDecodeL(aUrl);
       
   353     CleanupStack::PushL(temp);
       
   354 
       
   355     // Trim leading and trailing whitespace.
       
   356     temp->Des().Trim();
       
   357 
       
   358     // Copy temp back to aUrl.  This is always safe because temp->Des().Length()
       
   359     // is always <= aUrl.Length().
       
   360     aUrl.Copy(*temp);
       
   361 
       
   362     CleanupStack::PopAndDestroy(temp);
       
   363     }    
       
   364 
       
   365 
       
   366 // -----------------------------------------------------------------------------
       
   367 // CXmlUtils::CleanupMarkupL
       
   368 // 
       
   369 // Resolves any html entities and removes any markup found in the given descriptor.
       
   370 // -----------------------------------------------------------------------------
       
   371 //
       
   372 TBool CXmlUtils::CleanupMarkupL(TDes& aBuffer, TInt aNewLineChar) const
       
   373     {
       
   374     TBool   modified = EFalse;
       
   375 
       
   376     // Strip all markup.
       
   377     modified |= StripMarkupL(aBuffer, aNewLineChar);
       
   378     
       
   379     // Resolve entities.
       
   380     modified |= ResolveEntitiesL(aBuffer);
       
   381 
       
   382     // Strip the CDATA markers.
       
   383     modified |= StripCDataMarkers(aBuffer);
       
   384 
       
   385     // Strip all markup created after entities were resolved.
       
   386     modified |= StripMarkupL(aBuffer, aNewLineChar);
       
   387     
       
   388     // Strip leading and trailing whitespace.
       
   389     aBuffer.Trim();
       
   390 
       
   391     return modified;
       
   392     }
       
   393 
       
   394 
       
   395 // -----------------------------------------------------------------------------
       
   396 // CXmlUtils::ResolveEntitiesL
       
   397 // 
       
   398 // Resolves any entities found in aOrig.
       
   399 // -----------------------------------------------------------------------------
       
   400 //
       
   401 TBool CXmlUtils::ResolveEntitiesL(TDes& aBuffer) const
       
   402 	{
       
   403     const TUint  KEntityStart =	'&';
       
   404     const TUint  KEntityEnd = ';';
       
   405 
       
   406 	TInt         entityStart = -1;
       
   407     TInt         currentPos =  0;    
       
   408     TBool        entityFound = EFalse;
       
   409 	TBool        textHasChanged = EFalse;
       
   410 
       
   411     while (currentPos < aBuffer.Length())
       
   412         {
       
   413         // If this is the beginning of an entity...
       
   414         if (aBuffer[currentPos] == KEntityStart) 
       
   415             {
       
   416             entityStart = currentPos;
       
   417             entityFound = ETrue;
       
   418             currentPos++;
       
   419             }
       
   420 
       
   421         // If this is the end of an entity...
       
   422         else if ((aBuffer[currentPos] == KEntityEnd) && entityFound)
       
   423             {
       
   424             TUint16    ucs2Value;
       
   425             TInt       entityLength = currentPos - entityStart + 1;
       
   426             TPtrC      entityStr(aBuffer.Mid(entityStart + 1, entityLength - 2));
       
   427             TBuf16<1>  entityCode;
       
   428             
       
   429             // Resolve the entity inline.
       
   430             iXmlEntity->ResolveL(entityStr, ucs2Value);            
       
   431             entityCode.SetLength(1);
       
   432             entityCode[0] = ucs2Value;
       
   433 
       
   434             aBuffer.Replace(entityStart, entityLength, entityCode);
       
   435 
       
   436             currentPos = entityStart;
       
   437 			textHasChanged = ETrue;
       
   438             entityStart = -1;
       
   439             entityFound = EFalse;
       
   440             }
       
   441 
       
   442         // Otherwise just move on.
       
   443         else
       
   444             {
       
   445             currentPos++;
       
   446             }
       
   447         }
       
   448 
       
   449 	return textHasChanged;
       
   450 	}
       
   451 
       
   452 
       
   453 // -----------------------------------------------------------------------------
       
   454 // CXmlUtils::StripMarkup
       
   455 // 
       
   456 // Remove any markup found in the given descriptor.
       
   457 // -----------------------------------------------------------------------------
       
   458 //
       
   459 TBool CXmlUtils::StripMarkupL(TDes& aBuffer, TInt aNewLineChar) const
       
   460     {
       
   461 	TInt    tagStart = -1;
       
   462     TInt    currentPos = 0;    
       
   463     TBool   tagStartFound = EFalse;
       
   464 	TBool   textHasChanged = EFalse;
       
   465 
       
   466     // TODO: 1) Remove xml comments as well.
       
   467 
       
   468     while (currentPos < aBuffer.Length())
       
   469         {
       
   470         // It's at the beginning of a tag.
       
   471         if (aBuffer.Mid(currentPos, 1).Compare(_L("<")) == 0)
       
   472             {
       
   473             tagStartFound = ETrue;
       
   474             tagStart = currentPos;
       
   475             currentPos++;
       
   476             }
       
   477 
       
   478         // It's at the end of a tag.
       
   479         else if ((aBuffer.Mid(currentPos,1).Compare(_L(">")) == 0) && 
       
   480                 tagStartFound)
       
   481             {
       
   482 			TBuf<6>  tag;
       
   483 			TInt     tagLength;
       
   484 
       
   485             // Extract enough of the tag to determine if it needs to insert a
       
   486             // newline.
       
   487             tagLength = Min(6, currentPos - tagStart + 1);
       
   488 			tag.Copy(aBuffer.Mid(tagStart, tagLength));
       
   489 			tag.LowerCase();
       
   490 
       
   491             // If it found a <p> or <br> tag insert a newline.
       
   492             // TODO: This fails if the tag includes attributes (i.e. style).
       
   493 			if ((aNewLineChar != 0) && 
       
   494 			        ((tag.Compare(_L("<br>")) == 0) ||
       
   495                     (tag.Compare(_L("<br/>")) == 0) ||
       
   496                     (tag.Compare(_L("<br />")) == 0) ||
       
   497                     (tag.Compare(_L("<p>")) == 0)))
       
   498 				{
       
   499                 // Delete the tag.
       
   500 				aBuffer.Delete(tagStart, currentPos - tagStart + 1);
       
   501 
       
   502                 // Insert the newline.
       
   503 				TBuf<1> chr;
       
   504 				_LIT(KFormat,"%c");
       
   505 
       
   506 				chr.Format(KFormat, aNewLineChar);
       
   507 				aBuffer.Insert(tagStart, chr);
       
   508 
       
   509 				currentPos = tagStart + 1;
       
   510 				textHasChanged = ETrue;
       
   511 				}
       
   512 
       
   513             // Otherwise just delete the tag.
       
   514 			else
       
   515 				{
       
   516 				aBuffer.Delete(tagStart, currentPos - tagStart + 1);
       
   517 				currentPos = tagStart;
       
   518 				textHasChanged = ETrue;
       
   519 				}
       
   520 
       
   521             tagStart = -1;
       
   522 			tagStartFound = EFalse;
       
   523             }
       
   524 
       
   525         // Otherwise just skip to the next char.
       
   526         else
       
   527             {
       
   528             currentPos++;
       
   529             }
       
   530         }
       
   531 
       
   532     if (textHasChanged)
       
   533         {
       
   534         aBuffer.TrimAll();
       
   535         }
       
   536 
       
   537 	return textHasChanged;
       
   538     }
       
   539 
       
   540 
       
   541 // -----------------------------------------------------------------------------
       
   542 // CXmlUtils::StripCDataMarkers
       
   543 // 
       
   544 // Remove any CDATA markers in the given descriptor.
       
   545 // -----------------------------------------------------------------------------
       
   546 //
       
   547 TBool CXmlUtils::StripCDataMarkers(TDes& aBuffer) const
       
   548     {
       
   549     _LIT(KMarkerBegin, "<![CDATA[");
       
   550     _LIT(KMarkerEnd, "]]>");
       
   551 
       
   552     const TInt KMarkerBeginLen = KMarkerBegin().Length();
       
   553     const TInt KMarkerEndLen = KMarkerEnd().Length();
       
   554     
       
   555     TInt    currentPos = 0;
       
   556     TBool   markerStartFound = EFalse;
       
   557 	TBool   textHasChanged = EFalse;
       
   558 
       
   559     while (currentPos < aBuffer.Length())
       
   560         {
       
   561         // Handle the the beginning of a marker.                
       
   562         if ((currentPos <= (aBuffer.Length() - KMarkerBeginLen)) && 
       
   563                 aBuffer.Mid(currentPos, KMarkerBeginLen).Compare(KMarkerBegin) == 0)
       
   564             {
       
   565             markerStartFound = ETrue;
       
   566             aBuffer.Delete(currentPos, KMarkerBeginLen);
       
   567             textHasChanged = ETrue;
       
   568             }
       
   569 
       
   570         // Handle the the end of a marker.                
       
   571         else if (markerStartFound && (currentPos <= (aBuffer.Length() - KMarkerEndLen)) && 
       
   572                 aBuffer.Mid(currentPos, KMarkerEndLen).Compare(KMarkerEnd) == 0)
       
   573             {
       
   574             markerStartFound = EFalse;
       
   575             aBuffer.Delete(currentPos, KMarkerEndLen);
       
   576             textHasChanged = ETrue;
       
   577             }
       
   578 
       
   579         // Otherwise just skip to the next char.
       
   580         else
       
   581             {
       
   582             currentPos++;
       
   583             }
       
   584         }
       
   585 
       
   586     if (textHasChanged)
       
   587         {
       
   588         aBuffer.TrimAll();
       
   589         }
       
   590 
       
   591 	return textHasChanged;
       
   592     }
       
   593 
       
   594 
       
   595 // -----------------------------------------------------------------------------
       
   596 // CXmlUtils::ExtractTextL
       
   597 //
       
   598 // Performs a deep extraction of the text children of the given node.  The result
       
   599 // is returned as a 16-bit descriptor.    The char encoding is always ucs2.
       
   600 // -----------------------------------------------------------------------------
       
   601 //
       
   602 HBufC* CXmlUtils::ExtractTextL(TXmlEngElement aElement) const
       
   603     {
       
   604     TInt     size;
       
   605     HBufC*   ucs2Des = NULL;
       
   606     RBuf8    rbuf;
       
   607     
       
   608     if (aElement.IsNull())
       
   609         {
       
   610         return NULL;
       
   611         }
       
   612         
       
   613     // Perform the deep extraction.
       
   614     size = aElement.InnerXmlL( rbuf );
       
   615     rbuf.CleanupClosePushL();
       
   616 
       
   617     // Resolve any remaining entities and strip any CDATA markers.
       
   618     if (size > 0)
       
   619         {
       
   620         ucs2Des = HBufC::NewLC( rbuf.Length() );
       
   621         TPtr  ptr(ucs2Des->Des());
       
   622         //ptr.Copy( rbuf );
       
   623         TInt unconverted = CnvUtfConverter::ConvertToUnicodeFromUtf8(ptr,rbuf);
       
   624         while( unconverted>0)
       
   625         {
       
   626         	
       
   627         	ucs2Des->ReAllocL(rbuf.Length()+unconverted);
       
   628         	ptr.Set(ucs2Des->Des());
       
   629         	unconverted = CnvUtfConverter::ConvertToUnicodeFromUtf8(ptr,rbuf);
       
   630         }
       
   631         
       
   632         ResolveEntitiesL(ptr);
       
   633         StripCDataMarkers(ptr);
       
   634         ptr.TrimAll();
       
   635 
       
   636         CleanupStack::Pop(ucs2Des);
       
   637         }
       
   638 
       
   639     CleanupStack::PopAndDestroy( /*rbuf*/ );
       
   640 
       
   641     return ucs2Des;
       
   642     }
       
   643 
       
   644 
       
   645 // -----------------------------------------------------------------------------
       
   646 // CXmlUtils::ExtractSimpleTextL
       
   647 //
       
   648 // Performs a deep extraction of the text children of the given node.  The result
       
   649 // is returned as a 16-bit descriptor.    The char encoding is always ucs2.  This
       
   650 // method is different from ExtractTextL in that it doesn't resolve entities or
       
   651 // do any other clean up.  It also has a length param.
       
   652 // -----------------------------------------------------------------------------
       
   653 //
       
   654 HBufC* CXmlUtils::ExtractSimpleTextL(TXmlEngElement aElement, TInt aMaxLength, 
       
   655         TBool aFromEnd) const
       
   656     {
       
   657     xmlChar*  xmlStr;
       
   658     HBufC*    ucs2Des = NULL;
       
   659     RBuf8   rbuf;
       
   660 
       
   661     if (aElement.IsNull())
       
   662         {
       
   663         return NULL;
       
   664         }
       
   665         
       
   666     // Extract the text from the element.
       
   667     aElement.WholeTextContentsCopyL( rbuf );
       
   668     rbuf.CleanupClosePushL();
       
   669     xmlStr = (xmlChar*) rbuf.Ptr();
       
   670         
       
   671     // If need be, trim the string.
       
   672     if (aMaxLength > 0)
       
   673         {
       
   674         if (aFromEnd)
       
   675             {
       
   676             if (rbuf.Length() < aMaxLength)
       
   677                 {
       
   678                 xmlStr = xmlUTF8Strndup(xmlStr, aMaxLength);
       
   679                 }
       
   680             else
       
   681                 {
       
   682                 xmlStr = xmlUTF8Strndup(xmlStr + rbuf.Length() - aMaxLength, aMaxLength);
       
   683                 }
       
   684             }
       
   685         else
       
   686             {            
       
   687             xmlStr = xmlUTF8Strndup(xmlStr, aMaxLength);
       
   688             }
       
   689         CleanupLibXml2::PushL(xmlStr);
       
   690         }
       
   691 
       
   692     // Convert the value into a TDesC
       
   693     TXmlEngString  temp((char*) xmlStr);
       
   694     
       
   695     ucs2Des = temp.AllocL();
       
   696     ucs2Des->Des().Trim();
       
   697 
       
   698     if (aMaxLength > 0)
       
   699         {
       
   700         CleanupStack::PopAndDestroy(/*xmlStr*/);
       
   701         }
       
   702         
       
   703     CleanupStack::PopAndDestroy( /*rbuf*/ );
       
   704 
       
   705     return ucs2Des;
       
   706     }
       
   707 
       
   708 
       
   709 // -----------------------------------------------------------------------------
       
   710 // CXmlUtils::GetDocumentFirstElement
       
   711 // 
       
   712 // Returns the first element in the given document.
       
   713 // -----------------------------------------------------------------------------
       
   714 //
       
   715 TXmlEngElement CXmlUtils::GetDocumentFirstElement(RXmlEngDocument aDocument) const
       
   716     {
       
   717     const TXmlEngElement  KNullElement;
       
   718     TXmlEngElement        element;    
       
   719 
       
   720     // Get the first child.
       
   721     element = aDocument.DocumentElement();
       
   722         
       
   723     // Loop through the children looking for the first element.
       
   724     while (element.NotNull())
       
   725         {
       
   726         if ((element.NodeType() == TXmlEngNode::EElement))
       
   727             {
       
   728             return element;
       
   729             }
       
   730             
       
   731         element = element.NextSibling().AsElement();
       
   732         }
       
   733         
       
   734     // Element not found, return a "null" node.
       
   735     return KNullElement;
       
   736     }
       
   737 
       
   738 
       
   739 // -----------------------------------------------------------------------------
       
   740 // CXmlUtils::GetFirstElementChild
       
   741 // 
       
   742 // Returns the first child of the given node.
       
   743 // -----------------------------------------------------------------------------
       
   744 //
       
   745 TXmlEngElement CXmlUtils::GetFirstElementChild(TXmlEngElement aElement) const
       
   746     {
       
   747     const TXmlEngElement      KNullElement;
       
   748     RXmlEngNodeList<TXmlEngElement> children;
       
   749     TXmlEngElement            child;
       
   750         
       
   751     // Get the first element -- children only contains TElements...
       
   752     aElement.GetChildElements(children);
       
   753     if (children.HasNext())
       
   754         {
       
   755         return children.Next();
       
   756         }
       
   757             
       
   758     return KNullElement;
       
   759     }
       
   760 
       
   761 
       
   762 // -----------------------------------------------------------------------------
       
   763 // CXmlUtils::GetFirstNamedChild
       
   764 // 
       
   765 // Returns the first child of the given node with the provided name.  aUtf8Name
       
   766 // MUST be null terminated.
       
   767 // -----------------------------------------------------------------------------
       
   768 //
       
   769 TXmlEngElement CXmlUtils::GetFirstNamedChild(TXmlEngElement aElement, const TDesC8& aUtf8Name) const
       
   770     {
       
   771     const TXmlEngElement      KNullElement;
       
   772     RXmlEngNodeList<TXmlEngElement> children;
       
   773     TXmlEngElement            child;
       
   774     
       
   775     // Get the first element -- children only contains TElements...
       
   776     aElement.GetChildElements(children);
       
   777     while (children.HasNext())
       
   778         {
       
   779         child = children.Next();
       
   780         
       
   781         if (IsNamed(child, aUtf8Name))
       
   782             {
       
   783             return child;
       
   784             }
       
   785         }
       
   786             
       
   787     return KNullElement;
       
   788     }
       
   789 
       
   790 
       
   791 // -----------------------------------------------------------------------------
       
   792 // CXmlUtils::GetNextSiblingElement
       
   793 // 
       
   794 // Returns the next sibling of the given node.
       
   795 // -----------------------------------------------------------------------------
       
   796 //
       
   797 TXmlEngElement CXmlUtils::GetNextSiblingElement(TXmlEngElement aElement)
       
   798     {
       
   799     const TXmlEngElement  KNullElement;
       
   800     TXmlEngElement        node;
       
   801     
       
   802     if (aElement.NotNull())
       
   803         {
       
   804         node = aElement.NextSibling().AsElement();
       
   805 
       
   806         // Loop through the siblings looking for the next element.
       
   807         while (node.NotNull())
       
   808             {
       
   809             if ((node.NodeType() == TXmlEngNode::EElement))
       
   810                 {
       
   811                 return node;
       
   812                 }
       
   813             
       
   814             node = node.NextSibling().AsElement();
       
   815             }
       
   816         }
       
   817         
       
   818     return KNullElement;
       
   819     }
       
   820 
       
   821 
       
   822 // -----------------------------------------------------------------------------
       
   823 // CXmlUtils::IsNamed
       
   824 // 
       
   825 // Returns true if the given node has a name of aName.  This method doesn't 
       
   826 // compare the namespace.
       
   827 // -----------------------------------------------------------------------------
       
   828 //
       
   829 TBool CXmlUtils::IsNamed(TXmlEngElement aElement, const TDesC8& aUtf8Name) const
       
   830     {
       
   831     return (xmlStrcasecmp((const xmlChar *) aElement.Name().Ptr(), 
       
   832             (const xmlChar *) aUtf8Name.Ptr()) == 0);
       
   833     }
       
   834 
       
   835 
       
   836 // -----------------------------------------------------------------------------
       
   837 // CXmlUtils::IsNamed
       
   838 // 
       
   839 // Returns true if the given node has match the given namespace and name.
       
   840 // -----------------------------------------------------------------------------
       
   841 //
       
   842 TBool CXmlUtils::IsNamed(TXmlEngElement aElement, const TDesC8& aUtf8NameSpace, 
       
   843         const TDesC8& aUtf8Name) const
       
   844     {
       
   845     TPtrC8  ns = aElement.Prefix();
       
   846     
       
   847     // First ensure the names match.
       
   848     if (!IsNamed(aElement, aUtf8Name))
       
   849         {
       
   850         return EFalse;
       
   851         }
       
   852 
       
   853     // Return false if one of the namespaces is NULL and the other isn't.
       
   854     if ((ns.Length() != 0 && (aUtf8NameSpace.Length() == 0)) ||
       
   855             (ns.Length() == 0 && (aUtf8NameSpace.Length() > 0)))
       
   856         {
       
   857         return EFalse;
       
   858         }
       
   859         
       
   860     // Return false if both are non-null, but don't match.
       
   861     if (ns.Length() != 0 && (aUtf8NameSpace.Length() >= 0) &&
       
   862             (xmlStrcasecmp((const xmlChar *) ns.Ptr(), 
       
   863             (const xmlChar *) aUtf8NameSpace.Ptr()) != 0))
       
   864         {
       
   865         return EFalse;
       
   866         }
       
   867 
       
   868     // Its a match.
       
   869     return ETrue;
       
   870     }
       
   871 
       
   872 
       
   873 // -----------------------------------------------------------------------------
       
   874 // CXmlUtils::AttributeL
       
   875 // 
       
   876 // Returns the attribute's value or NULL if the attribute wasn't present.
       
   877 // -----------------------------------------------------------------------------
       
   878 //
       
   879 HBufC* CXmlUtils::AttributeL(TXmlEngElement aElement, const TDesC8& aAttribute) const
       
   880     {    
       
   881     TXmlEngAttr     attr;
       
   882     HBufC*          value = NULL;
       
   883     RBuf8           rbuf;
       
   884     
       
   885     // Get the value.
       
   886     attr = aElement.AttributeNodeL( aAttribute );    
       
   887     if (attr.NotNull())
       
   888         {
       
   889         attr.WholeValueCopyL( rbuf );
       
   890         rbuf.CleanupClosePushL();
       
   891         value = HBufC::NewL( rbuf.Length() + 32 );
       
   892         TPtr ptr(value->Des());
       
   893         TInt unconverted = CnvUtfConverter::ConvertToUnicodeFromUtf8(ptr,rbuf);
       
   894         CleanupStack::PopAndDestroy( /*rbuf*/ );
       
   895         }
       
   896     return value;
       
   897     }
       
   898 
       
   899