diff -r 417699dc19c9 -r c7e9f1c97567 xml/legacyminidomparser/xmlparser/src/gmxmlparser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/legacyminidomparser/xmlparser/src/gmxmlparser.cpp Mon Sep 13 13:16:40 2010 +0530 @@ -0,0 +1,1962 @@ +// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// @file +// This file contains the definition of the generic CMDXMLParser class +// which is responsible for creating a DOM structure +// from a given XML file. +// +// + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "GMXMLFileDataSource.h" +#ifdef SYMBIAN_ENABLE_SPLIT_HEADERS +#include "gmxmldummydtd.h" +#endif + +const TInt KGMXMLDefaultTextBufferSize = 1024; +const TInt KUTF8EdgeBufferLen = 6; + +// +// Global functions // +// + + +//================================================================================== + +// +//CMDXParser // +// +//================================================================================== + +void CMDXMLParser::Panic(TPanicCode aReason) const + { + _LIT(KClassName, "CMDXMLParser"); + User::Panic(KClassName, aReason); + } + +EXPORT_C CMDXMLParser* CMDXMLParser::NewL(MMDXMLParserObserver* aParserObserver) +// +// Two phase static factory function constructor +// @return Created CMDXMLParser +// @leave can Leave due to OOM +// + { + CMDXMLParser* self = NewLC(aParserObserver); + CleanupStack::Pop(); + return self; + } + +EXPORT_C CMDXMLParser* CMDXMLParser::NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation) +// +// Two phase static factory function constructor +// @return Created CMDXMLParser +// @param aDtdRepresentation specid DTD represention class to be used for validation +// @leave can Leave due to OOM +// + { + CMDXMLParser* self = NewLC(aParserObserver, aDtdRepresentation); + CleanupStack::Pop(); + return self; + } + + +//================================================================================== + +EXPORT_C CMDXMLParser* CMDXMLParser::NewLC(MMDXMLParserObserver* aParserObserver) +// +// Two phase static factory function constructor +// @return Created CMDXMLParser +// @leave can Leave due to OOM +// + { + CMDXMLParser* self = new (ELeave) CMDXMLParser(aParserObserver); + CleanupStack::PushL(self); + // This overload of NewLC doesn't take a MXMLDtd*, but we need to provide one to + // ConstructL where ownership is taken if we do have one. Just pass NULL. + self->ConstructL(NULL); + return self; + } + +EXPORT_C CMDXMLParser* CMDXMLParser::NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation) +// +// Two phase static factory function constructor +// @return Created CMDXMLParser +// @param aDtdRepresentation specid DTD represention class to be used for validation +// @leave can Leave due to OOM +// + { + CMDXMLParser* self = new (ELeave) CMDXMLParser(aParserObserver); + CleanupStack::PushL(self); + self->ConstructL(aDtdRepresentation); + return self; + } + + +//================================================================================== + +void CMDXMLParser::ConstructL(MXMLDtd* aDtdRepresentation) +// +// Second stage constructor +// @param aDtdRepresentation The DTD to be used for validation +// @leave can Leave due to OOM +// + { + CMDXMLEntityConverter* entityConverter = new(ELeave) CMDXMLEntityConverter(); + SetEntityConverter(entityConverter); + + // This doesn't leave, but if CMDXMLParser::NewLC() Leaves after taking ownership + // of this we'll get a double deletion as the caller will have pushed + // aDtdRepresentation onto the CleanupStack. As such we can only take ownership + // once we are sure we aren't going to leave. + iDtdRepresentation = aDtdRepresentation; + } + +CMDXMLParser::CMDXMLParser(MMDXMLParserObserver* aParserObserver) + : CActive(EPriorityNormal) +// +// Constructor +// + { + iParserObserver = aParserObserver; + iStoreInvalid = ETrue; + CActiveScheduler::Add(this); + } + +//================================================================================== + +EXPORT_C CMDXMLParser::~CMDXMLParser() + { + Cancel(); + delete iBomBuffer; + if( iFileSource == NULL ) + { + // iFileSource has not been allocated yet, so the file path or file + // handle are still owned + if( iFileToParse!=NULL ) + delete iFileToParse; + else + iFileHandleToParse.Close(); + } + else + { + delete iFileSource; + } + delete iUTF8EdgeBuffer; + delete iXMLDoc; + delete iEntityConverter; + delete iElementTag; + delete iDtdRepresentation; + delete iText; + } + +//================================================================================== +//Defect fix for INC036136- Enable the use of custom entity converters in GMXML +EXPORT_C void CMDXMLParser::SetEntityConverter(CMDXMLEntityConverter* aEntityConverter) +/* + * Sets the entity converter to be used + * and take ownership of the passed entity converter + * @param aEntityConverter The entity converter to be used + */ + { + delete iEntityConverter; + iEntityConverter = aEntityConverter; + } +//End Defect fix for INC036136 +//================================================================================== + +EXPORT_C void CMDXMLParser::SetStoreInvalid(TBool aStoreInvalid) + { + iStoreInvalid = aStoreInvalid; + } + +//================================================================================== +// Defect fix for INC105134 - GmXML consumes whitespace characters +//================================================================================== +EXPORT_C void CMDXMLParser::SetWhiteSpaceHandlingMode(TBool aPreserve) + { + iPreserve = aPreserve; + } + +// End Defect fix for INC105134 +//================================================================================== +EXPORT_C CMDXMLDocument* CMDXMLParser::DetachXMLDoc() +// +// @return CMDXMLDocument* to the created DOM, should be called after the +// conclusion of the parser process. Note that internal variable pointing to +// the document is set to NULL so this function can only be called once per file +// parse. Client application must take ownership of document for cleanup purposes. +// + { + CMDXMLDocument* returnDoc = iXMLDoc; + iXMLDoc = NULL; + return returnDoc; + } + +//================================================================================== + +CMDXMLEntityConverter* CMDXMLParser::EntityConverter() +// +// @return the CMDXMLEntityConverter for use in converting built in entity +// and character entities back to their original format +// + { + return iEntityConverter; + } + +//================================================================================== + +EXPORT_C TInt CMDXMLParser::ParseFile(RFs aRFs, const TDesC& aFileToParse) +// +// ParseFile opens a file ready for parsing +// @param aRFs a resource file session used for file I/O +// @param aFileToParse the file name to parse +// @return KErrNone if all OK or file read error code +// + { + //Find out whether the file exists. If not dont start the active object + if(!BaflUtils::FileExists(aRFs,aFileToParse)) + { + return KErrNotFound; + } + else + { + //Check whether the file is locked by any other process + RFile tempFile; + TInt err=tempFile.Open(aRFs, aFileToParse, EFileRead | EFileShareReadersOnly); + if(err!=KErrNone) + { + return err; + } + tempFile.Close(); + } + Cancel(); + iSuspiciousCharacter = KErrNotFound; + iError = KErrNone; + iSeverity = EXMLNone; + iDocTypeSet = EFalse; + iVersionSet = EFalse; + + /* We need to open our file in a leave-safe place as it involves + a heap alloc, and so we'll set up the AO to do that when it runs next. + */ + delete iFileToParse; + iFileToParse = aFileToParse.Alloc(); + if(iFileToParse == NULL) + { + return KErrNoMemory; + } + + iRFs = aRFs; + iState = KInitFromFile; + + SetActive(); + TRequestStatus* s=&iStatus; + User::RequestComplete(s, KErrNone); + + return KErrNone; + } + +/** +Parses a specified XML file into a DOM object tree. + +Parses a specified XML file into a DOM object tree using an open file handle. The +parser takes ownership of the open file handle and will close handle when completed. + +@param aFileHandleToParse An open file handle for the file to parse. Ownership of the + file handle is passed. +@return KErrNone if successful. +*/ +EXPORT_C TInt CMDXMLParser::ParseFile(RFile& aFileHandleToParse) + { + iFileHandleToParse = aFileHandleToParse; + + Cancel(); + iSuspiciousCharacter = KErrNotFound; + iError = KErrNone; + iSeverity = EXMLNone; + iDocTypeSet = EFalse; + iVersionSet = EFalse; + + iState = KInitFromFile; + + iStatus = KRequestPending; + SetActive(); + TRequestStatus* s=&iStatus; + User::RequestComplete(s, KErrNone); + + return KErrNone; + } +//================================================================================== + +EXPORT_C void CMDXMLParser::ParseSourceL(MMDXMLParserDataProvider *aSource) + { + iSuspiciousCharacter = KErrNotFound; + iError = KErrNone; + iSeverity = EXMLNone; + iDocTypeSet = EFalse; + iVersionSet = EFalse; + + iDataSource = aSource; + PrepareForReuseL(); + GetMoreData(); + } + +//================================================================================== + +void CMDXMLParser::PrepareForReuseL() + { + Cancel(); + + iError = KErrNone; + iSeverity = EXMLNone; + iDocTypeSet = EFalse; + iVersionSet = EFalse; + iBytesPerChar = 0; + iNextChar = 0; + iInputBytesRemaining = 0; + iOpened = EFalse; + iClosed = EFalse; + iNewElement = NULL; + iParentElement = NULL; + delete iUTF8EdgeBuffer; + iUTF8EdgeBuffer = NULL; + delete iBomBuffer; + iBomBuffer = NULL; + + CreateDocumentL(); + iState = KDetermineCharset; + } + +//================================================================================== +TBool CMDXMLParser::DetectFileType() +// +// Detects the type of a data source - can be Unicode, UTF-8 or ASCII (because ASCII is +// a subset of UTF-8). +// If the file is empty it is assumed to be Utf-8. + { + TBuf8<3> bom; + + // Read the first 3 bytes of the file. These contain any BOM present. If it turns out + // there's not a bom we leave the pointer untouched so we can parse these bytes + // as usual. + if(iInputBufferPtr.Length() < 3) + return EFalse; + else + bom.Copy(iInputBufferPtr.Left(3)); + + TInt hichar = (CEditableText::EByteOrderMark & 0xFF00)>>8; + TInt lochar = CEditableText::EByteOrderMark & 0xFF; + + if((bom[0] == 0xEF) && (bom[1] == 0xBB) && (bom[2] == 0xBF)) + { + // Utf-8 with a bom. We don't want to parse the bom, so add 3 bytes offset to the read pos. + iBytesPerChar = 1; + iNextChar = 3; + } + else + { + if((bom[0] == lochar) && (bom[1] == hichar)) + { + // Little Endian Unicode. Move the read position on 2 bytes to ignore the bom. + iBytesPerChar = 2; + // would normally skip first 2 characters + iNextChar = 2; + } + else if((bom[0] == hichar) && (bom[1] == lochar)) + { + // We have a bom, but it indicates endianess opposite to that of the platform. We + // don't currently support this so set an error. + SetError(KErrNotSupported, EXMLFatal); + } + else + { + // Default to Utf-8 + iBytesPerChar = 1; + } + } + return ETrue; + } + +//================================================================================== + +EXPORT_C void CMDXMLParser::DoCancel() +// +// DoCancel function inherited from CActive base class +// + { + if (iDataSource) + { + iDataSource->Disconnect(); + iDataSource = NULL; + } + } + +//================================================================================== + +EXPORT_C TInt CMDXMLParser::RunError(TInt aError) +// +// RunError function inherited from CActive base class - intercepts any Leave from +// the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL +// + { + if(iDataSource) + { + iDataSource->Disconnect(); + iDataSource = NULL; + } + + iSeverity = EXMLFatal; + iError = aError; + if (iFileToParse) + { + delete iFileToParse; + iFileToParse = NULL; + } + else if( iFileSource==NULL ) + { + // iFileSource is not set so the ownership of the file handle has not been passed + iFileHandleToParse.Close(); + } + + __ASSERT_DEBUG(iParserObserver != NULL, Panic(ENullMemVarParserObserver)); + TRAPD(err, iParserObserver->ParseFileCompleteL()); + return err; + } + +//================================================================================== + +TBool CMDXMLParser::DoParseLoopL() +// +// RunL function inherited from CActive base class - carries out the actual parsing. +// @leave can Leave due to OOM +// + { + TBuf<1> singleChar; + TGetCharReturn getCharReturn = KError; + TInt error; + + while(iSeverity != EXMLFatal && (getCharReturn = GetChar(singleChar), getCharReturn == KCharReturned) ) + { + +#if 0 // THIS NEEDS TO BE REMOVED - IT'S A VERY HANDY DEBUGGING TOOL WHEN YOU'RE MANUALLY DOING + // BYTE ORDERING ON MISALIGNED STREAMS THOUGH... + { +#define DES_AS_8_BIT(str) (TPtrC8((TText8*)((str).Ptr()), (str).Size())) + + RFs aRFs; + RFile aFile; + _LIT(KFileName, "c:\\documents\\SMIL_Test_Files\\echoOutput.txt"); + TPtrC file; + TInt err; + + file.Set(KFileName); + + aRFs.Connect(); + + err = aFile.Open(aRFs, file, EFileWrite); + if(err != KErrNone) + err = aFile.Create(aRFs, file, EFileWrite); + + err = 0; + aFile.Seek(ESeekEnd, err); + aFile.Write(DES_AS_8_BIT(singleChar)); + aFile.Close(); + aRFs.Close(); + } +#endif + if((!iOpened) && singleChar != KXMLStartTag) + { + HandleTextL(singleChar); + } + + if((iOpened) || (singleChar == KXMLStartTag)) + { + if(singleChar == KXMLStartTag) + { + if(iOpened) + { + if((iSuspiciousCharacter == KErrNotFound)) + { + iSuspiciousCharacter = iElementTag->Length(); + } + } + else + { + AddTextL(iParentElement); + iOpened = ETrue; + } + } + else if(singleChar == KXMLEndTag) + { + if(iSuspiciousCharacter != KErrNotFound) + { + TPtrC suspiciousSection = iElementTag->Mid(iSuspiciousCharacter); + if( CheckForStartCData(suspiciousSection) == 0 ) + { + TInt endCDataLen = TPtrC(KXMLEndCDataSection).Length(); + // The suspicious character begins a CDataSection. Check if + // this End Tag is closing it. + if( suspiciousSection.Right(endCDataLen - 1) + == TPtrC(KXMLEndCDataSection).Left(endCDataLen - 1) ) + { + // Any dodgy characters began the CDataSection or were in it + iSuspiciousCharacter = KErrNotFound; + } + } + else if( suspiciousSection.Find(KXMLStartComment) == 0 ) + { + // The suspicious character begins a comment. Check if + // this End Tag is closing it. + TInt endCommentLen = TPtrC(KXMLEndComment).Length(); + if( suspiciousSection.Right(endCommentLen - 1) + == TPtrC(KXMLEndComment).Left(endCommentLen - 1) ) + { + // Any dodgy characters began the comment or were in it + iSuspiciousCharacter = KErrNotFound; + } + } + else if((CheckForStartCData(*iElementTag) == 0) || (iElementTag->Find(KXMLStartComment) == 0)) + { + // this tag is a CDataSection or comment, we're allowed < + iSuspiciousCharacter = KErrNotFound; + iClosed = ETrue; + } + else + { + // The < was spurious, set an error and close the tag as normal + SetError(KErrXMLIllegalCharacter, EXMLWorkable); + iClosed = ETrue; + iSuspiciousCharacter = KErrNotFound; + } + } + else + { + iClosed = ETrue; + } + } + + // ensure descriptor doesn't overflow end and panics + if(iElementTag->Length() == iElementTag->Des().MaxLength()) + { + iElementTag = iElementTag->ReAllocL(iElementTag->Length() + KNominalTagLength); + } + + iElementTag->Des().Append(singleChar); + + // if tag is complete and needs adding to the DOM? + if(iClosed) + { + if( !CommentL(iParentElement) + && !CDataSectionL(iParentElement) + && !VersionIDL() + && !DocTypeL() + && !ProcessingInstructionL(iParentElement) ) + { + // is this a regular closing tag + if + (iElementTag->Left(2) == KXMLStartEndTag) + { + error = ParseElementEndTag(*iParentElement, iElementTag->Des()); + if(error == KErrNone) + { + if(iParentElement->ParentNode() == NULL) + { + SetError(KErrXMLBadNesting, EXMLIndeterminate); + } + else + { + iParentElement = (CMDXMLElement*) iParentElement->ParentNode(); + } + } + else if(error == KErrNotFound) + { + CMDXMLElement* tempElement = (CMDXMLElement*) iParentElement->ParentNode(); + TInt searchResult = KErrNotFound; + + while(tempElement != NULL && + searchResult == KErrNotFound && + tempElement->NodeName() != KXMLDocumentElementNodeName) + { + searchResult = ParseElementEndTag(*tempElement,iElementTag->Des()); + if(searchResult == KErrNone) + { + iParentElement = tempElement; + SetError(KErrXMLBadNesting, EXMLIndeterminate); + } + else + { + tempElement = (CMDXMLElement*) tempElement->ParentNode(); + } + } + if(searchResult != KErrNone) + { + SetError(KErrXMLBadNesting, EXMLIndeterminate); + } + } + else + { + SetError(error, EXMLIndeterminate); + } + } + // if a new element start tag or start/end tag + else + { + // NOTE ParseStartTagL destroys iElementTag + // so following check must be done first + + // if not single tag with close + if(!(iElementTag->Right(2) == KXMLEndStartTag)) + { + iNewElement = ParseStartTagL(); + CleanupStack::PushL(iNewElement); + error = iParentElement->AppendChild(iNewElement); + + if(error == KErrNone) + { + CleanupStack::Pop(); // iNewElement + iParentElement = (CMDXMLElement*) iParentElement->LastChild(); + } + else + { + SetError(error, EXMLWorkable); + CleanupStack::PopAndDestroy(iNewElement); // iNewElement + } + } + else + { + iNewElement = ParseStartTagL(); + CleanupStack::PushL(iNewElement); + error = iParentElement->AppendChild(iNewElement); + + if(error == KErrNone) + { + CleanupStack::Pop(iNewElement); // iNewElement + } + else + { + SetError(error, EXMLWorkable); + CleanupStack::PopAndDestroy(iNewElement); // iNewElement + } + } + } + iEndOfTag = ETrue; + } + + if(iEndOfTag) + { + iEndOfTag = EFalse; + iOpened = iClosed = EFalse; + iElementTag->Des().Zero(); + + // reduce size of ElementTag if increased beyond normal limits on last pass + if(iElementTag->Des().MaxLength() > KNominalTagLength) + { + iElementTag = iElementTag->ReAllocL(KNominalTagLength); + } + } + } + } + } // END OF WHILE LOOP + + if(getCharReturn == KError) + { + return EFalse; + } + else + { + // GetChar returned KWEaitForChar + // GetChar handles pushing the state and requesting more data for us, so we just go active. + return ETrue; + } + } + + + + +//================================================================================== + +void CMDXMLParser::RunL() + { + TRequestStatus* s=&iStatus; + TInt err = s->Int(); + + switch(iState) + { + case KInitFromFile: + delete iFileSource; + iFileSource = NULL; + if( iFileToParse == NULL ) + { + // iFileToParse is not set, file was passed by open file handle. + iFileSource = CMDXMLParserFileDataSource::NewL(iFileHandleToParse); + } + else + { + iFileSource = CMDXMLParserFileDataSource::NewL(iRFs, *iFileToParse); + + delete iFileToParse; + iFileToParse = NULL; + } + + ParseSource(iFileSource); // will go active itself + break; + + case KDetermineCharset: + if(!iBytesPerChar) + { + if(DetectFileType()) + { + iState = KParseData; + SetActive(); + User::RequestComplete(s, KErrNone); + } + else + { + if(!iBomBuffer) + { + iBomBuffer = HBufC8::NewL(KUTF8EdgeBufferLen); + } + + TPtr8 bomDes(iBomBuffer->Des()); + TInt newLength = bomDes.Length() + iCurrentInputBufferLen; + iBomBuffer->ReAlloc(newLength); + + bomDes.Append(iInputBufferPtr); + if(iBomBuffer->Length() >=3) + { + iInputBufferPtr.Set(bomDes); + iCurrentInputBufferLen = newLength; + + SetActive(); + User::RequestComplete(s, KErrNone); + } + else + { + GetMoreData(); + } + } + } + else + { + iState = KParseData; + SetActive(); + User::RequestComplete(s, KErrNone); + } + break; + + case KWaitingForData: + switch(err) + { + case MMDXMLParserDataProvider::KMoreData: + // We got more data this time, make sure all the parameters are correct. + iCurrentInputBufferLen = iInputBufferPtr.Length(); + + iState = iPreviousState; + SetActive(); + User::RequestComplete(s, KErrNone); + break; + + case MMDXMLParserDataProvider::KDataStreamEnd: + iState = KFinished; + SetActive(); + User::RequestComplete(s, KErrNone); + break; + + default: + case MMDXMLParserDataProvider::KDataStreamError: + User::Leave(KErrCorrupt); + break; + } + + break; + + case KParseData: + if(!iParentElement) // initialise the parsing + { + iOpened = EFalse; + iClosed = EFalse; + // If we're going through the tag and find a < we don't know whether + // it will be valid (eg it starts a CDataSection) or whether it's an + // illegal character. Store its index so that when we're a bit further + // along in the string we can check whether it was allowed and if not + // set an Illegal Character error. + delete iNewElement; + iNewElement = NULL; + + __ASSERT_DEBUG(iXMLDoc != NULL, Panic(ENullMemVarXMLDoc)); + iParentElement = iXMLDoc->DocumentElement(); + + delete iElementTag; + iElementTag = NULL; + iElementTag = HBufC::NewL(KNominalTagLength); + } + + if(!iBytesPerChar) + { + // assume ascii/UTF8 + iBytesPerChar = 1; + } + + if(!DoParseLoopL()) + iState = KFinished; + + break; + + case KFinished: + // Check for any errors that we can pick up now, like missing doctype, or incomplete content + CheckForErrors(); + + // we want to leave this instance in a safe state where it can be restarted again with a single call to ParseSource. + // cannot delete this element, as it belongs to the document... + iParentElement = NULL; + iState = KDetermineCharset; + + iDataSource->Disconnect(); + iDataSource=NULL; + __ASSERT_DEBUG(iParserObserver != NULL, Panic(ENullMemVarParserObserver)); + iParserObserver->ParseFileCompleteL(); + break; + + default: + User::Leave(KErrUnknown); + break; + } + } + +void CMDXMLParser::CheckForErrors() + { + if(iError == KErrNone) + { + if(iParentElement) + { + // if iParentElement is not pointing to dummy root node, there has been a problem + if( (iParentElement == NULL) || (iParentElement->NodeName() != KXMLDocumentElementNodeName) ) + { + SetError(KErrXMLIncomplete, EXMLWorkable); + } + else if(!iParentElement->CheckChildren()) + { + SetError(KErrXMLInvalidChild, EXMLWorkable); + } + else if(iParentElement->FirstChild() != NULL) + { + // multiple real (not dummy) root elements + TInt count = 0; + CMDXMLNode* iterator = iParentElement->FirstChild(); + do + { + if(iterator->NodeType() == CMDXMLNode::EElementNode) + { + count++; + } + iterator = iterator->NextSibling(); + } + while(iterator != NULL); + + if(count != 1) + { + SetError(KErrXMLMultipleRootElements, EXMLWorkable); + } + } + } + } + if(iError == KErrNone && !iDocTypeSet) + { + SetError(KErrXMLMissingDocTypeTag, EXMLWorkable); + } + + if(iError == KErrNone && !iVersionSet) + { + SetError(KErrXMLMissingVersionTag, EXMLWorkable); + } + } + + +void CMDXMLParser::CreateDocumentL() +// +// Creates a generic or DTD-specific document object +// @leave can Leave due to OOM +// + { + delete iXMLDoc; + iXMLDoc = NULL; + + if (iDtdRepresentation != NULL) + iXMLDoc = CMDXMLDocument::NewL(*iDtdRepresentation); + else + iXMLDoc = CMDXMLDocument::NewL(); + } + + +//================================================================================== + +TBool CMDXMLParser::DocTypeL() +// +// @return Returns true if the current tag is a doctype tag and sets the +// Document DocType member accordingly on the first pass of this function. +// + { + TBool returnValue = EFalse; + TInt tagIdLen = TPtrC(KXMLDocumentTypes).Length(); + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + if(iElementTag->Length() > tagIdLen + && iElementTag->Left(tagIdLen) == KXMLDocumentTypes) + { + if(iDocTypeSet) + { + SetError(KErrXMLDuplicateDocTypeTags, EXMLWorkable); + } + else + { + iXMLDoc->SetDocTypeTagL(iElementTag->Des()); + iDocTypeSet = ETrue; + } + returnValue = ETrue; + iEndOfTag = ETrue; + } + + return returnValue; + } + +//================================================================================== + +TBool CMDXMLParser::ProcessingInstructionL(CMDXMLElement* aParentElement) +// +// creates a new processing instruction if necessary and adds to document +// @return Returns true if the current tag is a processing instruction +// + { + TBool returnValue = EFalse; + TInt startPILen = TPtrC(KXMLStartProcessingInstruction).Length(); + TInt endPILen = TPtrC(KXMLEndProcessingInstruction).Length(); + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + if((iElementTag->Left(startPILen) == KXMLStartProcessingInstruction) && + (iElementTag->Right(endPILen) == KXMLEndProcessingInstruction)) + { + if(aParentElement != NULL) + { + CMDXMLProcessingInstruction* inst = CMDXMLProcessingInstruction::NewLC(iXMLDoc); + + TPtrC instStr = iElementTag->Des().Mid(startPILen, + iElementTag->Length() - (startPILen + endPILen)); + inst->SetDataL(instStr); + + __ASSERT_DEBUG(aParentElement != NULL, Panic(ENullParameterParentElement)); + TInt error = aParentElement->AppendChild(inst); + CleanupStack::Pop(inst); + + if(error != KErrNone) + { + SetError(error, EXMLWorkable); + } + } + + returnValue = ETrue; + iEndOfTag = ETrue; + } + + return returnValue; + } + + +//================================================================================== + +TBool CMDXMLParser::CDataSectionL(CMDXMLElement* aParentElement) +// +// creates a new CDataSection if necessary and adds to document +// @return Returns true if the current tag is a CDataSection +// + { + TBool returnValue = EFalse; + TInt instLen = TPtrC(KXMLStartCDataSection).Length(); + TInt endCDataLen = TPtrC(KXMLEndCDataSection).Length(); + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + if (iElementTag->Left(instLen) == KXMLStartCDataSection) + { + returnValue = ETrue; + if ((iElementTag->Right(endCDataLen) == KXMLEndCDataSection) && (aParentElement != NULL)) + { + CMDXMLCDATASection* inst = CMDXMLCDATASection::NewLC(iXMLDoc); + + TPtrC instStr = iElementTag->Des().Mid(instLen, + iElementTag->Length() - (instLen + endCDataLen)); + inst->SetDataL(instStr); + + __ASSERT_DEBUG(aParentElement != NULL, Panic(ENullParameterParentElement)); + TInt error = aParentElement->AppendChild(inst); + CleanupStack::Pop(); // inst + + if(error != KErrNone) + { + SetError(error, EXMLWorkable); + } + } + iEndOfTag = ETrue; + } + + return returnValue; + } + + +//================================================================================== + +TBool CMDXMLParser::VersionIDL() +// +// @return returns true if the current tag is a version id tag and sets the +// Document Version member accordingly on the first pass of this function. +// + { + TBool returnValue = EFalse; + TInt tagIdLen = TPtrC(KXMLVersion).Length(); + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + if(iElementTag->Length() > tagIdLen + && iElementTag->Left(tagIdLen) == KXMLVersion) + { + if(iVersionSet) + { + SetError(KErrXMLDuplicateVersionTags, EXMLWorkable); + } + else + { + iXMLDoc->SetVersionTagL(iElementTag->Des()); + iVersionSet = ETrue; + } + returnValue = ETrue; + iEndOfTag = ETrue; + } + + return returnValue; + } + +//================================================================================== + +TBool CMDXMLParser::CommentL(CMDXMLElement* aParentElement) +// +// creates a new comment if necessary and adds to document +// @return returns true if the current tag is a comment tag +// + { + TBool returnValue = EFalse; + TInt commentLen = TPtrC(KXMLStartComment).Length(); + TInt endCommentLen = TPtrC(KXMLEndComment).Length(); + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + if (iElementTag->Left(commentLen) == KXMLStartComment) + { + returnValue = ETrue; + if ((aParentElement != NULL) && (iElementTag->Right(endCommentLen) == KXMLEndComment)) + { + CMDXMLComment* comment = CMDXMLComment::NewLC(iXMLDoc); + + TPtrC commentStr = iElementTag->Des().Mid(commentLen, + iElementTag->Length() - (commentLen + endCommentLen)); + comment->SetDataL(commentStr); + + __ASSERT_DEBUG(aParentElement != NULL, Panic(ENullParameterParentElement)); + TInt error = aParentElement->AppendChild(comment); + CleanupStack::Pop(); // comment + + if(error != KErrNone) + { + SetError(error, EXMLWorkable); + } + iEndOfTag = ETrue; + } + } + + return returnValue; + } + +//================================================================================== + +EXPORT_C void CMDXMLParser::SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth) + { + iBytesPerChar = aWidth; + } + +//================================================================================== + +void CMDXMLParser::GetMoreData() + { + + // Prepare these variables. + iNextChar = 0; // reading from the start of the buffer + iCurrentInputBufferLen = 0; // we have no characters in the buffer + iUnicodeConversion.Zero(); // we have no characters in our UTF8->Unicode Conversion buffer. + iUnicodeConversionLen = 0; + iUnicodeReadPos = 0; + + // Request more data from the data provider + __ASSERT_DEBUG(iDataSource != NULL, Panic(ENullMemVarDataSource)); + iDataSource->GetData(iInputBufferPtr, iStatus); + SetActive(); + iPreviousState = iState; + iState = KWaitingForData; + } + +//================================================================================== + +CMDXMLParser::TGetCharReturn CMDXMLParser::GetDoubleByteChar(TDes& aChar) + // when inputing we have a pointer to an 8 bit buffer (iInputBufferPtr), for unicode + // input we point a 16 bit descriptor (tempUnicodeInput) at the 8 bit buffer to + // enable us to read the 2 x 8 bit chars as a single 16 bit char. + // However it isn't always this simple as the data provider interface makes no guarantees + // on the alignment of this data. It's perfectly possible for it to end up with a unicode + // character where the high byte comes from the previous buffer and the low byte comes from + // the current one. This will put the rest of the current buffer out of line, and also all + // subsequent buffers unless an odd length buffer is provided. Hopfully this won't happen often. + { + aChar.Zero(); + + if(iUnicodeInputMisaligned) + { + TUint16 tempOut; + TUint8* tempRead; + tempRead = (TUint8*)(iInputBufferPtr.Ptr()) + iNextChar; + + if(iCurrentInputBufferLen - iNextChar >=1) + { + // if we saved a byte last time, lets use that first + if(iSpareChar.Length()) + { + tempOut = iSpareChar[0]; + iSpareChar.Zero(); + } + else if(iCurrentInputBufferLen - iNextChar >=2) + { + // we didn't save a byte, so we read from the stream. + tempOut = *tempRead; + tempRead++; + iNextChar++; + } + else + { + // our input stream must have been an odd length - this might cause alignment problems, + // so we need to start reading bytewise for a while + iUnicodeInputMisaligned = ETrue; + + TUint8* tempRead = (TUint8*)(iInputBufferPtr.Ptr()) + iNextChar; + TUint16 tempVal = (TUint16)(*tempRead << 8); + iSpareChar.Copy(&tempVal,1); + + GetMoreData(); + return KWaitForChar; + + } + + // second byte (high byte) of our output comes from the input stream in all cases. + tempOut |= ((*tempRead & 0xFF) << 8); + iNextChar++; + + TPtrC16 readDes(&tempOut, 1); + aChar = readDes.Left(1); + } + } + else if(iCurrentInputBufferLen - iNextChar >= 2) + { + // we may be in a position where we don't know we're going to lose a byte + // so we'll test for that ahead of time, and then handle that in the normal way + + // if we execute this, it means that we have two bytes available to read. + const TUint16* word = reinterpret_cast((iInputBufferPtr.Ptr() + iNextChar)); + TPtrC16 tempUnicodeInput(word, 2); + aChar = tempUnicodeInput.Left(1); + iNextChar+=2; + return KCharReturned; + } + + TInt bytesRemaining = iCurrentInputBufferLen - iNextChar; + + switch(bytesRemaining) + { + case 1: + { + // our input stream must have been an odd length - this might cause alignment problems, + // so we need to start reading bytewise for a while + + TUint8* tempRead = (TUint8*)(iInputBufferPtr.Ptr()) + iNextChar; + TUint16 tempVal = *tempRead; + iSpareChar.Copy(&tempVal,1); + iUnicodeInputMisaligned = ETrue; + iNextChar++; + if(!aChar.Length()) + { + GetMoreData(); + return KWaitForChar; + } + } + break; + case 0: + { + // we're at the end of this block, and it's turned out to be re-aligned. + // we can read subsequent blocks in 16-bit chunks. + iUnicodeInputMisaligned = EFalse; + } + break; + } + + return KCharReturned; + } + +//================================================================================== + +CMDXMLParser::TGetCharReturn CMDXMLParser::GetSingleByteChar(TDes& aChar) + { + // We have UTF8/ASCII Source, and we must need to convert some more if we got here. + iUnicodeConversion.Zero(); + iUnicodeConversionLen = 0; + iUnicodeReadPos = 0; + + // if we are not operating out of the edge buffer yet, work on the real one + if(!iUTF8EdgeBuffer) + { + // This is an 8 bit encoding, probably UTF-8, but could be ASCII. Because + // ASCII is valid UTF-8 we can just convert what we have to Unicode. + // We're going to convert a number of characters at a time here. + TInt inputBytesRemaining = iCurrentInputBufferLen - iNextChar; + TInt convResult; + TPtrC8 tempPtr( (TUint8*)(iInputBufferPtr.Ptr()) + iNextChar, inputBytesRemaining ); + + convResult = CnvUtfConverter::ConvertToUnicodeFromUtf8(iUnicodeConversion, tempPtr); + if((convResult >= 0) || (convResult == KErrCorrupt)) + { + // Sometimes the UTF8 decoder might return corrupt if it only gets a single character + // in this case we ignore the error and report that we have converted 0 characters + if (convResult == KErrCorrupt) + convResult = tempPtr.Length(); + + // This is the number of bytes converted. + // Keep an eye out in case there is no change in the character consumed count. + TInt bytesConverted = inputBytesRemaining - convResult; + + // We consumed characters. Make our input buffer read position correct. + iNextChar += bytesConverted; + + // Make our intermediate buffering correct and return the first character out of our buffer + // subsequent calls will just return characters from this buffer. + iUnicodeConversionLen = iUnicodeConversion.Length(); + aChar = iUnicodeConversion.Left(1); + iUnicodeReadPos = 1; + + if(convResult && convResult < KUTF8EdgeBufferLen) + { + TUint8* multiByteCheck = (TUint8*)(iInputBufferPtr.Ptr()) + iNextChar; + + // There is a possibility that we've got an edge case here + //check if our left over character is in fact UTF8. + if((0x80 & *multiByteCheck) != 0) + { + // Shift 'convResult' characters off into the edge buffer. + delete iUTF8EdgeBuffer; + iUTF8EdgeBuffer = HBufC8::New(KUTF8EdgeBufferLen); + *iUTF8EdgeBuffer = iInputBufferPtr.Right(convResult); + + TUint8 bitMask = 0x80; + TInt byteCount = 0; + while(bitMask && (bitMask & (iUTF8EdgeBuffer->Des()[0])) != 0) + { + bitMask >>= 1; + byteCount++; + }; + + if(!bitMask) + { + // the utf8 stream appears to be corrupt. + SetError(KError, EXMLFatal); + return KError; + } + // we need to find byteCount characters to make up the character currently stored in the edge + // buffer. + iRequiredUTF8Bytes = byteCount - iUTF8EdgeBuffer->Length(); + + // set the variables up so that we return any converted characters, and then begin work + // on the edge buffer (where we've already cached the remaining bytes if any) + // NOTE: We will return all the characters which we preconverted into iUnicodeConversion *before* + // we begin dealing with the edge buffer, because of the structure of this function. + iNextChar = iCurrentInputBufferLen; + + if (bytesConverted == 0) + // If no bytes were converted then there is nothing to return, + // we need to wait for more data and then conbine it with what we have + // just put on the edge buffer. + { + // need more bytes to finish this character. + GetMoreData(); + return KWaitForChar; + } + } + } + } + + else + { + return KError; // something failed in the UTF8 Converter. + } + } + else + { + // We are converting the UTF8 Edge Buffer. We know that in it's current state, it + // can't be converted to Unicode. + // Decide if we have enough characters in our current input stream to convert to an + // output character (or two) yet. + + if(iUTF8EdgeBuffer->Length() >= KUTF8EdgeBufferLen) + { + // our edge buffer reached the maximum length for a UTF8 character + // and we haven't managed to convert a unicode output. + // this means that the input stream is corrupt. + delete iUTF8EdgeBuffer; + iUTF8EdgeBuffer = NULL; + + // Report a fatal error. + SetError(KError, EXMLFatal); + return KError; + } + else + { + TInt convResult; + + // we know how many bytes are required in order to complete the utf8 buffer + TInt bytesAvailable = iCurrentInputBufferLen - iNextChar; + + if(bytesAvailable >= iRequiredUTF8Bytes) + { + // we have enough bytes to complete this character. + // Go ahead and convert then return it. + iUTF8EdgeBuffer->Des().Append(iInputBufferPtr.Mid(iNextChar, iRequiredUTF8Bytes)); + iUnicodeConversion.Zero(); + convResult = CnvUtfConverter::ConvertToUnicodeFromUtf8(iUnicodeConversion, iUTF8EdgeBuffer->Des()); + + // regardless if we managed to convert this buffer or not, we don't need it any more. + delete iUTF8EdgeBuffer; + iUTF8EdgeBuffer = NULL; + + // make sure we report any error in the conversion + if(convResult != 0) + { + // we either incorrectly calculated the required number of bytes or the + // stream is corrupt. Either way we have a fatal error. + SetError(KError, EXMLFatal); + return KError; + } + + // Make our intermediate buffering correct and return the first character out of our buffer + // subsequent calls will just return characters from this buffer. + iUnicodeConversionLen = iUnicodeConversion.Length(); + aChar = iUnicodeConversion.Left(1); + iUnicodeReadPos = 1; + + // set up the main input buffers so that the next char comes from the input stream. + iNextChar += iRequiredUTF8Bytes; + iRequiredUTF8Bytes = 0; + } + else + { + // we haven't got enough bytes to complete this character, store the + // available byte(s) and request more data. + iUTF8EdgeBuffer->Des().Append(iInputBufferPtr.Mid(iNextChar, bytesAvailable)); + + // Move the next character index on for as many bytes as we have just added to the edge buffer + iNextChar += bytesAvailable; + // We can reduce the number of bytes require by the number of bytes added to the edge buffer + iRequiredUTF8Bytes -= bytesAvailable; + + // need more bytes to finish this character. + GetMoreData(); + return KWaitForChar; + } + } + } + + return KCharReturned; + } + +//================================================================================== + +CMDXMLParser::TGetCharReturn CMDXMLParser::GetChar(TDes& aChar) +// +// Fetch one character from the input file +// @param aChar the returned character. +// @return returns true if a character returned or false if the file is finished +// + { + // first test - see if we're providing preconverted characters. + if(iUnicodeConversionLen && iUnicodeReadPos < iUnicodeConversionLen) + { + // return one of the preconverted chars. + aChar = iUnicodeConversion.Mid(iUnicodeReadPos, 1); + iUnicodeReadPos++; + + return KCharReturned; + } + + // Second test - see if we require more data. If we have converted data and we require + // more data, this code is not intelligent enough to request data from the provider + // early, but that's ok. + + // Buffer length held as a member variable for performance reasons + // this function will be accessed thousands of times + if(iCurrentInputBufferLen <= iNextChar) + { + GetMoreData(); + return KWaitForChar; + } + + // return the character, handling any of the buffer shuffling we need to do. + if(iBytesPerChar == 2) + return GetDoubleByteChar(aChar); + else + return GetSingleByteChar(aChar); + } + +//================================================================================== + +CMDXMLElement* CMDXMLParser::ParseStartTagL() +// +// Parse a start of element tag and create an element with attributes set. +// @return Returns a pointer to the created element +// @leave can Leave due to OOM +// + { + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + + // there must be at least two angle brackets and a single character to be meaningful + if(iElementTag->Length() < 3) + return NULL; + + CMDXMLElement* newElement = NULL; + TPtr elementTagPtr = iElementTag->Des(); + + // remove the angle brackets and trim white space + if(iElementTag->Right(TPtrC(KXMLEndStartTag).Length()) == KXMLEndStartTag) + elementTagPtr = iElementTag->Left(iElementTag->Length() - TPtrC(KXMLEndStartTag).Length()); + else + elementTagPtr = iElementTag->Left(iElementTag->Length() - TPtrC(KXMLEndTag).Length()); + + elementTagPtr = iElementTag->Right(iElementTag->Length() - TPtrC(KXMLStartTag).Length()); + elementTagPtr.Trim(); + + // find out where the name ends and the attributes begin + TLex16 element(elementTagPtr); + element.SkipCharacters(); + TInt endOfName = element.Offset(); + + // separate out the name from the attributes + HBufC* elementName = (iElementTag->Left(endOfName)).AllocLC(); + TPtr elementNamePtr = elementName->Des(); + elementNamePtr.TrimRight(); + + TInt error = KErrNone; + + __ASSERT_DEBUG(iXMLDoc != NULL, Panic(ENullMemVarXMLDoc)); + + TBool validElement = iXMLDoc->ValidElementNameL(elementNamePtr); + if(validElement || iStoreInvalid) + { + // remove the actual name from the tag so we only pass on the attributes + elementTagPtr = iElementTag->Right(iElementTag->Length() - endOfName); + elementTagPtr.TrimLeft(); + newElement = CMDXMLElement::NewLC(iXMLDoc->CanElementHaveChildren(elementNamePtr), iXMLDoc, elementNamePtr); + + error = ParseElementAttributesL(*newElement, elementTagPtr); + CleanupStack::Pop(); + } + if(!validElement) + { + error = KErrXMLInvalidElement; + } + + CleanupStack::PopAndDestroy(elementName); // elementName + + if(error != KErrNone) + { + SetError(error, EXMLWorkable); + } + + return newElement; + + } + + +TInt CMDXMLParser::ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse) +// +// This function is used to parse the attributes. +// @param aElement The element to which the attributes belong +// @param aTagToParse The tag to be parsed +// @return Returns KErrNone if both attribute name & value are valid +// KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid +// @leave can Leave due to OOM +// + { + TInt error = KErrNone; + TInt attributeError = KErrNone; + HBufC* attributeName = NULL; + HBufC* attributeValue = NULL; + TBuf<1> attributeDelimiter; // may be " or ' + TInt offset = KErrNone; + + offset = aTagToParse.Find(KEqualSign); + while(offset != KErrNotFound) + { + attributeName = TPtrC(aTagToParse.Left(offset)).AllocLC(); + TPtr attributeNamePtr = attributeName->Des(); + attributeNamePtr.TrimRight(); // remove white space that existed between name and equal sign + + // remove current attribute name and equal sign from string + aTagToParse = aTagToParse.Right(aTagToParse.Length() - (offset + 1)); + aTagToParse.TrimLeft(); // remove white space that existed between equal sign and delimiter + + if(error == KErrNone && aTagToParse.Length() < 2) // a name must be at followed by at least 2 delimiters + { + error = KErrXMLBadAttributeName; + // In this case, there is insufficient tag left to contain any more attributes + } + else if(error == KErrNone && aElement.IsAttributeSpecified(*attributeName)) + { + error = KErrXMLDuplicateAttributeName; + // We need to remove the attribute value from the tag string or it will be + // picked up as part of the next attribute name. + attributeDelimiter = aTagToParse.Left(1); + + // Check in case we've got a missing " at the beginning of the attribute. If + // we do, we need to try a different strategy to find the end of this attribute + if (attributeDelimiter != KQuotation && attributeDelimiter != KApostrophe) + { + offset = LocateNextAttribute(aTagToParse); + } + else + { + // remove start delimiter then search for next one (end delimiter) + aTagToParse = aTagToParse.Right(aTagToParse.Length() - 1); + offset = FindDelimiter(aTagToParse, attributeDelimiter); + } + + if(offset != KErrNotFound) + { + // remove current attribute value and delimiter + aTagToParse = aTagToParse.Right(aTagToParse.Length() - (offset + 1)); + aTagToParse.TrimLeft(); // remove white space that existed between delimiter and next name + } + } + else + { + attributeDelimiter = aTagToParse.Left(1); + + if (attributeDelimiter != KQuotation && attributeDelimiter != KApostrophe) + { + // This attribute doesn't have a valid delimiter. Try and find the beginning of the next + // attribute and just cut this one. + TInt nextAttribute = LocateNextAttribute(aTagToParse); + if(nextAttribute > 0) + { + // Add one to next attribute because the offset includes the whitespace before it + aTagToParse = aTagToParse.Right(aTagToParse.Length() - (nextAttribute + 1)); + } + + if (error == KErrNone) + { + error = KErrXMLBadAttributeValue; + } + } + else + { + // remove start delimiter then search for next one (end delimiter) + aTagToParse = aTagToParse.Right(aTagToParse.Length() - 1); + + offset = FindDelimiter(aTagToParse, attributeDelimiter); + if(offset != KErrNotFound) + { + attributeValue = TPtrC(aTagToParse.Left(offset)).AllocLC(); + TPtr attributeValuePtr = attributeValue->Des(); + attributeValuePtr.TrimRight(); // remove white space that existed between value and delimiter + + // remove current attribute value and delimiter + aTagToParse = aTagToParse.Right(aTagToParse.Length() - (offset + 1)); + aTagToParse.TrimLeft(); // remove white space that existed between delimiter and next name + + // Entity convert this attribute + attributeError = ParseSingleAttributeL(attributeValuePtr); + if( attributeError != KErrNone && error == KErrNone) + { + error = attributeError; + } + + attributeError = aElement.SetAttributeL(*attributeName, *attributeValue, iStoreInvalid); + if( attributeError != KErrNone && error == KErrNone) + { + error = KErrXMLInvalidAttribute; + } + CleanupStack::PopAndDestroy(attributeValue); //attributeValue + } + else if(error == KErrNone) + { + error = KErrXMLBadAttributeValue; + } + } + } + + // next attribute pair + offset = aTagToParse.Find(KEqualSign); + CleanupStack::PopAndDestroy(attributeName); //attributeName + } + + if(error == KErrNone && aTagToParse.Length() != 0) + { + error = KErrXMLBadAttributeValue; + } + + return error; + } + +TInt CMDXMLParser::LocateNextAttribute(const TDesC& aTagToParse) + { + // Find the next attribute by looking for an = then search back for a ' '. + // This is useful when you've hit rubbish parsing the content of a start tag + // and are looking for somewhere sensible to start. + TInt nextAttribute = KErrNotFound; + TInt offset = aTagToParse.Find(KEqualSign); + + // If the = is the first character then there isn't space for a ' ' so + // don't bother looking + if(offset > 0) + { + TPtrC invalidText = aTagToParse.Left(offset); + nextAttribute = invalidText.LocateReverse(' '); + } + + return nextAttribute; + } + +TInt CMDXMLParser::ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse) +// +// Parses an end tag. In fact, at this point the end tag must match +// the tag name of the start tag. +// @param aTagToParse Text of the end tag. +// @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch. +// + { + // The tag should be of the form '' where tag is the name of this element so we will + // check and strip off the surrounding and then compare the remains with this # + // node name. + TInt retVal = KErrNone; + if( aTagToParse.Length() != (aElement.NodeName().Length()+3)) + { + retVal = KErrNotFound; + } + else + { + TInt startEndTagLen = TPtrC(KXMLStartEndTag).Length(); + TInt endTagLen = TPtrC(KXMLEndTag).Length(); + + if((aTagToParse.Left(startEndTagLen).Compare(KXMLStartEndTag) == 0) && + (aTagToParse.Right(endTagLen).Compare(KXMLEndTag) == 0)) + { + if(aElement.NodeName().Compare(aTagToParse.Mid(2, + aTagToParse.Length() - (startEndTagLen + endTagLen))) != 0) + { + retVal = KErrNotFound; + } + } + } + return retVal; + } + + +//================================================================================== + +EXPORT_C void CMDXMLParser::SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity) +// +// Sets iError to new errorcode if more serious than any error so far encountered +// + { + if(iSeverity > aSeverity) + { + iSeverity = aSeverity; + iError = aErrorCode; + } + } + +//================================================================================== + +EXPORT_C TInt CMDXMLParser::Error() const + { + return iError; + } + +//================================================================================== + +EXPORT_C TXMLErrorCodeSeverity CMDXMLParser::ErrorSeverity() const + { + return iSeverity; + } + +//================================================================================== + +void CMDXMLParser::HandleTextL(TDes& aChar) +// +// Called when a character is read in and found to bo outside of an element tag +// + { + // Save the text in a buffer. + // This text will get added as as a child element when the next tag is encounted. + if (iText == NULL) + iText = HBufC::NewL(KGMXMLDefaultTextBufferSize); + + if (iText->Length() == iText->Des().MaxLength()) + // The buffer will overflow if we add another character. + // Need to reallocate. + { + iText = iText->ReAllocL(iText->Des().MaxLength() + KGMXMLDefaultTextBufferSize); + } + + iText->Des().Append(aChar); + } + +TBool CMDXMLParser::EndOfCDataSection() + { + TBool endOfCData = EFalse; + TPtrC cdataEndSection(KXMLEndCDataSection); + TInt instLen = TPtrC(KXMLEndCDataSection).Length()-1; + + __ASSERT_DEBUG(iElementTag != NULL, Panic(ENullMemVarElementTag)); + if(iElementTag->Right(instLen) == cdataEndSection.Left(instLen)) + { + if(iElementTag->Left(instLen) == KXMLStartCDataSection) + endOfCData = ETrue; + } + + return endOfCData; + } + +TInt CMDXMLParser::CheckForStartCData(const TDesC& aTextToCheck) + { + TInt index; + index = aTextToCheck.Find(KXMLStartCDataSection); + return index; + } + +TInt CMDXMLParser::FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind) + { + TInt currentOffset = 0; + TInt nextDelimiter = KErrNotFound; + TBool valid = EFalse; + TPtrC unsearchedData(aDataToSearch); + + while (!valid && ((nextDelimiter = unsearchedData.Find(aDelimiterToFind)) != KErrNotFound)) + { + // If this isn't the first time round the loop (When currentOffset == 0) we're moved + // our attention to the character after the delimiter we found, so add one to currentOffset + if(currentOffset != 0) + { + currentOffset += 1; + } + // We have a delimiter, add the position of this to currentOffset + currentOffset += nextDelimiter; + + // Check whether this delimiter is in a CDataSection, it's valid if it isn't + TPtrC delimiterToCheck = aDataToSearch.Left(currentOffset); + valid = !InCDataSection(delimiterToCheck); + + // Move on to the next section of text in case this one wasn't valid + unsearchedData.Set(aDataToSearch.Mid(currentOffset + 1)); + } + + if ((nextDelimiter == KErrNotFound) && (!valid)) + { + return KErrNotFound; + } + else + { + return currentOffset; + } + } + +void CMDXMLParser::AddTextL(CMDXMLElement* aParentElement) + { + if ((aParentElement != NULL) && (iText != NULL)) + // Add any buffered text to the parent element unless it contains only whitespace + { + // Strip off any leading whitespace + TInt stripCounter = 0; + + if (!iPreserve) // GmXML consumes whitespace characters + { + TBool endOfWhitespace = EFalse; + while ((stripCounter < iText->Length()) && (!endOfWhitespace)) + { + // If character is not 0x20 (space) and not between 0x09 and 0x0d + // it isn't whitespace + if( ((*iText)[stripCounter] != 0x20) && + !((*iText)[stripCounter] >= 0x09 && (*iText)[stripCounter] <= 0x0d)) + { + endOfWhitespace = ETrue; + } + else + { + stripCounter++; + } + } + } + + HBufC* strippedText = TPtrC(iText->Right(iText->Length() - stripCounter)).AllocLC(); + + if (strippedText->Length() > 0) + // If there is anything left of the stripped text then entity convert and add it. + { + TPtr toConvert = strippedText->Des(); + TInt error = iEntityConverter->EntityToTextL(toConvert); + if( error != KErrNone ) + { + SetError(error, EXMLIndeterminate); + } + CMDXMLText* textElement = CMDXMLText::NewLC(iXMLDoc); + textElement->SetDataL(*strippedText); + CleanupStack::Pop(textElement); + TInt err = aParentElement->AppendChild(textElement); + if(err != KErrNone) + { + SetError(err, EXMLWorkable); + } + } + + CleanupStack::PopAndDestroy(strippedText); + iText->Des().Zero(); + } + } + +TBool CMDXMLParser::InCDataSection(TDesC& aDataToSearch) + { + TBool inCDataSection = EFalse; + TInt startCData = CheckForStartCData(aDataToSearch); + TInt endCData = 0; + + while ((startCData != KErrNotFound) && !inCDataSection) + { + // We only want to look for the end of the CDataSection in the part of + // aDataToSearch after the start of the CDataSection. We know that the + // first (TPtrC)KXMLStartCDataSection.Length() of the data we're looking + // at won't match because it's the start tag, but it's probably more + // efficient to check the extra few characters than to work out the + // length of the tag so we can ignore them. + startCData += endCData; + TPtrC afterStart = aDataToSearch.Mid(startCData); + endCData = afterStart.Find(KXMLEndCDataSection); + if (endCData == KErrNotFound) + { + // We haven't found a match for the start of the CDataSection so + // we must still be in it -> "<" is valid. + inCDataSection = ETrue; + } + else + { + // We found a match for the start of the CDataSection. Check to + // see if another one has started since then. + endCData += startCData; + TPtrC afterEnd = aDataToSearch.Mid(endCData); + startCData = CheckForStartCData(afterEnd); + } + } + + return inCDataSection; + } + +TInt CMDXMLParser::ParseSingleAttributeL(TDes& aAttributeValue) + { + TInt error = KErrNone; + TInt beginSection = 0; + TInt endSection = aAttributeValue.Find(KXMLStartCDataSection); + + // We've found at least one CDataSection + while(endSection != KErrNotFound) + { + // Entity convert this plain text section + HBufC* textToConvert = TPtrC(aAttributeValue.Mid(beginSection, endSection)).AllocLC(); + TPtr toConvert = textToConvert->Des(); + error = iEntityConverter->EntityToTextL(toConvert); + aAttributeValue.Replace(beginSection, endSection, *textToConvert); + CleanupStack::PopAndDestroy(textToConvert); + + // Move on our markers. We start the new section at the end of the old one. + beginSection += endSection; + // The end of this new section is the end of the CDataSection + endSection = TPtrC(aAttributeValue.Mid(beginSection)).Find(KXMLEndCDataSection); + + if(endSection != KErrNotFound) + { + // Now move on our markers again. Start at the end of the CDataSection, + // plus the length of the end tag, and continue to the beginning of the next one. + beginSection += endSection + TPtrC(KXMLEndCDataSection).Length(); + endSection = TPtrC(aAttributeValue.Mid(beginSection)).Find(KXMLStartCDataSection); + } + else + { + // There's an unterminated CDataSection in our attribute + error = KErrXMLBadAttributeValue; + } + } + + // There are no more CDataSections, entity convert the rest of the string + if(!error) + { + HBufC* textToConvert = TPtrC(aAttributeValue.Mid(beginSection)).AllocLC(); + TPtr toConvert = textToConvert->Des(); + error = iEntityConverter->EntityToTextL(toConvert); + aAttributeValue.Replace(beginSection, (aAttributeValue.Length()-beginSection), *textToConvert); + CleanupStack::PopAndDestroy(textToConvert); + } + + return error; + } + + +EXPORT_C void CMDXMLParser::PlaceholderForRemovedExport1(MMDXMLParserObserver* /*aParserObserver*/) + { + User::Panic(KLDRIMPORT, KLdrImportedOrdinalDoesNotExist); + } + +EXPORT_C void CMDXMLParser::PlaceholderForRemovedExport2(MMDXMLParserObserver* /*aParserObserver*/, MXMLDtd* /*aDtdRepresentation*/) + { + User::Panic(KLDRIMPORT, KLdrImportedOrdinalDoesNotExist); + } + +EXPORT_C void CMDXMLParser::PlaceholderForRemovedExport3() + { + User::Panic(KLDRIMPORT, KLdrImportedOrdinalDoesNotExist); + } + +// End of File