--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dependencies/xcfw/gmxmlparser.h Fri Mar 19 13:46:28 2010 +0200
@@ -0,0 +1,510 @@
+// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+// This file contains the declaration of the generic CMDXMLParser class
+// which is responsible for creating a DOM structure
+// from a given XML file.
+//
+//
+
+/**
+ @file
+*/
+
+#ifndef __GMXMLPARSER_H__
+#define __GMXMLPARSER_H__
+
+#include <e32std.h>
+#include <txtetext.h>
+#include <gmxmlconstants.h>
+#include <f32file.h>
+
+//forward reference
+class CMDXMLDocument;
+class CMDXMLEntityConverter;
+class CMDXMLElement;
+class MXMLDtd;
+
+
+
+class MMDXMLParserObserver
+/** Abstract observer interface for notification when XML parsing is complete.
+
+It should be implemented by users of CMDXMLParser
+@publishedAll
+@released*/
+ {
+public:
+ /**
+ Call back function used to inform a client of the Parser when a parsing operation completes.
+ */
+ virtual void ParseFileCompleteL() = 0;
+ };
+
+class MMDXMLParserDataProvider
+/** Abstract data source interface for XML data source.
+
+The user of CMDXMLParser must build one of these to encapsulate the data source
+that they wish to parse. CMDXMLParser implements a file-based data source to
+implement the functionality of the ParseFile function.
+
+@publishedAll
+@released*/
+ {
+public:
+ /** Status codes returned by GetData() implementations. */
+ enum TDataProviderResults
+ {
+ KMoreData, //< Returned by the interface implementation when it is returning more data.
+ KDataStreamError, //< Returned by the interface when an unrecoverable error prevents obtaining more data. A recoverable error should be represented by KDataNotReady.
+ KDataStreamEnd //< Returned by the interface when there is no more data to come.
+ };
+
+public:
+ /**
+ The XML Parser calls this on a specific data provider to get more data
+ when required.
+
+ Note that the TPtrC supplied may be used by the parser at any time
+ between the return of this call and the next call that the parser
+ makes out.
+
+ Your data provider must not move the data pointed to until the
+ parser has indicated that it's done with that block by asking for
+ another.
+
+ Ownership of the data pointed to remains with the data provider.
+
+
+ General comments on efficiency
+ ------------------------------
+
+ The parser is designed such that it processes the whole data block
+ provided in one go. It will automatically become asynchronous when
+ another block is required - the data provider only needs to supply
+ data.
+
+ Because of this design, it allows the data provider to indirectly
+ control the amount of processing time that will be needed
+ in a single block.
+
+ It is a good idea to balance the need for the fastest possible
+ processing with the need for client application responsiveness by
+ ensuring that the amount of data passed in a single block is not
+ too large. However, it is worth bearing in mind that the parser
+ will convert UTF8 data streams in blocks of 32 characters, and
+ supplying blocks of smaller length than this will result in a
+ slight loss of efficiency.
+
+ @param aPtr On return, the data provided
+ @param aStatus Asynchronous status to be completed by the function with a
+ TDataProviderResults value
+ */
+ virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
+ /**
+ Called to indicate that use of the data source is complete.
+ */
+ virtual void Disconnect() = 0;
+ };
+
+class CMDXMLParserFileDataSource;
+
+class CMDXMLParser: public CActive
+/** Creates a DOM structure from a given XML file.
+
+The parsing operation is asynchronous and is initiated by a call to ParseFile().
+On completion, the created DOM document can be retrieved through DetachXMLDoc().
+
+Note the following ownership rules for the DOM document:
+
+1. calling DetachXMLDoc() transfers ownership of the document to the client
+
+2. if the parser is asked to parse a new file while it still owns an existing
+DOM document, it will delete the old document.
+
+@publishedAll
+@released
+*/
+ {
+public:
+ /** Allocates and constructs a new XML parser, specifying a DTD.
+
+ @param aParserObserver XML parser observer
+ @leave KErrNoMemory Out of memory
+ @return New XML parser */
+ IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
+
+ /** Allocates and constructs a new XML parser, specifying a DTD.
+
+ @param aParserObserver XML parser observer
+ @param aDtdRepresentation DTD validator
+ @leave KErrNoMemory Out of memory
+ @return New XML parser */
+ IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
+
+ /** Allocates and constructs a new XML parser, leaving the object on the cleanup
+ stack.
+
+ @param aParserObserver XML parser observer
+ @leave KErrNoMemory Out of memory
+ @return New XML parser */
+ IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
+
+ /** Allocates and constructs a new XML parser, leaving the object on the cleanup
+ stack.
+
+ @param aParserObserver XML parser observer
+ @param aDtdRepresentation DTD validator
+ @leave KErrNoMemory Out of memory
+ @return New XML parser */
+ IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
+
+
+ /** Destructor. */
+ IMPORT_C ~CMDXMLParser();
+
+ /** Gets the last error found by the parser.
+
+ @return Error code
+ */
+ IMPORT_C TInt Error() const;
+
+ /**
+ Get the severity of the most severe error found.
+ @return the maximum error severity
+ */
+ IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const;
+
+ /** Gets the created DOM.
+
+ This should be called after the conclusion of the parser process.
+
+ Note that the function sets the internal variable pointing to the document
+ to NULL, so this function can only be called once per file parse. The caller
+ takes ownership of the document, and must delete it when its use is complete.
+
+ @return The created DOM */
+ IMPORT_C CMDXMLDocument* DetachXMLDoc();
+
+ /** Parses a specified XML file into a DOM object tree.
+
+ @param aRFs File server session
+ @param aFileToParse The file name to parse
+ @return KErrNone if success or a file read error code */
+ IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
+
+ IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
+
+ /** Parses a specified XML Data Source into a DOM object tree.
+ Use ParseSourceL() function in preference to ParseSource()
+ @param aSource MMDXMLParserDataProvider pointer
+ */
+ inline void ParseSource(MMDXMLParserDataProvider *aSource)
+ {
+ TRAP_IGNORE(ParseSourceL(aSource));
+ }
+
+ /** Parses a specified XML Data Source into a DOM object tree.
+ @param aSource MMDXMLParserDataProvider pointer
+ */
+ IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
+
+ /** Defines input stream character widths. */
+ enum TMDXMLParserInputCharWidth
+ {
+ EAscii = 0x01, //< ASCII
+ EUnicode = 0x02 //<Unicode
+ };
+
+ /** Sets the input stream character width.
+ *
+ * @param aWidth Character width for incoming stream. Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
+ *
+ */
+ IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
+
+ //Defect fix for INC036136- Enable the use of custom entity converters in GMXML
+ /**
+ * Sets the entity converter to be used for parsing.
+ * and take ownership of the passed entity converter
+ * @param aEntityConverter the entity converter to be used.
+ */
+ IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
+ //End Defect fix for INC036136
+
+ /**
+ Controls whether invalid elements and attributes are added to the DOM.
+ @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
+ */
+ IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
+
+ /**
+ Controls whether whitespaces are handled by XML parser or by client.
+ @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
+ */
+ IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
+
+public: // public functions used by other classes within the .dll, not for Export.
+ /** Gets the entity converter.
+
+ @return The entity converter */
+ CMDXMLEntityConverter* EntityConverter();
+
+private:
+ IMPORT_C virtual void DoCancel();
+
+ /*
+ * RunL function inherited from CActive base class - carries out the actual parsing.
+ * @leave can Leave due to OOM
+ */
+ virtual void RunL();
+
+ /*
+ * Helper function that does the parsing - called from inside RunL
+ */
+ TBool DoParseLoopL();
+
+ /*
+ * RunError function inherited from CActive base class - intercepts any Leave from
+ * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
+ */
+ IMPORT_C TInt RunError(TInt aError);
+
+ /*
+ * Constructors
+ */
+ CMDXMLParser(MMDXMLParserObserver* aParserObserver);
+
+ CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
+
+ /*
+ * Called when a character is read in and found to bo outside of an element tag
+ */
+ virtual void HandleTextL(TDes& aChar);
+
+ enum TGetCharReturn
+ {
+ KError = 0x00, // GetChar detected an error
+ KCharReturned, // GetChar returned a character
+ KWaitForChar // GetChar couldn't return a character this time, but might next time.
+ };
+
+ /*
+ * Fetch one character from the input file
+ * @param aChar the returned character.
+ * @return returns one of the values of TCharReturn
+ */
+ TGetCharReturn GetChar(TDes& aChar);
+
+ /* utility functions, called from GetChar to deal with the
+ * 2 types of input stream
+ */
+ TGetCharReturn GetDoubleByteChar(TDes& aChar);
+ TGetCharReturn GetSingleByteChar(TDes& aChar);
+
+ /*
+ * Fetch some more data from the data provider
+ * @return returns one of the values of TCharReturn
+ */
+ void GetMoreData();
+
+ /*
+ * @return Returns true if the current tag is a doctype tag and sets the
+ * Document DocType member accordingly on the first pass of this function.
+ */
+ TBool DocTypeL();
+
+ /*
+ * creates a new processing instruction if necessary and adds to document
+ * @return Returns true if the current tag is a processing instruction
+ */
+ TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
+
+ /*
+ * creates a new CDataSection if necessary and adds to document
+ * @return Returns true if the current tag is a processing instruction
+ */
+ TBool CDataSectionL(CMDXMLElement* aParentElement);
+ TBool EndOfCDataSection();
+
+ /*
+ * @return returns true if the current tag is a version id tag and sets the
+ * Document Version member accordingly on the first pass of this function.
+ */
+ TBool VersionIDL();
+
+ /*
+ * creates a new comment if necessary and adds to document
+ * @return returns true if the current tag is a comment tag
+ */
+ TBool CommentL(CMDXMLElement* aParentElement);
+
+ /*
+ * Parse a start of element tag and create an element with attributes set.
+ * @return Returns a pointer to the created element
+ * @leave can Leave due to OOM
+ */
+ virtual CMDXMLElement* ParseStartTagL();
+
+ /*
+ * Detects the type of a file - can be Unicode or UTF-8
+ */
+ TBool DetectFileType();
+
+ /*
+ * Creates a generic or DTD-specific document object
+ * @leave can Leave due to OOM
+ */
+ virtual void CreateDocumentL();
+
+ /*
+ * Sets iError to new errorcode if more serious than any error so far encountered
+ */
+ IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
+
+ /*
+ * This function is used to parse the attributes.
+ * @param aElement The element to which the attributes belong
+ * @param aTagToParse The tag to be parsed
+ * @return Returns KErrNone if both attribute name & value are valid
+ * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
+ * @leave can Leave due to OOM
+ */
+ TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
+
+ /**
+ This function locates the next attribute in the tag.
+ @param aTagToParse the tag to find the attribute in
+ @return the offset of the next attribute
+ */
+ TInt LocateNextAttribute(const TDesC& aTagToParse);
+
+ /*
+ * Parses an end tag. In fact, at this point the end tag must match
+ * the tag name of the start tag.
+ * @param aTagToParse Text of the end tag.
+ * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
+ */
+ TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
+
+ TInt CheckForStartCData(const TDesC& aTextToCheck);
+ TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
+
+ /*
+ * Second stage constructor
+ */
+ void ConstructL(MXMLDtd* aDtdRepresentation);
+ void AddTextL(CMDXMLElement* aParentElement);
+
+ /*
+ * Checks whether the end of this tag is in a CDataSection.
+ * @param aDataToSearch The data to check
+ * @return Returns ETrue if the tag contains an unclosed CDataSection
+ */
+ TBool InCDataSection(TDesC& aDataToSearch);
+
+ /*
+ * Entity converts the sections of one attribute value that are not within a CDataSection.
+ * @param aAttributeValue one attribute value
+ * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
+ */
+ TInt ParseSingleAttributeL(TDes& aAttributeValue);
+
+ /*
+ * Prepares this class for use on another file.
+ *
+ */
+ void PrepareForReuseL();
+
+ /**
+ This should be called when parsing has been completed, before calling ParseFileCompleteL().
+ It checks for errors that can only be determined at the end of parsing, eg missing doctype or
+ incomplete content.
+ */
+ void CheckForErrors();
+
+ IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
+ IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
+ IMPORT_C void PlaceholderForRemovedExport3();
+
+
+private:
+ enum TPanicCode { ENullMemVarDataSource,
+ ENullMemVarParserObserver,
+ ENullMemVarXMLDoc,
+ ENullMemVarElementTag,
+ ENullParameterParentElement };
+ void Panic(TPanicCode aReason) const;
+
+private:
+ MMDXMLParserObserver* iParserObserver;
+ MXMLDtd* iDtdRepresentation;
+ TInt iError; // Current error
+ TXMLErrorCodeSeverity iSeverity; // ErrorCode severity
+ CMDXMLDocument* iXMLDoc; // Document created by the parser
+ CMDXMLEntityConverter* iEntityConverter; // Entity converter used by the parser
+ HBufC* iElementTag; // Currently processed element tag
+ TBool iDocTypeSet;
+ TBool iVersionSet;
+ TInt iBytesPerChar;
+
+ /* member variables dealing with access to source data */
+ TPtrC8 iInputBufferPtr; // set during a call to get more data
+ TInt iCurrentInputBufferLen; // current length of the data block available
+ TInt iNextChar; // read position in the data block
+ TInt iInputBytesRemaining; // number of bytes remaining to read.
+ HBufC8 *iUTF8EdgeBuffer; // buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
+ HBufC8 *iBomBuffer; // buffer to hold data at the start of the stream so we may determine charset
+ TInt iRequiredUTF8Bytes; // number of bytes required to complete the character held in the edge buffer
+ TBool iUnicodeInputMisaligned; // Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
+ MMDXMLParserDataProvider* iDataSource; // XML Data Source being parsed.
+ CMDXMLParserFileDataSource* iFileSource; // We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
+
+ /* member variables dealing with chunked conversion into unicode output */
+ TBuf<32> iUnicodeConversion; // buffer to temporarily hold the results of conversion from UTF8 to Unicode
+ TInt iUnicodeConversionLen; // number of characters stored in our intermediate buffer
+ TInt iUnicodeReadPos; // next character to send from our intermediate buffer
+ TBuf<1> iSpareChar;
+
+ /* member variables used when parsing a local file */
+ TDesC *iFileToParse;
+ RFs iRFs;
+ RFile iFileHandleToParse;
+
+ TBool iEndOfTag;
+
+ /* member variables used in DoParseLoopL() */
+ TBool iOpened;
+ TBool iClosed;
+ CMDXMLElement* iNewElement;
+ CMDXMLElement* iParentElement;
+ HBufC* iText;
+ enum EParserStates
+ {
+ KInitFromFile,
+ KDetermineCharset,
+ KWaitingForData,
+ KParseData,
+ KSpanDataGap,
+ KFinished
+ };
+
+ EParserStates iState;
+ EParserStates iPreviousState;
+ TInt iSuspiciousCharacter;
+ TBool iStoreInvalid; // controls whether invalid elements and attributes are stored in the DOM.
+ TBool iPreserve;
+
+ };
+
+#endif