xml/legacyminidomparser/XMLParser/INC/GMXMLParser.h
changeset 34 c7e9f1c97567
parent 25 417699dc19c9
child 36 172b09aa4eb6
--- a/xml/legacyminidomparser/XMLParser/INC/GMXMLParser.h	Thu Jul 01 15:13:40 2010 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,510 +0,0 @@
-// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
-// All rights reserved.
-// This component and the accompanying materials are made available
-// under the terms of "Eclipse Public License v1.0"
-// which accompanies this distribution, and is available
-// at the URL "http://www.eclipse.org/legal/epl-v10.html".
-//
-// Initial Contributors:
-// Nokia Corporation - initial contribution.
-//
-// Contributors:
-//
-// Description:
-// This file contains the declaration of the generic CMDXMLParser class
-// which is responsible for creating a DOM structure
-// from a given XML file.
-// 
-//
-
-/**
- @file
-*/
-
-#ifndef __GMXMLPARSER_H__
-#define __GMXMLPARSER_H__
-
-#include <e32std.h>
-#include <txtetext.h>
-#include <gmxmlconstants.h>
-#include <f32file.h>
-
-//forward reference
-class CMDXMLDocument;
-class CMDXMLEntityConverter;
-class CMDXMLElement;
-class MXMLDtd;
-
-
-
-class MMDXMLParserObserver
-/** Abstract observer interface for notification when XML parsing is complete.
-
-It should be implemented by users of CMDXMLParser
-@publishedAll 
-@released*/
-	{
-public:
-	/**
-	Call back function used to inform a client of the Parser when a parsing operation completes.
-	 */
-	virtual void ParseFileCompleteL() = 0;
-	};
-
-class MMDXMLParserDataProvider
-/** Abstract data source interface for XML data source.
-
-The user of CMDXMLParser must build one of these to encapsulate the data source
-that they wish to parse.  CMDXMLParser implements a file-based data source to
-implement the functionality of the ParseFile function.
-
-@publishedAll 
-@released*/
-	{
-public:
-	/** Status codes returned by GetData() implementations. */
-	enum TDataProviderResults
-		{
-		KMoreData,		//< Returned by the interface implementation when it is returning more data.
-		KDataStreamError,	//< Returned by the interface when an unrecoverable error prevents obtaining more data.  A recoverable error should be represented by KDataNotReady.
-		KDataStreamEnd	//< Returned by the interface when there is no more data to come.
-		};
-
-public:
-	/** 
-	The XML Parser calls this on a specific data provider to get more data
-	when required.
-
-	Note that the TPtrC supplied may be used by the parser at any time
-	between the return of this call and the next call that the parser
-	makes out.
-
-	Your data provider must not move the data pointed to until the
-	parser has indicated that it's done with that block by asking for
-	another.
-
-	Ownership of the data pointed to remains with the data provider.
-
-
-	General comments on efficiency
-	------------------------------
-
-	The parser is designed such that it processes the whole data block
-	provided in one go.  It will automatically become asynchronous when
-	another block is required - the data provider only needs to supply
-	data.
-
-	Because of this design, it allows the data provider to indirectly
-	control the amount of processing time that will be needed
-	in a single block.
-
-	It is a good idea to balance the need for the fastest possible 
-	processing with the need for client application responsiveness by
-	ensuring that the amount of data passed in a single block is not 
-	too large.	However, it is worth bearing in mind that the parser
-	will convert UTF8 data streams in blocks of 32 characters, and
-	supplying blocks of smaller length than this will result in a
-	slight loss of efficiency.
-
-	@param aPtr On return, the data provided
-	@param aStatus Asynchronous status to be completed by the function with a 
-	TDataProviderResults value
-	*/
-	virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
-	/**
-	Called to indicate that use of the data source is complete.
-	*/
-	virtual void Disconnect() = 0;
-	};
-
-class CMDXMLParserFileDataSource;
-
-class CMDXMLParser: public CActive
-/** Creates a DOM structure from a given XML file.
-
-The parsing operation is asynchronous and is initiated by a call to ParseFile(). 
-On completion, the created DOM document can be retrieved through DetachXMLDoc().
-
-Note the following ownership rules for the DOM document:
-
-1. calling DetachXMLDoc() transfers ownership of the document to the client
-
-2. if the parser is asked to parse a new file while it still owns an existing 
-DOM document, it will delete the old document.
-
-@publishedAll
-@released
-*/
-	{
-public:
-	/** Allocates and constructs a new XML parser, specifying a DTD.
-	
-	@param aParserObserver XML parser observer
-	@leave KErrNoMemory Out of memory
-	@return New XML parser */
-	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
-
-	/** Allocates and constructs a new XML parser, specifying a DTD.
-	
-	@param aParserObserver XML parser observer
-	@param aDtdRepresentation DTD validator
-	@leave KErrNoMemory Out of memory
-	@return New XML parser */
-	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
-
-	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
-	stack.
-	
-	@param aParserObserver XML parser observer
-	@leave KErrNoMemory Out of memory
-	@return New XML parser */
-	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
-
-	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
-	stack.
-	
-	@param aParserObserver XML parser observer
-	@param aDtdRepresentation DTD validator
-	@leave KErrNoMemory Out of memory
-	@return New XML parser */
-	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
-
-
-	/** Destructor. */
-	IMPORT_C ~CMDXMLParser();
-
-	/** Gets the last error found by the parser.
-	
-	@return Error code
-	 */
-	IMPORT_C TInt Error() const;
-
-	/**
-	 Get the severity of the most severe error found.
-	 @return the maximum error severity
-	 */
-	IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; 
-
-	/** Gets the created DOM.
-	
-	This should be called after the conclusion of the parser process.
-	
-	Note that the function sets the internal variable pointing to the document 
-	to NULL, so this function can only be called once per file parse. The caller 
-	takes ownership of the document, and must delete it when its use is complete.
-	
-	@return The created DOM */
-	IMPORT_C CMDXMLDocument* DetachXMLDoc();
-
-	/** Parses a specified XML file into a DOM object tree.
-	
-	@param aRFs File server session
-	@param aFileToParse The file name to parse
-	@return KErrNone if success or a file read error code */
-	IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
-	
-	IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
-
-	/** Parses a specified XML Data Source into a DOM object tree.
-	Use ParseSourceL() function in preference to ParseSource()
-	@param aSource MMDXMLParserDataProvider pointer 
-	*/
-	inline void ParseSource(MMDXMLParserDataProvider *aSource)
-		{
-		TRAP_IGNORE(ParseSourceL(aSource));
-		} 
-				
-	/** Parses a specified XML Data Source into a DOM object tree.	
-	@param aSource MMDXMLParserDataProvider pointer 
-	*/
-	IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
-
-	/** Defines input stream character widths. */
-	enum TMDXMLParserInputCharWidth
-		{
-		EAscii = 0x01, //< ASCII
-		EUnicode = 0x02 //<Unicode
-		};
-	
-	/** Sets the input stream character width.
-	 *
-	 * @param aWidth Character width for incoming stream.  Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
-	 *
-	 */
-	IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
-
-	//Defect fix for INC036136- Enable the use of custom entity converters in GMXML
-	/**
-	 * Sets the entity converter to be used for parsing.
-	 * and  take ownership of the passed entity converter
-	 * @param aEntityConverter the entity converter to be used.
-	 */
-	IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
-	//End Defect fix for INC036136
-
-	/**
-	 Controls whether invalid elements and attributes are added to the DOM.
-	 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
-	 */
-	IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
-	
-	/**
-	 Controls whether whitespaces are handled by XML parser or by client.
-	 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
-	 */
-	IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
-
-public: // public functions used by other classes within the .dll, not for Export.
-	/** Gets the entity converter.
-	
-	@return The entity converter */
-	CMDXMLEntityConverter* EntityConverter();
-
-private:
-	IMPORT_C virtual void DoCancel();
-
-	/*
-	 * RunL function inherited from CActive base class - carries out the actual parsing.
-	 * @leave can Leave due to OOM
-	 */
-	virtual void RunL();
-
-	/*
-	 * Helper function that does the parsing - called from inside RunL
-	 */
-	TBool DoParseLoopL();
-
-	/*
-	 * RunError function inherited from CActive base class - intercepts any Leave from
-	 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
-	 */
-	IMPORT_C TInt RunError(TInt aError);
-
-	/*
-	 * Constructors
-	 */
-	CMDXMLParser(MMDXMLParserObserver* aParserObserver);
-
-	CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
-
-	/*
-	 * Called when a character is read in and found to bo outside of an element tag
-	 */
-	virtual void HandleTextL(TDes& aChar);
-
-	enum TGetCharReturn
-		{
-		KError = 0x00,			// GetChar detected an error
-		KCharReturned,	// GetChar returned a character
-		KWaitForChar	// GetChar couldn't return a character this time, but might next time.
-		};
-
-	/*
-	 * Fetch one character from the input file
-	 * @param aChar the returned character.
-	 * @return returns one of the values of TCharReturn
-	 */
-	TGetCharReturn GetChar(TDes& aChar);
-
-	/* utility functions, called from GetChar to deal with the
-	 * 2 types of input stream
-	 */
-	TGetCharReturn GetDoubleByteChar(TDes& aChar);
-	TGetCharReturn GetSingleByteChar(TDes& aChar);
-
-	/*
-	 * Fetch some more data from the data provider
-	 * @return returns one of the values of TCharReturn
-	 */
-	void GetMoreData();
-
-	/*
-	 * @return Returns true if the current tag is a doctype tag and sets the
-	 * Document DocType member accordingly on the first pass of this function.
-	 */
-	TBool DocTypeL();
-
-	/*
-	 * creates a new processing instruction if necessary and adds to document
-	 * @return Returns true if the current tag is a processing instruction
-	 */
-	TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
-
-	/*
-	 * creates a new CDataSection if necessary and adds to document
-	 * @return Returns true if the current tag is a processing instruction
-	 */
-	TBool CDataSectionL(CMDXMLElement* aParentElement);
-	TBool EndOfCDataSection();
-
-	/*
-	 * @return returns true if the current tag is a version id tag and sets the
-	 * Document Version member accordingly on the first pass of this function.
-	 */
-	TBool VersionIDL();
-
-	/*
-	 * creates a new comment if necessary and adds to document
-	 * @return returns true if the current tag is a comment tag
-	 */
-	TBool CommentL(CMDXMLElement* aParentElement);
-
-	/*
-	 * Parse a start of element tag and create an element with attributes set.
-	 * @return Returns a pointer to the created element
-	 * @leave can Leave due to OOM
-	 */
-	virtual CMDXMLElement* ParseStartTagL();
-
-	/*
-	 * Detects the type of a file - can be Unicode or UTF-8
-	 */
-	TBool DetectFileType();
-
-	/*
-	 * Creates a generic or DTD-specific document object
-	 * @leave can Leave due to OOM
-	 */
-	virtual void CreateDocumentL();
-
-	/*
-	 * Sets iError to new errorcode if more serious than any error so far encountered
-	 */
-	IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
-
-	/*
-	 * This function is used to parse the attributes.
-     * @param aElement The element to which the attributes belong
-     * @param aTagToParse The tag to be parsed
-     * @return Returns KErrNone if both attribute name & value are valid 
-	 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
-     * @leave can Leave due to OOM
-	 */
-	TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
-
-	/** 
-	  This function locates the next attribute in the tag.
-	  @param aTagToParse the tag to find the attribute in
-	  @return the offset of the next attribute
-	 */
-	TInt LocateNextAttribute(const TDesC& aTagToParse);
-
-    /*
-     * Parses an end tag.  In fact, at this point the end tag must match
-     * the tag name of the start tag.  
-     * @param aTagToParse Text of the end tag.
-     * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
-     */
-	TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
-
-	TInt CheckForStartCData(const TDesC& aTextToCheck);
-	TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
-
-	/*
-	 * Second stage constructor
-	 */
-	void ConstructL(MXMLDtd* aDtdRepresentation);
-	void AddTextL(CMDXMLElement* aParentElement);
-
-	/*
-	 * Checks whether the end of this tag is in a CDataSection.
-	 * @param aDataToSearch The data to check
-	 * @return Returns ETrue if the tag contains an unclosed CDataSection
-	 */
-	TBool InCDataSection(TDesC& aDataToSearch);
-
-	/*
-	 * Entity converts the sections of one attribute value that are not within a CDataSection.
-	 * @param aAttributeValue one attribute value
-	 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
-	 */
-	TInt ParseSingleAttributeL(TDes& aAttributeValue);
-
-	/*
-	 * Prepares this class for use on another file.
-	 *
-	 */
-	void PrepareForReuseL();
-
-	/**
-	 This should be called when parsing has been completed, before calling ParseFileCompleteL().
-	 It checks for errors that can only be determined at the end of parsing, eg missing doctype or 
-	 incomplete content.
-	 */
-	void CheckForErrors();
-
-	IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
-	IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
-	IMPORT_C void PlaceholderForRemovedExport3();
-
-
-private:
-	enum TPanicCode {	ENullMemVarDataSource, 
-						ENullMemVarParserObserver, 
-						ENullMemVarXMLDoc, 
-						ENullMemVarElementTag, 
-						ENullParameterParentElement };
-	void Panic(TPanicCode aReason) const;
-
-private:
-	MMDXMLParserObserver* iParserObserver;
-	MXMLDtd* iDtdRepresentation;
-	TInt iError;								// Current error
-	TXMLErrorCodeSeverity iSeverity;			// ErrorCode severity
-	CMDXMLDocument* iXMLDoc;					// Document created by the parser
-	CMDXMLEntityConverter* iEntityConverter;	// Entity converter used by the parser
-	HBufC* iElementTag;							// Currently processed element tag
-	TBool iDocTypeSet;
-	TBool iVersionSet;
-	TInt iBytesPerChar;
-
-	/* member variables dealing with access to source data */
-	TPtrC8 iInputBufferPtr;						// set during a call to get more data
-	TInt iCurrentInputBufferLen;				// current length of the data block available
-	TInt iNextChar;								// read position in the data block
-	TInt iInputBytesRemaining;					// number of bytes remaining to read.
-	HBufC8 *iUTF8EdgeBuffer;					// buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
-	HBufC8 *iBomBuffer;							// buffer to hold data at the start of the stream so we may determine charset
-	TInt iRequiredUTF8Bytes;					// number of bytes required to complete the character held in the edge buffer
-	TBool iUnicodeInputMisaligned;				// Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
-	MMDXMLParserDataProvider* iDataSource;		// XML Data Source being parsed.
-	CMDXMLParserFileDataSource* iFileSource;	// We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
-
-	/* member variables dealing with chunked conversion into unicode output */
-	TBuf<32> iUnicodeConversion;				// buffer to temporarily hold the results of conversion from UTF8 to Unicode
-	TInt iUnicodeConversionLen;					// number of characters stored in our intermediate buffer
-	TInt iUnicodeReadPos;						// next character to send from our intermediate buffer
-	TBuf<1> iSpareChar;
-
-	/* member variables used when parsing a local file */
-	TDesC *iFileToParse;
-	RFs iRFs;
-	RFile iFileHandleToParse;
-
-	TBool iEndOfTag;
-	
-	/* member variables used in DoParseLoopL() */
-	TBool iOpened;
-	TBool iClosed;
-	CMDXMLElement* iNewElement;
-	CMDXMLElement* iParentElement;
-	HBufC* iText;
-	enum EParserStates
-		{
-		KInitFromFile,
-		KDetermineCharset,
-		KWaitingForData,
-		KParseData,
-		KSpanDataGap,
-		KFinished
-		};
-
-	EParserStates iState;
-	EParserStates iPreviousState;
-	TInt iSuspiciousCharacter;
-	TBool iStoreInvalid;						// controls whether invalid elements and attributes are stored in the DOM.
-	TBool iPreserve;
-
-	};
-
-#endif