xml/legacyminidomparser/XMLParser/INC/GMXMLParser.h
changeset 34 c7e9f1c97567
parent 25 417699dc19c9
child 36 172b09aa4eb6
equal deleted inserted replaced
25:417699dc19c9 34:c7e9f1c97567
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // This file contains the declaration of the generic CMDXMLParser class
       
    15 // which is responsible for creating a DOM structure
       
    16 // from a given XML file.
       
    17 // 
       
    18 //
       
    19 
       
    20 /**
       
    21  @file
       
    22 */
       
    23 
       
    24 #ifndef __GMXMLPARSER_H__
       
    25 #define __GMXMLPARSER_H__
       
    26 
       
    27 #include <e32std.h>
       
    28 #include <txtetext.h>
       
    29 #include <gmxmlconstants.h>
       
    30 #include <f32file.h>
       
    31 
       
    32 //forward reference
       
    33 class CMDXMLDocument;
       
    34 class CMDXMLEntityConverter;
       
    35 class CMDXMLElement;
       
    36 class MXMLDtd;
       
    37 
       
    38 
       
    39 
       
    40 class MMDXMLParserObserver
       
    41 /** Abstract observer interface for notification when XML parsing is complete.
       
    42 
       
    43 It should be implemented by users of CMDXMLParser
       
    44 @publishedAll 
       
    45 @released*/
       
    46 	{
       
    47 public:
       
    48 	/**
       
    49 	Call back function used to inform a client of the Parser when a parsing operation completes.
       
    50 	 */
       
    51 	virtual void ParseFileCompleteL() = 0;
       
    52 	};
       
    53 
       
    54 class MMDXMLParserDataProvider
       
    55 /** Abstract data source interface for XML data source.
       
    56 
       
    57 The user of CMDXMLParser must build one of these to encapsulate the data source
       
    58 that they wish to parse.  CMDXMLParser implements a file-based data source to
       
    59 implement the functionality of the ParseFile function.
       
    60 
       
    61 @publishedAll 
       
    62 @released*/
       
    63 	{
       
    64 public:
       
    65 	/** Status codes returned by GetData() implementations. */
       
    66 	enum TDataProviderResults
       
    67 		{
       
    68 		KMoreData,		//< Returned by the interface implementation when it is returning more data.
       
    69 		KDataStreamError,	//< Returned by the interface when an unrecoverable error prevents obtaining more data.  A recoverable error should be represented by KDataNotReady.
       
    70 		KDataStreamEnd	//< Returned by the interface when there is no more data to come.
       
    71 		};
       
    72 
       
    73 public:
       
    74 	/** 
       
    75 	The XML Parser calls this on a specific data provider to get more data
       
    76 	when required.
       
    77 
       
    78 	Note that the TPtrC supplied may be used by the parser at any time
       
    79 	between the return of this call and the next call that the parser
       
    80 	makes out.
       
    81 
       
    82 	Your data provider must not move the data pointed to until the
       
    83 	parser has indicated that it's done with that block by asking for
       
    84 	another.
       
    85 
       
    86 	Ownership of the data pointed to remains with the data provider.
       
    87 
       
    88 
       
    89 	General comments on efficiency
       
    90 	------------------------------
       
    91 
       
    92 	The parser is designed such that it processes the whole data block
       
    93 	provided in one go.  It will automatically become asynchronous when
       
    94 	another block is required - the data provider only needs to supply
       
    95 	data.
       
    96 
       
    97 	Because of this design, it allows the data provider to indirectly
       
    98 	control the amount of processing time that will be needed
       
    99 	in a single block.
       
   100 
       
   101 	It is a good idea to balance the need for the fastest possible 
       
   102 	processing with the need for client application responsiveness by
       
   103 	ensuring that the amount of data passed in a single block is not 
       
   104 	too large.	However, it is worth bearing in mind that the parser
       
   105 	will convert UTF8 data streams in blocks of 32 characters, and
       
   106 	supplying blocks of smaller length than this will result in a
       
   107 	slight loss of efficiency.
       
   108 
       
   109 	@param aPtr On return, the data provided
       
   110 	@param aStatus Asynchronous status to be completed by the function with a 
       
   111 	TDataProviderResults value
       
   112 	*/
       
   113 	virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
       
   114 	/**
       
   115 	Called to indicate that use of the data source is complete.
       
   116 	*/
       
   117 	virtual void Disconnect() = 0;
       
   118 	};
       
   119 
       
   120 class CMDXMLParserFileDataSource;
       
   121 
       
   122 class CMDXMLParser: public CActive
       
   123 /** Creates a DOM structure from a given XML file.
       
   124 
       
   125 The parsing operation is asynchronous and is initiated by a call to ParseFile(). 
       
   126 On completion, the created DOM document can be retrieved through DetachXMLDoc().
       
   127 
       
   128 Note the following ownership rules for the DOM document:
       
   129 
       
   130 1. calling DetachXMLDoc() transfers ownership of the document to the client
       
   131 
       
   132 2. if the parser is asked to parse a new file while it still owns an existing 
       
   133 DOM document, it will delete the old document.
       
   134 
       
   135 @publishedAll
       
   136 @released
       
   137 */
       
   138 	{
       
   139 public:
       
   140 	/** Allocates and constructs a new XML parser, specifying a DTD.
       
   141 	
       
   142 	@param aParserObserver XML parser observer
       
   143 	@leave KErrNoMemory Out of memory
       
   144 	@return New XML parser */
       
   145 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
       
   146 
       
   147 	/** Allocates and constructs a new XML parser, specifying a DTD.
       
   148 	
       
   149 	@param aParserObserver XML parser observer
       
   150 	@param aDtdRepresentation DTD validator
       
   151 	@leave KErrNoMemory Out of memory
       
   152 	@return New XML parser */
       
   153 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   154 
       
   155 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
       
   156 	stack.
       
   157 	
       
   158 	@param aParserObserver XML parser observer
       
   159 	@leave KErrNoMemory Out of memory
       
   160 	@return New XML parser */
       
   161 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
       
   162 
       
   163 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
       
   164 	stack.
       
   165 	
       
   166 	@param aParserObserver XML parser observer
       
   167 	@param aDtdRepresentation DTD validator
       
   168 	@leave KErrNoMemory Out of memory
       
   169 	@return New XML parser */
       
   170 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   171 
       
   172 
       
   173 	/** Destructor. */
       
   174 	IMPORT_C ~CMDXMLParser();
       
   175 
       
   176 	/** Gets the last error found by the parser.
       
   177 	
       
   178 	@return Error code
       
   179 	 */
       
   180 	IMPORT_C TInt Error() const;
       
   181 
       
   182 	/**
       
   183 	 Get the severity of the most severe error found.
       
   184 	 @return the maximum error severity
       
   185 	 */
       
   186 	IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; 
       
   187 
       
   188 	/** Gets the created DOM.
       
   189 	
       
   190 	This should be called after the conclusion of the parser process.
       
   191 	
       
   192 	Note that the function sets the internal variable pointing to the document 
       
   193 	to NULL, so this function can only be called once per file parse. The caller 
       
   194 	takes ownership of the document, and must delete it when its use is complete.
       
   195 	
       
   196 	@return The created DOM */
       
   197 	IMPORT_C CMDXMLDocument* DetachXMLDoc();
       
   198 
       
   199 	/** Parses a specified XML file into a DOM object tree.
       
   200 	
       
   201 	@param aRFs File server session
       
   202 	@param aFileToParse The file name to parse
       
   203 	@return KErrNone if success or a file read error code */
       
   204 	IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
       
   205 	
       
   206 	IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
       
   207 
       
   208 	/** Parses a specified XML Data Source into a DOM object tree.
       
   209 	Use ParseSourceL() function in preference to ParseSource()
       
   210 	@param aSource MMDXMLParserDataProvider pointer 
       
   211 	*/
       
   212 	inline void ParseSource(MMDXMLParserDataProvider *aSource)
       
   213 		{
       
   214 		TRAP_IGNORE(ParseSourceL(aSource));
       
   215 		} 
       
   216 				
       
   217 	/** Parses a specified XML Data Source into a DOM object tree.	
       
   218 	@param aSource MMDXMLParserDataProvider pointer 
       
   219 	*/
       
   220 	IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
       
   221 
       
   222 	/** Defines input stream character widths. */
       
   223 	enum TMDXMLParserInputCharWidth
       
   224 		{
       
   225 		EAscii = 0x01, //< ASCII
       
   226 		EUnicode = 0x02 //<Unicode
       
   227 		};
       
   228 	
       
   229 	/** Sets the input stream character width.
       
   230 	 *
       
   231 	 * @param aWidth Character width for incoming stream.  Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
       
   232 	 *
       
   233 	 */
       
   234 	IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
       
   235 
       
   236 	//Defect fix for INC036136- Enable the use of custom entity converters in GMXML
       
   237 	/**
       
   238 	 * Sets the entity converter to be used for parsing.
       
   239 	 * and  take ownership of the passed entity converter
       
   240 	 * @param aEntityConverter the entity converter to be used.
       
   241 	 */
       
   242 	IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
       
   243 	//End Defect fix for INC036136
       
   244 
       
   245 	/**
       
   246 	 Controls whether invalid elements and attributes are added to the DOM.
       
   247 	 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
       
   248 	 */
       
   249 	IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
       
   250 	
       
   251 	/**
       
   252 	 Controls whether whitespaces are handled by XML parser or by client.
       
   253 	 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
       
   254 	 */
       
   255 	IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
       
   256 
       
   257 public: // public functions used by other classes within the .dll, not for Export.
       
   258 	/** Gets the entity converter.
       
   259 	
       
   260 	@return The entity converter */
       
   261 	CMDXMLEntityConverter* EntityConverter();
       
   262 
       
   263 private:
       
   264 	IMPORT_C virtual void DoCancel();
       
   265 
       
   266 	/*
       
   267 	 * RunL function inherited from CActive base class - carries out the actual parsing.
       
   268 	 * @leave can Leave due to OOM
       
   269 	 */
       
   270 	virtual void RunL();
       
   271 
       
   272 	/*
       
   273 	 * Helper function that does the parsing - called from inside RunL
       
   274 	 */
       
   275 	TBool DoParseLoopL();
       
   276 
       
   277 	/*
       
   278 	 * RunError function inherited from CActive base class - intercepts any Leave from
       
   279 	 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
       
   280 	 */
       
   281 	IMPORT_C TInt RunError(TInt aError);
       
   282 
       
   283 	/*
       
   284 	 * Constructors
       
   285 	 */
       
   286 	CMDXMLParser(MMDXMLParserObserver* aParserObserver);
       
   287 
       
   288 	CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   289 
       
   290 	/*
       
   291 	 * Called when a character is read in and found to bo outside of an element tag
       
   292 	 */
       
   293 	virtual void HandleTextL(TDes& aChar);
       
   294 
       
   295 	enum TGetCharReturn
       
   296 		{
       
   297 		KError = 0x00,			// GetChar detected an error
       
   298 		KCharReturned,	// GetChar returned a character
       
   299 		KWaitForChar	// GetChar couldn't return a character this time, but might next time.
       
   300 		};
       
   301 
       
   302 	/*
       
   303 	 * Fetch one character from the input file
       
   304 	 * @param aChar the returned character.
       
   305 	 * @return returns one of the values of TCharReturn
       
   306 	 */
       
   307 	TGetCharReturn GetChar(TDes& aChar);
       
   308 
       
   309 	/* utility functions, called from GetChar to deal with the
       
   310 	 * 2 types of input stream
       
   311 	 */
       
   312 	TGetCharReturn GetDoubleByteChar(TDes& aChar);
       
   313 	TGetCharReturn GetSingleByteChar(TDes& aChar);
       
   314 
       
   315 	/*
       
   316 	 * Fetch some more data from the data provider
       
   317 	 * @return returns one of the values of TCharReturn
       
   318 	 */
       
   319 	void GetMoreData();
       
   320 
       
   321 	/*
       
   322 	 * @return Returns true if the current tag is a doctype tag and sets the
       
   323 	 * Document DocType member accordingly on the first pass of this function.
       
   324 	 */
       
   325 	TBool DocTypeL();
       
   326 
       
   327 	/*
       
   328 	 * creates a new processing instruction if necessary and adds to document
       
   329 	 * @return Returns true if the current tag is a processing instruction
       
   330 	 */
       
   331 	TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
       
   332 
       
   333 	/*
       
   334 	 * creates a new CDataSection if necessary and adds to document
       
   335 	 * @return Returns true if the current tag is a processing instruction
       
   336 	 */
       
   337 	TBool CDataSectionL(CMDXMLElement* aParentElement);
       
   338 	TBool EndOfCDataSection();
       
   339 
       
   340 	/*
       
   341 	 * @return returns true if the current tag is a version id tag and sets the
       
   342 	 * Document Version member accordingly on the first pass of this function.
       
   343 	 */
       
   344 	TBool VersionIDL();
       
   345 
       
   346 	/*
       
   347 	 * creates a new comment if necessary and adds to document
       
   348 	 * @return returns true if the current tag is a comment tag
       
   349 	 */
       
   350 	TBool CommentL(CMDXMLElement* aParentElement);
       
   351 
       
   352 	/*
       
   353 	 * Parse a start of element tag and create an element with attributes set.
       
   354 	 * @return Returns a pointer to the created element
       
   355 	 * @leave can Leave due to OOM
       
   356 	 */
       
   357 	virtual CMDXMLElement* ParseStartTagL();
       
   358 
       
   359 	/*
       
   360 	 * Detects the type of a file - can be Unicode or UTF-8
       
   361 	 */
       
   362 	TBool DetectFileType();
       
   363 
       
   364 	/*
       
   365 	 * Creates a generic or DTD-specific document object
       
   366 	 * @leave can Leave due to OOM
       
   367 	 */
       
   368 	virtual void CreateDocumentL();
       
   369 
       
   370 	/*
       
   371 	 * Sets iError to new errorcode if more serious than any error so far encountered
       
   372 	 */
       
   373 	IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
       
   374 
       
   375 	/*
       
   376 	 * This function is used to parse the attributes.
       
   377      * @param aElement The element to which the attributes belong
       
   378      * @param aTagToParse The tag to be parsed
       
   379      * @return Returns KErrNone if both attribute name & value are valid 
       
   380 	 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
       
   381      * @leave can Leave due to OOM
       
   382 	 */
       
   383 	TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
       
   384 
       
   385 	/** 
       
   386 	  This function locates the next attribute in the tag.
       
   387 	  @param aTagToParse the tag to find the attribute in
       
   388 	  @return the offset of the next attribute
       
   389 	 */
       
   390 	TInt LocateNextAttribute(const TDesC& aTagToParse);
       
   391 
       
   392     /*
       
   393      * Parses an end tag.  In fact, at this point the end tag must match
       
   394      * the tag name of the start tag.  
       
   395      * @param aTagToParse Text of the end tag.
       
   396      * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
       
   397      */
       
   398 	TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
       
   399 
       
   400 	TInt CheckForStartCData(const TDesC& aTextToCheck);
       
   401 	TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
       
   402 
       
   403 	/*
       
   404 	 * Second stage constructor
       
   405 	 */
       
   406 	void ConstructL(MXMLDtd* aDtdRepresentation);
       
   407 	void AddTextL(CMDXMLElement* aParentElement);
       
   408 
       
   409 	/*
       
   410 	 * Checks whether the end of this tag is in a CDataSection.
       
   411 	 * @param aDataToSearch The data to check
       
   412 	 * @return Returns ETrue if the tag contains an unclosed CDataSection
       
   413 	 */
       
   414 	TBool InCDataSection(TDesC& aDataToSearch);
       
   415 
       
   416 	/*
       
   417 	 * Entity converts the sections of one attribute value that are not within a CDataSection.
       
   418 	 * @param aAttributeValue one attribute value
       
   419 	 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
       
   420 	 */
       
   421 	TInt ParseSingleAttributeL(TDes& aAttributeValue);
       
   422 
       
   423 	/*
       
   424 	 * Prepares this class for use on another file.
       
   425 	 *
       
   426 	 */
       
   427 	void PrepareForReuseL();
       
   428 
       
   429 	/**
       
   430 	 This should be called when parsing has been completed, before calling ParseFileCompleteL().
       
   431 	 It checks for errors that can only be determined at the end of parsing, eg missing doctype or 
       
   432 	 incomplete content.
       
   433 	 */
       
   434 	void CheckForErrors();
       
   435 
       
   436 	IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
       
   437 	IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   438 	IMPORT_C void PlaceholderForRemovedExport3();
       
   439 
       
   440 
       
   441 private:
       
   442 	enum TPanicCode {	ENullMemVarDataSource, 
       
   443 						ENullMemVarParserObserver, 
       
   444 						ENullMemVarXMLDoc, 
       
   445 						ENullMemVarElementTag, 
       
   446 						ENullParameterParentElement };
       
   447 	void Panic(TPanicCode aReason) const;
       
   448 
       
   449 private:
       
   450 	MMDXMLParserObserver* iParserObserver;
       
   451 	MXMLDtd* iDtdRepresentation;
       
   452 	TInt iError;								// Current error
       
   453 	TXMLErrorCodeSeverity iSeverity;			// ErrorCode severity
       
   454 	CMDXMLDocument* iXMLDoc;					// Document created by the parser
       
   455 	CMDXMLEntityConverter* iEntityConverter;	// Entity converter used by the parser
       
   456 	HBufC* iElementTag;							// Currently processed element tag
       
   457 	TBool iDocTypeSet;
       
   458 	TBool iVersionSet;
       
   459 	TInt iBytesPerChar;
       
   460 
       
   461 	/* member variables dealing with access to source data */
       
   462 	TPtrC8 iInputBufferPtr;						// set during a call to get more data
       
   463 	TInt iCurrentInputBufferLen;				// current length of the data block available
       
   464 	TInt iNextChar;								// read position in the data block
       
   465 	TInt iInputBytesRemaining;					// number of bytes remaining to read.
       
   466 	HBufC8 *iUTF8EdgeBuffer;					// buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
       
   467 	HBufC8 *iBomBuffer;							// buffer to hold data at the start of the stream so we may determine charset
       
   468 	TInt iRequiredUTF8Bytes;					// number of bytes required to complete the character held in the edge buffer
       
   469 	TBool iUnicodeInputMisaligned;				// Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
       
   470 	MMDXMLParserDataProvider* iDataSource;		// XML Data Source being parsed.
       
   471 	CMDXMLParserFileDataSource* iFileSource;	// We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
       
   472 
       
   473 	/* member variables dealing with chunked conversion into unicode output */
       
   474 	TBuf<32> iUnicodeConversion;				// buffer to temporarily hold the results of conversion from UTF8 to Unicode
       
   475 	TInt iUnicodeConversionLen;					// number of characters stored in our intermediate buffer
       
   476 	TInt iUnicodeReadPos;						// next character to send from our intermediate buffer
       
   477 	TBuf<1> iSpareChar;
       
   478 
       
   479 	/* member variables used when parsing a local file */
       
   480 	TDesC *iFileToParse;
       
   481 	RFs iRFs;
       
   482 	RFile iFileHandleToParse;
       
   483 
       
   484 	TBool iEndOfTag;
       
   485 	
       
   486 	/* member variables used in DoParseLoopL() */
       
   487 	TBool iOpened;
       
   488 	TBool iClosed;
       
   489 	CMDXMLElement* iNewElement;
       
   490 	CMDXMLElement* iParentElement;
       
   491 	HBufC* iText;
       
   492 	enum EParserStates
       
   493 		{
       
   494 		KInitFromFile,
       
   495 		KDetermineCharset,
       
   496 		KWaitingForData,
       
   497 		KParseData,
       
   498 		KSpanDataGap,
       
   499 		KFinished
       
   500 		};
       
   501 
       
   502 	EParserStates iState;
       
   503 	EParserStates iPreviousState;
       
   504 	TInt iSuspiciousCharacter;
       
   505 	TBool iStoreInvalid;						// controls whether invalid elements and attributes are stored in the DOM.
       
   506 	TBool iPreserve;
       
   507 
       
   508 	};
       
   509 
       
   510 #endif