epoc32/include/gmxmlparser.h
branchSymbian2
changeset 2 2fe1408b6811
child 4 837f303aceeb
equal deleted inserted replaced
1:666f914201fb 2:2fe1408b6811
       
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // This file contains the declaration of the generic CMDXMLParser class
       
    15 // which is responsible for creating a DOM structure
       
    16 // from a given XML file.
       
    17 // 
       
    18 //
       
    19 
       
    20 
       
    21 
       
    22 /**
       
    23  @file
       
    24 */
       
    25 
       
    26 #ifndef __GMXMLPARSER_H__
       
    27 #define __GMXMLPARSER_H__
       
    28 
       
    29 #include <e32std.h>
       
    30 #include <eikenv.h>
       
    31 #include <gmxmlconstants.h>
       
    32 
       
    33 //forward reference
       
    34 class CMDXMLDocument;
       
    35 class CMDXMLEntityConverter;
       
    36 class CMDXMLElement;
       
    37 class MXMLDtd;
       
    38 
       
    39 
       
    40 
       
    41 class MMDXMLParserObserver
       
    42 /** Abstract observer interface for notification when XML parsing is complete.
       
    43 
       
    44 It should be implemented by users of CMDXMLParser
       
    45 @publishedAll 
       
    46 @released*/
       
    47 	{
       
    48 public:
       
    49 	/**
       
    50 	Call back function used to inform a client of the Parser when a parsing operation completes.
       
    51 	 */
       
    52 	virtual void ParseFileCompleteL() = 0;
       
    53 	};
       
    54 
       
    55 class MMDXMLParserDataProvider
       
    56 /** Abstract data source interface for XML data source.
       
    57 
       
    58 The user of CMDXMLParser must build one of these to encapsulate the data source
       
    59 that they wish to parse.  CMDXMLParser implements a file-based data source to
       
    60 implement the functionality of the ParseFile function.
       
    61 
       
    62 @publishedAll 
       
    63 @released*/
       
    64 	{
       
    65 public:
       
    66 	/** Status codes returned by GetData() implementations. */
       
    67 	enum TDataProviderResults
       
    68 		{
       
    69 		KMoreData,		///< Returned by the interface implementation when it is returning more data.
       
    70 		KDataStreamError,	///< Returned by the interface when an unrecoverable error prevents obtaining more data.  A recoverable error should be represented by KDataNotReady.
       
    71 		KDataStreamEnd	///< Returned by the interface when there is no more data to come.
       
    72 		};
       
    73 
       
    74 public:
       
    75 	/** 
       
    76 	The XML Parser calls this on a specific data provider to get more data
       
    77 	when required.
       
    78 
       
    79 	Note that the TPtrC supplied may be used by the parser at any time
       
    80 	between the return of this call and the next call that the parser
       
    81 	makes out.
       
    82 
       
    83 	Your data provider must not move the data pointed to until the
       
    84 	parser has indicated that it's done with that block by asking for
       
    85 	another.
       
    86 
       
    87 	Ownership of the data pointed to remains with the data provider.
       
    88 
       
    89 
       
    90 	General comments on efficiency
       
    91 	------------------------------
       
    92 
       
    93 	The parser is designed such that it processes the whole data block
       
    94 	provided in one go.  It will automatically become asynchronous when
       
    95 	another block is required - the data provider only needs to supply
       
    96 	data.
       
    97 
       
    98 	Because of this design, it allows the data provider to indirectly
       
    99 	control the amount of processing time that will be needed
       
   100 	in a single block.
       
   101 
       
   102 	It is a good idea to balance the need for the fastest possible 
       
   103 	processing with the need for client application responsiveness by
       
   104 	ensuring that the amount of data passed in a single block is not 
       
   105 	too large.	However, it is worth bearing in mind that the parser
       
   106 	will convert UTF8 data streams in blocks of 32 characters, and
       
   107 	supplying blocks of smaller length than this will result in a
       
   108 	slight loss of efficiency.
       
   109 
       
   110 	@param aPtr On return, the data provided
       
   111 	@param aStatus Asynchronous status to be completed by the function with a 
       
   112 	TDataProviderResults value
       
   113 	*/
       
   114 	virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
       
   115 	/**
       
   116 	Called to indicate that use of the data source is complete.
       
   117 	*/
       
   118 	virtual void Disconnect() = 0;
       
   119 	};
       
   120 
       
   121 class CMDXMLParserFileDataSource;
       
   122 
       
   123 class CMDXMLParser: public CActive
       
   124 /** Creates a DOM structure from a given XML file.
       
   125 
       
   126 The parsing operation is asynchronous and is initiated by a call to ParseFile(). 
       
   127 On completion, the created DOM document can be retrieved through DetachXMLDoc().
       
   128 
       
   129 Note the following ownership rules for the DOM document:
       
   130 
       
   131 1. calling DetachXMLDoc() transfers ownership of the document to the client
       
   132 
       
   133 2. if the parser is asked to parse a new file while it still owns an existing 
       
   134 DOM document, it will delete the old document.
       
   135 
       
   136 @publishedAll
       
   137 @released
       
   138 */
       
   139 	{
       
   140 public:
       
   141 	/** Allocates and constructs a new XML parser, specifying a DTD.
       
   142 	
       
   143 	@param aParserObserver XML parser observer
       
   144 	@leave KErrNoMemory Out of memory
       
   145 	@return New XML parser */
       
   146 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
       
   147 
       
   148 	/** Allocates and constructs a new XML parser, specifying a DTD.
       
   149 	
       
   150 	@param aParserObserver XML parser observer
       
   151 	@param aDtdRepresentation DTD validator
       
   152 	@leave KErrNoMemory Out of memory
       
   153 	@return New XML parser */
       
   154 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   155 
       
   156 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
       
   157 	stack.
       
   158 	
       
   159 	@param aParserObserver XML parser observer
       
   160 	@leave KErrNoMemory Out of memory
       
   161 	@return New XML parser */
       
   162 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
       
   163 
       
   164 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
       
   165 	stack.
       
   166 	
       
   167 	@param aParserObserver XML parser observer
       
   168 	@param aDtdRepresentation DTD validator
       
   169 	@leave KErrNoMemory Out of memory
       
   170 	@return New XML parser */
       
   171 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   172 
       
   173 
       
   174 	/** Destructor. */
       
   175 	IMPORT_C ~CMDXMLParser();
       
   176 
       
   177 	/** Gets the last error found by the parser.
       
   178 	
       
   179 	@return Error code
       
   180 	 */
       
   181 	IMPORT_C TInt Error() const;
       
   182 
       
   183 	/**
       
   184 	 Get the severity of the most severe error found.
       
   185 	 @return the maximum error severity
       
   186 	 */
       
   187 	IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; 
       
   188 
       
   189 	/** Gets the created DOM.
       
   190 	
       
   191 	This should be called after the conclusion of the parser process.
       
   192 	
       
   193 	Note that the function sets the internal variable pointing to the document 
       
   194 	to NULL, so this function can only be called once per file parse. The caller 
       
   195 	takes ownership of the document, and must delete it when its use is complete.
       
   196 	
       
   197 	@return The created DOM */
       
   198 	IMPORT_C CMDXMLDocument* DetachXMLDoc();
       
   199 
       
   200 	/** Parses a specified XML file into a DOM object tree.
       
   201 	
       
   202 	@param aRFs File server session
       
   203 	@param aFileToParse The file name to parse
       
   204 	@return KErrNone if success or a file read error code */
       
   205 	IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
       
   206 	
       
   207 	IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
       
   208 
       
   209 	/** Parses a specified XML Data Source into a DOM object tree.
       
   210 	Use ParseSourceL() function in preference to ParseSource()
       
   211 	@param aSource MMDXMLParserDataProvider pointer 
       
   212 	*/
       
   213 	inline void ParseSource(MMDXMLParserDataProvider *aSource)
       
   214 		{
       
   215 		TRAP_IGNORE(ParseSourceL(aSource));
       
   216 		} 
       
   217 				
       
   218 	/** Parses a specified XML Data Source into a DOM object tree.	
       
   219 	@param aSource MMDXMLParserDataProvider pointer 
       
   220 	*/
       
   221 	IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
       
   222 
       
   223 	/** Defines input stream character widths. */
       
   224 	enum TMDXMLParserInputCharWidth
       
   225 		{
       
   226 		EAscii = 0x01, ///< ASCII
       
   227 		EUnicode = 0x02 ///<Unicode
       
   228 		};
       
   229 	
       
   230 	/** Sets the input stream character width.
       
   231 	 *
       
   232 	 * @param aWidth Character width for incoming stream.  Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
       
   233 	 *
       
   234 	 */
       
   235 	IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
       
   236 
       
   237 	//Defect fix for INC036136- Enable the use of custom entity converters in GMXML
       
   238 	/**
       
   239 	 * Sets the entity converter to be used for parsing.
       
   240 	 * and  take ownership of the passed entity converter
       
   241 	 * @param aEntityConverter the entity converter to be used.
       
   242 	 */
       
   243 	IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
       
   244 	//End Defect fix for INC036136
       
   245 
       
   246 	/**
       
   247 	 Controls whether invalid elements and attributes are added to the DOM.
       
   248 	 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
       
   249 	 */
       
   250 	IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
       
   251 	
       
   252 	/**
       
   253 	 Controls whether whitespaces are handled by XML parser or by client.
       
   254 	 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
       
   255 	 */
       
   256 	IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
       
   257 
       
   258 public: // public functions used by other classes within the .dll, not for Export.
       
   259 	/** Gets the entity converter.
       
   260 	
       
   261 	@return The entity converter */
       
   262 	CMDXMLEntityConverter* EntityConverter();
       
   263 
       
   264 private:
       
   265 	IMPORT_C virtual void DoCancel();
       
   266 
       
   267 	/*
       
   268 	 * RunL function inherited from CActive base class - carries out the actual parsing.
       
   269 	 * @leave can Leave due to OOM
       
   270 	 */
       
   271 	virtual void RunL();
       
   272 
       
   273 	/*
       
   274 	 * Helper function that does the parsing - called from inside RunL
       
   275 	 */
       
   276 	TBool DoParseLoopL();
       
   277 
       
   278 	/*
       
   279 	 * RunError function inherited from CActive base class - intercepts any Leave from
       
   280 	 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
       
   281 	 */
       
   282 	IMPORT_C TInt RunError(TInt aError);
       
   283 
       
   284 	/*
       
   285 	 * Constructors
       
   286 	 */
       
   287 	CMDXMLParser(MMDXMLParserObserver* aParserObserver);
       
   288 
       
   289 	CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   290 
       
   291 	/*
       
   292 	 * Called when a character is read in and found to bo outside of an element tag
       
   293 	 */
       
   294 	virtual void HandleTextL(TDes& aChar);
       
   295 
       
   296 	enum TGetCharReturn
       
   297 		{
       
   298 		KError = 0x00,			// GetChar detected an error
       
   299 		KCharReturned,	// GetChar returned a character
       
   300 		KWaitForChar	// GetChar couldn't return a character this time, but might next time.
       
   301 		};
       
   302 
       
   303 	/*
       
   304 	 * Fetch one character from the input file
       
   305 	 * @param aChar the returned character.
       
   306 	 * @return returns one of the values of TCharReturn
       
   307 	 */
       
   308 	TGetCharReturn GetChar(TDes& aChar);
       
   309 
       
   310 	/* utility functions, called from GetChar to deal with the
       
   311 	 * 2 types of input stream
       
   312 	 */
       
   313 	TGetCharReturn GetDoubleByteChar(TDes& aChar);
       
   314 	TGetCharReturn GetSingleByteChar(TDes& aChar);
       
   315 
       
   316 	/*
       
   317 	 * Fetch some more data from the data provider
       
   318 	 * @return returns one of the values of TCharReturn
       
   319 	 */
       
   320 	void GetMoreData();
       
   321 
       
   322 	/*
       
   323 	 * @return Returns true if the current tag is a doctype tag and sets the
       
   324 	 * Document DocType member accordingly on the first pass of this function.
       
   325 	 */
       
   326 	TBool DocTypeL();
       
   327 
       
   328 	/*
       
   329 	 * creates a new processing instruction if necessary and adds to document
       
   330 	 * @return Returns true if the current tag is a processing instruction
       
   331 	 */
       
   332 	TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
       
   333 
       
   334 	/*
       
   335 	 * creates a new CDataSection if necessary and adds to document
       
   336 	 * @return Returns true if the current tag is a processing instruction
       
   337 	 */
       
   338 	TBool CDataSectionL(CMDXMLElement* aParentElement);
       
   339 	TBool EndOfCDataSection();
       
   340 
       
   341 	/*
       
   342 	 * @return returns true if the current tag is a version id tag and sets the
       
   343 	 * Document Version member accordingly on the first pass of this function.
       
   344 	 */
       
   345 	TBool VersionIDL();
       
   346 
       
   347 	/*
       
   348 	 * creates a new comment if necessary and adds to document
       
   349 	 * @return returns true if the current tag is a comment tag
       
   350 	 */
       
   351 	TBool CommentL(CMDXMLElement* aParentElement);
       
   352 
       
   353 	/*
       
   354 	 * Parse a start of element tag and create an element with attributes set.
       
   355 	 * @return Returns a pointer to the created element
       
   356 	 * @leave can Leave due to OOM
       
   357 	 */
       
   358 	virtual CMDXMLElement* ParseStartTagL();
       
   359 
       
   360 	/*
       
   361 	 * Detects the type of a file - can be Unicode or UTF-8
       
   362 	 */
       
   363 	TBool DetectFileType();
       
   364 
       
   365 	/*
       
   366 	 * Creates a generic or DTD-specific document object
       
   367 	 * @leave can Leave due to OOM
       
   368 	 */
       
   369 	virtual void CreateDocumentL();
       
   370 
       
   371 	/*
       
   372 	 * Sets iError to new errorcode if more serious than any error so far encountered
       
   373 	 */
       
   374 	IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
       
   375 
       
   376 	/*
       
   377 	 * This function is used to parse the attributes.
       
   378      * @param aElement The element to which the attributes belong
       
   379      * @param aTagToParse The tag to be parsed
       
   380      * @return Returns KErrNone if both attribute name & value are valid 
       
   381 	 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
       
   382      * @leave can Leave due to OOM
       
   383 	 */
       
   384 	TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
       
   385 
       
   386 	/** 
       
   387 	  This function locates the next attribute in the tag.
       
   388 	  @param aTagToParse the tag to find the attribute in
       
   389 	  @return the offset of the next attribute
       
   390 	 */
       
   391 	TInt LocateNextAttribute(const TDesC& aTagToParse);
       
   392 
       
   393     /*
       
   394      * Parses an end tag.  In fact, at this point the end tag must match
       
   395      * the tag name of the start tag.  
       
   396      * @param aTagToParse Text of the end tag.
       
   397      * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
       
   398      */
       
   399 	TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
       
   400 
       
   401 	TInt CheckForStartCData(const TDesC& aTextToCheck);
       
   402 	TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
       
   403 
       
   404 	/*
       
   405 	 * Second stage constructor
       
   406 	 */
       
   407 	void ConstructL(MXMLDtd* aDtdRepresentation);
       
   408 	void AddTextL(CMDXMLElement* aParentElement);
       
   409 
       
   410 	/*
       
   411 	 * Checks whether the end of this tag is in a CDataSection.
       
   412 	 * @param aDataToSearch The data to check
       
   413 	 * @return Returns ETrue if the tag contains an unclosed CDataSection
       
   414 	 */
       
   415 	TBool InCDataSection(TDesC& aDataToSearch);
       
   416 
       
   417 	/*
       
   418 	 * Entity converts the sections of one attribute value that are not within a CDataSection.
       
   419 	 * @param aAttributeValue one attribute value
       
   420 	 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
       
   421 	 */
       
   422 	TInt ParseSingleAttributeL(TDes& aAttributeValue);
       
   423 
       
   424 	/*
       
   425 	 * Prepares this class for use on another file.
       
   426 	 *
       
   427 	 */
       
   428 	void PrepareForReuseL();
       
   429 
       
   430 	/**
       
   431 	 This should be called when parsing has been completed, before calling ParseFileCompleteL().
       
   432 	 It checks for errors that can only be determined at the end of parsing, eg missing doctype or 
       
   433 	 incomplete content.
       
   434 	 */
       
   435 	void CheckForErrors();
       
   436 
       
   437 	IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
       
   438 	IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
       
   439 	IMPORT_C void PlaceholderForRemovedExport3();
       
   440 
       
   441 
       
   442 private:
       
   443 	enum TPanicCode {	ENullMemVarDataSource, 
       
   444 						ENullMemVarParserObserver, 
       
   445 						ENullMemVarXMLDoc, 
       
   446 						ENullMemVarElementTag, 
       
   447 						ENullParameterParentElement };
       
   448 	void Panic(TPanicCode aReason) const;
       
   449 
       
   450 private:
       
   451 	MMDXMLParserObserver* iParserObserver;
       
   452 	MXMLDtd* iDtdRepresentation;
       
   453 	TInt iError;								// Current error
       
   454 	TXMLErrorCodeSeverity iSeverity;			// ErrorCode severity
       
   455 	CMDXMLDocument* iXMLDoc;					// Document created by the parser
       
   456 	CMDXMLEntityConverter* iEntityConverter;	// Entity converter used by the parser
       
   457 	HBufC* iElementTag;							// Currently processed element tag
       
   458 	TBool iDocTypeSet;
       
   459 	TBool iVersionSet;
       
   460 	TInt iBytesPerChar;
       
   461 
       
   462 	/* member variables dealing with access to source data */
       
   463 	TPtrC8 iInputBufferPtr;						// set during a call to get more data
       
   464 	TInt iCurrentInputBufferLen;				// current length of the data block available
       
   465 	TInt iNextChar;								// read position in the data block
       
   466 	TInt iInputBytesRemaining;					// number of bytes remaining to read.
       
   467 	HBufC8 *iUTF8EdgeBuffer;					// buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
       
   468 	HBufC8 *iBomBuffer;							// buffer to hold data at the start of the stream so we may determine charset
       
   469 	TInt iRequiredUTF8Bytes;					// number of bytes required to complete the character held in the edge buffer
       
   470 	TBool iUnicodeInputMisaligned;				// Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
       
   471 	MMDXMLParserDataProvider* iDataSource;		// XML Data Source being parsed.
       
   472 	CMDXMLParserFileDataSource* iFileSource;	// We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
       
   473 
       
   474 	/* member variables dealing with chunked conversion into unicode output */
       
   475 	TBuf<32> iUnicodeConversion;				// buffer to temporarily hold the results of conversion from UTF8 to Unicode
       
   476 	TInt iUnicodeConversionLen;					// number of characters stored in our intermediate buffer
       
   477 	TInt iUnicodeReadPos;						// next character to send from our intermediate buffer
       
   478 	TBuf<1> iSpareChar;
       
   479 
       
   480 	/* member variables used when parsing a local file */
       
   481 	TDesC *iFileToParse;
       
   482 	RFs iRFs;
       
   483 	RFile iFileHandleToParse;
       
   484 
       
   485 	TBool iEndOfTag;
       
   486 	
       
   487 	/* member variables used in DoParseLoopL() */
       
   488 	TBool iOpened;
       
   489 	TBool iClosed;
       
   490 	CMDXMLElement* iNewElement;
       
   491 	CMDXMLElement* iParentElement;
       
   492 	HBufC* iText;
       
   493 	enum EParserStates
       
   494 		{
       
   495 		KInitFromFile,
       
   496 		KDetermineCharset,
       
   497 		KWaitingForData,
       
   498 		KParseData,
       
   499 		KSpanDataGap,
       
   500 		KFinished
       
   501 		};
       
   502 
       
   503 	EParserStates iState;
       
   504 	EParserStates iPreviousState;
       
   505 	TInt iSuspiciousCharacter;
       
   506 	TBool iStoreInvalid;						// controls whether invalid elements and attributes are stored in the DOM.
       
   507 	TBool iPreserve;
       
   508 
       
   509 	};
       
   510 
       
   511 #endif