|
1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.symbianfoundation.org/legal/licencesv10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // This file contains the declaration of the generic CMDXMLParser class |
|
15 // which is responsible for creating a DOM structure |
|
16 // from a given XML file. |
|
17 // |
|
18 // |
|
19 |
|
20 |
|
21 |
|
22 /** |
|
23 @file |
|
24 */ |
|
25 |
|
26 #ifndef __GMXMLPARSER_H__ |
|
27 #define __GMXMLPARSER_H__ |
|
28 |
|
29 #include <e32std.h> |
|
30 #include <eikenv.h> |
|
31 #include <gmxmlconstants.h> |
|
32 |
|
33 //forward reference |
|
34 class CMDXMLDocument; |
|
35 class CMDXMLEntityConverter; |
|
36 class CMDXMLElement; |
|
37 class MXMLDtd; |
|
38 |
|
39 |
|
40 |
|
41 class MMDXMLParserObserver |
|
42 /** Abstract observer interface for notification when XML parsing is complete. |
|
43 |
|
44 It should be implemented by users of CMDXMLParser |
|
45 @publishedAll |
|
46 @released*/ |
|
47 { |
|
48 public: |
|
49 /** |
|
50 Call back function used to inform a client of the Parser when a parsing operation completes. |
|
51 */ |
|
52 virtual void ParseFileCompleteL() = 0; |
|
53 }; |
|
54 |
|
55 class MMDXMLParserDataProvider |
|
56 /** Abstract data source interface for XML data source. |
|
57 |
|
58 The user of CMDXMLParser must build one of these to encapsulate the data source |
|
59 that they wish to parse. CMDXMLParser implements a file-based data source to |
|
60 implement the functionality of the ParseFile function. |
|
61 |
|
62 @publishedAll |
|
63 @released*/ |
|
64 { |
|
65 public: |
|
66 /** Status codes returned by GetData() implementations. */ |
|
67 enum TDataProviderResults |
|
68 { |
|
69 KMoreData, ///< Returned by the interface implementation when it is returning more data. |
|
70 KDataStreamError, ///< Returned by the interface when an unrecoverable error prevents obtaining more data. A recoverable error should be represented by KDataNotReady. |
|
71 KDataStreamEnd ///< Returned by the interface when there is no more data to come. |
|
72 }; |
|
73 |
|
74 public: |
|
75 /** |
|
76 The XML Parser calls this on a specific data provider to get more data |
|
77 when required. |
|
78 |
|
79 Note that the TPtrC supplied may be used by the parser at any time |
|
80 between the return of this call and the next call that the parser |
|
81 makes out. |
|
82 |
|
83 Your data provider must not move the data pointed to until the |
|
84 parser has indicated that it's done with that block by asking for |
|
85 another. |
|
86 |
|
87 Ownership of the data pointed to remains with the data provider. |
|
88 |
|
89 |
|
90 General comments on efficiency |
|
91 ------------------------------ |
|
92 |
|
93 The parser is designed such that it processes the whole data block |
|
94 provided in one go. It will automatically become asynchronous when |
|
95 another block is required - the data provider only needs to supply |
|
96 data. |
|
97 |
|
98 Because of this design, it allows the data provider to indirectly |
|
99 control the amount of processing time that will be needed |
|
100 in a single block. |
|
101 |
|
102 It is a good idea to balance the need for the fastest possible |
|
103 processing with the need for client application responsiveness by |
|
104 ensuring that the amount of data passed in a single block is not |
|
105 too large. However, it is worth bearing in mind that the parser |
|
106 will convert UTF8 data streams in blocks of 32 characters, and |
|
107 supplying blocks of smaller length than this will result in a |
|
108 slight loss of efficiency. |
|
109 |
|
110 @param aPtr On return, the data provided |
|
111 @param aStatus Asynchronous status to be completed by the function with a |
|
112 TDataProviderResults value |
|
113 */ |
|
114 virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0; |
|
115 /** |
|
116 Called to indicate that use of the data source is complete. |
|
117 */ |
|
118 virtual void Disconnect() = 0; |
|
119 }; |
|
120 |
|
121 class CMDXMLParserFileDataSource; |
|
122 |
|
123 class CMDXMLParser: public CActive |
|
124 /** Creates a DOM structure from a given XML file. |
|
125 |
|
126 The parsing operation is asynchronous and is initiated by a call to ParseFile(). |
|
127 On completion, the created DOM document can be retrieved through DetachXMLDoc(). |
|
128 |
|
129 Note the following ownership rules for the DOM document: |
|
130 |
|
131 1. calling DetachXMLDoc() transfers ownership of the document to the client |
|
132 |
|
133 2. if the parser is asked to parse a new file while it still owns an existing |
|
134 DOM document, it will delete the old document. |
|
135 |
|
136 @publishedAll |
|
137 @released |
|
138 */ |
|
139 { |
|
140 public: |
|
141 /** Allocates and constructs a new XML parser, specifying a DTD. |
|
142 |
|
143 @param aParserObserver XML parser observer |
|
144 @leave KErrNoMemory Out of memory |
|
145 @return New XML parser */ |
|
146 IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver); |
|
147 |
|
148 /** Allocates and constructs a new XML parser, specifying a DTD. |
|
149 |
|
150 @param aParserObserver XML parser observer |
|
151 @param aDtdRepresentation DTD validator |
|
152 @leave KErrNoMemory Out of memory |
|
153 @return New XML parser */ |
|
154 IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
155 |
|
156 /** Allocates and constructs a new XML parser, leaving the object on the cleanup |
|
157 stack. |
|
158 |
|
159 @param aParserObserver XML parser observer |
|
160 @leave KErrNoMemory Out of memory |
|
161 @return New XML parser */ |
|
162 IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver); |
|
163 |
|
164 /** Allocates and constructs a new XML parser, leaving the object on the cleanup |
|
165 stack. |
|
166 |
|
167 @param aParserObserver XML parser observer |
|
168 @param aDtdRepresentation DTD validator |
|
169 @leave KErrNoMemory Out of memory |
|
170 @return New XML parser */ |
|
171 IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
172 |
|
173 |
|
174 /** Destructor. */ |
|
175 IMPORT_C ~CMDXMLParser(); |
|
176 |
|
177 /** Gets the last error found by the parser. |
|
178 |
|
179 @return Error code |
|
180 */ |
|
181 IMPORT_C TInt Error() const; |
|
182 |
|
183 /** |
|
184 Get the severity of the most severe error found. |
|
185 @return the maximum error severity |
|
186 */ |
|
187 IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; |
|
188 |
|
189 /** Gets the created DOM. |
|
190 |
|
191 This should be called after the conclusion of the parser process. |
|
192 |
|
193 Note that the function sets the internal variable pointing to the document |
|
194 to NULL, so this function can only be called once per file parse. The caller |
|
195 takes ownership of the document, and must delete it when its use is complete. |
|
196 |
|
197 @return The created DOM */ |
|
198 IMPORT_C CMDXMLDocument* DetachXMLDoc(); |
|
199 |
|
200 /** Parses a specified XML file into a DOM object tree. |
|
201 |
|
202 @param aRFs File server session |
|
203 @param aFileToParse The file name to parse |
|
204 @return KErrNone if success or a file read error code */ |
|
205 IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse); |
|
206 |
|
207 IMPORT_C TInt ParseFile(RFile& aFileHandleToParse); |
|
208 |
|
209 /** Parses a specified XML Data Source into a DOM object tree. |
|
210 Use ParseSourceL() function in preference to ParseSource() |
|
211 @param aSource MMDXMLParserDataProvider pointer |
|
212 */ |
|
213 inline void ParseSource(MMDXMLParserDataProvider *aSource) |
|
214 { |
|
215 TRAP_IGNORE(ParseSourceL(aSource)); |
|
216 } |
|
217 |
|
218 /** Parses a specified XML Data Source into a DOM object tree. |
|
219 @param aSource MMDXMLParserDataProvider pointer |
|
220 */ |
|
221 IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource); |
|
222 |
|
223 /** Defines input stream character widths. */ |
|
224 enum TMDXMLParserInputCharWidth |
|
225 { |
|
226 EAscii = 0x01, ///< ASCII |
|
227 EUnicode = 0x02 ///<Unicode |
|
228 }; |
|
229 |
|
230 /** Sets the input stream character width. |
|
231 * |
|
232 * @param aWidth Character width for incoming stream. Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively). |
|
233 * |
|
234 */ |
|
235 IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth); |
|
236 |
|
237 //Defect fix for INC036136- Enable the use of custom entity converters in GMXML |
|
238 /** |
|
239 * Sets the entity converter to be used for parsing. |
|
240 * and take ownership of the passed entity converter |
|
241 * @param aEntityConverter the entity converter to be used. |
|
242 */ |
|
243 IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter); |
|
244 //End Defect fix for INC036136 |
|
245 |
|
246 /** |
|
247 Controls whether invalid elements and attributes are added to the DOM. |
|
248 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise. |
|
249 */ |
|
250 IMPORT_C void SetStoreInvalid(TBool aStoreInvalid); |
|
251 |
|
252 /** |
|
253 Controls whether whitespaces are handled by XML parser or by client. |
|
254 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise. |
|
255 */ |
|
256 IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve); |
|
257 |
|
258 public: // public functions used by other classes within the .dll, not for Export. |
|
259 /** Gets the entity converter. |
|
260 |
|
261 @return The entity converter */ |
|
262 CMDXMLEntityConverter* EntityConverter(); |
|
263 |
|
264 private: |
|
265 IMPORT_C virtual void DoCancel(); |
|
266 |
|
267 /* |
|
268 * RunL function inherited from CActive base class - carries out the actual parsing. |
|
269 * @leave can Leave due to OOM |
|
270 */ |
|
271 virtual void RunL(); |
|
272 |
|
273 /* |
|
274 * Helper function that does the parsing - called from inside RunL |
|
275 */ |
|
276 TBool DoParseLoopL(); |
|
277 |
|
278 /* |
|
279 * RunError function inherited from CActive base class - intercepts any Leave from |
|
280 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL |
|
281 */ |
|
282 IMPORT_C TInt RunError(TInt aError); |
|
283 |
|
284 /* |
|
285 * Constructors |
|
286 */ |
|
287 CMDXMLParser(MMDXMLParserObserver* aParserObserver); |
|
288 |
|
289 CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
290 |
|
291 /* |
|
292 * Called when a character is read in and found to bo outside of an element tag |
|
293 */ |
|
294 virtual void HandleTextL(TDes& aChar); |
|
295 |
|
296 enum TGetCharReturn |
|
297 { |
|
298 KError = 0x00, // GetChar detected an error |
|
299 KCharReturned, // GetChar returned a character |
|
300 KWaitForChar // GetChar couldn't return a character this time, but might next time. |
|
301 }; |
|
302 |
|
303 /* |
|
304 * Fetch one character from the input file |
|
305 * @param aChar the returned character. |
|
306 * @return returns one of the values of TCharReturn |
|
307 */ |
|
308 TGetCharReturn GetChar(TDes& aChar); |
|
309 |
|
310 /* utility functions, called from GetChar to deal with the |
|
311 * 2 types of input stream |
|
312 */ |
|
313 TGetCharReturn GetDoubleByteChar(TDes& aChar); |
|
314 TGetCharReturn GetSingleByteChar(TDes& aChar); |
|
315 |
|
316 /* |
|
317 * Fetch some more data from the data provider |
|
318 * @return returns one of the values of TCharReturn |
|
319 */ |
|
320 void GetMoreData(); |
|
321 |
|
322 /* |
|
323 * @return Returns true if the current tag is a doctype tag and sets the |
|
324 * Document DocType member accordingly on the first pass of this function. |
|
325 */ |
|
326 TBool DocTypeL(); |
|
327 |
|
328 /* |
|
329 * creates a new processing instruction if necessary and adds to document |
|
330 * @return Returns true if the current tag is a processing instruction |
|
331 */ |
|
332 TBool ProcessingInstructionL(CMDXMLElement* aParentElement); |
|
333 |
|
334 /* |
|
335 * creates a new CDataSection if necessary and adds to document |
|
336 * @return Returns true if the current tag is a processing instruction |
|
337 */ |
|
338 TBool CDataSectionL(CMDXMLElement* aParentElement); |
|
339 TBool EndOfCDataSection(); |
|
340 |
|
341 /* |
|
342 * @return returns true if the current tag is a version id tag and sets the |
|
343 * Document Version member accordingly on the first pass of this function. |
|
344 */ |
|
345 TBool VersionIDL(); |
|
346 |
|
347 /* |
|
348 * creates a new comment if necessary and adds to document |
|
349 * @return returns true if the current tag is a comment tag |
|
350 */ |
|
351 TBool CommentL(CMDXMLElement* aParentElement); |
|
352 |
|
353 /* |
|
354 * Parse a start of element tag and create an element with attributes set. |
|
355 * @return Returns a pointer to the created element |
|
356 * @leave can Leave due to OOM |
|
357 */ |
|
358 virtual CMDXMLElement* ParseStartTagL(); |
|
359 |
|
360 /* |
|
361 * Detects the type of a file - can be Unicode or UTF-8 |
|
362 */ |
|
363 TBool DetectFileType(); |
|
364 |
|
365 /* |
|
366 * Creates a generic or DTD-specific document object |
|
367 * @leave can Leave due to OOM |
|
368 */ |
|
369 virtual void CreateDocumentL(); |
|
370 |
|
371 /* |
|
372 * Sets iError to new errorcode if more serious than any error so far encountered |
|
373 */ |
|
374 IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity); |
|
375 |
|
376 /* |
|
377 * This function is used to parse the attributes. |
|
378 * @param aElement The element to which the attributes belong |
|
379 * @param aTagToParse The tag to be parsed |
|
380 * @return Returns KErrNone if both attribute name & value are valid |
|
381 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid |
|
382 * @leave can Leave due to OOM |
|
383 */ |
|
384 TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse); |
|
385 |
|
386 /** |
|
387 This function locates the next attribute in the tag. |
|
388 @param aTagToParse the tag to find the attribute in |
|
389 @return the offset of the next attribute |
|
390 */ |
|
391 TInt LocateNextAttribute(const TDesC& aTagToParse); |
|
392 |
|
393 /* |
|
394 * Parses an end tag. In fact, at this point the end tag must match |
|
395 * the tag name of the start tag. |
|
396 * @param aTagToParse Text of the end tag. |
|
397 * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch. |
|
398 */ |
|
399 TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse); |
|
400 |
|
401 TInt CheckForStartCData(const TDesC& aTextToCheck); |
|
402 TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind); |
|
403 |
|
404 /* |
|
405 * Second stage constructor |
|
406 */ |
|
407 void ConstructL(MXMLDtd* aDtdRepresentation); |
|
408 void AddTextL(CMDXMLElement* aParentElement); |
|
409 |
|
410 /* |
|
411 * Checks whether the end of this tag is in a CDataSection. |
|
412 * @param aDataToSearch The data to check |
|
413 * @return Returns ETrue if the tag contains an unclosed CDataSection |
|
414 */ |
|
415 TBool InCDataSection(TDesC& aDataToSearch); |
|
416 |
|
417 /* |
|
418 * Entity converts the sections of one attribute value that are not within a CDataSection. |
|
419 * @param aAttributeValue one attribute value |
|
420 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone |
|
421 */ |
|
422 TInt ParseSingleAttributeL(TDes& aAttributeValue); |
|
423 |
|
424 /* |
|
425 * Prepares this class for use on another file. |
|
426 * |
|
427 */ |
|
428 void PrepareForReuseL(); |
|
429 |
|
430 /** |
|
431 This should be called when parsing has been completed, before calling ParseFileCompleteL(). |
|
432 It checks for errors that can only be determined at the end of parsing, eg missing doctype or |
|
433 incomplete content. |
|
434 */ |
|
435 void CheckForErrors(); |
|
436 |
|
437 IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver); |
|
438 IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
439 IMPORT_C void PlaceholderForRemovedExport3(); |
|
440 |
|
441 |
|
442 private: |
|
443 enum TPanicCode { ENullMemVarDataSource, |
|
444 ENullMemVarParserObserver, |
|
445 ENullMemVarXMLDoc, |
|
446 ENullMemVarElementTag, |
|
447 ENullParameterParentElement }; |
|
448 void Panic(TPanicCode aReason) const; |
|
449 |
|
450 private: |
|
451 MMDXMLParserObserver* iParserObserver; |
|
452 MXMLDtd* iDtdRepresentation; |
|
453 TInt iError; // Current error |
|
454 TXMLErrorCodeSeverity iSeverity; // ErrorCode severity |
|
455 CMDXMLDocument* iXMLDoc; // Document created by the parser |
|
456 CMDXMLEntityConverter* iEntityConverter; // Entity converter used by the parser |
|
457 HBufC* iElementTag; // Currently processed element tag |
|
458 TBool iDocTypeSet; |
|
459 TBool iVersionSet; |
|
460 TInt iBytesPerChar; |
|
461 |
|
462 /* member variables dealing with access to source data */ |
|
463 TPtrC8 iInputBufferPtr; // set during a call to get more data |
|
464 TInt iCurrentInputBufferLen; // current length of the data block available |
|
465 TInt iNextChar; // read position in the data block |
|
466 TInt iInputBytesRemaining; // number of bytes remaining to read. |
|
467 HBufC8 *iUTF8EdgeBuffer; // buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks |
|
468 HBufC8 *iBomBuffer; // buffer to hold data at the start of the stream so we may determine charset |
|
469 TInt iRequiredUTF8Bytes; // number of bytes required to complete the character held in the edge buffer |
|
470 TBool iUnicodeInputMisaligned; // Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries |
|
471 MMDXMLParserDataProvider* iDataSource; // XML Data Source being parsed. |
|
472 CMDXMLParserFileDataSource* iFileSource; // We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file. |
|
473 |
|
474 /* member variables dealing with chunked conversion into unicode output */ |
|
475 TBuf<32> iUnicodeConversion; // buffer to temporarily hold the results of conversion from UTF8 to Unicode |
|
476 TInt iUnicodeConversionLen; // number of characters stored in our intermediate buffer |
|
477 TInt iUnicodeReadPos; // next character to send from our intermediate buffer |
|
478 TBuf<1> iSpareChar; |
|
479 |
|
480 /* member variables used when parsing a local file */ |
|
481 TDesC *iFileToParse; |
|
482 RFs iRFs; |
|
483 RFile iFileHandleToParse; |
|
484 |
|
485 TBool iEndOfTag; |
|
486 |
|
487 /* member variables used in DoParseLoopL() */ |
|
488 TBool iOpened; |
|
489 TBool iClosed; |
|
490 CMDXMLElement* iNewElement; |
|
491 CMDXMLElement* iParentElement; |
|
492 HBufC* iText; |
|
493 enum EParserStates |
|
494 { |
|
495 KInitFromFile, |
|
496 KDetermineCharset, |
|
497 KWaitingForData, |
|
498 KParseData, |
|
499 KSpanDataGap, |
|
500 KFinished |
|
501 }; |
|
502 |
|
503 EParserStates iState; |
|
504 EParserStates iPreviousState; |
|
505 TInt iSuspiciousCharacter; |
|
506 TBool iStoreInvalid; // controls whether invalid elements and attributes are stored in the DOM. |
|
507 TBool iPreserve; |
|
508 |
|
509 }; |
|
510 |
|
511 #endif |