|
1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // This file contains the declaration of the generic CMDXMLParser class |
|
15 // which is responsible for creating a DOM structure |
|
16 // from a given XML file. |
|
17 // |
|
18 // |
|
19 |
|
20 /** |
|
21 @file |
|
22 */ |
|
23 |
|
24 #ifndef __GMXMLPARSER_H__ |
|
25 #define __GMXMLPARSER_H__ |
|
26 |
|
27 #include <e32std.h> |
|
28 #include <txtetext.h> |
|
29 #include <gmxmlconstants.h> |
|
30 #include <f32file.h> |
|
31 |
|
32 //forward reference |
|
33 class CMDXMLDocument; |
|
34 class CMDXMLEntityConverter; |
|
35 class CMDXMLElement; |
|
36 class MXMLDtd; |
|
37 |
|
38 |
|
39 |
|
40 class MMDXMLParserObserver |
|
41 /** Abstract observer interface for notification when XML parsing is complete. |
|
42 |
|
43 It should be implemented by users of CMDXMLParser |
|
44 @publishedAll |
|
45 @released*/ |
|
46 { |
|
47 public: |
|
48 /** |
|
49 Call back function used to inform a client of the Parser when a parsing operation completes. |
|
50 */ |
|
51 virtual void ParseFileCompleteL() = 0; |
|
52 }; |
|
53 |
|
54 class MMDXMLParserDataProvider |
|
55 /** Abstract data source interface for XML data source. |
|
56 |
|
57 The user of CMDXMLParser must build one of these to encapsulate the data source |
|
58 that they wish to parse. CMDXMLParser implements a file-based data source to |
|
59 implement the functionality of the ParseFile function. |
|
60 |
|
61 @publishedAll |
|
62 @released*/ |
|
63 { |
|
64 public: |
|
65 /** Status codes returned by GetData() implementations. */ |
|
66 enum TDataProviderResults |
|
67 { |
|
68 KMoreData, //< Returned by the interface implementation when it is returning more data. |
|
69 KDataStreamError, //< Returned by the interface when an unrecoverable error prevents obtaining more data. A recoverable error should be represented by KDataNotReady. |
|
70 KDataStreamEnd //< Returned by the interface when there is no more data to come. |
|
71 }; |
|
72 |
|
73 public: |
|
74 /** |
|
75 The XML Parser calls this on a specific data provider to get more data |
|
76 when required. |
|
77 |
|
78 Note that the TPtrC supplied may be used by the parser at any time |
|
79 between the return of this call and the next call that the parser |
|
80 makes out. |
|
81 |
|
82 Your data provider must not move the data pointed to until the |
|
83 parser has indicated that it's done with that block by asking for |
|
84 another. |
|
85 |
|
86 Ownership of the data pointed to remains with the data provider. |
|
87 |
|
88 |
|
89 General comments on efficiency |
|
90 ------------------------------ |
|
91 |
|
92 The parser is designed such that it processes the whole data block |
|
93 provided in one go. It will automatically become asynchronous when |
|
94 another block is required - the data provider only needs to supply |
|
95 data. |
|
96 |
|
97 Because of this design, it allows the data provider to indirectly |
|
98 control the amount of processing time that will be needed |
|
99 in a single block. |
|
100 |
|
101 It is a good idea to balance the need for the fastest possible |
|
102 processing with the need for client application responsiveness by |
|
103 ensuring that the amount of data passed in a single block is not |
|
104 too large. However, it is worth bearing in mind that the parser |
|
105 will convert UTF8 data streams in blocks of 32 characters, and |
|
106 supplying blocks of smaller length than this will result in a |
|
107 slight loss of efficiency. |
|
108 |
|
109 @param aPtr On return, the data provided |
|
110 @param aStatus Asynchronous status to be completed by the function with a |
|
111 TDataProviderResults value |
|
112 */ |
|
113 virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0; |
|
114 /** |
|
115 Called to indicate that use of the data source is complete. |
|
116 */ |
|
117 virtual void Disconnect() = 0; |
|
118 }; |
|
119 |
|
120 class CMDXMLParserFileDataSource; |
|
121 |
|
122 class CMDXMLParser: public CActive |
|
123 /** Creates a DOM structure from a given XML file. |
|
124 |
|
125 The parsing operation is asynchronous and is initiated by a call to ParseFile(). |
|
126 On completion, the created DOM document can be retrieved through DetachXMLDoc(). |
|
127 |
|
128 Note the following ownership rules for the DOM document: |
|
129 |
|
130 1. calling DetachXMLDoc() transfers ownership of the document to the client |
|
131 |
|
132 2. if the parser is asked to parse a new file while it still owns an existing |
|
133 DOM document, it will delete the old document. |
|
134 |
|
135 @publishedAll |
|
136 @released |
|
137 */ |
|
138 { |
|
139 public: |
|
140 /** Allocates and constructs a new XML parser, specifying a DTD. |
|
141 |
|
142 @param aParserObserver XML parser observer |
|
143 @leave KErrNoMemory Out of memory |
|
144 @return New XML parser */ |
|
145 IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver); |
|
146 |
|
147 /** Allocates and constructs a new XML parser, specifying a DTD. |
|
148 |
|
149 @param aParserObserver XML parser observer |
|
150 @param aDtdRepresentation DTD validator |
|
151 @leave KErrNoMemory Out of memory |
|
152 @return New XML parser */ |
|
153 IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
154 |
|
155 /** Allocates and constructs a new XML parser, leaving the object on the cleanup |
|
156 stack. |
|
157 |
|
158 @param aParserObserver XML parser observer |
|
159 @leave KErrNoMemory Out of memory |
|
160 @return New XML parser */ |
|
161 IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver); |
|
162 |
|
163 /** Allocates and constructs a new XML parser, leaving the object on the cleanup |
|
164 stack. |
|
165 |
|
166 @param aParserObserver XML parser observer |
|
167 @param aDtdRepresentation DTD validator |
|
168 @leave KErrNoMemory Out of memory |
|
169 @return New XML parser */ |
|
170 IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
171 |
|
172 |
|
173 /** Destructor. */ |
|
174 IMPORT_C ~CMDXMLParser(); |
|
175 |
|
176 /** Gets the last error found by the parser. |
|
177 |
|
178 @return Error code |
|
179 */ |
|
180 IMPORT_C TInt Error() const; |
|
181 |
|
182 /** |
|
183 Get the severity of the most severe error found. |
|
184 @return the maximum error severity |
|
185 */ |
|
186 IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; |
|
187 |
|
188 /** Gets the created DOM. |
|
189 |
|
190 This should be called after the conclusion of the parser process. |
|
191 |
|
192 Note that the function sets the internal variable pointing to the document |
|
193 to NULL, so this function can only be called once per file parse. The caller |
|
194 takes ownership of the document, and must delete it when its use is complete. |
|
195 |
|
196 @return The created DOM */ |
|
197 IMPORT_C CMDXMLDocument* DetachXMLDoc(); |
|
198 |
|
199 /** Parses a specified XML file into a DOM object tree. |
|
200 |
|
201 @param aRFs File server session |
|
202 @param aFileToParse The file name to parse |
|
203 @return KErrNone if success or a file read error code */ |
|
204 IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse); |
|
205 |
|
206 IMPORT_C TInt ParseFile(RFile& aFileHandleToParse); |
|
207 |
|
208 /** Parses a specified XML Data Source into a DOM object tree. |
|
209 Use ParseSourceL() function in preference to ParseSource() |
|
210 @param aSource MMDXMLParserDataProvider pointer |
|
211 */ |
|
212 inline void ParseSource(MMDXMLParserDataProvider *aSource) |
|
213 { |
|
214 TRAP_IGNORE(ParseSourceL(aSource)); |
|
215 } |
|
216 |
|
217 /** Parses a specified XML Data Source into a DOM object tree. |
|
218 @param aSource MMDXMLParserDataProvider pointer |
|
219 */ |
|
220 IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource); |
|
221 |
|
222 /** Defines input stream character widths. */ |
|
223 enum TMDXMLParserInputCharWidth |
|
224 { |
|
225 EAscii = 0x01, //< ASCII |
|
226 EUnicode = 0x02 //<Unicode |
|
227 }; |
|
228 |
|
229 /** Sets the input stream character width. |
|
230 * |
|
231 * @param aWidth Character width for incoming stream. Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively). |
|
232 * |
|
233 */ |
|
234 IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth); |
|
235 |
|
236 //Defect fix for INC036136- Enable the use of custom entity converters in GMXML |
|
237 /** |
|
238 * Sets the entity converter to be used for parsing. |
|
239 * and take ownership of the passed entity converter |
|
240 * @param aEntityConverter the entity converter to be used. |
|
241 */ |
|
242 IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter); |
|
243 //End Defect fix for INC036136 |
|
244 |
|
245 /** |
|
246 Controls whether invalid elements and attributes are added to the DOM. |
|
247 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise. |
|
248 */ |
|
249 IMPORT_C void SetStoreInvalid(TBool aStoreInvalid); |
|
250 |
|
251 /** |
|
252 Controls whether whitespaces are handled by XML parser or by client. |
|
253 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise. |
|
254 */ |
|
255 IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve); |
|
256 |
|
257 public: // public functions used by other classes within the .dll, not for Export. |
|
258 /** Gets the entity converter. |
|
259 |
|
260 @return The entity converter */ |
|
261 CMDXMLEntityConverter* EntityConverter(); |
|
262 |
|
263 private: |
|
264 IMPORT_C virtual void DoCancel(); |
|
265 |
|
266 /* |
|
267 * RunL function inherited from CActive base class - carries out the actual parsing. |
|
268 * @leave can Leave due to OOM |
|
269 */ |
|
270 virtual void RunL(); |
|
271 |
|
272 /* |
|
273 * Helper function that does the parsing - called from inside RunL |
|
274 */ |
|
275 TBool DoParseLoopL(); |
|
276 |
|
277 /* |
|
278 * RunError function inherited from CActive base class - intercepts any Leave from |
|
279 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL |
|
280 */ |
|
281 IMPORT_C TInt RunError(TInt aError); |
|
282 |
|
283 /* |
|
284 * Constructors |
|
285 */ |
|
286 CMDXMLParser(MMDXMLParserObserver* aParserObserver); |
|
287 |
|
288 CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
289 |
|
290 /* |
|
291 * Called when a character is read in and found to bo outside of an element tag |
|
292 */ |
|
293 virtual void HandleTextL(TDes& aChar); |
|
294 |
|
295 enum TGetCharReturn |
|
296 { |
|
297 KError = 0x00, // GetChar detected an error |
|
298 KCharReturned, // GetChar returned a character |
|
299 KWaitForChar // GetChar couldn't return a character this time, but might next time. |
|
300 }; |
|
301 |
|
302 /* |
|
303 * Fetch one character from the input file |
|
304 * @param aChar the returned character. |
|
305 * @return returns one of the values of TCharReturn |
|
306 */ |
|
307 TGetCharReturn GetChar(TDes& aChar); |
|
308 |
|
309 /* utility functions, called from GetChar to deal with the |
|
310 * 2 types of input stream |
|
311 */ |
|
312 TGetCharReturn GetDoubleByteChar(TDes& aChar); |
|
313 TGetCharReturn GetSingleByteChar(TDes& aChar); |
|
314 |
|
315 /* |
|
316 * Fetch some more data from the data provider |
|
317 * @return returns one of the values of TCharReturn |
|
318 */ |
|
319 void GetMoreData(); |
|
320 |
|
321 /* |
|
322 * @return Returns true if the current tag is a doctype tag and sets the |
|
323 * Document DocType member accordingly on the first pass of this function. |
|
324 */ |
|
325 TBool DocTypeL(); |
|
326 |
|
327 /* |
|
328 * creates a new processing instruction if necessary and adds to document |
|
329 * @return Returns true if the current tag is a processing instruction |
|
330 */ |
|
331 TBool ProcessingInstructionL(CMDXMLElement* aParentElement); |
|
332 |
|
333 /* |
|
334 * creates a new CDataSection if necessary and adds to document |
|
335 * @return Returns true if the current tag is a processing instruction |
|
336 */ |
|
337 TBool CDataSectionL(CMDXMLElement* aParentElement); |
|
338 TBool EndOfCDataSection(); |
|
339 |
|
340 /* |
|
341 * @return returns true if the current tag is a version id tag and sets the |
|
342 * Document Version member accordingly on the first pass of this function. |
|
343 */ |
|
344 TBool VersionIDL(); |
|
345 |
|
346 /* |
|
347 * creates a new comment if necessary and adds to document |
|
348 * @return returns true if the current tag is a comment tag |
|
349 */ |
|
350 TBool CommentL(CMDXMLElement* aParentElement); |
|
351 |
|
352 /* |
|
353 * Parse a start of element tag and create an element with attributes set. |
|
354 * @return Returns a pointer to the created element |
|
355 * @leave can Leave due to OOM |
|
356 */ |
|
357 virtual CMDXMLElement* ParseStartTagL(); |
|
358 |
|
359 /* |
|
360 * Detects the type of a file - can be Unicode or UTF-8 |
|
361 */ |
|
362 TBool DetectFileType(); |
|
363 |
|
364 /* |
|
365 * Creates a generic or DTD-specific document object |
|
366 * @leave can Leave due to OOM |
|
367 */ |
|
368 virtual void CreateDocumentL(); |
|
369 |
|
370 /* |
|
371 * Sets iError to new errorcode if more serious than any error so far encountered |
|
372 */ |
|
373 IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity); |
|
374 |
|
375 /* |
|
376 * This function is used to parse the attributes. |
|
377 * @param aElement The element to which the attributes belong |
|
378 * @param aTagToParse The tag to be parsed |
|
379 * @return Returns KErrNone if both attribute name & value are valid |
|
380 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid |
|
381 * @leave can Leave due to OOM |
|
382 */ |
|
383 TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse); |
|
384 |
|
385 /** |
|
386 This function locates the next attribute in the tag. |
|
387 @param aTagToParse the tag to find the attribute in |
|
388 @return the offset of the next attribute |
|
389 */ |
|
390 TInt LocateNextAttribute(const TDesC& aTagToParse); |
|
391 |
|
392 /* |
|
393 * Parses an end tag. In fact, at this point the end tag must match |
|
394 * the tag name of the start tag. |
|
395 * @param aTagToParse Text of the end tag. |
|
396 * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch. |
|
397 */ |
|
398 TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse); |
|
399 |
|
400 TInt CheckForStartCData(const TDesC& aTextToCheck); |
|
401 TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind); |
|
402 |
|
403 /* |
|
404 * Second stage constructor |
|
405 */ |
|
406 void ConstructL(MXMLDtd* aDtdRepresentation); |
|
407 void AddTextL(CMDXMLElement* aParentElement); |
|
408 |
|
409 /* |
|
410 * Checks whether the end of this tag is in a CDataSection. |
|
411 * @param aDataToSearch The data to check |
|
412 * @return Returns ETrue if the tag contains an unclosed CDataSection |
|
413 */ |
|
414 TBool InCDataSection(TDesC& aDataToSearch); |
|
415 |
|
416 /* |
|
417 * Entity converts the sections of one attribute value that are not within a CDataSection. |
|
418 * @param aAttributeValue one attribute value |
|
419 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone |
|
420 */ |
|
421 TInt ParseSingleAttributeL(TDes& aAttributeValue); |
|
422 |
|
423 /* |
|
424 * Prepares this class for use on another file. |
|
425 * |
|
426 */ |
|
427 void PrepareForReuseL(); |
|
428 |
|
429 /** |
|
430 This should be called when parsing has been completed, before calling ParseFileCompleteL(). |
|
431 It checks for errors that can only be determined at the end of parsing, eg missing doctype or |
|
432 incomplete content. |
|
433 */ |
|
434 void CheckForErrors(); |
|
435 |
|
436 IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver); |
|
437 IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); |
|
438 IMPORT_C void PlaceholderForRemovedExport3(); |
|
439 |
|
440 |
|
441 private: |
|
442 enum TPanicCode { ENullMemVarDataSource, |
|
443 ENullMemVarParserObserver, |
|
444 ENullMemVarXMLDoc, |
|
445 ENullMemVarElementTag, |
|
446 ENullParameterParentElement }; |
|
447 void Panic(TPanicCode aReason) const; |
|
448 |
|
449 private: |
|
450 MMDXMLParserObserver* iParserObserver; |
|
451 MXMLDtd* iDtdRepresentation; |
|
452 TInt iError; // Current error |
|
453 TXMLErrorCodeSeverity iSeverity; // ErrorCode severity |
|
454 CMDXMLDocument* iXMLDoc; // Document created by the parser |
|
455 CMDXMLEntityConverter* iEntityConverter; // Entity converter used by the parser |
|
456 HBufC* iElementTag; // Currently processed element tag |
|
457 TBool iDocTypeSet; |
|
458 TBool iVersionSet; |
|
459 TInt iBytesPerChar; |
|
460 |
|
461 /* member variables dealing with access to source data */ |
|
462 TPtrC8 iInputBufferPtr; // set during a call to get more data |
|
463 TInt iCurrentInputBufferLen; // current length of the data block available |
|
464 TInt iNextChar; // read position in the data block |
|
465 TInt iInputBytesRemaining; // number of bytes remaining to read. |
|
466 HBufC8 *iUTF8EdgeBuffer; // buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks |
|
467 HBufC8 *iBomBuffer; // buffer to hold data at the start of the stream so we may determine charset |
|
468 TInt iRequiredUTF8Bytes; // number of bytes required to complete the character held in the edge buffer |
|
469 TBool iUnicodeInputMisaligned; // Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries |
|
470 MMDXMLParserDataProvider* iDataSource; // XML Data Source being parsed. |
|
471 CMDXMLParserFileDataSource* iFileSource; // We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file. |
|
472 |
|
473 /* member variables dealing with chunked conversion into unicode output */ |
|
474 TBuf<32> iUnicodeConversion; // buffer to temporarily hold the results of conversion from UTF8 to Unicode |
|
475 TInt iUnicodeConversionLen; // number of characters stored in our intermediate buffer |
|
476 TInt iUnicodeReadPos; // next character to send from our intermediate buffer |
|
477 TBuf<1> iSpareChar; |
|
478 |
|
479 /* member variables used when parsing a local file */ |
|
480 TDesC *iFileToParse; |
|
481 RFs iRFs; |
|
482 RFile iFileHandleToParse; |
|
483 |
|
484 TBool iEndOfTag; |
|
485 |
|
486 /* member variables used in DoParseLoopL() */ |
|
487 TBool iOpened; |
|
488 TBool iClosed; |
|
489 CMDXMLElement* iNewElement; |
|
490 CMDXMLElement* iParentElement; |
|
491 HBufC* iText; |
|
492 enum EParserStates |
|
493 { |
|
494 KInitFromFile, |
|
495 KDetermineCharset, |
|
496 KWaitingForData, |
|
497 KParseData, |
|
498 KSpanDataGap, |
|
499 KFinished |
|
500 }; |
|
501 |
|
502 EParserStates iState; |
|
503 EParserStates iPreviousState; |
|
504 TInt iSuspiciousCharacter; |
|
505 TBool iStoreInvalid; // controls whether invalid elements and attributes are stored in the DOM. |
|
506 TBool iPreserve; |
|
507 |
|
508 }; |
|
509 |
|
510 #endif |