|
1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // HTML parser header |
|
15 // @publishedAll |
|
16 // @released |
|
17 // |
|
18 // |
|
19 |
|
20 #ifndef _HTMLPARSER_H |
|
21 #define _HTMLPARSER_H |
|
22 |
|
23 #include <xml/parserfeature.h> |
|
24 #include <xml/plugins/parserinitparams.h> |
|
25 |
|
26 class CDesC8ArrayFlat; |
|
27 class CStringParser; |
|
28 |
|
29 class CHtmlParser : public CBase |
|
30 { |
|
31 public: |
|
32 enum TTagState |
|
33 { |
|
34 ENoTag, |
|
35 EOpeningTag, |
|
36 EClosingTag, |
|
37 EText |
|
38 }; |
|
39 |
|
40 enum TParserState |
|
41 { |
|
42 EInitialState, |
|
43 ESeeStartOfTag, |
|
44 ESeeEndOfTag, |
|
45 ESeeClosingTagIndicator, |
|
46 ESeeEquals, |
|
47 ESeeExclamationMark, |
|
48 ESeeEndOfTagWhileReadingJavascript, |
|
49 EReadingOpeningTag, |
|
50 EReadingClosingTag, |
|
51 EReadingAttribute, |
|
52 EReadingAttributeValue, |
|
53 EReadingAttributeValueWithinApos, |
|
54 EReadingAttributeValueWithinQuot, |
|
55 EReadingText, |
|
56 EReadingJavascript, |
|
57 EFinishedReadingTag, |
|
58 EFinishedReadingAttribute, |
|
59 EFinishedReadingAttributeValue |
|
60 }; |
|
61 |
|
62 static CHtmlParser* NewL ( Xml::MContentHandler& aContentHandler, RStringPool& aStringPool ); |
|
63 ~CHtmlParser (); |
|
64 |
|
65 void ParseL ( const TDesC8& aBuffer, TBool aLastChunk = EFalse ); |
|
66 void SetContentSink(class Xml::MContentHandler& ); |
|
67 |
|
68 private: |
|
69 CHtmlParser ( Xml::MContentHandler& aContentHandler, RStringPool& aStringPool ); |
|
70 void ConstructL (); |
|
71 |
|
72 void CreateTagInfoLC ( Xml::RTagInfo& aTagInfo, const TDesC8& aTag ); |
|
73 |
|
74 void ParseContentL ( const TDesC8& aContent, TBool aLastChunk = EFalse ); |
|
75 |
|
76 void ParseTaggedBufferL ( const TPtrC8& aTaggedBuffer, TBool aPartial = EFalse ); |
|
77 |
|
78 void ProcessPartialContentL ( CStringParser& aParser ); |
|
79 void AddToPartialContentL ( const TPtrC8& aContent, TBool aAppend = EFalse ); |
|
80 |
|
81 void ParseStartDocumentL ( CStringParser& aParser ); |
|
82 |
|
83 void ProcessTextL ( const TPtrC8& aText ); |
|
84 |
|
85 TBool InspectCurrentCharacter( TChar aChar ); |
|
86 |
|
87 void SeeWhiteSpaceCharacterInTag( TBool& aBool ); |
|
88 void SeeSpecialCharactersInTag( TChar aChar, TBool& aBool ); |
|
89 void SeeOtherCharactersInTag(); |
|
90 void SeeOtherCharactersNotInTag( TBool& aBool ); |
|
91 |
|
92 TBool CheckAndRemoveTagL ( const TPtrC8& aTag ); |
|
93 |
|
94 void CallStartElementL ( const TDesC8& aTag ); |
|
95 void CallEndElementL ( const TDesC8& aTag ); |
|
96 |
|
97 void CallStartDocumentL ( const TDesC8& aCharset ); |
|
98 |
|
99 TBool IsOptionalTagL ( const TDesC8& aTag ); |
|
100 void CheckAndProcessLastOptionalTagL ( const TDesC8& aTag ); |
|
101 TBool IsForbiddenTagL ( const TDesC8& aTag ); |
|
102 void CheckAndProcessForbiddenTagL ( const TDesC8& aTag ); |
|
103 |
|
104 void ExtractCharsetL ( const TDesC8& aContent, TPtrC8& aCharset, TBool& aXMLFound ); |
|
105 void ExtractCharsetValueL ( const TDesC8& aContent, const TDesC8& aSearchValue, TPtrC8& aCharset ); |
|
106 |
|
107 private: |
|
108 Xml::MContentHandler* iContentHandler; |
|
109 RStringPool& iStringPool; |
|
110 |
|
111 TTagState iTagState; |
|
112 TParserState iParserState; |
|
113 TBool iInTag; |
|
114 |
|
115 HBufC8* iPartialContent; // Content which is pending to parse. |
|
116 |
|
117 CDesC8ArrayFlat* iTagAttribute; |
|
118 CDesC8ArrayFlat* iTagAttributeValue; |
|
119 CDesC8ArrayFlat* iTagQueue; |
|
120 |
|
121 TBool iFirst; |
|
122 TBool iEndDocCalled; |
|
123 }; |
|
124 |
|
125 #endif // _HTMLPARSER_H |