|
1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #ifndef __CHTTPMESSAGEPARSER_H__ |
|
17 #define __CHTTPMESSAGEPARSER_H__ |
|
18 |
|
19 #include <e32base.h> |
|
20 |
|
21 #include "thttpdataparser.h" |
|
22 #include "mhttpbuffersupplier.h" |
|
23 |
|
24 class MHttpMessageParserObserver; |
|
25 |
|
26 class CHttpMessageParser : public CActive, |
|
27 public MHttpBufferSupplier |
|
28 /** |
|
29 The CHttpMessageParser class provides parsing functionality for HTTP/1.1 |
|
30 messages as defined in RFC2616. The HTTP/1.1 protocol specifies that the CR |
|
31 LF sequence is the end of line (eol) marker for all protocol elements except |
|
32 the entity-body. The parser tolerates some deviation from this, as the RFC |
|
33 recommends in section 19.3 - a LF without the leading CR can also be parsed |
|
34 as an eol marker. |
|
35 |
|
36 The parser does not process any header fields. Therefore it needs to be told |
|
37 if an entity body is expected. If one is then the parser needs to know if it |
|
38 is chunk-encoded and if not what the expected size is. |
|
39 |
|
40 With a chunked entity body the parser de-chunks the body data - the observer |
|
41 does not need to parse a chunked body. |
|
42 |
|
43 The parser needs an observer (MHttpMessageParserObserver). The observer |
|
44 supplies the buffers containing the unparsed http message data. The observer |
|
45 must ensure that the buffers it supplies to the parser remain valid until the |
|
46 parser notifies it that it has parsed the entire contents of that buffer. |
|
47 |
|
48 The parser is initially in the Idle state waiting to be notified of available |
|
49 meesage data. When it is notified, the parser obtains the data and moves |
|
50 into the ParsingStartLine state. |
|
51 |
|
52 Whichever state it is in, the parser will notify the observer when it has |
|
53 parsed the entire contents of the current data buffer. It will then wait to |
|
54 be told that there is more data available. When it has been given the next |
|
55 part of the message data it continues parsing. |
|
56 |
|
57 In the ParsingStartLine state the parser looks for the first eol marker. |
|
58 This delimits the start-line. Once found the observer is notified and the |
|
59 parser moves into the ParsingHeaders state. |
|
60 |
|
61 In the ParsingHeaders state the parser searches for header field lines. |
|
62 These lines are delimited by eol markers. In HTTP/1.1 it is possible to fold |
|
63 header field values over multiple lines if the continuation line is followed |
|
64 by a SP or HT. In this case the eol marker is part of LWS and is ignored. |
|
65 Also any eol markers that are part of LWS are omitted from the header field |
|
66 data given to the observer. |
|
67 |
|
68 The header field section is completed once an empty line is parsed. The |
|
69 observer is informed and it should supply the parser with the necessary |
|
70 info about the entity body. If no entity body is expected then the parser |
|
71 moves to the MessageComplete state. If a non-encoded entity body is expected |
|
72 then the parser moves to the ReadingBodyData state. If a chunk-encoded |
|
73 entity body is expected then the parser moves to the ParsingChunkSize state. |
|
74 |
|
75 In the ReadingBodyData state the parser extracts the specified length of |
|
76 entity body data. The observer is notified of each chunk body data parsed. |
|
77 It is possible for the entity body data to be received in several parts due |
|
78 to segmentation at the transport layer, Once all the entity body data has |
|
79 been received the parser notifies the observer that the entity body is |
|
80 complete. The parser moves to MessageComplete state. |
|
81 |
|
82 Note that although the parser will have notified the observer that it has |
|
83 finished with the current data packet that held some entity body data, the |
|
84 observer should only release that data packet once it itself has no more use |
|
85 for the entity body data it has been given. Failure to do this will result |
|
86 in the buffer containing the received entity body chunks being invalid. The |
|
87 same is true when receiving a chunked entity body. |
|
88 |
|
89 In the ParsingChunkSize the parser searches for a chunk-size component as |
|
90 defined in RFC2616 section 3.6.1. The chunk-size component is delimited by |
|
91 an eol marker. An optional chunk-extension component can be present between |
|
92 the chunk-size and the eol marker - the parser will ignore any |
|
93 chunk-extension components. The chunk-size is a hex number specifying the |
|
94 size of the subsequent chunk-data component. A chunk-size of value zero |
|
95 indicates a last-chunk token - there are no subsequent chunk-data components |
|
96 and the parser moves to the ParsingTrailerHeaders state. A chunk-size of any |
|
97 other value indicates a subsequent chunk-data component and the parser moves |
|
98 to the ParsingChunkData state. |
|
99 |
|
100 In the ParsingChunkData state the parser extracts the length of entity body |
|
101 data specified in the preceeding chunk-size component. The observer is |
|
102 notified of each chunk of entity body data. Note that a chunk-data component |
|
103 can be segmented in the transport layer resulting in the observer being |
|
104 notified more than once for a given chunk-data component. The observer will |
|
105 not notice any disparate behaviour between receiving an entity body that has |
|
106 not been chunk-encoded and one that has. Once the entire chunk-data has been |
|
107 received the parser moves to the ParsingChunkSize state. |
|
108 |
|
109 The ParsingTrailerHeaders state is very similar to the ParsingHeaders state. |
|
110 The observer is notified of each header field and the trailer headers |
|
111 section is delimited by an empty line. The observer is notified of the end |
|
112 of the trailer headers section and the parser moves to MessageComplete state. |
|
113 |
|
114 In the MessageComplete state the parser notifies the observer that the |
|
115 message is complete. The observer is presented with any remaining data in |
|
116 the current data packet. This can be either excess data indicating a mal- |
|
117 formed message or the start of a pipelined message. In either case if the |
|
118 observer wishes to use that data it must not release the current data packet |
|
119 as that will invalidate the data. |
|
120 @internalComponent |
|
121 @see MHttpMessageParserObserver |
|
122 */ |
|
123 { |
|
124 public: // methods |
|
125 |
|
126 static CHttpMessageParser* NewL(MHttpMessageParserObserver& aObserver); |
|
127 virtual ~CHttpMessageParser(); |
|
128 |
|
129 void ReceivedMessageData(); |
|
130 void CompletedBodyDataL(); |
|
131 void Reset(); |
|
132 void Flush (); |
|
133 |
|
134 TBool CompleteMessage ( const TDesC8& aData ); |
|
135 |
|
136 private: // methods from CActive |
|
137 |
|
138 virtual void RunL(); |
|
139 virtual void DoCancel(); |
|
140 virtual TInt RunError(TInt aError); |
|
141 |
|
142 private: // methods from MHttpBufferSupplier |
|
143 |
|
144 virtual void ReAllocBufferL(TInt aRequiredSize, TPtr8& aBuffer); |
|
145 virtual void DeleteBuffer(); |
|
146 |
|
147 private: // enums |
|
148 |
|
149 enum TParserState |
|
150 /** |
|
151 The TParserState enumeration defines the state machine for the http message |
|
152 parser. |
|
153 */ |
|
154 { |
|
155 /** The parser is idle. A new message can be parsed. |
|
156 */ |
|
157 EIdle = 0, |
|
158 |
|
159 /** The parser has started parsing and is looking for the start-line of |
|
160 the http message. This is delimited by the first eol marker. |
|
161 */ |
|
162 EParsingStartLine, |
|
163 |
|
164 /** The parser is looking for header fields. Each header field is |
|
165 delimited by an eol marker. The parser continues to parser for |
|
166 header fields until an empty line is found - this marks the end of |
|
167 the header fields section. |
|
168 */ |
|
169 EParsingHeaders, |
|
170 |
|
171 /** The parser is looking for the chunk-size component. A chunk-size of |
|
172 value zero indicates that no more chunk-data is expected. |
|
173 */ |
|
174 EParsingChunkSize, |
|
175 |
|
176 /** The parser is looking for trailer header fields. Each trailer header |
|
177 field is delimited by an eol marker. The parser continues to parser |
|
178 for trailer header fields until an empty line is found - this marks the |
|
179 end of the trailer header fields section. |
|
180 */ |
|
181 EParsingTrailerHeaders, |
|
182 |
|
183 /** The parser is reading body data. The size of the body data was |
|
184 supplied to the parser by its observer. |
|
185 */ |
|
186 EReadingBodyData, |
|
187 |
|
188 /** The parser is reading chunk-data. The size of the this chunk-data |
|
189 was specified in the previous chunk-size. |
|
190 */ |
|
191 EReadingChunkData, |
|
192 |
|
193 /** The parser has parsed the http message. |
|
194 */ |
|
195 EMessageComplete |
|
196 }; |
|
197 |
|
198 |
|
199 enum TDataState |
|
200 /** |
|
201 The TDataState enumeration defines the state of the message parser as |
|
202 regards to the current data packet. |
|
203 */ |
|
204 { |
|
205 /** The parser has parsed the previous data packet and is now expecting |
|
206 to be given more data to continue with its parsing. |
|
207 */ |
|
208 EWaitingForData = 0, |
|
209 |
|
210 /** The parser is parsing the current data packet. |
|
211 */ |
|
212 EGotData, |
|
213 |
|
214 /** The parser has been reset. The current data packet is no longer |
|
215 valid. |
|
216 */ |
|
217 EReset |
|
218 }; |
|
219 |
|
220 enum TParsingStatus |
|
221 /** |
|
222 The TParsingStatus enumeration defines the status of parsing for the current |
|
223 state of the parser. |
|
224 */ |
|
225 { |
|
226 /** The current state has been completed. |
|
227 */ |
|
228 ESectionDone = 0, |
|
229 |
|
230 /** The current state has not been completed. |
|
231 */ |
|
232 ESectionNotDone, |
|
233 |
|
234 /** The current data packet has been completely parsed. More data is |
|
235 required to continue parsing the message. |
|
236 */ |
|
237 EBufferEmpty, |
|
238 |
|
239 /** The parser should not continue parsing - the message has been parsed. |
|
240 */ |
|
241 EStop |
|
242 }; |
|
243 |
|
244 private: // methods |
|
245 |
|
246 CHttpMessageParser(MHttpMessageParserObserver& aObserver); |
|
247 |
|
248 TParsingStatus ParseStartLineL(); |
|
249 TParsingStatus ParseSingleHeaderL(); |
|
250 TParsingStatus ParseHeadersL(); |
|
251 TParsingStatus ReadBodyData(TPtrC8& aData); |
|
252 TParsingStatus ParseChunkSizeL(); |
|
253 TParsingStatus ParseChunkDataL(TPtrC8& aData); |
|
254 |
|
255 void CompleteSelf(); |
|
256 void DoReset(); |
|
257 |
|
258 private: // attributes |
|
259 |
|
260 MHttpMessageParserObserver& iObserver; |
|
261 THttpDataParser iDataParser; |
|
262 TParserState iParserState; |
|
263 TDataState iDataState; |
|
264 TPtrC8 iData; |
|
265 HBufC8* iLineBuffer; |
|
266 TInt iDataSizeLeft; |
|
267 |
|
268 }; |
|
269 |
|
270 #endif // __CHTTPMESSAGEPARSER_H__ |