|
1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #include "thttpdataparser.h" |
|
17 |
|
18 #include "mhttpbuffersupplier.h" |
|
19 #include "thttpmessagepanic.h" |
|
20 |
|
21 const TUint KCarriageReturn = '\r'; |
|
22 const TUint KLineFeed = '\n'; |
|
23 const TUint KSpace = ' '; |
|
24 const TUint KTab = '\t'; |
|
25 |
|
26 THttpDataParser::THttpDataParser(MHttpBufferSupplier& aBufferSupplier) |
|
27 : iBufferSupplier(aBufferSupplier), iLineBuffer(TPtr8(NULL, 0)) |
|
28 /** |
|
29 Constructor. |
|
30 @param aBufferSupplier The buffer supplier. |
|
31 */ |
|
32 { |
|
33 } |
|
34 |
|
35 void THttpDataParser::Reset() |
|
36 /** |
|
37 Resets the data parser. The line buffer and current data packet buffer are |
|
38 reset to a zero length buffer and the parser moves to the Idle state. The |
|
39 observer is notified that the buffer supplying the line buffer should be |
|
40 deleted. |
|
41 */ |
|
42 { |
|
43 // Move to the Idle state and reset the line buffer and data chunk. |
|
44 iState = EIdle; |
|
45 iLineBuffer.Set(NULL, 0, 0); |
|
46 iData.Set(NULL, 0); |
|
47 iBufferSupplier.DeleteBuffer(); |
|
48 } |
|
49 |
|
50 THttpDataParser::TParseResult THttpDataParser::GetHeaderLineL(TPtrC8& aLine) |
|
51 /** |
|
52 Extracts a header field line. The data parser attempts to parse a line of |
|
53 data where the end of line marker is defined by the CRLF sequence. In |
|
54 HTTP/1.1 header field values can be folded onto multiple lines if the |
|
55 continuation line begins with a SP or HT. The CRLF sequence in this case is |
|
56 part of LWS and is ignored - the CRLF will not be preset in the returned |
|
57 line buffer. The delimiting CRLF is not given in the return data. |
|
58 |
|
59 If a line cannot be extracted (due to lack of data) then the value |
|
60 EPartialData is returned. In this case the output argument is not valid. If |
|
61 an empty line is found then the value EEmptyLine is returned. If the line is |
|
62 extracted then the value ELineParsed is returned. |
|
63 @param aLine The output argument set to the buffer containing the |
|
64 extracted header field line. This will only be valid if |
|
65 the return value is either ELineParsed or EEmptyLine. |
|
66 @return A value of EPartialData if the requested line type cannot be |
|
67 found in the current data packet. If an empty line is found then |
|
68 EEmptyLine is returned otherwise if the line is found then |
|
69 ELineParsed is returned. |
|
70 */ |
|
71 { |
|
72 return GetLineL(aLine, EHeaderLine); |
|
73 } |
|
74 |
|
75 THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine) |
|
76 /** |
|
77 Extracts a line. The data parser attempts to parse a line of data where the |
|
78 end of line marker is defined by the CRLF sequence. The delimiting CRLF is |
|
79 not given in the return data. |
|
80 |
|
81 If a line cannot be extracted (due to lack of data) then the value |
|
82 EPartialData is returned. In this case the output argument is not valid. If |
|
83 an empty line is found then the value EEmptyLine is returned. If the line is |
|
84 extracted then the value ELineParsed is returned. |
|
85 @param aLine The output argument set to the buffer containing the |
|
86 extracted header field line. This will only be valid if |
|
87 the return value is either ELineParsed or EEmptyLine. |
|
88 @return A value of EPartialData if the requested line type cannot be |
|
89 found in the current data packet. If an empty line is found then |
|
90 EEmptyLine is returned otherwise if the line is found then |
|
91 ELineParsed is returned. |
|
92 */ |
|
93 { |
|
94 return GetLineL(aLine, EStandardLine); |
|
95 } |
|
96 |
|
97 THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine, TLineType aLineType) |
|
98 /** |
|
99 Parses data for the specified line type. In the HTTP/1.1 protocol there are |
|
100 two types of line. The standard line is delimited by the CRLF sequence. The |
|
101 header field line is delimited by a CRLF sequence that is not followed by a |
|
102 SP or HT. When parsing for a header field line any CRLF sequences followed |
|
103 by a SP or HT are classed as part of LWS and ignored - header field values |
|
104 can be folded onto multiple line using this LWS in this way. The parser is |
|
105 robust to spurious CRs in the data and eol markers that are missing the |
|
106 leading CR. |
|
107 |
|
108 Initially the data parser is in the Idle state when starting to parse for a |
|
109 line. In the Idle state it clears the line buffer and moves to the |
|
110 PendingMoreData state. |
|
111 |
|
112 In the PendingMoreData state the data parser looks for an end of line marker. |
|
113 The eol marker should can be either a CRLF sequence or just a LF in cases |
|
114 where there has been a deviation from the protocol - this deviation is |
|
115 tolerated for robustness. |
|
116 |
|
117 If an eol marker is not found one in the current data packet, the parser |
|
118 appends the data packet to the line buffer and returns EPartialData. The |
|
119 current data packet is then discarded. |
|
120 |
|
121 If an eol maker is found at the start of the data packet this indicates a |
|
122 possible empty line. The data parser moves to the PendingEmptyLine state if |
|
123 the eol marker was a CR. If the eol marker was a LF then an empty line has |
|
124 been found - the parser moves to the Idle state and returns the EEmptyLine |
|
125 value. The current data packet is updated to skip past the eol marker. |
|
126 |
|
127 If an eol marker is found but not at the start of the data packet this |
|
128 indicates a possible line. The data parser moves to the PendingLF state if |
|
129 the eol marker was a CR. If the eol marker was a LF then the parser moves to |
|
130 the PendingFoundLine state. The data upto, but not including the eol marker, |
|
131 is appended to the line buffer and the current data packet is updated to |
|
132 skip past the eol marker. |
|
133 |
|
134 In the PendingLF state the data parser expects a LF to be present at the |
|
135 start of the data packet. If the LF is found then the parser moves to the |
|
136 PendingFoundLine state and the data packet is updated to skip past the LF. |
|
137 If a CR is found at the start of the data packet then it is ignored and the |
|
138 parser remains in the PendingLF state. If the any other character is found |
|
139 then the initial CR is deemed to be spurious and the parser moves back to |
|
140 the PendingMoreData state. |
|
141 |
|
142 In the PendingEmptyLine state the data parser expects a LF to be present at |
|
143 the start of the data packet. If the LF is found then the parser moves to |
|
144 the Idle state and the data packet is updated to skip past the LF. The value |
|
145 EEmptyLine is returned. If a CR is found at the start of the data packet |
|
146 then it is ignored and the parser remains in the PendingLF state. If the any |
|
147 other character is found then the initial CR is deemed to be spurious and |
|
148 the parser moves back to the PendingMoreData state. |
|
149 |
|
150 In the PendingFoundLine state the data parser checks the first character in |
|
151 the data packet. If it is a SP or HT and the a header field line was |
|
152 requested then the parser moves to the PendingMoreData state. Otherwise a |
|
153 line has been found. The line buffer will contain the line less any CRLF |
|
154 sequences either as the end of line marker or as part of LWS. The output |
|
155 argument aLine is set to the line buffer. The value ELineParsed is returned |
|
156 and the parser moves to the Idle state. |
|
157 |
|
158 The current data packet is checked before processing each state. If there is |
|
159 no data then the parser does not continue as it requires more data. The |
|
160 value EPartialData is returned. |
|
161 @param aLine The output argument that is set to the parsed line |
|
162 when the value ELineParsed is returned. |
|
163 @param aLineType The type of line requested to be parsed. |
|
164 @return A value of EPartial data if a the requested line type cannot be |
|
165 found in the current data packet. If an empty line is found then |
|
166 EEmptyLine is returned otherwise if a line is found then |
|
167 ELineParsed is returned. |
|
168 @panic EHttpMessagePanicBadDataParserState The parser is in an invalid |
|
169 state. |
|
170 */ |
|
171 { |
|
172 // Parse the current data for a 'line' |
|
173 TBool done = EFalse; |
|
174 TParseResult result = EPartialData; |
|
175 while( !done && iData.Length() > 0 ) |
|
176 { |
|
177 // The data buffer always has data - no need to check... |
|
178 switch( iState ) |
|
179 { |
|
180 case EIdle: |
|
181 { |
|
182 // Not waiting for more data - clear the buffer. |
|
183 iLineBuffer.Zero(); |
|
184 |
|
185 // Drop through to PendingMoreData case... |
|
186 } |
|
187 case EPendingMoreData: |
|
188 { |
|
189 // Locate end of line marker - need to be tolerant of protocol |
|
190 // deviations. Therefore support eol termination by just a LF. |
|
191 TChar eol = 0; |
|
192 TInt posEOL = FindEOLMarker(eol); |
|
193 |
|
194 if( posEOL != KErrNotFound ) |
|
195 { |
|
196 if( posEOL == 0 && iState == EIdle ) |
|
197 { |
|
198 // Data was at start of a new line and the eol marker was |
|
199 // found at the start - possible empty line. Check what the |
|
200 // eol marker was |
|
201 if( eol == KCarriageReturn ) |
|
202 { |
|
203 // Move to the PendingEmptyLine state. |
|
204 iState = EPendingEmptyLine; |
|
205 } |
|
206 else |
|
207 { |
|
208 // An empty line has been found - can stop parsing and |
|
209 // move to the Idle state. |
|
210 done = ETrue; |
|
211 result = EEmptyLine; |
|
212 iState = EIdle; |
|
213 } |
|
214 } |
|
215 else |
|
216 { |
|
217 // Append the data found upto the eol marker. |
|
218 AppendToBufferL(iData.Left(posEOL)); |
|
219 |
|
220 // Check what the eol marker was. |
|
221 if( eol == KCarriageReturn ) |
|
222 { |
|
223 // Expecting a LF - move to the PendingLF state. |
|
224 iState = EPendingLF; |
|
225 } |
|
226 else |
|
227 { |
|
228 // The eol marker was a LF - move to the PendingFoundLine state. |
|
229 iState = EPendingFoundLine; |
|
230 } |
|
231 } |
|
232 // Skip past the eol marker... |
|
233 iData.Set(iData.Mid(posEOL + 1)); |
|
234 } |
|
235 else |
|
236 { |
|
237 // Not enough data - append the data to the buffer. |
|
238 AppendToBufferL(iData); |
|
239 |
|
240 // Move to the PendingMoreData state and wait for next data |
|
241 // packet |
|
242 done = ETrue; |
|
243 iState = EPendingMoreData; |
|
244 } |
|
245 } break; |
|
246 case EPendingLF: |
|
247 { |
|
248 // Expecting a LF at the start of the data |
|
249 if( iData[0] == KLineFeed ) |
|
250 { |
|
251 // Skip past the LF and move to PendingFoundLine |
|
252 iData.Set(iData.Mid(1)); |
|
253 iState = EPendingFoundLine; |
|
254 } |
|
255 else if( iData[0] == KCarriageReturn ) |
|
256 { |
|
257 // Need to be robust to spurious amounts of CRs - skip past the |
|
258 // CR and remain in PendingLF state. |
|
259 iData.Set(iData.Mid(1)); |
|
260 } |
|
261 else |
|
262 { |
|
263 // Data is wrong - had a spurious CR in the data. Need to be so |
|
264 // return to the PendingMoreData state. |
|
265 iState = EPendingMoreData; |
|
266 } |
|
267 } break; |
|
268 case EPendingEmptyLine: |
|
269 { |
|
270 // Expecting a LF at the start of the data |
|
271 if( iData[0] == KLineFeed ) |
|
272 { |
|
273 // This CRLF delimits an empty line - no need to check that it |
|
274 // is part of LWS. |
|
275 result = EEmptyLine; |
|
276 done = ETrue; |
|
277 |
|
278 // Move to the Idle state and skip past the LF. |
|
279 iData.Set(iData.Mid(1)); |
|
280 iState = EIdle; |
|
281 } |
|
282 else if( iData[0] == KCarriageReturn ) |
|
283 { |
|
284 // Need to be robust to spurious amounts of CRs - skip past the |
|
285 // CR and remain in PendingEmptyLine state. |
|
286 iData.Set(iData.Mid(1)); |
|
287 } |
|
288 else |
|
289 { |
|
290 // Data is wrong - had a spurious CR in the data. Need to be so |
|
291 // return to the PendingMoreData state. |
|
292 iState = EPendingMoreData; |
|
293 } |
|
294 } break; |
|
295 case EPendingFoundLine: |
|
296 { |
|
297 // Expecting a leading char tha is not a SP or a HT |
|
298 TChar ch = iData[0]; |
|
299 if( aLineType == EHeaderLine && (ch == KSpace || ch == KTab) ) |
|
300 { |
|
301 // This CRLF is part of LWS between tokens in a header field |
|
302 // line - move to PendingMoreData state and search for the next CR. |
|
303 iState = EPendingMoreData; |
|
304 } |
|
305 else if( aLineType == EHeaderLine && (ch == KCarriageReturn) ) |
|
306 { |
|
307 // Need to be robust to spurious amounts of CRs - skip past the |
|
308 // CR and remain in EPendingFoundLine state. |
|
309 iData.Set(iData.Mid(1)); |
|
310 } |
|
311 else |
|
312 { |
|
313 // This CRLF delimits the end of a line - move to Idle state |
|
314 done = ETrue; |
|
315 result = ELineParsed; |
|
316 iState = EIdle; |
|
317 } |
|
318 } break; |
|
319 default: |
|
320 THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState); |
|
321 break; |
|
322 } |
|
323 } |
|
324 // Set the output argument aLine |
|
325 if( result == ELineParsed ) |
|
326 { |
|
327 // Line was found - set buffer to the line buffer |
|
328 aLine.Set(iLineBuffer); |
|
329 } |
|
330 else |
|
331 { |
|
332 // Either an empty line was found or waiting for more data - set buffer |
|
333 // to empty buffer. |
|
334 aLine.Set(KNullDesC8()); |
|
335 } |
|
336 return result; |
|
337 } |
|
338 |
|
339 THttpDataParser::TParseResult THttpDataParser::GetData(TPtrC8& aData, TInt aMaxSize) |
|
340 /** |
|
341 Attempts to supply the data requested. The argument aMaxSize specifies the |
|
342 amount of data required. If the amount is available the data returns a value |
|
343 of EGotData, otherwise EPartialData is returned. |
|
344 @param aData The output argument that is set to the buffer with |
|
345 the requested data. |
|
346 @param aMaxSize The amount of data required. |
|
347 @return A value of EGotData if all the requested data was supplied or |
|
348 EPartialData if was not. |
|
349 @panic EHttpMessagePanicBadDataParserState The parser was not in the |
|
350 Idle state. |
|
351 */ |
|
352 { |
|
353 __ASSERT_DEBUG( iState == EIdle, THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState) ); |
|
354 |
|
355 // Set the length of the data to copy - intially set to available amount |
|
356 TInt copyLength = iData.Length(); |
|
357 TParseResult result = EPartialData; |
|
358 |
|
359 // Make sure that not too much data is given... |
|
360 if( copyLength >= aMaxSize ) |
|
361 { |
|
362 // The available data is same or more than the requested amount - adjust |
|
363 // the copy size to copy just the requested amount. |
|
364 copyLength = aMaxSize; |
|
365 result = EGotData; |
|
366 } |
|
367 // Copy over the data specified. |
|
368 aData.Set(iData.Left(copyLength)); |
|
369 |
|
370 // Update the data buffer... |
|
371 iData.Set(iData.Mid(copyLength)); |
|
372 |
|
373 return result; |
|
374 } |
|
375 |
|
376 void THttpDataParser::SetData(const TDesC8& aData) |
|
377 /** |
|
378 Sets the current data packet. |
|
379 @param aData The buffer containing the current data packet. |
|
380 */ |
|
381 { |
|
382 iData.Set(aData); |
|
383 } |
|
384 |
|
385 void THttpDataParser::UnparsedData(TPtrC8& aData) |
|
386 /** |
|
387 Accessor to the unparsed data in the current data packet. The data parser |
|
388 can supply the unparsed data that is in the current data packet. |
|
389 @param aData The output argument that is set to the unparsed data. |
|
390 */ |
|
391 { |
|
392 aData.Set(iData); |
|
393 } |
|
394 |
|
395 void THttpDataParser::AppendToBufferL(const TDesC8& aData) |
|
396 /** |
|
397 Appends data to line buffer. If the line buffer is not big enough to hold |
|
398 the extra data the parser gets the observer to reallocate a larger buffer. |
|
399 @param aData The data to be appended to the line buffer. |
|
400 */ |
|
401 { |
|
402 // Check to see if there is enough space |
|
403 TInt requiredLength = iLineBuffer.Length() + aData.Length(); |
|
404 if( requiredLength > iLineBuffer.MaxLength() ) |
|
405 { |
|
406 // Need more space... |
|
407 iBufferSupplier.ReAllocBufferL(requiredLength, iLineBuffer); |
|
408 } |
|
409 // Append the data. |
|
410 iLineBuffer.Append(aData); |
|
411 } |
|
412 |
|
413 TInt THttpDataParser::FindEOLMarker(TChar& aEOLMarker) |
|
414 /** |
|
415 Locates end of line marker in current data packet. Need to be tolerant of a |
|
416 LF without a leading CR being used as a end of line marker. This function |
|
417 searches for either a CR of a LF as the eol marker and returns its position |
|
418 if it is found and the what the marker was. If neither a CR of a LF was |
|
419 found then KErrNotFound is returned. |
|
420 @param aEOLMarker An output argument that is set to the eol marker |
|
421 that was found. Is not valid if the return value is |
|
422 KErrNotFound. |
|
423 @return A value of KErrNotFound is returned if an eol marker was not |
|
424 found in the current data packet. If an eol marker was found |
|
425 then its position in the data packet is returned. |
|
426 */ |
|
427 { |
|
428 const TUint len = iData.Length(); |
|
429 TUint pos = 0; |
|
430 TInt ret = KErrNotFound; |
|
431 for(;pos < len;++pos) |
|
432 { |
|
433 const TUint8& ch = iData[pos]; |
|
434 if(ch == KLineFeed || ch == KCarriageReturn) |
|
435 { |
|
436 aEOLMarker = ch; |
|
437 ret = pos; |
|
438 break; |
|
439 } |
|
440 } |
|
441 return ret; |
|
442 } |
|
443 |
|
444 |