applayerprotocols/httptransportfw/httpmessage/chttpmessageparser.h
changeset 0 b16258d2340f
equal deleted inserted replaced
-1:000000000000 0:b16258d2340f
       
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #ifndef __CHTTPMESSAGEPARSER_H__
       
    17 #define __CHTTPMESSAGEPARSER_H__
       
    18 
       
    19 #include <e32base.h>
       
    20 
       
    21 #include "thttpdataparser.h"
       
    22 #include "mhttpbuffersupplier.h"
       
    23 
       
    24 class MHttpMessageParserObserver;
       
    25 
       
    26 class CHttpMessageParser : public CActive,
       
    27 						   public MHttpBufferSupplier
       
    28 /**	
       
    29 	The CHttpMessageParser class provides parsing functionality for HTTP/1.1 
       
    30 	messages as defined in RFC2616. The HTTP/1.1 protocol specifies that the CR
       
    31 	LF sequence is the end of line (eol) marker for all protocol elements except
       
    32 	the entity-body. The parser tolerates some deviation from this, as the RFC
       
    33 	recommends in section 19.3 - a LF without the leading CR can also be parsed
       
    34 	as an eol marker.
       
    35 
       
    36 	The parser does not process any header fields. Therefore it needs to be told
       
    37 	if an entity body is expected. If one is then the parser needs to know if it
       
    38 	is chunk-encoded and if not what the expected size is.
       
    39 
       
    40 	With a chunked entity body the parser de-chunks the body data - the observer
       
    41 	does not need to parse a chunked body.
       
    42 
       
    43 	The parser needs an observer (MHttpMessageParserObserver). The observer 
       
    44 	supplies the buffers containing the unparsed http message data. The observer
       
    45 	must ensure that the buffers it supplies to the parser remain valid until the
       
    46 	parser notifies it that it has parsed the entire contents of that buffer.
       
    47 
       
    48 	The parser is initially in the Idle state waiting to be notified of available
       
    49 	meesage data. When it is notified, the parser obtains the data and moves
       
    50 	into the ParsingStartLine state.
       
    51 
       
    52 	Whichever state it is in, the parser will notify the observer when it has
       
    53 	parsed the entire contents of the current data buffer. It will then wait to
       
    54 	be told that there is more data available. When it has been given the next 
       
    55 	part of the message data it continues parsing.
       
    56 
       
    57 	In the ParsingStartLine state the parser looks for the first eol marker. 
       
    58 	This delimits the start-line. Once found the observer is notified and the
       
    59 	parser moves into the ParsingHeaders state.
       
    60 
       
    61 	In the ParsingHeaders state the parser searches for header field lines. 
       
    62 	These lines are delimited by eol markers. In HTTP/1.1 it is possible to fold
       
    63 	header field values over multiple lines if the continuation line is followed
       
    64 	by a SP or HT. In this case the eol marker is part of LWS and is ignored. 
       
    65 	Also any eol markers that are part of LWS are omitted from the header field
       
    66 	data given to the observer.
       
    67 
       
    68 	The header field section is completed once an empty line is parsed. The
       
    69 	observer is informed and it should supply the parser with the necessary
       
    70 	info about the entity body. If no entity body is expected then the parser 
       
    71 	moves to the MessageComplete state. If a non-encoded entity body is expected
       
    72 	then the parser moves to the ReadingBodyData state. If a chunk-encoded 
       
    73 	entity body is expected then the parser moves to the ParsingChunkSize state.
       
    74 
       
    75 	In the ReadingBodyData state the parser extracts the specified length of 
       
    76 	entity body data. The observer is notified of each chunk body data parsed. 
       
    77 	It is possible for the entity body data to be received in several parts due
       
    78 	to segmentation at the transport layer, Once all the entity body data has 
       
    79 	been received the parser notifies the observer that the entity body is 
       
    80 	complete. The parser moves to MessageComplete state.
       
    81 
       
    82 	Note that although the parser will have notified the observer that it has 
       
    83 	finished with the current data packet that held some entity body data, the
       
    84 	observer should only release that data packet once it itself has no more use
       
    85 	for the entity body data it has been given. Failure to do this will result
       
    86 	in the buffer containing the received entity body chunks being invalid. The
       
    87 	same is true when receiving a chunked entity body.
       
    88 
       
    89 	In the ParsingChunkSize the parser searches for a chunk-size component as 
       
    90 	defined in RFC2616 section 3.6.1. The chunk-size component is delimited by
       
    91 	an eol marker. An optional chunk-extension component can be present between
       
    92 	the chunk-size and the eol marker - the parser will ignore any 
       
    93 	chunk-extension components. The chunk-size is a hex number specifying the 
       
    94 	size of the subsequent chunk-data component. A chunk-size of value zero 
       
    95 	indicates a last-chunk token - there are no subsequent chunk-data components
       
    96 	and the parser moves to the ParsingTrailerHeaders state. A chunk-size of any
       
    97 	other value indicates a subsequent chunk-data component and the parser moves
       
    98 	to the ParsingChunkData state.
       
    99 
       
   100 	In the ParsingChunkData state the parser extracts the length of entity body
       
   101 	data specified in the preceeding chunk-size component. The observer is 
       
   102 	notified of each chunk of entity body data. Note that a chunk-data component
       
   103 	can be segmented in the transport layer resulting in the observer being 
       
   104 	notified more than once for a given chunk-data component. The observer will
       
   105 	not notice any disparate behaviour between receiving an entity body that has 
       
   106 	not been chunk-encoded and one that has. Once the entire chunk-data has been 
       
   107 	received the parser moves to the ParsingChunkSize state.
       
   108 
       
   109 	The ParsingTrailerHeaders state is very similar to the ParsingHeaders state.
       
   110 	The observer is notified of each header field and the trailer headers 
       
   111 	section is delimited by an empty line. The observer is notified of the end
       
   112 	of the trailer headers section and the parser moves to MessageComplete state.
       
   113 
       
   114 	In the MessageComplete state the parser notifies the observer that the
       
   115 	message is complete. The observer is presented with any remaining data in 
       
   116 	the current data packet. This can be either excess data indicating a mal-
       
   117 	formed message or the start of a pipelined message. In either case if the 
       
   118 	observer wishes to use that data it must not release the current data packet
       
   119 	as that will invalidate the data.
       
   120 	@internalComponent
       
   121 	@see		MHttpMessageParserObserver
       
   122 */
       
   123 	{
       
   124 public:	// methods
       
   125 
       
   126 	static CHttpMessageParser* NewL(MHttpMessageParserObserver& aObserver);	
       
   127 	virtual ~CHttpMessageParser();
       
   128 
       
   129 	void ReceivedMessageData();
       
   130 	void CompletedBodyDataL();
       
   131 	void Reset();
       
   132 	void Flush ();
       
   133 
       
   134 	TBool CompleteMessage ( const TDesC8& aData );
       
   135 
       
   136 private:	// methods from CActive
       
   137 
       
   138 	virtual void RunL();
       
   139 	virtual void DoCancel();
       
   140 	virtual TInt RunError(TInt aError);
       
   141 
       
   142 private:	// methods from MHttpBufferSupplier
       
   143 
       
   144 	virtual void ReAllocBufferL(TInt aRequiredSize, TPtr8& aBuffer);
       
   145 	virtual void DeleteBuffer();
       
   146 
       
   147 private:	// enums
       
   148 
       
   149 	enum TParserState
       
   150 /**
       
   151 	The TParserState enumeration defines the state machine for the http message
       
   152 	parser.
       
   153 */
       
   154 		{
       
   155 		/**	The parser is idle. A new message can be parsed.
       
   156 		*/
       
   157 		EIdle					= 0,
       
   158 
       
   159 		/**	The parser has started parsing and is looking for the start-line of
       
   160 			the http message. This is delimited by the first eol marker.
       
   161 		*/
       
   162 		EParsingStartLine,
       
   163 
       
   164 		/**	The parser is looking for header fields. Each header field is 
       
   165 			delimited by an eol marker. The parser continues to parser for 
       
   166 			header fields until an empty line is found - this marks the end of
       
   167 			the header fields section.
       
   168 		*/
       
   169 		EParsingHeaders,
       
   170 
       
   171 		/** The parser is looking for the chunk-size component. A chunk-size of
       
   172 			value zero indicates that no more chunk-data is expected.			
       
   173 		*/
       
   174 		EParsingChunkSize,
       
   175 
       
   176 		/**	The parser is looking for trailer header fields. Each trailer header
       
   177 			field is delimited by an eol marker. The parser continues to parser 
       
   178 			for trailer header fields until an empty line is found - this marks the 
       
   179 			end of the trailer header fields section.
       
   180 		*/
       
   181 		EParsingTrailerHeaders,
       
   182 
       
   183 		/** The parser is reading body data. The size of the body data was 
       
   184 			supplied to the parser by its observer.
       
   185 		*/
       
   186 		EReadingBodyData,
       
   187 
       
   188 		/**	The parser is reading chunk-data. The size of the this chunk-data 
       
   189 			was specified in the previous chunk-size.
       
   190 		*/
       
   191 		EReadingChunkData,
       
   192 
       
   193 		/** The parser has parsed the http message.
       
   194 		*/
       
   195 		EMessageComplete
       
   196 		};
       
   197 
       
   198 
       
   199 	enum TDataState
       
   200 /**	
       
   201 	The TDataState enumeration defines the state of the message parser as 
       
   202 	regards to the current data packet.
       
   203 */
       
   204 		{
       
   205 		/**	The parser has parsed the previous data packet and is now expecting
       
   206 			to be given more data to continue with its parsing.
       
   207 		*/
       
   208 		EWaitingForData			= 0,
       
   209 
       
   210 		/**	The parser is parsing the current data packet.
       
   211 		*/
       
   212 		EGotData,
       
   213 
       
   214 		/**	The parser has been reset. The current data packet is no longer 
       
   215 			valid.
       
   216 		*/
       
   217 		EReset
       
   218 		};
       
   219 
       
   220 	enum TParsingStatus
       
   221 /**	
       
   222 	The TParsingStatus enumeration defines the status of parsing for the current
       
   223 	state of the parser.
       
   224 */
       
   225 		{
       
   226 		/**	The current state has been completed.
       
   227 		*/
       
   228 		ESectionDone		= 0,
       
   229 
       
   230 		/** The current state has not been completed.
       
   231 		*/
       
   232 		ESectionNotDone,
       
   233 
       
   234 		/** The current data packet has been completely parsed. More data is
       
   235 			required to continue parsing the message.
       
   236 		*/
       
   237 		EBufferEmpty,
       
   238 
       
   239 		/** The parser should not continue parsing - the message has been parsed.
       
   240 		*/
       
   241 		EStop
       
   242 		};
       
   243 
       
   244 private:	// methods
       
   245 
       
   246 	CHttpMessageParser(MHttpMessageParserObserver& aObserver);
       
   247 	
       
   248 	TParsingStatus ParseStartLineL();
       
   249 	TParsingStatus ParseSingleHeaderL();
       
   250 	TParsingStatus ParseHeadersL();
       
   251 	TParsingStatus ReadBodyData(TPtrC8& aData);
       
   252 	TParsingStatus ParseChunkSizeL();
       
   253 	TParsingStatus ParseChunkDataL(TPtrC8& aData);
       
   254 
       
   255 	void CompleteSelf();
       
   256 	void DoReset();
       
   257 
       
   258 private:	// attributes
       
   259 
       
   260 	MHttpMessageParserObserver&	iObserver;
       
   261 	THttpDataParser				iDataParser;
       
   262 	TParserState				iParserState;
       
   263 	TDataState					iDataState;
       
   264 	TPtrC8						iData;		
       
   265 	HBufC8*						iLineBuffer;
       
   266 	TInt						iDataSizeLeft;
       
   267 
       
   268 	};
       
   269 	
       
   270 #endif // __CHTTPMESSAGEPARSER_H__