applayerprotocols/httptransportfw/httpmessage/thttpdataparser.cpp
changeset 0 b16258d2340f
equal deleted inserted replaced
-1:000000000000 0:b16258d2340f
       
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include "thttpdataparser.h"
       
    17 
       
    18 #include "mhttpbuffersupplier.h"
       
    19 #include "thttpmessagepanic.h"
       
    20 
       
    21 const TUint KCarriageReturn	= '\r';
       
    22 const TUint KLineFeed		= '\n';
       
    23 const TUint KSpace			= ' ';
       
    24 const TUint KTab			= '\t';
       
    25 
       
    26 THttpDataParser::THttpDataParser(MHttpBufferSupplier& aBufferSupplier)
       
    27 : iBufferSupplier(aBufferSupplier), iLineBuffer(TPtr8(NULL, 0))
       
    28 /**
       
    29 	Constructor.
       
    30 	@param		aBufferSupplier	The buffer supplier.
       
    31 */
       
    32 	{
       
    33 	}
       
    34 
       
    35 void THttpDataParser::Reset()
       
    36 /**
       
    37 	Resets the data parser. The line buffer and current data packet buffer are
       
    38 	reset to a zero length buffer and the parser moves to the Idle state. The 
       
    39 	observer is notified that the buffer supplying the line buffer should be 
       
    40 	deleted.
       
    41 */
       
    42 	{
       
    43 	// Move to the Idle state and reset the line buffer and data chunk.
       
    44 	iState = EIdle;
       
    45 	iLineBuffer.Set(NULL, 0, 0);
       
    46 	iData.Set(NULL, 0);
       
    47 	iBufferSupplier.DeleteBuffer();
       
    48 	}
       
    49 
       
    50 THttpDataParser::TParseResult THttpDataParser::GetHeaderLineL(TPtrC8& aLine)
       
    51 /**
       
    52 	Extracts a header field line. The data parser attempts to parse a line of
       
    53 	data where the end of line marker is defined by the CRLF sequence. In 
       
    54 	HTTP/1.1 header field values can be folded onto multiple lines if the 
       
    55 	continuation line begins with a SP or HT. The CRLF sequence in this case is
       
    56 	part of LWS and is ignored - the CRLF will not be preset in the returned 
       
    57 	line buffer. The delimiting CRLF is not given in the return data. 
       
    58 	
       
    59 	If a line cannot be extracted (due to lack of data) then the value 
       
    60 	EPartialData is returned. In this case the output argument is not valid. If 
       
    61 	an empty line is found then the value EEmptyLine is returned. If the line is
       
    62 	extracted then the value ELineParsed is returned.
       
    63 	@param		aLine	The output argument set to the buffer containing the 
       
    64 						extracted header field line. This will only be valid if
       
    65 						the return value is either ELineParsed or EEmptyLine.
       
    66 	@return		A value of EPartialData if the requested line type cannot be 
       
    67 				found in the current data packet. If an empty line is found then
       
    68 				EEmptyLine is returned otherwise if the line is found then
       
    69 				ELineParsed is returned.
       
    70 */
       
    71 	{
       
    72 	return GetLineL(aLine, EHeaderLine);
       
    73 	}
       
    74 
       
    75 THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine)
       
    76 /**
       
    77 	Extracts a line. The data parser attempts to parse a line of data where the
       
    78 	end of line marker is defined by the CRLF sequence. The delimiting CRLF is
       
    79 	not given in the return data. 
       
    80 	
       
    81 	If a line cannot be extracted (due to lack of data) then the value 
       
    82 	EPartialData is returned. In this case the output argument is not valid. If 
       
    83 	an empty line is found then the value EEmptyLine is returned. If the line is
       
    84 	extracted then the value ELineParsed is returned.
       
    85 	@param		aLine	The output argument set to the buffer containing the 
       
    86 						extracted header field line. This will only be valid if
       
    87 						the return value is either ELineParsed or EEmptyLine.
       
    88 	@return		A value of EPartialData if the requested line type cannot be 
       
    89 				found in the current data packet. If an empty line is found then
       
    90 				EEmptyLine is returned otherwise if the line is found then
       
    91 				ELineParsed is returned.
       
    92 */
       
    93 	{
       
    94 	return GetLineL(aLine, EStandardLine);
       
    95 	}
       
    96 
       
    97 THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine, TLineType aLineType)
       
    98 /**
       
    99 	Parses data for the specified line type. In the HTTP/1.1 protocol there are
       
   100 	two types of line. The standard line is delimited by the CRLF sequence. The
       
   101 	header field line is delimited by a CRLF sequence that is not followed by a 
       
   102 	SP or HT. When parsing for a header field line any CRLF sequences followed 
       
   103 	by a SP or HT are classed as part of LWS and ignored - header field values 
       
   104 	can be folded onto multiple line using this LWS in this way. The parser is
       
   105 	robust to spurious CRs in the data and eol markers that are missing the
       
   106 	leading CR.
       
   107 
       
   108 	Initially the data parser is in the Idle state when starting to parse for a
       
   109 	line. In the Idle state it clears the line buffer and moves to the 
       
   110 	PendingMoreData state.
       
   111 
       
   112 	In the PendingMoreData state the data parser looks for an end of line marker.
       
   113 	The eol marker should can be either a CRLF sequence or just a LF in cases 
       
   114 	where there has been a deviation from the protocol - this deviation is 
       
   115 	tolerated for robustness.
       
   116 	 
       
   117 	If an eol marker is not found one in the current data packet, the parser 
       
   118 	appends the data packet to the line buffer and returns EPartialData. The 
       
   119 	current data packet is then discarded. 
       
   120 	
       
   121 	If an eol maker is found at the start of the data packet this indicates a
       
   122 	possible empty line. The data parser moves to the PendingEmptyLine state if
       
   123 	the eol marker was a CR. If the eol marker was a LF then an empty line has
       
   124 	been found - the parser moves to the Idle state and returns the EEmptyLine 
       
   125 	value. The current data packet is updated to skip past the eol marker. 
       
   126 	
       
   127 	If an eol marker is found but not at the start of the data packet this 
       
   128 	indicates a possible line. The data parser moves to the PendingLF state if 
       
   129 	the eol marker was a CR. If the eol marker was a LF then the parser moves to
       
   130 	the PendingFoundLine state. The data upto, but not including the eol marker,
       
   131 	is appended to the line buffer and the current data packet is updated to 
       
   132 	skip past the eol marker.
       
   133 	
       
   134 	In the PendingLF state the data parser expects a LF to be present at the 
       
   135 	start of the data packet. If the LF is found then the parser moves to the 
       
   136 	PendingFoundLine state and the data packet is updated to skip past the LF. 
       
   137 	If a CR is found at the start of the data packet then it is ignored and the
       
   138 	parser remains in the PendingLF state. If the any other character is found
       
   139 	then the initial CR is deemed to be spurious and the parser moves back to 
       
   140 	the PendingMoreData state.
       
   141 
       
   142 	In the PendingEmptyLine state the data parser expects a LF to be present at
       
   143 	the start of the data packet. If the LF is found then the parser moves to 
       
   144 	the Idle state and the data packet is updated to skip past the LF. The value
       
   145 	EEmptyLine is returned.	If a CR is found at the start of the data packet 
       
   146 	then it is ignored and the parser remains in the PendingLF state. If the any
       
   147 	other character is found then the initial CR is deemed to be spurious and 
       
   148 	the parser moves back to the PendingMoreData state.
       
   149 
       
   150 	In the PendingFoundLine state the data parser checks the first character in 
       
   151 	the data packet. If it is a SP or HT and the a header field line was 
       
   152 	requested then the parser moves to the PendingMoreData state. Otherwise a 
       
   153 	line has been found. The line buffer will contain the line less any CRLF 
       
   154 	sequences either as the end of line marker or as part of LWS. The output 
       
   155 	argument aLine is set to the line buffer. The value ELineParsed is returned
       
   156 	and the parser moves to the Idle state.
       
   157 
       
   158 	The current data packet is checked before processing each state. If there is
       
   159 	no data then the parser does not continue as it requires more data. The 
       
   160 	value EPartialData is returned.
       
   161 	@param		aLine		The output argument that is set to the parsed line 
       
   162 							when the value ELineParsed is returned.
       
   163 	@param		aLineType	The type of line requested to be parsed.
       
   164 	@return		A value of EPartial data if a the requested line type cannot be 
       
   165 				found in the current data packet. If an empty line is found then
       
   166 				EEmptyLine is returned otherwise if a line is found then
       
   167 				ELineParsed is returned.
       
   168 	@panic		EHttpMessagePanicBadDataParserState	The parser is in an invalid
       
   169 													state.
       
   170 */
       
   171 	{
       
   172 	// Parse the current data for a 'line'
       
   173 	TBool done	= EFalse;
       
   174 	TParseResult result = EPartialData;
       
   175 	while( !done && iData.Length() > 0 )
       
   176 		{
       
   177 		// The data buffer always has data - no need to check...
       
   178 		switch( iState )
       
   179 			{
       
   180 		case EIdle:
       
   181 			{	
       
   182 			// Not waiting for more data - clear the buffer.
       
   183 			iLineBuffer.Zero();
       
   184 
       
   185 			// Drop through to PendingMoreData case...
       
   186 			}
       
   187 		case EPendingMoreData:
       
   188 			{
       
   189 			// Locate end of line marker - need to be tolerant of protocol 
       
   190 			// deviations. Therefore support eol termination by just a LF.
       
   191 			TChar eol = 0;
       
   192 			TInt posEOL = FindEOLMarker(eol);
       
   193 
       
   194 			if( posEOL != KErrNotFound )
       
   195 				{
       
   196 				if( posEOL == 0 && iState == EIdle )
       
   197 					{
       
   198 					// Data was at start of a new line and the eol marker was 
       
   199 					// found at the start - possible empty line. Check what the
       
   200 					// eol marker was
       
   201 					if( eol == KCarriageReturn )
       
   202 						{
       
   203 						// Move to the PendingEmptyLine state.
       
   204 						iState = EPendingEmptyLine;
       
   205 						}
       
   206 					else
       
   207 						{
       
   208 						// An empty line has been found - can stop parsing and
       
   209 						// move to the Idle state.
       
   210 						done = ETrue;
       
   211 						result = EEmptyLine;
       
   212 						iState = EIdle;
       
   213 						}
       
   214 					}
       
   215 				else
       
   216 					{
       
   217 					// Append the data found upto the eol marker.
       
   218 					AppendToBufferL(iData.Left(posEOL));
       
   219 
       
   220 					// Check what the eol marker was.
       
   221 					if( eol == KCarriageReturn )
       
   222 						{
       
   223 						// Expecting a LF - move to the PendingLF state.
       
   224 						iState = EPendingLF;
       
   225 						}
       
   226 					else
       
   227 						{
       
   228 						// The eol marker was a LF - move to the PendingFoundLine state.
       
   229 						iState = EPendingFoundLine;
       
   230 						}
       
   231 					}
       
   232 				// Skip past the eol marker...
       
   233 				iData.Set(iData.Mid(posEOL + 1));
       
   234 				}
       
   235 			else
       
   236 				{
       
   237 				// Not enough data - append the data to the buffer.
       
   238 				AppendToBufferL(iData);
       
   239 
       
   240 				// Move to the PendingMoreData state and wait for next data 
       
   241 				// packet				
       
   242 				done = ETrue;
       
   243 				iState = EPendingMoreData;
       
   244 				}
       
   245 			} break;
       
   246 		case EPendingLF:
       
   247 			{
       
   248 			// Expecting a LF at the start of the data
       
   249 			if( iData[0] == KLineFeed )
       
   250 				{
       
   251 				// Skip past the LF and move to PendingFoundLine
       
   252 				iData.Set(iData.Mid(1));
       
   253 				iState = EPendingFoundLine;
       
   254 				}
       
   255 			else if( iData[0] == KCarriageReturn )
       
   256 				{
       
   257 				// Need to be robust to spurious amounts of CRs - skip past the 
       
   258 				// CR and remain in PendingLF state.
       
   259 				iData.Set(iData.Mid(1));
       
   260 				}
       
   261 			else
       
   262 				{
       
   263 				// Data is wrong - had a spurious CR in the data. Need to be so
       
   264 				// return to the PendingMoreData state.
       
   265 				iState = EPendingMoreData;
       
   266 				}
       
   267 			} break;
       
   268 		case EPendingEmptyLine:
       
   269 			{
       
   270 			// Expecting a LF at the start of the data
       
   271 			if( iData[0] == KLineFeed )
       
   272 				{
       
   273 				// This CRLF delimits an empty line - no need to check that it 
       
   274 				// is part of LWS. 
       
   275 				result = EEmptyLine;
       
   276 				done = ETrue;
       
   277 
       
   278 				// Move to the Idle state and skip past the LF.
       
   279 				iData.Set(iData.Mid(1));
       
   280 				iState = EIdle;
       
   281 				}
       
   282 			else if( iData[0] == KCarriageReturn )
       
   283 				{
       
   284 				// Need to be robust to spurious amounts of CRs - skip past the 
       
   285 				// CR and remain in PendingEmptyLine state.
       
   286 				iData.Set(iData.Mid(1));
       
   287 				}
       
   288 			else
       
   289 				{
       
   290 				// Data is wrong - had a spurious CR in the data. Need to be so
       
   291 				// return to the PendingMoreData state.
       
   292 				iState = EPendingMoreData;
       
   293 				}
       
   294 			} break;
       
   295 		case EPendingFoundLine:
       
   296 			{
       
   297 			// Expecting a leading char tha is not a SP or a HT
       
   298 			TChar ch = iData[0];
       
   299 			if( aLineType == EHeaderLine && (ch == KSpace || ch == KTab) )
       
   300 				{
       
   301 				// This CRLF is part of LWS between tokens in a header field 
       
   302 				// line - move to PendingMoreData state and search for the next CR.
       
   303 				iState = EPendingMoreData;
       
   304 				}
       
   305 			else if( aLineType == EHeaderLine && (ch == KCarriageReturn) )
       
   306 				{
       
   307 				// Need to be robust to spurious amounts of CRs - skip past the 
       
   308 				// CR and remain in EPendingFoundLine state.
       
   309 				iData.Set(iData.Mid(1));
       
   310 				}
       
   311 			else
       
   312 				{
       
   313 				// This CRLF delimits the end of a line - move to Idle state
       
   314 				done = ETrue;
       
   315 				result = ELineParsed;
       
   316 				iState = EIdle;
       
   317 				}
       
   318 			} break;
       
   319 		default:
       
   320 			THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState);
       
   321 			break;
       
   322 			}
       
   323 		}
       
   324 	// Set the output argument aLine
       
   325 	if( result == ELineParsed )
       
   326 		{
       
   327 		// Line was found - set buffer to the line buffer
       
   328 		aLine.Set(iLineBuffer);
       
   329 		}
       
   330 	else
       
   331 		{
       
   332 		// Either an empty line was found or waiting for more data - set buffer
       
   333 		// to empty buffer.
       
   334 		aLine.Set(KNullDesC8());
       
   335 		}
       
   336 	return result;
       
   337 	}
       
   338 
       
   339 THttpDataParser::TParseResult THttpDataParser::GetData(TPtrC8& aData, TInt aMaxSize)
       
   340 /**
       
   341 	Attempts to supply the data requested. The argument aMaxSize specifies the 
       
   342 	amount of data required. If the amount is available the data returns a value
       
   343 	of EGotData, otherwise EPartialData is returned.
       
   344 	@param		aData		The output argument that is set to the buffer with 
       
   345 							the requested data.
       
   346 	@param		aMaxSize	The amount of data required.
       
   347 	@return		A value of EGotData if all the requested data was supplied or 
       
   348 				EPartialData if was not.
       
   349 	@panic		EHttpMessagePanicBadDataParserState The parser was not in the 
       
   350 													Idle state.
       
   351 */
       
   352 	{
       
   353 	__ASSERT_DEBUG( iState == EIdle, THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState) );
       
   354 
       
   355 	// Set the length of the data to copy - intially set to available amount
       
   356 	TInt copyLength = iData.Length();
       
   357 	TParseResult result = EPartialData;
       
   358 
       
   359 	// Make sure that not too much data is given...
       
   360 	if( copyLength >= aMaxSize )
       
   361 		{
       
   362 		// The available data is same or more than the requested amount - adjust
       
   363 		// the copy size to copy just the requested amount.
       
   364 		copyLength = aMaxSize;
       
   365 		result = EGotData;
       
   366 		}
       
   367 	// Copy over the data specified.
       
   368 	aData.Set(iData.Left(copyLength));
       
   369 
       
   370 	// Update the data buffer...
       
   371 	iData.Set(iData.Mid(copyLength));
       
   372 
       
   373 	return result;
       
   374 	}
       
   375 
       
   376 void THttpDataParser::SetData(const TDesC8& aData)
       
   377 /**
       
   378 	Sets the current data packet.
       
   379 	@param		aData	The buffer containing the current data packet.
       
   380 */
       
   381 	{
       
   382 	iData.Set(aData);
       
   383 	}
       
   384 
       
   385 void THttpDataParser::UnparsedData(TPtrC8& aData)
       
   386 /**
       
   387 	Accessor to the unparsed data in the current data packet. The data parser 
       
   388 	can supply the unparsed data that is in the current data packet.
       
   389 	@param		aData	The output argument that is set to the unparsed data.
       
   390 */
       
   391 	{
       
   392 	aData.Set(iData);
       
   393 	}
       
   394 
       
   395 void THttpDataParser::AppendToBufferL(const TDesC8& aData)
       
   396 /**
       
   397 	Appends data to line buffer. If the line buffer is not big enough to hold 
       
   398 	the extra data the parser gets the observer to reallocate a larger buffer.
       
   399 	@param		aData	The data to be appended to the line buffer.
       
   400 */
       
   401 	{
       
   402 	// Check to see if there is enough space
       
   403 	TInt requiredLength = iLineBuffer.Length() + aData.Length();
       
   404 	if( requiredLength > iLineBuffer.MaxLength() )
       
   405 		{
       
   406 		// Need more space...
       
   407 		iBufferSupplier.ReAllocBufferL(requiredLength, iLineBuffer);
       
   408 		}
       
   409 	// Append the data.
       
   410 	iLineBuffer.Append(aData);
       
   411 	}
       
   412 
       
   413 TInt THttpDataParser::FindEOLMarker(TChar& aEOLMarker)
       
   414 /**
       
   415 	Locates end of line marker in current data packet. Need to be tolerant of a 
       
   416 	LF without a leading CR being used as a end of line marker. This function
       
   417 	searches for either a CR of a LF as the eol marker and returns its position
       
   418 	if it is found and the what the marker was. If neither a CR of a LF was 
       
   419 	found then KErrNotFound is returned.
       
   420 	@param		aEOLMarker	An output argument that is set to the eol marker
       
   421 							that was found. Is not valid if the return value is
       
   422 							KErrNotFound.
       
   423 	@return		A value of KErrNotFound is returned if an eol marker was not 
       
   424 				found in the current data packet. If an eol marker was found 
       
   425 				then its position in the data packet is returned.
       
   426 */
       
   427 	{
       
   428 	const TUint len = iData.Length();
       
   429 	TUint pos = 0;
       
   430 	TInt ret = KErrNotFound;
       
   431 	for(;pos < len;++pos)
       
   432 		{
       
   433 		const TUint8& ch = iData[pos];
       
   434 		if(ch == KLineFeed || ch == KCarriageReturn)
       
   435 			{
       
   436 			aEOLMarker = ch;
       
   437 			ret = pos;
       
   438 			break;
       
   439 			}
       
   440 		}
       
   441 	return ret;
       
   442 	}
       
   443 
       
   444