applayerprotocols/httptransportfw/httpmessage/thttpdataparser.cpp
changeset 0 b16258d2340f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/applayerprotocols/httptransportfw/httpmessage/thttpdataparser.cpp	Tue Feb 02 01:09:52 2010 +0200
@@ -0,0 +1,444 @@
+// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+#include "thttpdataparser.h"
+
+#include "mhttpbuffersupplier.h"
+#include "thttpmessagepanic.h"
+
+const TUint KCarriageReturn	= '\r';
+const TUint KLineFeed		= '\n';
+const TUint KSpace			= ' ';
+const TUint KTab			= '\t';
+
+THttpDataParser::THttpDataParser(MHttpBufferSupplier& aBufferSupplier)
+: iBufferSupplier(aBufferSupplier), iLineBuffer(TPtr8(NULL, 0))
+/**
+	Constructor.
+	@param		aBufferSupplier	The buffer supplier.
+*/
+	{
+	}
+
+void THttpDataParser::Reset()
+/**
+	Resets the data parser. The line buffer and current data packet buffer are
+	reset to a zero length buffer and the parser moves to the Idle state. The 
+	observer is notified that the buffer supplying the line buffer should be 
+	deleted.
+*/
+	{
+	// Move to the Idle state and reset the line buffer and data chunk.
+	iState = EIdle;
+	iLineBuffer.Set(NULL, 0, 0);
+	iData.Set(NULL, 0);
+	iBufferSupplier.DeleteBuffer();
+	}
+
+THttpDataParser::TParseResult THttpDataParser::GetHeaderLineL(TPtrC8& aLine)
+/**
+	Extracts a header field line. The data parser attempts to parse a line of
+	data where the end of line marker is defined by the CRLF sequence. In 
+	HTTP/1.1 header field values can be folded onto multiple lines if the 
+	continuation line begins with a SP or HT. The CRLF sequence in this case is
+	part of LWS and is ignored - the CRLF will not be preset in the returned 
+	line buffer. The delimiting CRLF is not given in the return data. 
+	
+	If a line cannot be extracted (due to lack of data) then the value 
+	EPartialData is returned. In this case the output argument is not valid. If 
+	an empty line is found then the value EEmptyLine is returned. If the line is
+	extracted then the value ELineParsed is returned.
+	@param		aLine	The output argument set to the buffer containing the 
+						extracted header field line. This will only be valid if
+						the return value is either ELineParsed or EEmptyLine.
+	@return		A value of EPartialData if the requested line type cannot be 
+				found in the current data packet. If an empty line is found then
+				EEmptyLine is returned otherwise if the line is found then
+				ELineParsed is returned.
+*/
+	{
+	return GetLineL(aLine, EHeaderLine);
+	}
+
+THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine)
+/**
+	Extracts a line. The data parser attempts to parse a line of data where the
+	end of line marker is defined by the CRLF sequence. The delimiting CRLF is
+	not given in the return data. 
+	
+	If a line cannot be extracted (due to lack of data) then the value 
+	EPartialData is returned. In this case the output argument is not valid. If 
+	an empty line is found then the value EEmptyLine is returned. If the line is
+	extracted then the value ELineParsed is returned.
+	@param		aLine	The output argument set to the buffer containing the 
+						extracted header field line. This will only be valid if
+						the return value is either ELineParsed or EEmptyLine.
+	@return		A value of EPartialData if the requested line type cannot be 
+				found in the current data packet. If an empty line is found then
+				EEmptyLine is returned otherwise if the line is found then
+				ELineParsed is returned.
+*/
+	{
+	return GetLineL(aLine, EStandardLine);
+	}
+
+THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine, TLineType aLineType)
+/**
+	Parses data for the specified line type. In the HTTP/1.1 protocol there are
+	two types of line. The standard line is delimited by the CRLF sequence. The
+	header field line is delimited by a CRLF sequence that is not followed by a 
+	SP or HT. When parsing for a header field line any CRLF sequences followed 
+	by a SP or HT are classed as part of LWS and ignored - header field values 
+	can be folded onto multiple line using this LWS in this way. The parser is
+	robust to spurious CRs in the data and eol markers that are missing the
+	leading CR.
+
+	Initially the data parser is in the Idle state when starting to parse for a
+	line. In the Idle state it clears the line buffer and moves to the 
+	PendingMoreData state.
+
+	In the PendingMoreData state the data parser looks for an end of line marker.
+	The eol marker should can be either a CRLF sequence or just a LF in cases 
+	where there has been a deviation from the protocol - this deviation is 
+	tolerated for robustness.
+	 
+	If an eol marker is not found one in the current data packet, the parser 
+	appends the data packet to the line buffer and returns EPartialData. The 
+	current data packet is then discarded. 
+	
+	If an eol maker is found at the start of the data packet this indicates a
+	possible empty line. The data parser moves to the PendingEmptyLine state if
+	the eol marker was a CR. If the eol marker was a LF then an empty line has
+	been found - the parser moves to the Idle state and returns the EEmptyLine 
+	value. The current data packet is updated to skip past the eol marker. 
+	
+	If an eol marker is found but not at the start of the data packet this 
+	indicates a possible line. The data parser moves to the PendingLF state if 
+	the eol marker was a CR. If the eol marker was a LF then the parser moves to
+	the PendingFoundLine state. The data upto, but not including the eol marker,
+	is appended to the line buffer and the current data packet is updated to 
+	skip past the eol marker.
+	
+	In the PendingLF state the data parser expects a LF to be present at the 
+	start of the data packet. If the LF is found then the parser moves to the 
+	PendingFoundLine state and the data packet is updated to skip past the LF. 
+	If a CR is found at the start of the data packet then it is ignored and the
+	parser remains in the PendingLF state. If the any other character is found
+	then the initial CR is deemed to be spurious and the parser moves back to 
+	the PendingMoreData state.
+
+	In the PendingEmptyLine state the data parser expects a LF to be present at
+	the start of the data packet. If the LF is found then the parser moves to 
+	the Idle state and the data packet is updated to skip past the LF. The value
+	EEmptyLine is returned.	If a CR is found at the start of the data packet 
+	then it is ignored and the parser remains in the PendingLF state. If the any
+	other character is found then the initial CR is deemed to be spurious and 
+	the parser moves back to the PendingMoreData state.
+
+	In the PendingFoundLine state the data parser checks the first character in 
+	the data packet. If it is a SP or HT and the a header field line was 
+	requested then the parser moves to the PendingMoreData state. Otherwise a 
+	line has been found. The line buffer will contain the line less any CRLF 
+	sequences either as the end of line marker or as part of LWS. The output 
+	argument aLine is set to the line buffer. The value ELineParsed is returned
+	and the parser moves to the Idle state.
+
+	The current data packet is checked before processing each state. If there is
+	no data then the parser does not continue as it requires more data. The 
+	value EPartialData is returned.
+	@param		aLine		The output argument that is set to the parsed line 
+							when the value ELineParsed is returned.
+	@param		aLineType	The type of line requested to be parsed.
+	@return		A value of EPartial data if a the requested line type cannot be 
+				found in the current data packet. If an empty line is found then
+				EEmptyLine is returned otherwise if a line is found then
+				ELineParsed is returned.
+	@panic		EHttpMessagePanicBadDataParserState	The parser is in an invalid
+													state.
+*/
+	{
+	// Parse the current data for a 'line'
+	TBool done	= EFalse;
+	TParseResult result = EPartialData;
+	while( !done && iData.Length() > 0 )
+		{
+		// The data buffer always has data - no need to check...
+		switch( iState )
+			{
+		case EIdle:
+			{	
+			// Not waiting for more data - clear the buffer.
+			iLineBuffer.Zero();
+
+			// Drop through to PendingMoreData case...
+			}
+		case EPendingMoreData:
+			{
+			// Locate end of line marker - need to be tolerant of protocol 
+			// deviations. Therefore support eol termination by just a LF.
+			TChar eol = 0;
+			TInt posEOL = FindEOLMarker(eol);
+
+			if( posEOL != KErrNotFound )
+				{
+				if( posEOL == 0 && iState == EIdle )
+					{
+					// Data was at start of a new line and the eol marker was 
+					// found at the start - possible empty line. Check what the
+					// eol marker was
+					if( eol == KCarriageReturn )
+						{
+						// Move to the PendingEmptyLine state.
+						iState = EPendingEmptyLine;
+						}
+					else
+						{
+						// An empty line has been found - can stop parsing and
+						// move to the Idle state.
+						done = ETrue;
+						result = EEmptyLine;
+						iState = EIdle;
+						}
+					}
+				else
+					{
+					// Append the data found upto the eol marker.
+					AppendToBufferL(iData.Left(posEOL));
+
+					// Check what the eol marker was.
+					if( eol == KCarriageReturn )
+						{
+						// Expecting a LF - move to the PendingLF state.
+						iState = EPendingLF;
+						}
+					else
+						{
+						// The eol marker was a LF - move to the PendingFoundLine state.
+						iState = EPendingFoundLine;
+						}
+					}
+				// Skip past the eol marker...
+				iData.Set(iData.Mid(posEOL + 1));
+				}
+			else
+				{
+				// Not enough data - append the data to the buffer.
+				AppendToBufferL(iData);
+
+				// Move to the PendingMoreData state and wait for next data 
+				// packet				
+				done = ETrue;
+				iState = EPendingMoreData;
+				}
+			} break;
+		case EPendingLF:
+			{
+			// Expecting a LF at the start of the data
+			if( iData[0] == KLineFeed )
+				{
+				// Skip past the LF and move to PendingFoundLine
+				iData.Set(iData.Mid(1));
+				iState = EPendingFoundLine;
+				}
+			else if( iData[0] == KCarriageReturn )
+				{
+				// Need to be robust to spurious amounts of CRs - skip past the 
+				// CR and remain in PendingLF state.
+				iData.Set(iData.Mid(1));
+				}
+			else
+				{
+				// Data is wrong - had a spurious CR in the data. Need to be so
+				// return to the PendingMoreData state.
+				iState = EPendingMoreData;
+				}
+			} break;
+		case EPendingEmptyLine:
+			{
+			// Expecting a LF at the start of the data
+			if( iData[0] == KLineFeed )
+				{
+				// This CRLF delimits an empty line - no need to check that it 
+				// is part of LWS. 
+				result = EEmptyLine;
+				done = ETrue;
+
+				// Move to the Idle state and skip past the LF.
+				iData.Set(iData.Mid(1));
+				iState = EIdle;
+				}
+			else if( iData[0] == KCarriageReturn )
+				{
+				// Need to be robust to spurious amounts of CRs - skip past the 
+				// CR and remain in PendingEmptyLine state.
+				iData.Set(iData.Mid(1));
+				}
+			else
+				{
+				// Data is wrong - had a spurious CR in the data. Need to be so
+				// return to the PendingMoreData state.
+				iState = EPendingMoreData;
+				}
+			} break;
+		case EPendingFoundLine:
+			{
+			// Expecting a leading char tha is not a SP or a HT
+			TChar ch = iData[0];
+			if( aLineType == EHeaderLine && (ch == KSpace || ch == KTab) )
+				{
+				// This CRLF is part of LWS between tokens in a header field 
+				// line - move to PendingMoreData state and search for the next CR.
+				iState = EPendingMoreData;
+				}
+			else if( aLineType == EHeaderLine && (ch == KCarriageReturn) )
+				{
+				// Need to be robust to spurious amounts of CRs - skip past the 
+				// CR and remain in EPendingFoundLine state.
+				iData.Set(iData.Mid(1));
+				}
+			else
+				{
+				// This CRLF delimits the end of a line - move to Idle state
+				done = ETrue;
+				result = ELineParsed;
+				iState = EIdle;
+				}
+			} break;
+		default:
+			THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState);
+			break;
+			}
+		}
+	// Set the output argument aLine
+	if( result == ELineParsed )
+		{
+		// Line was found - set buffer to the line buffer
+		aLine.Set(iLineBuffer);
+		}
+	else
+		{
+		// Either an empty line was found or waiting for more data - set buffer
+		// to empty buffer.
+		aLine.Set(KNullDesC8());
+		}
+	return result;
+	}
+
+THttpDataParser::TParseResult THttpDataParser::GetData(TPtrC8& aData, TInt aMaxSize)
+/**
+	Attempts to supply the data requested. The argument aMaxSize specifies the 
+	amount of data required. If the amount is available the data returns a value
+	of EGotData, otherwise EPartialData is returned.
+	@param		aData		The output argument that is set to the buffer with 
+							the requested data.
+	@param		aMaxSize	The amount of data required.
+	@return		A value of EGotData if all the requested data was supplied or 
+				EPartialData if was not.
+	@panic		EHttpMessagePanicBadDataParserState The parser was not in the 
+													Idle state.
+*/
+	{
+	__ASSERT_DEBUG( iState == EIdle, THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState) );
+
+	// Set the length of the data to copy - intially set to available amount
+	TInt copyLength = iData.Length();
+	TParseResult result = EPartialData;
+
+	// Make sure that not too much data is given...
+	if( copyLength >= aMaxSize )
+		{
+		// The available data is same or more than the requested amount - adjust
+		// the copy size to copy just the requested amount.
+		copyLength = aMaxSize;
+		result = EGotData;
+		}
+	// Copy over the data specified.
+	aData.Set(iData.Left(copyLength));
+
+	// Update the data buffer...
+	iData.Set(iData.Mid(copyLength));
+
+	return result;
+	}
+
+void THttpDataParser::SetData(const TDesC8& aData)
+/**
+	Sets the current data packet.
+	@param		aData	The buffer containing the current data packet.
+*/
+	{
+	iData.Set(aData);
+	}
+
+void THttpDataParser::UnparsedData(TPtrC8& aData)
+/**
+	Accessor to the unparsed data in the current data packet. The data parser 
+	can supply the unparsed data that is in the current data packet.
+	@param		aData	The output argument that is set to the unparsed data.
+*/
+	{
+	aData.Set(iData);
+	}
+
+void THttpDataParser::AppendToBufferL(const TDesC8& aData)
+/**
+	Appends data to line buffer. If the line buffer is not big enough to hold 
+	the extra data the parser gets the observer to reallocate a larger buffer.
+	@param		aData	The data to be appended to the line buffer.
+*/
+	{
+	// Check to see if there is enough space
+	TInt requiredLength = iLineBuffer.Length() + aData.Length();
+	if( requiredLength > iLineBuffer.MaxLength() )
+		{
+		// Need more space...
+		iBufferSupplier.ReAllocBufferL(requiredLength, iLineBuffer);
+		}
+	// Append the data.
+	iLineBuffer.Append(aData);
+	}
+
+TInt THttpDataParser::FindEOLMarker(TChar& aEOLMarker)
+/**
+	Locates end of line marker in current data packet. Need to be tolerant of a 
+	LF without a leading CR being used as a end of line marker. This function
+	searches for either a CR of a LF as the eol marker and returns its position
+	if it is found and the what the marker was. If neither a CR of a LF was 
+	found then KErrNotFound is returned.
+	@param		aEOLMarker	An output argument that is set to the eol marker
+							that was found. Is not valid if the return value is
+							KErrNotFound.
+	@return		A value of KErrNotFound is returned if an eol marker was not 
+				found in the current data packet. If an eol marker was found 
+				then its position in the data packet is returned.
+*/
+	{
+	const TUint len = iData.Length();
+	TUint pos = 0;
+	TInt ret = KErrNotFound;
+	for(;pos < len;++pos)
+		{
+		const TUint8& ch = iData[pos];
+		if(ch == KLineFeed || ch == KCarriageReturn)
+			{
+			aEOLMarker = ch;
+			ret = pos;
+			break;
+			}
+		}
+	return ret;
+	}
+
+