diff -r 000000000000 -r b16258d2340f applayerprotocols/httptransportfw/httpmessage/thttpdataparser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applayerprotocols/httptransportfw/httpmessage/thttpdataparser.cpp Tue Feb 02 01:09:52 2010 +0200 @@ -0,0 +1,444 @@ +// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// + +#include "thttpdataparser.h" + +#include "mhttpbuffersupplier.h" +#include "thttpmessagepanic.h" + +const TUint KCarriageReturn = '\r'; +const TUint KLineFeed = '\n'; +const TUint KSpace = ' '; +const TUint KTab = '\t'; + +THttpDataParser::THttpDataParser(MHttpBufferSupplier& aBufferSupplier) +: iBufferSupplier(aBufferSupplier), iLineBuffer(TPtr8(NULL, 0)) +/** + Constructor. + @param aBufferSupplier The buffer supplier. +*/ + { + } + +void THttpDataParser::Reset() +/** + Resets the data parser. The line buffer and current data packet buffer are + reset to a zero length buffer and the parser moves to the Idle state. The + observer is notified that the buffer supplying the line buffer should be + deleted. +*/ + { + // Move to the Idle state and reset the line buffer and data chunk. + iState = EIdle; + iLineBuffer.Set(NULL, 0, 0); + iData.Set(NULL, 0); + iBufferSupplier.DeleteBuffer(); + } + +THttpDataParser::TParseResult THttpDataParser::GetHeaderLineL(TPtrC8& aLine) +/** + Extracts a header field line. The data parser attempts to parse a line of + data where the end of line marker is defined by the CRLF sequence. In + HTTP/1.1 header field values can be folded onto multiple lines if the + continuation line begins with a SP or HT. The CRLF sequence in this case is + part of LWS and is ignored - the CRLF will not be preset in the returned + line buffer. The delimiting CRLF is not given in the return data. + + If a line cannot be extracted (due to lack of data) then the value + EPartialData is returned. In this case the output argument is not valid. If + an empty line is found then the value EEmptyLine is returned. If the line is + extracted then the value ELineParsed is returned. + @param aLine The output argument set to the buffer containing the + extracted header field line. This will only be valid if + the return value is either ELineParsed or EEmptyLine. + @return A value of EPartialData if the requested line type cannot be + found in the current data packet. If an empty line is found then + EEmptyLine is returned otherwise if the line is found then + ELineParsed is returned. +*/ + { + return GetLineL(aLine, EHeaderLine); + } + +THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine) +/** + Extracts a line. The data parser attempts to parse a line of data where the + end of line marker is defined by the CRLF sequence. The delimiting CRLF is + not given in the return data. + + If a line cannot be extracted (due to lack of data) then the value + EPartialData is returned. In this case the output argument is not valid. If + an empty line is found then the value EEmptyLine is returned. If the line is + extracted then the value ELineParsed is returned. + @param aLine The output argument set to the buffer containing the + extracted header field line. This will only be valid if + the return value is either ELineParsed or EEmptyLine. + @return A value of EPartialData if the requested line type cannot be + found in the current data packet. If an empty line is found then + EEmptyLine is returned otherwise if the line is found then + ELineParsed is returned. +*/ + { + return GetLineL(aLine, EStandardLine); + } + +THttpDataParser::TParseResult THttpDataParser::GetLineL(TPtrC8& aLine, TLineType aLineType) +/** + Parses data for the specified line type. In the HTTP/1.1 protocol there are + two types of line. The standard line is delimited by the CRLF sequence. The + header field line is delimited by a CRLF sequence that is not followed by a + SP or HT. When parsing for a header field line any CRLF sequences followed + by a SP or HT are classed as part of LWS and ignored - header field values + can be folded onto multiple line using this LWS in this way. The parser is + robust to spurious CRs in the data and eol markers that are missing the + leading CR. + + Initially the data parser is in the Idle state when starting to parse for a + line. In the Idle state it clears the line buffer and moves to the + PendingMoreData state. + + In the PendingMoreData state the data parser looks for an end of line marker. + The eol marker should can be either a CRLF sequence or just a LF in cases + where there has been a deviation from the protocol - this deviation is + tolerated for robustness. + + If an eol marker is not found one in the current data packet, the parser + appends the data packet to the line buffer and returns EPartialData. The + current data packet is then discarded. + + If an eol maker is found at the start of the data packet this indicates a + possible empty line. The data parser moves to the PendingEmptyLine state if + the eol marker was a CR. If the eol marker was a LF then an empty line has + been found - the parser moves to the Idle state and returns the EEmptyLine + value. The current data packet is updated to skip past the eol marker. + + If an eol marker is found but not at the start of the data packet this + indicates a possible line. The data parser moves to the PendingLF state if + the eol marker was a CR. If the eol marker was a LF then the parser moves to + the PendingFoundLine state. The data upto, but not including the eol marker, + is appended to the line buffer and the current data packet is updated to + skip past the eol marker. + + In the PendingLF state the data parser expects a LF to be present at the + start of the data packet. If the LF is found then the parser moves to the + PendingFoundLine state and the data packet is updated to skip past the LF. + If a CR is found at the start of the data packet then it is ignored and the + parser remains in the PendingLF state. If the any other character is found + then the initial CR is deemed to be spurious and the parser moves back to + the PendingMoreData state. + + In the PendingEmptyLine state the data parser expects a LF to be present at + the start of the data packet. If the LF is found then the parser moves to + the Idle state and the data packet is updated to skip past the LF. The value + EEmptyLine is returned. If a CR is found at the start of the data packet + then it is ignored and the parser remains in the PendingLF state. If the any + other character is found then the initial CR is deemed to be spurious and + the parser moves back to the PendingMoreData state. + + In the PendingFoundLine state the data parser checks the first character in + the data packet. If it is a SP or HT and the a header field line was + requested then the parser moves to the PendingMoreData state. Otherwise a + line has been found. The line buffer will contain the line less any CRLF + sequences either as the end of line marker or as part of LWS. The output + argument aLine is set to the line buffer. The value ELineParsed is returned + and the parser moves to the Idle state. + + The current data packet is checked before processing each state. If there is + no data then the parser does not continue as it requires more data. The + value EPartialData is returned. + @param aLine The output argument that is set to the parsed line + when the value ELineParsed is returned. + @param aLineType The type of line requested to be parsed. + @return A value of EPartial data if a the requested line type cannot be + found in the current data packet. If an empty line is found then + EEmptyLine is returned otherwise if a line is found then + ELineParsed is returned. + @panic EHttpMessagePanicBadDataParserState The parser is in an invalid + state. +*/ + { + // Parse the current data for a 'line' + TBool done = EFalse; + TParseResult result = EPartialData; + while( !done && iData.Length() > 0 ) + { + // The data buffer always has data - no need to check... + switch( iState ) + { + case EIdle: + { + // Not waiting for more data - clear the buffer. + iLineBuffer.Zero(); + + // Drop through to PendingMoreData case... + } + case EPendingMoreData: + { + // Locate end of line marker - need to be tolerant of protocol + // deviations. Therefore support eol termination by just a LF. + TChar eol = 0; + TInt posEOL = FindEOLMarker(eol); + + if( posEOL != KErrNotFound ) + { + if( posEOL == 0 && iState == EIdle ) + { + // Data was at start of a new line and the eol marker was + // found at the start - possible empty line. Check what the + // eol marker was + if( eol == KCarriageReturn ) + { + // Move to the PendingEmptyLine state. + iState = EPendingEmptyLine; + } + else + { + // An empty line has been found - can stop parsing and + // move to the Idle state. + done = ETrue; + result = EEmptyLine; + iState = EIdle; + } + } + else + { + // Append the data found upto the eol marker. + AppendToBufferL(iData.Left(posEOL)); + + // Check what the eol marker was. + if( eol == KCarriageReturn ) + { + // Expecting a LF - move to the PendingLF state. + iState = EPendingLF; + } + else + { + // The eol marker was a LF - move to the PendingFoundLine state. + iState = EPendingFoundLine; + } + } + // Skip past the eol marker... + iData.Set(iData.Mid(posEOL + 1)); + } + else + { + // Not enough data - append the data to the buffer. + AppendToBufferL(iData); + + // Move to the PendingMoreData state and wait for next data + // packet + done = ETrue; + iState = EPendingMoreData; + } + } break; + case EPendingLF: + { + // Expecting a LF at the start of the data + if( iData[0] == KLineFeed ) + { + // Skip past the LF and move to PendingFoundLine + iData.Set(iData.Mid(1)); + iState = EPendingFoundLine; + } + else if( iData[0] == KCarriageReturn ) + { + // Need to be robust to spurious amounts of CRs - skip past the + // CR and remain in PendingLF state. + iData.Set(iData.Mid(1)); + } + else + { + // Data is wrong - had a spurious CR in the data. Need to be so + // return to the PendingMoreData state. + iState = EPendingMoreData; + } + } break; + case EPendingEmptyLine: + { + // Expecting a LF at the start of the data + if( iData[0] == KLineFeed ) + { + // This CRLF delimits an empty line - no need to check that it + // is part of LWS. + result = EEmptyLine; + done = ETrue; + + // Move to the Idle state and skip past the LF. + iData.Set(iData.Mid(1)); + iState = EIdle; + } + else if( iData[0] == KCarriageReturn ) + { + // Need to be robust to spurious amounts of CRs - skip past the + // CR and remain in PendingEmptyLine state. + iData.Set(iData.Mid(1)); + } + else + { + // Data is wrong - had a spurious CR in the data. Need to be so + // return to the PendingMoreData state. + iState = EPendingMoreData; + } + } break; + case EPendingFoundLine: + { + // Expecting a leading char tha is not a SP or a HT + TChar ch = iData[0]; + if( aLineType == EHeaderLine && (ch == KSpace || ch == KTab) ) + { + // This CRLF is part of LWS between tokens in a header field + // line - move to PendingMoreData state and search for the next CR. + iState = EPendingMoreData; + } + else if( aLineType == EHeaderLine && (ch == KCarriageReturn) ) + { + // Need to be robust to spurious amounts of CRs - skip past the + // CR and remain in EPendingFoundLine state. + iData.Set(iData.Mid(1)); + } + else + { + // This CRLF delimits the end of a line - move to Idle state + done = ETrue; + result = ELineParsed; + iState = EIdle; + } + } break; + default: + THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState); + break; + } + } + // Set the output argument aLine + if( result == ELineParsed ) + { + // Line was found - set buffer to the line buffer + aLine.Set(iLineBuffer); + } + else + { + // Either an empty line was found or waiting for more data - set buffer + // to empty buffer. + aLine.Set(KNullDesC8()); + } + return result; + } + +THttpDataParser::TParseResult THttpDataParser::GetData(TPtrC8& aData, TInt aMaxSize) +/** + Attempts to supply the data requested. The argument aMaxSize specifies the + amount of data required. If the amount is available the data returns a value + of EGotData, otherwise EPartialData is returned. + @param aData The output argument that is set to the buffer with + the requested data. + @param aMaxSize The amount of data required. + @return A value of EGotData if all the requested data was supplied or + EPartialData if was not. + @panic EHttpMessagePanicBadDataParserState The parser was not in the + Idle state. +*/ + { + __ASSERT_DEBUG( iState == EIdle, THttpMessagePanic::Panic(THttpMessagePanic::EHttpMessagePanicBadDataParserState) ); + + // Set the length of the data to copy - intially set to available amount + TInt copyLength = iData.Length(); + TParseResult result = EPartialData; + + // Make sure that not too much data is given... + if( copyLength >= aMaxSize ) + { + // The available data is same or more than the requested amount - adjust + // the copy size to copy just the requested amount. + copyLength = aMaxSize; + result = EGotData; + } + // Copy over the data specified. + aData.Set(iData.Left(copyLength)); + + // Update the data buffer... + iData.Set(iData.Mid(copyLength)); + + return result; + } + +void THttpDataParser::SetData(const TDesC8& aData) +/** + Sets the current data packet. + @param aData The buffer containing the current data packet. +*/ + { + iData.Set(aData); + } + +void THttpDataParser::UnparsedData(TPtrC8& aData) +/** + Accessor to the unparsed data in the current data packet. The data parser + can supply the unparsed data that is in the current data packet. + @param aData The output argument that is set to the unparsed data. +*/ + { + aData.Set(iData); + } + +void THttpDataParser::AppendToBufferL(const TDesC8& aData) +/** + Appends data to line buffer. If the line buffer is not big enough to hold + the extra data the parser gets the observer to reallocate a larger buffer. + @param aData The data to be appended to the line buffer. +*/ + { + // Check to see if there is enough space + TInt requiredLength = iLineBuffer.Length() + aData.Length(); + if( requiredLength > iLineBuffer.MaxLength() ) + { + // Need more space... + iBufferSupplier.ReAllocBufferL(requiredLength, iLineBuffer); + } + // Append the data. + iLineBuffer.Append(aData); + } + +TInt THttpDataParser::FindEOLMarker(TChar& aEOLMarker) +/** + Locates end of line marker in current data packet. Need to be tolerant of a + LF without a leading CR being used as a end of line marker. This function + searches for either a CR of a LF as the eol marker and returns its position + if it is found and the what the marker was. If neither a CR of a LF was + found then KErrNotFound is returned. + @param aEOLMarker An output argument that is set to the eol marker + that was found. Is not valid if the return value is + KErrNotFound. + @return A value of KErrNotFound is returned if an eol marker was not + found in the current data packet. If an eol marker was found + then its position in the data packet is returned. +*/ + { + const TUint len = iData.Length(); + TUint pos = 0; + TInt ret = KErrNotFound; + for(;pos < len;++pos) + { + const TUint8& ch = iData[pos]; + if(ch == KLineFeed || ch == KCarriageReturn) + { + aEOLMarker = ch; + ret = pos; + break; + } + } + return ret; + } + +