filehandling/htmltorichtextconverter/src/CHtmlToCrtConvParser.cpp
changeset 0 2e3d3ce01487
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filehandling/htmltorichtextconverter/src/CHtmlToCrtConvParser.cpp	Tue Feb 02 10:12:00 2010 +0200
@@ -0,0 +1,500 @@
+// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+//========================================================================================
+//the html to be parsed looks like:
+//<tag attribute=attribute_value attribute = "attribute value">text ... etc
+//
+//(1)'tag' is written to iTagBuffer
+//(2)'attribute' is written to iAttributeBuffer and 'attribute value' is written
+//   to iAttributeValueBuffer, and each pair are appended to the array iAttributes
+//(3)iStartOfTextPosition is set to the start of 'text'
+//
+//when < is seen, 'text' is written to the richtext object
+//when > is seen, 'iTagBuffer' is recognised, and any required action is performed; similarly for iAttributes
+//========================================================================================
+
+#include "CHtmlToCrtConvParser.h"
+#include "CHtmlToCrtConvBuffer.h"
+#include "CHtmlToCrtConvHashTable.h"
+#include "CHtmlToCrtConvHash.h"
+#include "CHtmlToCrtConvActionProcessor.h"
+#include "MHtmlToCrtConvResourceFile.h"
+
+const TInt KAttributeValueBufferGranularity = 10;
+const TInt KDefaultTextPosition				= -1;
+
+CHtmlToCrtConvParser* CHtmlToCrtConvParser::NewL(CRichText& aRichText, CHtmlToCrtConvBuffer& aBuffer, MHtmlToCrtConvResourceFile& aResourceFile)
+	{
+	CHtmlToCrtConvParser* self=new(ELeave) CHtmlToCrtConvParser(aBuffer);
+	CleanupStack::PushL(self);
+	self->ConstructL(aRichText, aResourceFile);
+	CleanupStack::Pop(self);
+	return self;
+	}
+
+void CHtmlToCrtConvParser::ConstructL(CRichText& aRichText, MHtmlToCrtConvResourceFile& aResourceFile)
+	{
+	iAttributeValueBuffer=CBufFlat::NewL(KAttributeValueBufferGranularity);
+	iHashTable=CHtmlToCrtConvHashTable::NewL();
+	iActionProcessor=CHtmlToCrtConvActionProcessor::NewL(aRichText, aResourceFile);
+	}
+
+CHtmlToCrtConvParser::CHtmlToCrtConvParser(CHtmlToCrtConvBuffer& aBuffer)
+:iStartOfTextPosition(KDefaultTextPosition)
+,iEndOfTextPosition(KDefaultTextPosition)
+,iBuffer(aBuffer)
+	{
+	}
+
+CHtmlToCrtConvParser::~CHtmlToCrtConvParser()
+	{
+	delete iHashTable;
+	delete iActionProcessor;
+	iAttributes.ResetAndDestroy();
+	iAttributes.Close();
+	delete iAttributeValueBuffer;
+	}
+
+//=============================================================
+//DoOneStepL
+//=============================================================
+TBool CHtmlToCrtConvParser::DoOneStepL()
+	{
+	TChar currentCharacter;
+	TBool moreProcessingReqd=ETrue;
+
+	for (TInt ii=0; ii<KCharsProcessedInOneStep; ii++)
+		{
+		moreProcessingReqd=iBuffer.ReadCharacterL(currentCharacter, iBufferPosition, iEndOfBufferReached);
+
+		if (!moreProcessingReqd)
+			{
+			if (iStartOfTextPosition!=KDefaultTextPosition)
+				{
+				iBuffer.GetToEndOfBufferL(iTextBuffer, iStartOfTextPosition);
+				WriteToRichTextL();
+				}
+			break;
+			}
+
+		if (InspectCurrentCharacter(currentCharacter))
+			{
+			DoActionL(currentCharacter);
+			}
+
+ 		if (iEndOfBufferReached && (iStartOfTextPosition != KDefaultTextPosition) && moreProcessingReqd)
+			{
+			iEndOfTextPosition=iBufferPosition;
+			iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition);
+			WriteToRichTextL();
+			iParserState=EInitialState;
+			iStartOfTextPosition=KDefaultTextPosition;
+			iEndOfTextPosition=KDefaultTextPosition;
+			}
+		}
+	return moreProcessingReqd;
+	}
+//======================================================================
+//InspectCurrentCharacterL - updates iParserState, iTagState and iInTag
+//======================================================================
+TBool CHtmlToCrtConvParser::InspectCurrentCharacter(TChar aChar)
+	{
+	TBool processCharacter=ETrue;
+
+	if (iParserState==EReadingJavascript)
+		{
+		if (aChar=='>')
+			iParserState=ESeeEndOfTagWhileReadingJavascript;
+		}
+
+	else if (aChar=='<')
+		{
+		iTagState	=EOpeningTag;
+	 	iParserState=ESeeStartOfTag;
+		iInTag		=ETrue;
+		}
+	else if (iInTag)
+		{
+		if (aChar=='/' || aChar=='=' || aChar=='"' || aChar=='>' || aChar=='!')
+			{
+			SeeSpecialCharactersInTag(aChar, processCharacter);
+			}
+		else if (aChar.IsSpace())
+			{
+			SeeWhiteSpaceCharacterInTag(processCharacter);
+			}
+		else if (iParserState==ESeeExclamationMark && aChar=='-')
+			{
+			iParserState=EReadingJavascript;
+			processCharacter=EFalse;
+			}
+		else
+			{
+			SeeOtherCharactersInTag();
+			}
+		}
+	else if (aChar == '&')
+		{
+		iParserState = EStartOfCharacterEntity;
+		iInCharacterEntity = ETrue;
+		processCharacter = EFalse;
+		iStartOfTextPosition = KDefaultTextPosition;
+		}
+	else if (iInCharacterEntity)
+		{
+		if (aChar == ';')
+			{
+			iParserState = EEndOfCharacterEntity;
+			iInCharacterEntity = EFalse;
+			}
+ 		else
+ 			{
+			iParserState = EReadingCharacterEntity;
+			}
+		}
+	else
+		{
+		SeeOtherCharactersNotInTag(processCharacter);
+		}
+
+	return processCharacter;
+	}
+//=============================================================
+//functions used by InspectCurrentCharacterL
+//=============================================================
+void CHtmlToCrtConvParser::SeeWhiteSpaceCharacterInTag(TBool& aBool)
+	{		
+	switch (iParserState)
+		{
+		case EReadingOpeningTag:
+			iParserState=EFinishedReadingTag;
+			aBool=EFalse;
+			break;
+		case EReadingClosingTag:
+			iParserState=EFinishedReadingTag;
+			aBool=EFalse;
+			break;
+		case EReadingAttribute:
+			iParserState=EFinishedReadingAttribute;
+			aBool=EFalse;
+			break;
+		case EReadingAttributeValue:
+			iParserState=EFinishedReadingAttributeValue;
+			break;
+		case EReadingAttributeValueWithinInvCommas:
+			break;
+		default:
+			break;
+		}
+	};
+
+void CHtmlToCrtConvParser::SeeSpecialCharactersInTag(TChar aChar, TBool& aBool)
+	{
+	switch (aChar)
+		{
+		case '/':
+			if (iParserState==EReadingAttributeValue || iParserState==EReadingAttributeValueWithinInvCommas)
+				aBool=ETrue;
+			else if (iParserState==ESeeStartOfTag)
+				{
+				iTagState=EClosingTag;
+	 			iParserState=ESeeClosingTagIndicator;
+				aBool=EFalse;
+				}
+			break;
+		case '=':
+			if(iParserState!=EReadingAttributeValue && iParserState!=EReadingAttributeValueWithinInvCommas)
+				{
+				iParserState=ESeeEquals;
+				aBool=EFalse;
+				}
+			else
+				{
+				aBool=ETrue;
+				}
+			break;
+		case '"':
+			if (iParserState==ESeeEquals)
+				{
+				iParserState=EReadingAttributeValueWithinInvCommas;
+				aBool=EFalse;
+				}
+			else
+				{
+				iParserState=EFinishedReadingAttributeValue;
+				}
+			break;
+		case '>':
+			iParserState=ESeeEndOfTag;
+			iInTag=EFalse;
+			break;
+		case '!':
+			if(iParserState==ESeeStartOfTag)
+				{
+				iParserState=ESeeExclamationMark;
+				aBool=EFalse;
+				}
+			break;
+		default:
+			break;
+		}	
+	};
+
+void CHtmlToCrtConvParser::SeeOtherCharactersInTag()
+	{
+	switch (iParserState)
+		{
+		case ESeeStartOfTag:
+			iParserState=EReadingOpeningTag;
+			break;
+		case ESeeClosingTagIndicator:
+			iParserState=EReadingClosingTag;
+			break;
+		case ESeeEquals:
+			iParserState=EReadingAttributeValue;
+			break;
+		case EFinishedReadingTag:
+			iParserState=EReadingAttribute;
+			break;
+		case EFinishedReadingAttribute:
+			iAttributeBuffer.Zero();
+			iParserState=EReadingAttribute;
+			break;
+		case EFinishedReadingAttributeValue:
+			iParserState=EReadingAttribute;
+			break;
+		default:
+			break;
+		}
+	};
+
+void CHtmlToCrtConvParser::SeeOtherCharactersNotInTag(TBool& aBool)
+	{
+	switch (iParserState)
+		{
+		case EReadingText:
+			aBool=EFalse;
+			break;
+		case ESeeEndOfTag:
+		case EEndOfCharacterEntity:
+			iParserState=EReadingText;
+			break;
+		case EInitialState:
+			iParserState=EReadingText;
+			break;
+		default:
+			break;
+		}
+	};
+//=============================================================
+//DoActionL
+//=============================================================
+void CHtmlToCrtConvParser::DoActionL(TChar aChar)
+	{
+	switch(iParserState)
+		{
+		case EReadingText:
+			iStartOfTextPosition=iBufferPosition;
+			break;
+		case EReadingOpeningTag:
+			WriteToTagBufferL(aChar);
+			break;
+		case EReadingClosingTag:
+			WriteToTagBufferL(aChar);
+			break;
+		case EEndOfCharacterEntity:
+			DoEntityOperationL();
+			break;
+		case EReadingCharacterEntity:
+			WriteToEntityBufferL(aChar);
+			break;
+		case EReadingAttribute:
+			WriteToAttributeBufferL(aChar);
+			break;
+		case EReadingAttributeValue:
+			WriteToAttributeValueBufferL(aChar);
+			break;
+		case EReadingAttributeValueWithinInvCommas:
+			WriteToAttributeValueBufferL(aChar);
+			break;
+		case EFinishedReadingAttributeValue:
+			WriteToAttributeArrayL();
+			iAttributeBuffer.Zero();
+			iAttributeValueBuffer->Reset();
+			break;
+		case ESeeStartOfTag:
+			if (iStartOfTextPosition!=KDefaultTextPosition)
+				{
+				iEndOfTextPosition=iBufferPosition - 1;
+				iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition);
+				WriteToRichTextL();
+				iStartOfTextPosition=KDefaultTextPosition;
+				iEndOfTextPosition=KDefaultTextPosition;
+				}
+			break;				
+		case ESeeEndOfTag:
+			if(iAttributeValueBuffer->Size())
+				{
+				WriteToAttributeArrayL();
+				iAttributeBuffer.Zero();
+				iAttributeValueBuffer->Reset();
+				}
+			DoTagOperationL();
+			break;
+		case ESeeEndOfTagWhileReadingJavascript:
+			{
+			_LIT(KJavascriptEnd, "--");
+			if(!(iLastTwoCharacters.Compare(KJavascriptEnd)))
+				{
+				iParserState=EInitialState;//finished reading javascript
+				iInTag=EFalse;
+				}
+			else
+				iParserState=EReadingJavascript;
+			break;
+			}
+		case EReadingJavascript:
+			//keep record of last two characters encountered, for comparing with -- when > seen
+			if(iLastTwoCharacters.Length()==2)
+				iLastTwoCharacters.Copy(iLastTwoCharacters.Right(1));
+
+			iLastTwoCharacters.Append(aChar);
+			break;
+		default:
+			break;
+		};
+	}
+//=============================================================
+//DoTagOperationL
+//=============================================================
+void  CHtmlToCrtConvParser::DoTagOperationL()
+	{
+	const TInt count=iAttributes.Count();
+	THtmlToCrtConvTagType tagType=iHashTable->LookupTag(iTagBuffer);
+
+	if (iTagBuffer.Length())
+		{
+		iActionProcessor->DoTagActionL(tagType, iTagState);
+		iTagBuffer.Zero();
+
+		TBool imgTagResourceReqd=EFalse;
+		for(TInt ii=0; ii<count; ii++)
+			{
+			THtmlToCrtConvAttributeType attributeType=(iAttributes)[ii]->Type();
+			const TDesC8& attributeValue=(iAttributes)[ii]->Value();
+			iActionProcessor->DoAttributeActionL(tagType, iTagState, attributeType, attributeValue, imgTagResourceReqd);
+			}
+		if(imgTagResourceReqd)
+			{
+			iActionProcessor->InsertImgTagResourceL();
+			}
+		iAttributes.ResetAndDestroy();
+		}
+	}
+
+void CHtmlToCrtConvParser::DoEntityOperationL()
+	{
+	const TDesC16& entity = iHashTable->LookupEntity(iEntityBuffer);
+	if(entity != KHtmlEntityUnknown)
+		{
+		iTextBuffer.Set(entity);
+		WriteToRichTextL();
+		}
+	iEntityBuffer.Zero();
+	}
+
+void  CHtmlToCrtConvParser::WriteToRichTextL()
+	{
+	ASSERT(iTextBuffer.Length() > 0);
+	if (iTextBuffer.Length())
+		{
+		iActionProcessor->DoWriteTextL(iTextBuffer);
+		}
+	}
+
+void  CHtmlToCrtConvParser::WriteToTagBufferL(TChar aChar)
+	{
+	if (iTagBuffer.Length()==KTagBufferLength)
+		{
+		iTagBuffer.Zero();
+		}
+	iTagBuffer.Append(aChar);
+	}
+
+void  CHtmlToCrtConvParser::WriteToEntityBufferL(TChar aChar)
+	{
+	if (iEntityBuffer.Length()==KEntityBufferLength)
+		iEntityBuffer.Zero();
+
+	iEntityBuffer.Append(aChar);
+	}
+
+void  CHtmlToCrtConvParser::WriteToAttributeBufferL(TChar aChar)
+	{
+	if (iAttributeBuffer.Length()==KAttributeBufferLength)
+		{
+		iAttributeBuffer.Zero();
+		}
+	iAttributeBuffer.Append(aChar);
+	}
+
+void  CHtmlToCrtConvParser::WriteToAttributeValueBufferL(TChar aChar)
+	{
+	TInt size=iAttributeValueBuffer->Size();
+	TBuf8<1> temp;
+	temp.Append(aChar);
+	iAttributeValueBuffer->InsertL(size, temp);
+	}
+
+void  CHtmlToCrtConvParser::WriteToAttributeArrayL()
+	{
+	iAttributeValueBuffer->Compress();
+	if (iAttributeBuffer.Length() && iAttributeValueBuffer->Size())
+		{
+		THtmlToCrtConvAttributeType attType=iHashTable->LookupAttribute(iAttributeBuffer);
+		TPtr8 pAttributeTag(iAttributeValueBuffer->Ptr(0));
+		CHtmlToCrtConvAttribute* attribute=CHtmlToCrtConvAttribute::NewLC(attType, pAttributeTag);
+
+		User::LeaveIfError(iAttributes.Append(attribute));		
+		CleanupStack::Pop(attribute);
+		}
+	}
+
+//=============================================================
+//CHtmlToCrtConvAttribute class
+//=============================================================
+CHtmlToCrtConvAttribute* CHtmlToCrtConvAttribute::NewLC(THtmlToCrtConvAttributeType aType, TDesC8& aValue)
+	{
+	CHtmlToCrtConvAttribute* self=new(ELeave) CHtmlToCrtConvAttribute(aType);
+	CleanupStack::PushL(self);
+	self -> ConstructL(aValue);
+	return self;
+	}
+
+CHtmlToCrtConvAttribute::CHtmlToCrtConvAttribute(THtmlToCrtConvAttributeType aType)
+:iType(aType)
+	{
+	}
+
+void CHtmlToCrtConvAttribute::ConstructL(TDesC8& aValue)
+	{
+	iValue=aValue.AllocL();
+	}
+
+CHtmlToCrtConvAttribute::~CHtmlToCrtConvAttribute()
+	{
+	delete iValue;
+	}