diff -r 000000000000 -r 2e3d3ce01487 filehandling/htmltorichtextconverter/src/CHtmlToCrtConvParser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filehandling/htmltorichtextconverter/src/CHtmlToCrtConvParser.cpp Tue Feb 02 10:12:00 2010 +0200 @@ -0,0 +1,500 @@ +// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// + +//======================================================================================== +//the html to be parsed looks like: +//text ... etc +// +//(1)'tag' is written to iTagBuffer +//(2)'attribute' is written to iAttributeBuffer and 'attribute value' is written +// to iAttributeValueBuffer, and each pair are appended to the array iAttributes +//(3)iStartOfTextPosition is set to the start of 'text' +// +//when < is seen, 'text' is written to the richtext object +//when > is seen, 'iTagBuffer' is recognised, and any required action is performed; similarly for iAttributes +//======================================================================================== + +#include "CHtmlToCrtConvParser.h" +#include "CHtmlToCrtConvBuffer.h" +#include "CHtmlToCrtConvHashTable.h" +#include "CHtmlToCrtConvHash.h" +#include "CHtmlToCrtConvActionProcessor.h" +#include "MHtmlToCrtConvResourceFile.h" + +const TInt KAttributeValueBufferGranularity = 10; +const TInt KDefaultTextPosition = -1; + +CHtmlToCrtConvParser* CHtmlToCrtConvParser::NewL(CRichText& aRichText, CHtmlToCrtConvBuffer& aBuffer, MHtmlToCrtConvResourceFile& aResourceFile) + { + CHtmlToCrtConvParser* self=new(ELeave) CHtmlToCrtConvParser(aBuffer); + CleanupStack::PushL(self); + self->ConstructL(aRichText, aResourceFile); + CleanupStack::Pop(self); + return self; + } + +void CHtmlToCrtConvParser::ConstructL(CRichText& aRichText, MHtmlToCrtConvResourceFile& aResourceFile) + { + iAttributeValueBuffer=CBufFlat::NewL(KAttributeValueBufferGranularity); + iHashTable=CHtmlToCrtConvHashTable::NewL(); + iActionProcessor=CHtmlToCrtConvActionProcessor::NewL(aRichText, aResourceFile); + } + +CHtmlToCrtConvParser::CHtmlToCrtConvParser(CHtmlToCrtConvBuffer& aBuffer) +:iStartOfTextPosition(KDefaultTextPosition) +,iEndOfTextPosition(KDefaultTextPosition) +,iBuffer(aBuffer) + { + } + +CHtmlToCrtConvParser::~CHtmlToCrtConvParser() + { + delete iHashTable; + delete iActionProcessor; + iAttributes.ResetAndDestroy(); + iAttributes.Close(); + delete iAttributeValueBuffer; + } + +//============================================================= +//DoOneStepL +//============================================================= +TBool CHtmlToCrtConvParser::DoOneStepL() + { + TChar currentCharacter; + TBool moreProcessingReqd=ETrue; + + for (TInt ii=0; ii') + iParserState=ESeeEndOfTagWhileReadingJavascript; + } + + else if (aChar=='<') + { + iTagState =EOpeningTag; + iParserState=ESeeStartOfTag; + iInTag =ETrue; + } + else if (iInTag) + { + if (aChar=='/' || aChar=='=' || aChar=='"' || aChar=='>' || aChar=='!') + { + SeeSpecialCharactersInTag(aChar, processCharacter); + } + else if (aChar.IsSpace()) + { + SeeWhiteSpaceCharacterInTag(processCharacter); + } + else if (iParserState==ESeeExclamationMark && aChar=='-') + { + iParserState=EReadingJavascript; + processCharacter=EFalse; + } + else + { + SeeOtherCharactersInTag(); + } + } + else if (aChar == '&') + { + iParserState = EStartOfCharacterEntity; + iInCharacterEntity = ETrue; + processCharacter = EFalse; + iStartOfTextPosition = KDefaultTextPosition; + } + else if (iInCharacterEntity) + { + if (aChar == ';') + { + iParserState = EEndOfCharacterEntity; + iInCharacterEntity = EFalse; + } + else + { + iParserState = EReadingCharacterEntity; + } + } + else + { + SeeOtherCharactersNotInTag(processCharacter); + } + + return processCharacter; + } +//============================================================= +//functions used by InspectCurrentCharacterL +//============================================================= +void CHtmlToCrtConvParser::SeeWhiteSpaceCharacterInTag(TBool& aBool) + { + switch (iParserState) + { + case EReadingOpeningTag: + iParserState=EFinishedReadingTag; + aBool=EFalse; + break; + case EReadingClosingTag: + iParserState=EFinishedReadingTag; + aBool=EFalse; + break; + case EReadingAttribute: + iParserState=EFinishedReadingAttribute; + aBool=EFalse; + break; + case EReadingAttributeValue: + iParserState=EFinishedReadingAttributeValue; + break; + case EReadingAttributeValueWithinInvCommas: + break; + default: + break; + } + }; + +void CHtmlToCrtConvParser::SeeSpecialCharactersInTag(TChar aChar, TBool& aBool) + { + switch (aChar) + { + case '/': + if (iParserState==EReadingAttributeValue || iParserState==EReadingAttributeValueWithinInvCommas) + aBool=ETrue; + else if (iParserState==ESeeStartOfTag) + { + iTagState=EClosingTag; + iParserState=ESeeClosingTagIndicator; + aBool=EFalse; + } + break; + case '=': + if(iParserState!=EReadingAttributeValue && iParserState!=EReadingAttributeValueWithinInvCommas) + { + iParserState=ESeeEquals; + aBool=EFalse; + } + else + { + aBool=ETrue; + } + break; + case '"': + if (iParserState==ESeeEquals) + { + iParserState=EReadingAttributeValueWithinInvCommas; + aBool=EFalse; + } + else + { + iParserState=EFinishedReadingAttributeValue; + } + break; + case '>': + iParserState=ESeeEndOfTag; + iInTag=EFalse; + break; + case '!': + if(iParserState==ESeeStartOfTag) + { + iParserState=ESeeExclamationMark; + aBool=EFalse; + } + break; + default: + break; + } + }; + +void CHtmlToCrtConvParser::SeeOtherCharactersInTag() + { + switch (iParserState) + { + case ESeeStartOfTag: + iParserState=EReadingOpeningTag; + break; + case ESeeClosingTagIndicator: + iParserState=EReadingClosingTag; + break; + case ESeeEquals: + iParserState=EReadingAttributeValue; + break; + case EFinishedReadingTag: + iParserState=EReadingAttribute; + break; + case EFinishedReadingAttribute: + iAttributeBuffer.Zero(); + iParserState=EReadingAttribute; + break; + case EFinishedReadingAttributeValue: + iParserState=EReadingAttribute; + break; + default: + break; + } + }; + +void CHtmlToCrtConvParser::SeeOtherCharactersNotInTag(TBool& aBool) + { + switch (iParserState) + { + case EReadingText: + aBool=EFalse; + break; + case ESeeEndOfTag: + case EEndOfCharacterEntity: + iParserState=EReadingText; + break; + case EInitialState: + iParserState=EReadingText; + break; + default: + break; + } + }; +//============================================================= +//DoActionL +//============================================================= +void CHtmlToCrtConvParser::DoActionL(TChar aChar) + { + switch(iParserState) + { + case EReadingText: + iStartOfTextPosition=iBufferPosition; + break; + case EReadingOpeningTag: + WriteToTagBufferL(aChar); + break; + case EReadingClosingTag: + WriteToTagBufferL(aChar); + break; + case EEndOfCharacterEntity: + DoEntityOperationL(); + break; + case EReadingCharacterEntity: + WriteToEntityBufferL(aChar); + break; + case EReadingAttribute: + WriteToAttributeBufferL(aChar); + break; + case EReadingAttributeValue: + WriteToAttributeValueBufferL(aChar); + break; + case EReadingAttributeValueWithinInvCommas: + WriteToAttributeValueBufferL(aChar); + break; + case EFinishedReadingAttributeValue: + WriteToAttributeArrayL(); + iAttributeBuffer.Zero(); + iAttributeValueBuffer->Reset(); + break; + case ESeeStartOfTag: + if (iStartOfTextPosition!=KDefaultTextPosition) + { + iEndOfTextPosition=iBufferPosition - 1; + iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition); + WriteToRichTextL(); + iStartOfTextPosition=KDefaultTextPosition; + iEndOfTextPosition=KDefaultTextPosition; + } + break; + case ESeeEndOfTag: + if(iAttributeValueBuffer->Size()) + { + WriteToAttributeArrayL(); + iAttributeBuffer.Zero(); + iAttributeValueBuffer->Reset(); + } + DoTagOperationL(); + break; + case ESeeEndOfTagWhileReadingJavascript: + { + _LIT(KJavascriptEnd, "--"); + if(!(iLastTwoCharacters.Compare(KJavascriptEnd))) + { + iParserState=EInitialState;//finished reading javascript + iInTag=EFalse; + } + else + iParserState=EReadingJavascript; + break; + } + case EReadingJavascript: + //keep record of last two characters encountered, for comparing with -- when > seen + if(iLastTwoCharacters.Length()==2) + iLastTwoCharacters.Copy(iLastTwoCharacters.Right(1)); + + iLastTwoCharacters.Append(aChar); + break; + default: + break; + }; + } +//============================================================= +//DoTagOperationL +//============================================================= +void CHtmlToCrtConvParser::DoTagOperationL() + { + const TInt count=iAttributes.Count(); + THtmlToCrtConvTagType tagType=iHashTable->LookupTag(iTagBuffer); + + if (iTagBuffer.Length()) + { + iActionProcessor->DoTagActionL(tagType, iTagState); + iTagBuffer.Zero(); + + TBool imgTagResourceReqd=EFalse; + for(TInt ii=0; iiType(); + const TDesC8& attributeValue=(iAttributes)[ii]->Value(); + iActionProcessor->DoAttributeActionL(tagType, iTagState, attributeType, attributeValue, imgTagResourceReqd); + } + if(imgTagResourceReqd) + { + iActionProcessor->InsertImgTagResourceL(); + } + iAttributes.ResetAndDestroy(); + } + } + +void CHtmlToCrtConvParser::DoEntityOperationL() + { + const TDesC16& entity = iHashTable->LookupEntity(iEntityBuffer); + if(entity != KHtmlEntityUnknown) + { + iTextBuffer.Set(entity); + WriteToRichTextL(); + } + iEntityBuffer.Zero(); + } + +void CHtmlToCrtConvParser::WriteToRichTextL() + { + ASSERT(iTextBuffer.Length() > 0); + if (iTextBuffer.Length()) + { + iActionProcessor->DoWriteTextL(iTextBuffer); + } + } + +void CHtmlToCrtConvParser::WriteToTagBufferL(TChar aChar) + { + if (iTagBuffer.Length()==KTagBufferLength) + { + iTagBuffer.Zero(); + } + iTagBuffer.Append(aChar); + } + +void CHtmlToCrtConvParser::WriteToEntityBufferL(TChar aChar) + { + if (iEntityBuffer.Length()==KEntityBufferLength) + iEntityBuffer.Zero(); + + iEntityBuffer.Append(aChar); + } + +void CHtmlToCrtConvParser::WriteToAttributeBufferL(TChar aChar) + { + if (iAttributeBuffer.Length()==KAttributeBufferLength) + { + iAttributeBuffer.Zero(); + } + iAttributeBuffer.Append(aChar); + } + +void CHtmlToCrtConvParser::WriteToAttributeValueBufferL(TChar aChar) + { + TInt size=iAttributeValueBuffer->Size(); + TBuf8<1> temp; + temp.Append(aChar); + iAttributeValueBuffer->InsertL(size, temp); + } + +void CHtmlToCrtConvParser::WriteToAttributeArrayL() + { + iAttributeValueBuffer->Compress(); + if (iAttributeBuffer.Length() && iAttributeValueBuffer->Size()) + { + THtmlToCrtConvAttributeType attType=iHashTable->LookupAttribute(iAttributeBuffer); + TPtr8 pAttributeTag(iAttributeValueBuffer->Ptr(0)); + CHtmlToCrtConvAttribute* attribute=CHtmlToCrtConvAttribute::NewLC(attType, pAttributeTag); + + User::LeaveIfError(iAttributes.Append(attribute)); + CleanupStack::Pop(attribute); + } + } + +//============================================================= +//CHtmlToCrtConvAttribute class +//============================================================= +CHtmlToCrtConvAttribute* CHtmlToCrtConvAttribute::NewLC(THtmlToCrtConvAttributeType aType, TDesC8& aValue) + { + CHtmlToCrtConvAttribute* self=new(ELeave) CHtmlToCrtConvAttribute(aType); + CleanupStack::PushL(self); + self -> ConstructL(aValue); + return self; + } + +CHtmlToCrtConvAttribute::CHtmlToCrtConvAttribute(THtmlToCrtConvAttributeType aType) +:iType(aType) + { + } + +void CHtmlToCrtConvAttribute::ConstructL(TDesC8& aValue) + { + iValue=aValue.AllocL(); + } + +CHtmlToCrtConvAttribute::~CHtmlToCrtConvAttribute() + { + delete iValue; + }