filehandling/htmltorichtextconverter/src/CHtmlToCrtConvParser.cpp
changeset 0 2e3d3ce01487
equal deleted inserted replaced
-1:000000000000 0:2e3d3ce01487
       
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 //========================================================================================
       
    17 //the html to be parsed looks like:
       
    18 //<tag attribute=attribute_value attribute = "attribute value">text ... etc
       
    19 //
       
    20 //(1)'tag' is written to iTagBuffer
       
    21 //(2)'attribute' is written to iAttributeBuffer and 'attribute value' is written
       
    22 //   to iAttributeValueBuffer, and each pair are appended to the array iAttributes
       
    23 //(3)iStartOfTextPosition is set to the start of 'text'
       
    24 //
       
    25 //when < is seen, 'text' is written to the richtext object
       
    26 //when > is seen, 'iTagBuffer' is recognised, and any required action is performed; similarly for iAttributes
       
    27 //========================================================================================
       
    28 
       
    29 #include "CHtmlToCrtConvParser.h"
       
    30 #include "CHtmlToCrtConvBuffer.h"
       
    31 #include "CHtmlToCrtConvHashTable.h"
       
    32 #include "CHtmlToCrtConvHash.h"
       
    33 #include "CHtmlToCrtConvActionProcessor.h"
       
    34 #include "MHtmlToCrtConvResourceFile.h"
       
    35 
       
    36 const TInt KAttributeValueBufferGranularity = 10;
       
    37 const TInt KDefaultTextPosition				= -1;
       
    38 
       
    39 CHtmlToCrtConvParser* CHtmlToCrtConvParser::NewL(CRichText& aRichText, CHtmlToCrtConvBuffer& aBuffer, MHtmlToCrtConvResourceFile& aResourceFile)
       
    40 	{
       
    41 	CHtmlToCrtConvParser* self=new(ELeave) CHtmlToCrtConvParser(aBuffer);
       
    42 	CleanupStack::PushL(self);
       
    43 	self->ConstructL(aRichText, aResourceFile);
       
    44 	CleanupStack::Pop(self);
       
    45 	return self;
       
    46 	}
       
    47 
       
    48 void CHtmlToCrtConvParser::ConstructL(CRichText& aRichText, MHtmlToCrtConvResourceFile& aResourceFile)
       
    49 	{
       
    50 	iAttributeValueBuffer=CBufFlat::NewL(KAttributeValueBufferGranularity);
       
    51 	iHashTable=CHtmlToCrtConvHashTable::NewL();
       
    52 	iActionProcessor=CHtmlToCrtConvActionProcessor::NewL(aRichText, aResourceFile);
       
    53 	}
       
    54 
       
    55 CHtmlToCrtConvParser::CHtmlToCrtConvParser(CHtmlToCrtConvBuffer& aBuffer)
       
    56 :iStartOfTextPosition(KDefaultTextPosition)
       
    57 ,iEndOfTextPosition(KDefaultTextPosition)
       
    58 ,iBuffer(aBuffer)
       
    59 	{
       
    60 	}
       
    61 
       
    62 CHtmlToCrtConvParser::~CHtmlToCrtConvParser()
       
    63 	{
       
    64 	delete iHashTable;
       
    65 	delete iActionProcessor;
       
    66 	iAttributes.ResetAndDestroy();
       
    67 	iAttributes.Close();
       
    68 	delete iAttributeValueBuffer;
       
    69 	}
       
    70 
       
    71 //=============================================================
       
    72 //DoOneStepL
       
    73 //=============================================================
       
    74 TBool CHtmlToCrtConvParser::DoOneStepL()
       
    75 	{
       
    76 	TChar currentCharacter;
       
    77 	TBool moreProcessingReqd=ETrue;
       
    78 
       
    79 	for (TInt ii=0; ii<KCharsProcessedInOneStep; ii++)
       
    80 		{
       
    81 		moreProcessingReqd=iBuffer.ReadCharacterL(currentCharacter, iBufferPosition, iEndOfBufferReached);
       
    82 
       
    83 		if (!moreProcessingReqd)
       
    84 			{
       
    85 			if (iStartOfTextPosition!=KDefaultTextPosition)
       
    86 				{
       
    87 				iBuffer.GetToEndOfBufferL(iTextBuffer, iStartOfTextPosition);
       
    88 				WriteToRichTextL();
       
    89 				}
       
    90 			break;
       
    91 			}
       
    92 
       
    93 		if (InspectCurrentCharacter(currentCharacter))
       
    94 			{
       
    95 			DoActionL(currentCharacter);
       
    96 			}
       
    97 
       
    98  		if (iEndOfBufferReached && (iStartOfTextPosition != KDefaultTextPosition) && moreProcessingReqd)
       
    99 			{
       
   100 			iEndOfTextPosition=iBufferPosition;
       
   101 			iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition);
       
   102 			WriteToRichTextL();
       
   103 			iParserState=EInitialState;
       
   104 			iStartOfTextPosition=KDefaultTextPosition;
       
   105 			iEndOfTextPosition=KDefaultTextPosition;
       
   106 			}
       
   107 		}
       
   108 	return moreProcessingReqd;
       
   109 	}
       
   110 //======================================================================
       
   111 //InspectCurrentCharacterL - updates iParserState, iTagState and iInTag
       
   112 //======================================================================
       
   113 TBool CHtmlToCrtConvParser::InspectCurrentCharacter(TChar aChar)
       
   114 	{
       
   115 	TBool processCharacter=ETrue;
       
   116 
       
   117 	if (iParserState==EReadingJavascript)
       
   118 		{
       
   119 		if (aChar=='>')
       
   120 			iParserState=ESeeEndOfTagWhileReadingJavascript;
       
   121 		}
       
   122 
       
   123 	else if (aChar=='<')
       
   124 		{
       
   125 		iTagState	=EOpeningTag;
       
   126 	 	iParserState=ESeeStartOfTag;
       
   127 		iInTag		=ETrue;
       
   128 		}
       
   129 	else if (iInTag)
       
   130 		{
       
   131 		if (aChar=='/' || aChar=='=' || aChar=='"' || aChar=='>' || aChar=='!')
       
   132 			{
       
   133 			SeeSpecialCharactersInTag(aChar, processCharacter);
       
   134 			}
       
   135 		else if (aChar.IsSpace())
       
   136 			{
       
   137 			SeeWhiteSpaceCharacterInTag(processCharacter);
       
   138 			}
       
   139 		else if (iParserState==ESeeExclamationMark && aChar=='-')
       
   140 			{
       
   141 			iParserState=EReadingJavascript;
       
   142 			processCharacter=EFalse;
       
   143 			}
       
   144 		else
       
   145 			{
       
   146 			SeeOtherCharactersInTag();
       
   147 			}
       
   148 		}
       
   149 	else if (aChar == '&')
       
   150 		{
       
   151 		iParserState = EStartOfCharacterEntity;
       
   152 		iInCharacterEntity = ETrue;
       
   153 		processCharacter = EFalse;
       
   154 		iStartOfTextPosition = KDefaultTextPosition;
       
   155 		}
       
   156 	else if (iInCharacterEntity)
       
   157 		{
       
   158 		if (aChar == ';')
       
   159 			{
       
   160 			iParserState = EEndOfCharacterEntity;
       
   161 			iInCharacterEntity = EFalse;
       
   162 			}
       
   163  		else
       
   164  			{
       
   165 			iParserState = EReadingCharacterEntity;
       
   166 			}
       
   167 		}
       
   168 	else
       
   169 		{
       
   170 		SeeOtherCharactersNotInTag(processCharacter);
       
   171 		}
       
   172 
       
   173 	return processCharacter;
       
   174 	}
       
   175 //=============================================================
       
   176 //functions used by InspectCurrentCharacterL
       
   177 //=============================================================
       
   178 void CHtmlToCrtConvParser::SeeWhiteSpaceCharacterInTag(TBool& aBool)
       
   179 	{		
       
   180 	switch (iParserState)
       
   181 		{
       
   182 		case EReadingOpeningTag:
       
   183 			iParserState=EFinishedReadingTag;
       
   184 			aBool=EFalse;
       
   185 			break;
       
   186 		case EReadingClosingTag:
       
   187 			iParserState=EFinishedReadingTag;
       
   188 			aBool=EFalse;
       
   189 			break;
       
   190 		case EReadingAttribute:
       
   191 			iParserState=EFinishedReadingAttribute;
       
   192 			aBool=EFalse;
       
   193 			break;
       
   194 		case EReadingAttributeValue:
       
   195 			iParserState=EFinishedReadingAttributeValue;
       
   196 			break;
       
   197 		case EReadingAttributeValueWithinInvCommas:
       
   198 			break;
       
   199 		default:
       
   200 			break;
       
   201 		}
       
   202 	};
       
   203 
       
   204 void CHtmlToCrtConvParser::SeeSpecialCharactersInTag(TChar aChar, TBool& aBool)
       
   205 	{
       
   206 	switch (aChar)
       
   207 		{
       
   208 		case '/':
       
   209 			if (iParserState==EReadingAttributeValue || iParserState==EReadingAttributeValueWithinInvCommas)
       
   210 				aBool=ETrue;
       
   211 			else if (iParserState==ESeeStartOfTag)
       
   212 				{
       
   213 				iTagState=EClosingTag;
       
   214 	 			iParserState=ESeeClosingTagIndicator;
       
   215 				aBool=EFalse;
       
   216 				}
       
   217 			break;
       
   218 		case '=':
       
   219 			if(iParserState!=EReadingAttributeValue && iParserState!=EReadingAttributeValueWithinInvCommas)
       
   220 				{
       
   221 				iParserState=ESeeEquals;
       
   222 				aBool=EFalse;
       
   223 				}
       
   224 			else
       
   225 				{
       
   226 				aBool=ETrue;
       
   227 				}
       
   228 			break;
       
   229 		case '"':
       
   230 			if (iParserState==ESeeEquals)
       
   231 				{
       
   232 				iParserState=EReadingAttributeValueWithinInvCommas;
       
   233 				aBool=EFalse;
       
   234 				}
       
   235 			else
       
   236 				{
       
   237 				iParserState=EFinishedReadingAttributeValue;
       
   238 				}
       
   239 			break;
       
   240 		case '>':
       
   241 			iParserState=ESeeEndOfTag;
       
   242 			iInTag=EFalse;
       
   243 			break;
       
   244 		case '!':
       
   245 			if(iParserState==ESeeStartOfTag)
       
   246 				{
       
   247 				iParserState=ESeeExclamationMark;
       
   248 				aBool=EFalse;
       
   249 				}
       
   250 			break;
       
   251 		default:
       
   252 			break;
       
   253 		}	
       
   254 	};
       
   255 
       
   256 void CHtmlToCrtConvParser::SeeOtherCharactersInTag()
       
   257 	{
       
   258 	switch (iParserState)
       
   259 		{
       
   260 		case ESeeStartOfTag:
       
   261 			iParserState=EReadingOpeningTag;
       
   262 			break;
       
   263 		case ESeeClosingTagIndicator:
       
   264 			iParserState=EReadingClosingTag;
       
   265 			break;
       
   266 		case ESeeEquals:
       
   267 			iParserState=EReadingAttributeValue;
       
   268 			break;
       
   269 		case EFinishedReadingTag:
       
   270 			iParserState=EReadingAttribute;
       
   271 			break;
       
   272 		case EFinishedReadingAttribute:
       
   273 			iAttributeBuffer.Zero();
       
   274 			iParserState=EReadingAttribute;
       
   275 			break;
       
   276 		case EFinishedReadingAttributeValue:
       
   277 			iParserState=EReadingAttribute;
       
   278 			break;
       
   279 		default:
       
   280 			break;
       
   281 		}
       
   282 	};
       
   283 
       
   284 void CHtmlToCrtConvParser::SeeOtherCharactersNotInTag(TBool& aBool)
       
   285 	{
       
   286 	switch (iParserState)
       
   287 		{
       
   288 		case EReadingText:
       
   289 			aBool=EFalse;
       
   290 			break;
       
   291 		case ESeeEndOfTag:
       
   292 		case EEndOfCharacterEntity:
       
   293 			iParserState=EReadingText;
       
   294 			break;
       
   295 		case EInitialState:
       
   296 			iParserState=EReadingText;
       
   297 			break;
       
   298 		default:
       
   299 			break;
       
   300 		}
       
   301 	};
       
   302 //=============================================================
       
   303 //DoActionL
       
   304 //=============================================================
       
   305 void CHtmlToCrtConvParser::DoActionL(TChar aChar)
       
   306 	{
       
   307 	switch(iParserState)
       
   308 		{
       
   309 		case EReadingText:
       
   310 			iStartOfTextPosition=iBufferPosition;
       
   311 			break;
       
   312 		case EReadingOpeningTag:
       
   313 			WriteToTagBufferL(aChar);
       
   314 			break;
       
   315 		case EReadingClosingTag:
       
   316 			WriteToTagBufferL(aChar);
       
   317 			break;
       
   318 		case EEndOfCharacterEntity:
       
   319 			DoEntityOperationL();
       
   320 			break;
       
   321 		case EReadingCharacterEntity:
       
   322 			WriteToEntityBufferL(aChar);
       
   323 			break;
       
   324 		case EReadingAttribute:
       
   325 			WriteToAttributeBufferL(aChar);
       
   326 			break;
       
   327 		case EReadingAttributeValue:
       
   328 			WriteToAttributeValueBufferL(aChar);
       
   329 			break;
       
   330 		case EReadingAttributeValueWithinInvCommas:
       
   331 			WriteToAttributeValueBufferL(aChar);
       
   332 			break;
       
   333 		case EFinishedReadingAttributeValue:
       
   334 			WriteToAttributeArrayL();
       
   335 			iAttributeBuffer.Zero();
       
   336 			iAttributeValueBuffer->Reset();
       
   337 			break;
       
   338 		case ESeeStartOfTag:
       
   339 			if (iStartOfTextPosition!=KDefaultTextPosition)
       
   340 				{
       
   341 				iEndOfTextPosition=iBufferPosition - 1;
       
   342 				iBuffer.GetPartOfBufferL(iTextBuffer, iStartOfTextPosition, iEndOfTextPosition);
       
   343 				WriteToRichTextL();
       
   344 				iStartOfTextPosition=KDefaultTextPosition;
       
   345 				iEndOfTextPosition=KDefaultTextPosition;
       
   346 				}
       
   347 			break;				
       
   348 		case ESeeEndOfTag:
       
   349 			if(iAttributeValueBuffer->Size())
       
   350 				{
       
   351 				WriteToAttributeArrayL();
       
   352 				iAttributeBuffer.Zero();
       
   353 				iAttributeValueBuffer->Reset();
       
   354 				}
       
   355 			DoTagOperationL();
       
   356 			break;
       
   357 		case ESeeEndOfTagWhileReadingJavascript:
       
   358 			{
       
   359 			_LIT(KJavascriptEnd, "--");
       
   360 			if(!(iLastTwoCharacters.Compare(KJavascriptEnd)))
       
   361 				{
       
   362 				iParserState=EInitialState;//finished reading javascript
       
   363 				iInTag=EFalse;
       
   364 				}
       
   365 			else
       
   366 				iParserState=EReadingJavascript;
       
   367 			break;
       
   368 			}
       
   369 		case EReadingJavascript:
       
   370 			//keep record of last two characters encountered, for comparing with -- when > seen
       
   371 			if(iLastTwoCharacters.Length()==2)
       
   372 				iLastTwoCharacters.Copy(iLastTwoCharacters.Right(1));
       
   373 
       
   374 			iLastTwoCharacters.Append(aChar);
       
   375 			break;
       
   376 		default:
       
   377 			break;
       
   378 		};
       
   379 	}
       
   380 //=============================================================
       
   381 //DoTagOperationL
       
   382 //=============================================================
       
   383 void  CHtmlToCrtConvParser::DoTagOperationL()
       
   384 	{
       
   385 	const TInt count=iAttributes.Count();
       
   386 	THtmlToCrtConvTagType tagType=iHashTable->LookupTag(iTagBuffer);
       
   387 
       
   388 	if (iTagBuffer.Length())
       
   389 		{
       
   390 		iActionProcessor->DoTagActionL(tagType, iTagState);
       
   391 		iTagBuffer.Zero();
       
   392 
       
   393 		TBool imgTagResourceReqd=EFalse;
       
   394 		for(TInt ii=0; ii<count; ii++)
       
   395 			{
       
   396 			THtmlToCrtConvAttributeType attributeType=(iAttributes)[ii]->Type();
       
   397 			const TDesC8& attributeValue=(iAttributes)[ii]->Value();
       
   398 			iActionProcessor->DoAttributeActionL(tagType, iTagState, attributeType, attributeValue, imgTagResourceReqd);
       
   399 			}
       
   400 		if(imgTagResourceReqd)
       
   401 			{
       
   402 			iActionProcessor->InsertImgTagResourceL();
       
   403 			}
       
   404 		iAttributes.ResetAndDestroy();
       
   405 		}
       
   406 	}
       
   407 
       
   408 void CHtmlToCrtConvParser::DoEntityOperationL()
       
   409 	{
       
   410 	const TDesC16& entity = iHashTable->LookupEntity(iEntityBuffer);
       
   411 	if(entity != KHtmlEntityUnknown)
       
   412 		{
       
   413 		iTextBuffer.Set(entity);
       
   414 		WriteToRichTextL();
       
   415 		}
       
   416 	iEntityBuffer.Zero();
       
   417 	}
       
   418 
       
   419 void  CHtmlToCrtConvParser::WriteToRichTextL()
       
   420 	{
       
   421 	ASSERT(iTextBuffer.Length() > 0);
       
   422 	if (iTextBuffer.Length())
       
   423 		{
       
   424 		iActionProcessor->DoWriteTextL(iTextBuffer);
       
   425 		}
       
   426 	}
       
   427 
       
   428 void  CHtmlToCrtConvParser::WriteToTagBufferL(TChar aChar)
       
   429 	{
       
   430 	if (iTagBuffer.Length()==KTagBufferLength)
       
   431 		{
       
   432 		iTagBuffer.Zero();
       
   433 		}
       
   434 	iTagBuffer.Append(aChar);
       
   435 	}
       
   436 
       
   437 void  CHtmlToCrtConvParser::WriteToEntityBufferL(TChar aChar)
       
   438 	{
       
   439 	if (iEntityBuffer.Length()==KEntityBufferLength)
       
   440 		iEntityBuffer.Zero();
       
   441 
       
   442 	iEntityBuffer.Append(aChar);
       
   443 	}
       
   444 
       
   445 void  CHtmlToCrtConvParser::WriteToAttributeBufferL(TChar aChar)
       
   446 	{
       
   447 	if (iAttributeBuffer.Length()==KAttributeBufferLength)
       
   448 		{
       
   449 		iAttributeBuffer.Zero();
       
   450 		}
       
   451 	iAttributeBuffer.Append(aChar);
       
   452 	}
       
   453 
       
   454 void  CHtmlToCrtConvParser::WriteToAttributeValueBufferL(TChar aChar)
       
   455 	{
       
   456 	TInt size=iAttributeValueBuffer->Size();
       
   457 	TBuf8<1> temp;
       
   458 	temp.Append(aChar);
       
   459 	iAttributeValueBuffer->InsertL(size, temp);
       
   460 	}
       
   461 
       
   462 void  CHtmlToCrtConvParser::WriteToAttributeArrayL()
       
   463 	{
       
   464 	iAttributeValueBuffer->Compress();
       
   465 	if (iAttributeBuffer.Length() && iAttributeValueBuffer->Size())
       
   466 		{
       
   467 		THtmlToCrtConvAttributeType attType=iHashTable->LookupAttribute(iAttributeBuffer);
       
   468 		TPtr8 pAttributeTag(iAttributeValueBuffer->Ptr(0));
       
   469 		CHtmlToCrtConvAttribute* attribute=CHtmlToCrtConvAttribute::NewLC(attType, pAttributeTag);
       
   470 
       
   471 		User::LeaveIfError(iAttributes.Append(attribute));		
       
   472 		CleanupStack::Pop(attribute);
       
   473 		}
       
   474 	}
       
   475 
       
   476 //=============================================================
       
   477 //CHtmlToCrtConvAttribute class
       
   478 //=============================================================
       
   479 CHtmlToCrtConvAttribute* CHtmlToCrtConvAttribute::NewLC(THtmlToCrtConvAttributeType aType, TDesC8& aValue)
       
   480 	{
       
   481 	CHtmlToCrtConvAttribute* self=new(ELeave) CHtmlToCrtConvAttribute(aType);
       
   482 	CleanupStack::PushL(self);
       
   483 	self -> ConstructL(aValue);
       
   484 	return self;
       
   485 	}
       
   486 
       
   487 CHtmlToCrtConvAttribute::CHtmlToCrtConvAttribute(THtmlToCrtConvAttributeType aType)
       
   488 :iType(aType)
       
   489 	{
       
   490 	}
       
   491 
       
   492 void CHtmlToCrtConvAttribute::ConstructL(TDesC8& aValue)
       
   493 	{
       
   494 	iValue=aValue.AllocL();
       
   495 	}
       
   496 
       
   497 CHtmlToCrtConvAttribute::~CHtmlToCrtConvAttribute()
       
   498 	{
       
   499 	delete iValue;
       
   500 	}