applayerprotocols/httpexamples/TestWebBrowser/htmlparserplugin/src/HtmlParser.cpp
changeset 0 b16258d2340f
equal deleted inserted replaced
-1:000000000000 0:b16258d2340f
       
     1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include <xml/xmlparsererrors.h>
       
    17 #include <xml/taginfo.h>
       
    18 #include <xml/documentparameters.h>
       
    19 #include <xml/contenthandler.h>
       
    20 #include <e32test.h>
       
    21 
       
    22 #include "HtmlParser.h"
       
    23 #include "StringParser.h"
       
    24 #include "badesca.h"
       
    25 #include "HtmlTagsTable.h"
       
    26 
       
    27 using namespace Xml;
       
    28 
       
    29 const TInt KPartialContenttSize = 256;
       
    30 const TInt KDefaultGranularity = 4;
       
    31 const TInt KStartTag = '<' ;
       
    32 const TInt KEndTag = '>';
       
    33 
       
    34 _LIT8 ( KMetaName, "meta" );
       
    35 _LIT8 ( KDefaultCharset, "UTF-8" );
       
    36 _LIT8 ( KXMLName, "?xml" );
       
    37 
       
    38 LOCAL_C void AttributeArrayDelete( TAny* aPtr );
       
    39 
       
    40 
       
    41 CHtmlParser::CHtmlParser ( Xml::MContentHandler& aContentHandler, RStringPool& aStringPool )
       
    42  :iContentHandler ( &aContentHandler ),
       
    43   iStringPool ( aStringPool ),
       
    44   iTagState ( ENoTag ),
       
    45   iParserState ( EInitialState ),
       
    46   iFirst ( ETrue ),
       
    47   iEndDocCalled ( EFalse )
       
    48 	{	
       
    49 	}
       
    50 
       
    51 CHtmlParser::~CHtmlParser ()
       
    52 	{	
       
    53 	delete iPartialContent;			
       
    54 	delete iTagAttribute;		
       
    55 	delete iTagAttributeValue;	
       
    56 	delete iTagQueue;	
       
    57 	}
       
    58 
       
    59 CHtmlParser* CHtmlParser::NewL ( Xml::MContentHandler& aContentHandler, RStringPool& aStringPool )
       
    60 	{
       
    61 	CHtmlParser* self = new (ELeave) CHtmlParser ( aContentHandler, aStringPool );
       
    62 	CleanupStack::PushL ( self );
       
    63 	
       
    64 	self->ConstructL ();
       
    65 	
       
    66 	CleanupStack::Pop ();
       
    67 	return self;
       
    68 	}
       
    69 
       
    70 
       
    71 void CHtmlParser::ConstructL ()
       
    72 	{		
       
    73 	iStringPool.OpenL ( HtmlTagsStringTable::Table );
       
    74 	iTagAttribute = new (ELeave) CDesC8ArrayFlat ( KDefaultGranularity );
       
    75 	iTagAttributeValue = new (ELeave) CDesC8ArrayFlat ( KDefaultGranularity );
       
    76 	iTagQueue = new (ELeave) CDesC8ArrayFlat ( KDefaultGranularity );
       
    77 	
       
    78 	iPartialContent = HBufC8::NewL ( KPartialContenttSize );	
       
    79 	}
       
    80 
       
    81 void CHtmlParser::ParseL ( const TDesC8&  aBuffer, TBool aLastChunk /* = EFalse */  )
       
    82 	{
       
    83 	ParseContentL ( aBuffer, aLastChunk );
       
    84 	}
       
    85 
       
    86 void CHtmlParser::CreateTagInfoLC ( RTagInfo& aTagInfo, const TDesC8& aTag )
       
    87 	{
       
    88 	RString uriString;
       
    89 	RString nameString;
       
    90 	RString prefixString;		
       
    91 		
       
    92 	// For HTML there is no uri and prefix. So make it null
       
    93 	uriString = iStringPool.OpenStringL( KNullDesC8 );
       
    94 	CleanupClosePushL ( uriString );
       
    95 	prefixString = iStringPool.OpenStringL( KNullDesC8 );
       
    96 	CleanupClosePushL ( prefixString );
       
    97 	
       
    98 	// name string ( tag )
       
    99 	nameString = iStringPool.OpenStringL ( aTag );
       
   100 	CleanupClosePushL ( nameString );
       
   101 	
       
   102 	// Create tag info with the uri, localname and prefix values.
       
   103 	aTagInfo.Open( uriString, prefixString, nameString );	
       
   104 	
       
   105 	CleanupStack::Pop ( 3 ); 
       
   106 		
       
   107 	CleanupClosePushL( aTagInfo );	
       
   108 	}
       
   109 
       
   110 void CHtmlParser::SetContentSink( class Xml::MContentHandler& aContentHandler )
       
   111 	{
       
   112 	iContentHandler = &aContentHandler;	
       
   113 	}
       
   114 
       
   115 // InspectCurrentCharacterL - updates iParserState, iTagState and iInTag	
       
   116 TBool CHtmlParser::InspectCurrentCharacter( TChar aChar )
       
   117 	{
       
   118 	TBool processCharacter = ETrue;
       
   119 
       
   120 	if ( iParserState == EReadingJavascript )
       
   121 		{
       
   122 		if ( aChar == '>' )
       
   123 			{
       
   124 			iParserState = ESeeEndOfTagWhileReadingJavascript;			
       
   125 			}
       
   126 
       
   127 		}
       
   128 	else if ( aChar == '<' )
       
   129 		{
       
   130 		iTagState	=EOpeningTag;
       
   131 	 	iParserState = ESeeStartOfTag;
       
   132 		iInTag		=ETrue;
       
   133 		}
       
   134 	else if ( iInTag )
       
   135 		{
       
   136 		if ( aChar == '/' || aChar == '=' || aChar == '"' || aChar == '>' || aChar == '!' || aChar == '\'' )
       
   137 			{
       
   138 			SeeSpecialCharactersInTag( aChar, processCharacter );
       
   139 			}
       
   140 		else if ( aChar.IsSpace() )
       
   141 			{
       
   142 			SeeWhiteSpaceCharacterInTag( processCharacter );
       
   143 			}
       
   144 		else if ( iParserState == ESeeExclamationMark && aChar=='-' )
       
   145 			{
       
   146 			iParserState = EReadingJavascript;
       
   147 			processCharacter = EFalse;
       
   148 			}
       
   149 		else
       
   150 			{
       
   151 			SeeOtherCharactersInTag();
       
   152 			}
       
   153 		}
       
   154 	else
       
   155 		{
       
   156 		SeeOtherCharactersNotInTag(processCharacter);
       
   157 		}
       
   158 
       
   159 	return processCharacter;	
       
   160 	}
       
   161 
       
   162 /**
       
   163 	Set the new parser state depends on the previous, when the parser finds a 
       
   164 	space in between the tags. 
       
   165 	eg: <a href="/AudioVideo/TT/" class="navlink" shape="rect">
       
   166 	1. Read a -> State EReadingOpeningTag
       
   167 	2. Read space -> State EFinishedReadingTag
       
   168 	so on...	
       
   169 	
       
   170 	@param aBool& [out]
       
   171  */
       
   172 void  CHtmlParser::SeeWhiteSpaceCharacterInTag( TBool& aBool )
       
   173 	{
       
   174 	switch (iParserState)
       
   175 		{
       
   176 		case EReadingOpeningTag:
       
   177 			iParserState = EFinishedReadingTag;
       
   178 			aBool = EFalse;
       
   179 			break;
       
   180 		case EReadingClosingTag:
       
   181 			iParserState = EFinishedReadingTag;
       
   182 			aBool = EFalse;
       
   183 			break;
       
   184 		case EReadingAttribute:
       
   185 			iParserState = EFinishedReadingAttribute;
       
   186 			aBool = EFalse;
       
   187 			break;
       
   188 		case EReadingAttributeValue:
       
   189 			iParserState = EFinishedReadingAttributeValue;
       
   190 			break;
       
   191 		case EReadingAttributeValueWithinQuot:
       
   192 			break;		
       
   193 		default:
       
   194 			break;
       
   195 		}	
       
   196 	}
       
   197 
       
   198 void  CHtmlParser::SeeSpecialCharactersInTag( TChar aChar, TBool& aBool )
       
   199 	{
       
   200 	switch (aChar)
       
   201 		{
       
   202 		case '/':
       
   203 			if ( ( iParserState == EReadingAttributeValue ) || ( iParserState == EReadingAttributeValueWithinQuot ) ||	( iParserState == EReadingAttributeValueWithinApos ) )
       
   204 				{
       
   205 				aBool = ETrue;				
       
   206 				}
       
   207 			else if ( iParserState == ESeeStartOfTag )
       
   208 				{
       
   209 				iTagState = EClosingTag;
       
   210 	 			iParserState = ESeeClosingTagIndicator;
       
   211 				aBool = ETrue;
       
   212 				}
       
   213 			else if ( iParserState == ESeeEquals )
       
   214 				{
       
   215 				// special case. '/' comming after =. The parser will not understand this situation and 
       
   216 				// will not parse the element.
       
   217 				// eg: <form action=/search name=f ... >
       
   218 				iParserState = EReadingAttributeValue;					
       
   219 				}	
       
   220 			else if ( ( iParserState == EFinishedReadingAttributeValue ) ||	( iParserState == EFinishedReadingTag )	)
       
   221 				{	
       
   222 				iParserState = ESeeClosingTagIndicator;					
       
   223 				}			
       
   224 			break;
       
   225 		case '=':
       
   226 			if( ( iParserState != EReadingAttributeValue ) && ( iParserState != EReadingAttributeValueWithinQuot ) && ( iParserState != EReadingAttributeValueWithinApos ) )
       
   227 				{
       
   228 				iParserState = ESeeEquals;
       
   229 				aBool = EFalse;
       
   230 				}
       
   231 			else
       
   232 				{
       
   233 				aBool = ETrue;
       
   234 				}
       
   235 			break;
       
   236 		case '"':
       
   237 			if ( iParserState == ESeeEquals )
       
   238 				{
       
   239 				iParserState = EReadingAttributeValueWithinQuot;
       
   240 				aBool = EFalse;
       
   241 				}
       
   242 			else
       
   243 				{
       
   244 				iParserState = EFinishedReadingAttributeValue;
       
   245 				}
       
   246 			break;
       
   247 		case '\'':
       
   248 			if ( iParserState == ESeeEquals )
       
   249 				{
       
   250 				iParserState = EReadingAttributeValueWithinApos;
       
   251 				aBool = EFalse;
       
   252 				}
       
   253 			else
       
   254 				{
       
   255 				iParserState = EFinishedReadingAttributeValue;
       
   256 				}
       
   257 			break;
       
   258 			
       
   259 		case '>':
       
   260 			iParserState = ESeeEndOfTag;
       
   261 			iTagState = EClosingTag;
       
   262 			iInTag = EFalse;
       
   263 			break;
       
   264 		case '!':
       
   265 			if(iParserState == ESeeStartOfTag)
       
   266 				{
       
   267 				iParserState = ESeeExclamationMark;
       
   268 				aBool = EFalse;
       
   269 				}
       
   270 			break;
       
   271 					
       
   272 		default:
       
   273 			break;
       
   274 		}	
       
   275 	}
       
   276 
       
   277 void  CHtmlParser::SeeOtherCharactersInTag()
       
   278 	{
       
   279 	switch (iParserState)
       
   280 		{
       
   281 		case ESeeStartOfTag:
       
   282 			iParserState = EReadingOpeningTag;
       
   283 			break;
       
   284 		case ESeeClosingTagIndicator:
       
   285 			iParserState = EReadingClosingTag;
       
   286 			break;
       
   287 		case ESeeEquals:
       
   288 			iParserState = EReadingAttributeValue;
       
   289 			break;
       
   290 		case EFinishedReadingTag:
       
   291 			iParserState = EReadingAttribute;
       
   292 			break;
       
   293 		case EFinishedReadingAttribute:
       
   294 			iParserState = EReadingAttribute;
       
   295 			break;
       
   296 		case EFinishedReadingAttributeValue:
       
   297 			iParserState = EReadingAttribute;
       
   298 			break;
       
   299 		default:
       
   300 			break;
       
   301 		}
       
   302 	}
       
   303 
       
   304 void  CHtmlParser::SeeOtherCharactersNotInTag( TBool& aBool )
       
   305 	{
       
   306 	switch (iParserState)
       
   307 		{
       
   308 		case EReadingText:
       
   309 			aBool = EFalse;
       
   310 			break;
       
   311 		case ESeeEndOfTag:
       
   312 			iParserState = EReadingText;
       
   313 			iTagState = EText;
       
   314 			break;
       
   315 		case EInitialState:
       
   316 			iParserState = EReadingText;
       
   317 			break;
       
   318 		default:
       
   319 			break;
       
   320 		}
       
   321 	}
       
   322 
       
   323 /**
       
   324 	Parses the content and fetches the buffer in between tags or HTML text. 
       
   325 	The string can be partial ie unable to find the end tag or unable to find 
       
   326 	the opening tag in case of HTML text. In this case the pending buffer will be 
       
   327 	saved and will be parsed during the next call.	
       
   328 	
       
   329 	@param aContent [out] Content to be parsed. 
       
   330  */
       
   331 void CHtmlParser::ParseContentL ( const TDesC8& aContent, TBool aLastChunk /* = EFalse */ )
       
   332 	{
       
   333 	
       
   334 	CStringParser* parser = CStringParser::NewL ( aContent );	
       
   335 	CleanupStack::PushL ( parser );
       
   336 
       
   337 	if ( iFirst )
       
   338 		{	
       
   339 		TBool xmlFound;	
       
   340 		TPtrC8 charset ( KDefaultCharset );
       
   341 		TRAPD ( err, ExtractCharsetL ( aContent, charset, xmlFound ) );
       
   342 		TChar ch;
       
   343 		parser->GetCurrentCharacter ( ch );
       
   344 		if ( ( err == KErrNotFound ) && ( ch != KStartTag ) )
       
   345 			{
       
   346 			CleanupStack::PopAndDestroy ( parser );
       
   347 			// Maybe an invalid html document. First character must be '<'
       
   348 			return;
       
   349 			}
       
   350 		
       
   351 		CallStartDocumentL ( charset );
       
   352 		iFirst = EFalse;	
       
   353 		
       
   354 		if ( xmlFound )		
       
   355 			{
       
   356 			// ignore that line ( <?xml ... ?> )
       
   357 			parser->ParseTill ( charset, KEndTag );
       
   358 			parser->SkipLength ( 1 ); // skip '>'
       
   359 			}
       
   360 		}
       
   361 	
       
   362 			
       
   363 	// Process the partial content which is saved in the last call.
       
   364 	TRAPD ( err, ProcessPartialContentL ( *parser ) );
       
   365 	if ( err != KErrNone )  
       
   366 	{		
       
   367 		AddToPartialContentL ( aContent, ETrue );
       
   368 		CleanupStack::PopAndDestroy ( parser );
       
   369 		return; 
       
   370 	}
       
   371 		
       
   372 	TChar ch;
       
   373 	while ( parser->GetCurrentCharacter( ch ) )
       
   374 		{	
       
   375 		
       
   376 		if ( InspectCurrentCharacter ( ch ) )
       
   377 			{
       
   378 			switch ( iTagState )				
       
   379 				{
       
   380 				case EOpeningTag:
       
   381 					{
       
   382 					TPtrC8 TaggedBuffer;
       
   383 					
       
   384 					if ( ch == KStartTag )
       
   385 						{
       
   386 						parser->SkipLength ( 1 ); // skip '<'						
       
   387 						}
       
   388 					// The parser relies on the assumption that there will not be any '>' in between the
       
   389 					// tagged content. But there is a chance that attribute value can contain a '<'. The
       
   390 					// parsing will not happen properly in that case.
       
   391 					// eg. <tag name="value>abc" >
       
   392 					TBool success = parser->ParseTill ( TaggedBuffer, KEndTag );
       
   393 					ParseTaggedBufferL ( TaggedBuffer, !success ); // handle tagged buffer
       
   394 					break;					
       
   395 					}
       
   396 				case EClosingTag:
       
   397 					parser->SkipLength ( 1 ); // skip '>'
       
   398 					break;
       
   399 				case EText:
       
   400 					{
       
   401 					TPtrC8 Text;	
       
   402 					if ( parser->ParseTill ( Text, KStartTag ) )
       
   403 						{
       
   404 						ProcessTextL ( Text ); // Handle text.
       
   405 						}
       
   406 					else
       
   407 						{						
       
   408 						AddToPartialContentL ( Text ); 
       
   409 						}
       
   410 					}
       
   411 					break;
       
   412 				default:
       
   413 					parser->SkipLength ( 1 );
       
   414 					break;
       
   415 				}
       
   416 			}
       
   417 		else
       
   418 			{
       
   419 			parser->SkipLength ( 1 );
       
   420 			}	
       
   421 			
       
   422 		if ( iEndDocCalled )				
       
   423 			{
       
   424 			break; // </html> has already been reached. No further processing.				
       
   425 			}
       
   426 		}
       
   427 	
       
   428 	// This is the last chunk. OnEndDocumentL may have been called when it arrives for </html>
       
   429 	// In case if it cannot find </html> call OnEndDocumentL here. 
       
   430 	if ( aLastChunk && !iEndDocCalled )
       
   431 		{
       
   432 		iContentHandler->OnEndDocumentL ( KErrNone );			
       
   433 		}
       
   434 	CleanupStack::Pop ( parser );
       
   435 	delete parser;	
       
   436 	}
       
   437 
       
   438 /**
       
   439 	Parses the string inbetween the start tag and end tag. 
       
   440 	Fetches the HTML tags, attribute, values or java content.	
       
   441 	
       
   442 	@param aTaggedBuffer [in] Content inbetween start and end tag. 
       
   443 	@Param aPartial [in] ETrue for partial tagged content.
       
   444  */
       
   445 void CHtmlParser::ParseTaggedBufferL ( const TPtrC8& aTaggedBuffer, TBool  aPartial  /* = EFalse */ )
       
   446 	{	
       
   447 	
       
   448 	// Will add to the partial content variable and return.
       
   449 	if ( aPartial )
       
   450 		{
       
   451 		AddToPartialContentL ( aTaggedBuffer, ETrue ); 
       
   452 		return;
       
   453 		}
       
   454 
       
   455 	CStringParser* parser = CStringParser::NewL ( aTaggedBuffer );
       
   456 	CleanupStack::PushL ( parser );			
       
   457 			
       
   458 	TPtrC8 htmltag;	
       
   459 	TBool callEndElement = EFalse;
       
   460 	TBool callStartElement = EFalse;
       
   461 	TChar ch;	
       
   462 
       
   463 	while ( parser->GetCurrentCharacter( ch ) )
       
   464 		{
       
   465 		InspectCurrentCharacter ( ch );
       
   466 		
       
   467 		switch ( iParserState )
       
   468 			{		
       
   469 			case ESeeExclamationMark:
       
   470 				{
       
   471 				TPtrC8 text;
       
   472 				
       
   473 				TChar ch;
       
   474 				parser->GetNextCharacter( ch );
       
   475 				
       
   476 				if ( ch == '-' )
       
   477 					{
       
   478 					parser->SkipLength ( 2 ); // skip --
       
   479 					parser->ParseTill ( text, _L8 ("/-") ); 
       
   480 					}
       
   481 				else
       
   482 					{
       
   483 					parser->GetRemainder ( text );
       
   484 					}
       
   485 				
       
   486 				iContentHandler->OnContentL( text, KErrNone ); // This is a java script.				
       
   487 				
       
   488 				parser->GetRemainder ( text );
       
   489 				}
       
   490 			break;
       
   491 							
       
   492 			case ESeeClosingTagIndicator:
       
   493 				{
       
   494 				TPtrC8 tag;				
       
   495 				parser->SkipLength ( 1 ); // skip '/'
       
   496 				parser->GetRemainder ( tag );			
       
   497 				// check the tag in the array and remove it.	
       
   498 				if ( !CheckAndRemoveTagL ( tag ) )
       
   499 					{
       
   500 					iContentHandler->OnError( EXmlTagMismatch );	
       
   501 					}
       
   502 				else
       
   503 					{
       
   504 					if ( tag.Length () )
       
   505 						{
       
   506 						htmltag.Set ( tag );						
       
   507 						}
       
   508 					callEndElement = ETrue;		
       
   509 					}
       
   510 				}
       
   511 			break;
       
   512 			
       
   513 			case EReadingOpeningTag:
       
   514 				{
       
   515 				parser->ParseTill( htmltag, _L8 ("\t\n ") );
       
   516 				
       
   517 				// check whether it is an optional tag.
       
   518 				// Only the tags which is defined as optional in the HTML 4.01 specs is 
       
   519 				// considered. Any other tag will not be considered optional.
       
   520 				// In case of any other tag ( other than defined in specs ) which has optional behaviour 
       
   521 				// and if no end tag specified for it, the end tag will can get nested.
       
   522 				if ( IsOptionalTagL ( htmltag ) )	
       
   523 					{
       
   524 					CheckAndProcessLastOptionalTagL ( htmltag );
       
   525 					}				
       
   526 								
       
   527 				iTagQueue->AppendL (  htmltag ); // append to tag array				
       
   528 				callStartElement = ETrue;			
       
   529 				}
       
   530 			break;
       
   531 			
       
   532 			case EReadingAttribute:
       
   533 				{
       
   534 				TPtrC8 attribute;
       
   535 				parser->ParseTill( attribute, _L8 (" \n\t\r=") );
       
   536 				iTagAttribute->AppendL ( attribute );	// append to attribute array.
       
   537 				
       
   538 				TChar ch;
       
   539 				TBool success = parser->GetCurrentCharacter ( ch );
       
   540 				
       
   541 				if ( !success || ( ch != '=') ) 
       
   542 					{
       
   543 					// This attribute doesn't have an attribute value.
       
   544 					// The attribute value will be same as attribute name.
       
   545 					iTagAttributeValue->AppendL ( attribute );
       
   546 					}				
       
   547 				}
       
   548 			break;
       
   549 		
       
   550 			case EReadingAttributeValue:
       
   551 				{
       
   552 				TPtrC8 attributeval;										
       
   553 				parser->ParseTill( attributeval, _L8 ( " \t\r\n" ) );
       
   554 				iTagAttributeValue->AppendL ( attributeval ); // append to attribute value array
       
   555 				}
       
   556 			break;	
       
   557 			
       
   558 			case EReadingAttributeValueWithinQuot:
       
   559 				{
       
   560 				parser->SkipLength ( 1 ); // skip '"' 
       
   561 				TPtrC8 attributeval;										
       
   562 				parser->ParseTill( attributeval, '"' );
       
   563 				iTagAttributeValue->AppendL ( attributeval ); // append to attribute value array									
       
   564 				}
       
   565 			break;
       
   566 			
       
   567 			case EReadingAttributeValueWithinApos:
       
   568 				{
       
   569 				parser->SkipLength ( 1 ); // skip ''' 
       
   570 				TPtrC8 attributeval;										
       
   571 				parser->ParseTill( attributeval, '\'' );
       
   572 				iTagAttributeValue->AppendL ( attributeval ); // append to attribute value array														
       
   573 				}
       
   574 			break;
       
   575 						
       
   576 			default:
       
   577 				parser->SkipLength ( 1 );
       
   578 			break;
       
   579 			}
       
   580 		};
       
   581 	
       
   582 	// Forbidden tags need to be closed immediatly. Check whether it
       
   583 	// has already closed otherwise close it.
       
   584 	if ( !callEndElement && IsForbiddenTagL ( htmltag ) )
       
   585 		{
       
   586 		CheckAndRemoveTagL ( htmltag );
       
   587 		callEndElement = ETrue;
       
   588 		}
       
   589 		
       
   590 	// Tell the XML framework about the tag and attribute/value.
       
   591 	if ( callStartElement )
       
   592 		{
       
   593 		CallStartElementL ( htmltag );
       
   594 		}
       
   595 		
       
   596 	// Calling end element... ( End tag ) </html>
       
   597 	if ( callEndElement )
       
   598 		{
       
   599 		CallEndElementL ( htmltag );
       
   600 		
       
   601 		if ( !htmltag.CompareF ( _L8 ("html" ) ) )
       
   602 			{
       
   603 			iContentHandler->OnEndDocumentL ( KErrNone );
       
   604 			iEndDocCalled = ETrue;
       
   605 			}
       
   606 		}
       
   607 
       
   608 	
       
   609 	iTagAttribute->Delete ( 0, iTagAttribute->Count () );
       
   610 	iTagAttributeValue->Delete ( 0, iTagAttributeValue->Count () );
       
   611 	
       
   612 	CleanupStack::Pop ( parser );
       
   613 	delete parser;
       
   614 	}
       
   615 
       
   616 /**
       
   617 	Inform the CHtmlParser about the HTML text.	
       
   618 	
       
   619 	@param aText [in] Document text. 
       
   620  */
       
   621 void CHtmlParser::ProcessTextL ( const TPtrC8& aText )
       
   622 	{
       
   623 	iContentHandler->OnContentL ( aText, KErrNone );
       
   624 	}
       
   625 
       
   626 /**
       
   627 	Process the partial content, which is saved in the last call with the new content.	
       
   628 	
       
   629 	@param aParser [in,out] String parser object. 
       
   630  */
       
   631 void CHtmlParser::ProcessPartialContentL ( CStringParser& aParser )
       
   632 	{
       
   633 	if ( iPartialContent->Des().Length () > 0 )
       
   634 		{
       
   635 		switch ( iTagState )											
       
   636 			{
       
   637 			case EOpeningTag:
       
   638 				{				
       
   639 				TPtrC8 TaggedBuffer;
       
   640 				TBool success = aParser.ParseTill ( TaggedBuffer, KEndTag );
       
   641 				if ( success )
       
   642 					{
       
   643 					AddToPartialContentL ( TaggedBuffer, ETrue );
       
   644 					ParseTaggedBufferL ( iPartialContent->Des(), EFalse );
       
   645 					}
       
   646 				else
       
   647 					{
       
   648 					User::Leave ( KErrNotFound );
       
   649 					}
       
   650 				}			
       
   651 			break;						
       
   652 			case EText:
       
   653 				{
       
   654 				TPtrC8 Text;	
       
   655 				if ( aParser.ParseTill ( Text, KStartTag ) )
       
   656 					{
       
   657 					AddToPartialContentL ( Text, ETrue );
       
   658 					ProcessTextL ( iPartialContent->Des() ); // Handle text.
       
   659 					}
       
   660 				else
       
   661 					{						
       
   662 					User::Leave ( KErrNotFound );
       
   663 					}					
       
   664 				}
       
   665 			break;			
       
   666 			default:			
       
   667 			// shouldn't come here.
       
   668 			User::Panic ( _L ("Html parser - partial content processing error."), KErrUnknown );
       
   669 			break;
       
   670 			}
       
   671 		}		
       
   672 		iPartialContent->Des().Zero (); // zero the partial content length.		
       
   673 	}
       
   674 
       
   675 /**
       
   676 	Add or append to the partial buffer. 
       
   677 	
       
   678 	@param aContent [in] Partial content to save.
       
   679 	@param aAppend [in] ETrue to append to the existing partial buffer.
       
   680  */
       
   681 void CHtmlParser::AddToPartialContentL ( const TPtrC8& aContent, TBool aAppend /* = EFalse */ )
       
   682 	{
       
   683 	if ( aContent.Length () == 0 )
       
   684 		{
       
   685 		return;			
       
   686 		}
       
   687 
       
   688 	TInt len = aContent.Length () + iPartialContent->Des().Length();
       
   689 	if ( iPartialContent->Des().MaxSize () < len )
       
   690 		{
       
   691 		iPartialContent = iPartialContent->ReAllocL ( len );			
       
   692 		}
       
   693 
       
   694 	
       
   695 	if ( aAppend )
       
   696 		{
       
   697 		iPartialContent->Des().Append( aContent );		
       
   698 		}
       
   699 	else
       
   700 		{
       
   701 		*iPartialContent = aContent; // Partial text. Save the text.		
       
   702 		}		
       
   703 	}
       
   704 
       
   705 /**
       
   706 	Check the aTag ( if it is not empty ) in the tag array and will	remove it from the array.
       
   707 	
       
   708 	@param aTag [in] Tag value need to be checked.
       
   709 
       
   710 	@return TBool ETrue Found and successfully removed.
       
   711  */
       
   712 TBool CHtmlParser::CheckAndRemoveTagL ( const TPtrC8& aTag )
       
   713 	{
       
   714 	
       
   715 	TInt count = iTagQueue->Count ();	
       
   716 	if ( !count )
       
   717 		{
       
   718 		return EFalse;			
       
   719 		}
       
   720 	
       
   721 	if ( !aTag.Length () )
       
   722 		{
       
   723 		// Remove the last tag added. 
       
   724 		// For eg: <img src="picture.jpg"/> 
       
   725 		// "img" tag has been added and need to remove it from the array.		
       
   726 		TPtrC8 temp ( iTagQueue->MdcaPoint ( count - 1 ) );		
       
   727 		iTagQueue->Delete ( count - 1 );		
       
   728 		return ETrue;					
       
   729 		}
       
   730 		
       
   731 	// Iterate through the tag array from end till it finds the tag.
       
   732 	// Remove the elements that is mis-matching from the array. If the tag array 
       
   733 	// count is zero and if it is unable to find the tag, then there is a tag
       
   734 	// mis-match.		
       
   735 	while ( count )
       
   736 		{
       
   737 		TBool found = ETrue;
       
   738 		TPtrC8 temp ( iTagQueue->MdcaPoint ( count - 1 ) );
       
   739 		if ( aTag.Compare ( temp ) )
       
   740 			{
       
   741 			// Tag mis-match. 
       
   742 			found = EFalse;		
       
   743 			CallEndElementL ( temp ); // no end tag. Client should know about this.						
       
   744 			}			
       
   745 		// Remove the tag.
       
   746 		iTagQueue->Delete ( count - 1 );		
       
   747 					
       
   748 		if ( found )
       
   749 			{
       
   750 			return ETrue;					
       
   751 			}
       
   752 		count = iTagQueue->Count ();	
       
   753 		}
       
   754 
       
   755 	return EFalse; // Tag is not present. 				
       
   756 	}
       
   757 
       
   758 /**
       
   759 	Creates RTagInfo & RAttributeArray and tell XML framework
       
   760 	
       
   761 	@param aTag [in] Tag value.
       
   762  */
       
   763 void CHtmlParser::CallStartElementL ( const TDesC8& aTag )
       
   764 	{	
       
   765 	// Create tag info with the uri, localname and prefix values.
       
   766 	RTagInfo tagInfo;	
       
   767 	CreateTagInfoLC	( tagInfo, aTag );
       
   768 
       
   769 	RAttributeArray attributes;
       
   770 	CleanupStack::PushL( TCleanupItem( AttributeArrayDelete, &attributes ) );
       
   771 	
       
   772 	for ( TInt i  = 0; i < iTagAttribute->Count(); ++i )
       
   773 		{
       
   774 		RString nameString;
       
   775 		RString valueString;
       
   776 		RString uriString;
       
   777 		RString prefixString;
       
   778 		
       
   779 		// Create RString's for attribute name/value		
       
   780 		nameString = iStringPool.OpenStringL ( (*iTagAttribute)[i] );
       
   781 		CleanupClosePushL ( nameString );
       
   782 		valueString = iStringPool.OpenStringL ( (*iTagAttributeValue)[i] );
       
   783 		CleanupClosePushL ( valueString );
       
   784 		
       
   785 		// For HTML there is no uri and prefix. So make it null
       
   786 		uriString = iStringPool.OpenStringL( KNullDesC8 );
       
   787 		CleanupClosePushL ( uriString );
       
   788 		prefixString = iStringPool.OpenStringL( KNullDesC8 );
       
   789 		CleanupClosePushL ( prefixString );
       
   790 		
       
   791 		RAttribute attribute;
       
   792 		attribute.Open( uriString, prefixString, nameString, valueString );		
       
   793 
       
   794 		CleanupStack::Pop ( 4 ); 
       
   795 		CleanupClosePushL(attribute);
       
   796 		
       
   797 		User::LeaveIfError(attributes.Append(attribute));
       
   798 		
       
   799 		CleanupStack::Pop();
       
   800 		}
       
   801 	
       
   802 
       
   803 	// Tell the framework.
       
   804 	iContentHandler->OnStartElementL ( tagInfo, attributes, KErrNone );
       
   805 	CleanupStack::PopAndDestroy ( &attributes );
       
   806 	CleanupStack::PopAndDestroy ( &tagInfo ); 
       
   807 	}
       
   808 
       
   809 /**
       
   810 	Creates RTagInfo and tells the framework.
       
   811 	
       
   812 	@param aTag [in] Tag value.
       
   813  */
       
   814 void CHtmlParser::CallEndElementL ( const TDesC8& aTag )
       
   815 	{
       
   816 	
       
   817 	if ( !aTag.Length () )
       
   818 		{
       
   819 		return;			
       
   820 		}
       
   821 
       
   822 	// Create tag info with the uri, localname and prefix values.
       
   823 	RTagInfo tagInfo;	
       
   824 
       
   825 	CreateTagInfoLC( tagInfo, aTag );
       
   826 	
       
   827 	// Tell the framework
       
   828 	iContentHandler->OnEndElementL ( tagInfo, KErrNone );
       
   829 	
       
   830 	CleanupStack::PopAndDestroy ( &tagInfo ); 	
       
   831 	}
       
   832 
       
   833 /**
       
   834 	Creates RDocumentParams and tells the framework.	
       
   835  */
       
   836 void CHtmlParser::CallStartDocumentL ( const TDesC8& aCharset )
       
   837 	{
       
   838 	RString encodingString = iStringPool.OpenStringL ( aCharset );
       
   839 	CleanupClosePushL ( encodingString );
       
   840 	
       
   841 	RDocumentParameters docparams;	
       
   842 	
       
   843 	docparams.Open ( encodingString );
       
   844 	CleanupStack::Pop (); // pop encodingString
       
   845 	CleanupClosePushL ( docparams );
       
   846 	
       
   847 	// Tell the framework.
       
   848 	iContentHandler->OnStartDocumentL( docparams, KErrNone );
       
   849 	
       
   850 	CleanupStack::PopAndDestroy ();
       
   851 	}
       
   852 
       
   853 void CHtmlParser::CheckAndProcessLastOptionalTagL ( const TDesC8& aTag )
       
   854 	{
       
   855 	TInt count = iTagQueue->Count();
       
   856 	// Tag queue is empty. Nothing to do.
       
   857 	if ( !count )		
       
   858 		{
       
   859 		return;
       
   860 		}
       
   861 	
       
   862 	TPtrC8 temp( iTagQueue->MdcaPoint ( count - 1 ) );
       
   863 	
       
   864 	if ( !temp.Compare ( aTag ) )
       
   865 		{		
       
   866 		CallEndElementL ( aTag );		//  Optional tag without an end tag. Should be closed.
       
   867 		iTagQueue->Delete( count  - 1 );// delete it from the array.
       
   868 		}
       
   869 	}
       
   870 
       
   871 TBool CHtmlParser::IsOptionalTagL ( const TDesC8& aTag )
       
   872 	{
       
   873 	for ( TInt i = HtmlTagsStringTable::EBody; i < HtmlTagsStringTable::EArea; ++i )
       
   874 		{
       
   875 		RString string = iStringPool.String( i, HtmlTagsStringTable::Table );
       
   876 		CleanupClosePushL ( string );
       
   877 		TInt result = aTag.Compare ( string.DesC() );
       
   878 		CleanupStack::PopAndDestroy ( 1 );
       
   879 		if ( !result )
       
   880 			{
       
   881 			return ETrue;
       
   882 			}
       
   883 		}
       
   884 	return EFalse;
       
   885 	}
       
   886 
       
   887 TBool CHtmlParser::IsForbiddenTagL ( const TDesC8& aTag )
       
   888 	{
       
   889 
       
   890 	for ( TInt i = HtmlTagsStringTable::EArea; i <= HtmlTagsStringTable::EParam; ++i )
       
   891 		{
       
   892 		RString string = iStringPool.String( i, HtmlTagsStringTable::Table );
       
   893 		CleanupClosePushL ( string );
       
   894 		TInt result = aTag.Compare ( string.DesC() );
       
   895 		CleanupStack::PopAndDestroy ( 1 );
       
   896 		if ( !result )
       
   897 			{
       
   898 			return ETrue;			
       
   899 			}
       
   900 
       
   901 		}
       
   902 	return EFalse;
       
   903 	}
       
   904 
       
   905 void CHtmlParser::CheckAndProcessForbiddenTagL ( const TDesC8& aTag )
       
   906 	{
       
   907 	TInt count = iTagQueue->Count();
       
   908 	if ( count )		
       
   909 		{
       
   910 		CallEndElementL ( aTag );		//  Forbidden tag without an end tag. Should be closed.
       
   911 		iTagQueue->Delete( count  - 1 );// 	delete it from the array.	
       
   912 		}		
       
   913 	}
       
   914 
       
   915 
       
   916 void CHtmlParser::ExtractCharsetL ( const TDesC8& aContent, TPtrC8& aCharset, TBool& aXMLFound )
       
   917 	{
       
   918 	aXMLFound = EFalse;
       
   919 	TBool meta = ETrue;
       
   920 	TInt pos = aContent.FindF ( KMetaName );		
       
   921 	if ( pos == KErrNotFound )
       
   922 		{
       
   923 		pos = aContent.FindF ( KXMLName );
       
   924 		if ( pos == KErrNotFound )
       
   925 			{
       
   926 			User::Leave ( pos );
       
   927 			}		
       
   928 		meta = EFalse;
       
   929 		aXMLFound = ETrue;
       
   930 		}
       
   931 				
       
   932 	TPtrC8 contentPtr ( aContent.Mid ( pos ) );	
       
   933 	ExtractCharsetValueL ( contentPtr, meta ? _L8 ( "charset" ) : _L8( "encoding" ), aCharset );
       
   934 	}
       
   935 
       
   936 /**
       
   937 	Extract the charset value from the HTML content. This function will be called only
       
   938 	if the document is of HTML type. Function will leave if is unable to find "charset" or "encoding".
       
   939  
       
   940  */
       
   941 void CHtmlParser::ExtractCharsetValueL ( const TDesC8& aContent, const TDesC8& aSearchValue, TPtrC8& aCharset )
       
   942 	{
       
   943 	
       
   944 	TInt pos = aContent.FindF ( aSearchValue );
       
   945 	if ( pos == KErrNotFound )
       
   946 		{
       
   947 		User::Leave ( pos );
       
   948 		}
       
   949 		
       
   950 	TPtrC8 contentPtr ( aContent.Mid ( pos ) );
       
   951 	CStringParser* parser = CStringParser::NewLC ( contentPtr );
       
   952 	TPtrC8 value;
       
   953 	
       
   954 	if ( parser->ParseTill ( value, '=' ) )	
       
   955 		{
       
   956 		parser->SkipLength ( 1 ); // skip '='
       
   957 		TChar ch;
       
   958 		parser->GetCurrentCharacter ( ch );
       
   959 		if ( ch == '\"' )
       
   960 			{
       
   961 			parser->SkipLength ( 1 ); // skip '\"'	
       
   962 			}
       
   963 		
       
   964 		parser->ParseTill ( aCharset, _L8 ("\"\r\n" ) );		
       
   965 		}
       
   966 	else
       
   967 		{
       
   968 		CleanupStack::PopAndDestroy ( parser );
       
   969 		User::Leave ( pos );
       
   970 		}				
       
   971 
       
   972 	CleanupStack::PopAndDestroy ( parser );								
       
   973 	}	
       
   974 
       
   975 LOCAL_C void AttributeArrayDelete( TAny* aPtr )
       
   976 	{
       
   977 	RAttributeArray& attributes = *( RAttributeArray* )aPtr;
       
   978 
       
   979 	TInt nAttributes = attributes.Count();
       
   980 	for( TInt i=0; i < nAttributes; ++i )
       
   981 		{
       
   982 		attributes[i].Close();
       
   983 		}		
       
   984 	attributes.Close();
       
   985 	}
       
   986 	
       
   987 
       
   988