xml/legacyminidomparser/xmlparser/src/gmxmlentityconverter.cpp
changeset 34 c7e9f1c97567
parent 0 e35f40988205
equal deleted inserted replaced
25:417699dc19c9 34:c7e9f1c97567
       
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // MDXMLEntityConverter.cpp
       
    15 // @file
       
    16 // This class represents a generic entity converter for an XML parser or composer.
       
    17 // 
       
    18 //
       
    19 
       
    20 #include <f32file.h>
       
    21 #include <stringpool.h>
       
    22 
       
    23 #include <gmxmlconstants.h>
       
    24 #include <gmxmlcomposer.h>
       
    25 #include <gmxmlentityconverter.h>
       
    26 #include "xmlentityreferences.h"
       
    27 
       
    28 const TInt KMaxBuiltinEntityLength = 20;
       
    29 
       
    30 //
       
    31 // CMDXMLEntityConverter			//
       
    32 //
       
    33 
       
    34 EXPORT_C CMDXMLEntityConverter::CMDXMLEntityConverter()
       
    35 //
       
    36 // Constructor
       
    37 //
       
    38 	{
       
    39 	}
       
    40 
       
    41 EXPORT_C CMDXMLEntityConverter::~CMDXMLEntityConverter()
       
    42 	{
       
    43 	}
       
    44 
       
    45 //==================================================================================
       
    46 
       
    47 TInt CMDXMLEntityConverter::EntityToTextL(TDes& aTextToConvert)
       
    48 //
       
    49 // Takes a block of text and converts any entity references found to the 
       
    50 // appropriate text.  Because built-in and character entity references are
       
    51 // longer than the replacement text, this takes place in-situ.
       
    52 // @param aTextToConvert Text to be converted - replacement text goes
       
    53 // out in the same
       
    54 //
       
    55 	{
       
    56 	TInt error = KErrNone;
       
    57 	TInt beginEntityOffset = 0;
       
    58 	TInt beginSearchOffset = 0;
       
    59 	TPtrC searchPtr = TPtrC(aTextToConvert);
       
    60 
       
    61 	// Need to convert all entity refs in this bit of data.  Don't want to convert
       
    62 	// things more than once so we keep a marker and move it on so each time round
       
    63 	// the loop we start looking for entity references after the end of the previous
       
    64 	// one.
       
    65 	while((beginEntityOffset = searchPtr.Locate('&')) != KErrNotFound)
       
    66 		{
       
    67 		searchPtr.Set(aTextToConvert.Mid(beginSearchOffset + beginEntityOffset));
       
    68 		TInt endEntityOffset = searchPtr.Locate(';');
       
    69 		if(endEntityOffset == KErrNotFound)
       
    70 			{
       
    71 			// No point continuing past here - even if we find more & they can't be
       
    72 			// terminated either. 
       
    73 			error = KErrXMLBadEntity;
       
    74 			break;
       
    75 			}
       
    76 		endEntityOffset += beginEntityOffset;
       
    77 		// Can't construct a modifiable descriptor that's just part of another one.  Looks
       
    78 		// like we need to copy the section containing the reference and twiddle that, then
       
    79 		// copy the result back if it's valid.
       
    80 		HBufC* entityRef = searchPtr.Mid(0, (endEntityOffset - beginEntityOffset)+1).AllocLC();
       
    81 		TInt convertErr = KErrNone;
       
    82 		TPtr entityRefDes = entityRef->Des();
       
    83 		if((convertErr = ConvertEntityRefL(entityRefDes)) == KErrNotSupported)
       
    84 			{
       
    85 			convertErr = DTDEntityToText(entityRefDes);
       
    86 			}
       
    87 
       
    88 		if(!convertErr)
       
    89 			{
       
    90 			// We need to replace this section of aTextToConvert with entityRefDes
       
    91 			// we don't know whether DTDEntityToText has actually changed anything -
       
    92 			// because of constraints on the published behaviour of DTDEntityToText we
       
    93 			// can't compel DTDEntityToText to return KErrUnsupported - it thinks it 
       
    94 			// could be dealing with multiple entity refs and just cos it doesn't support
       
    95 			// one of them doesn't mean it doesn't support the rest.  Probably true that
       
    96 			// the entity ref will be a builtin, so won't need to call DTDEntityToText 
       
    97 			// anyway.  ConvertEntityRefL will return KErrNone only if it converted so
       
    98 			// we probably won't be copying things gratuitously in the common case. 
       
    99 			aTextToConvert.Replace(beginEntityOffset + beginSearchOffset, (endEntityOffset - beginEntityOffset)+1, entityRefDes);
       
   100 			}
       
   101 		else
       
   102 			{
       
   103 			error = KErrXMLBadEntity;
       
   104 			}
       
   105 
       
   106 		beginSearchOffset += beginEntityOffset + entityRefDes.Length();
       
   107 		searchPtr.Set(aTextToConvert.Mid(beginSearchOffset));
       
   108 		CleanupStack::PopAndDestroy(entityRef);
       
   109 		}
       
   110 
       
   111 	return error;
       
   112 	}
       
   113 
       
   114 //==================================================================================
       
   115 
       
   116 EXPORT_C TInt CMDXMLEntityConverter::OutputComposedTextL( CMDXMLComposer* aComposer, const TDesC& aTextToConvert)
       
   117 //
       
   118 // Outputs a block of text to a composer with offending characters replaced by
       
   119 // entity references.
       
   120 // @param aComposer the composer to be used for output
       
   121 // @param aTextToConvert The text to be converted.
       
   122 // @return Returns KErrNone if successful or a file write error
       
   123 // @leave can Leave due to OOM
       
   124 //
       
   125 	{
       
   126 	TBuf<1> oneChar;
       
   127 	TInt offset = 0;
       
   128 	TInt error = KErrNone;
       
   129 	TInt textLen = aTextToConvert.Length();
       
   130 
       
   131 	while(error == KErrNone && (offset < textLen))
       
   132 		{
       
   133 		oneChar = aTextToConvert.Mid(offset, 1);
       
   134 		offset++;
       
   135 
       
   136 	// Built in entities
       
   137 		RStringPool	pool;
       
   138 		CleanupClosePushL(pool);
       
   139 		pool.OpenL(XMLEntityReferences::Table);
       
   140 
       
   141 
       
   142 		TBool found = EFalse;
       
   143 		for (TUint i=0; i<XMLEntityReferences::Table.iCount; i++)
       
   144 			{
       
   145 			RStringF entity = pool.StringF(i, XMLEntityReferences::Table);
       
   146 			TBuf<KMaxBuiltinEntityLength> buf;
       
   147 			buf.Copy(entity.DesC());
       
   148 			TLex string(buf);
       
   149 
       
   150 			TPtrC entityRef = string.NextToken(); 
       
   151 			if(oneChar.Compare(string.NextToken()) == 0)
       
   152 				{
       
   153 				error = aComposer->OutputDataL(entityRef);
       
   154 				found = ETrue;
       
   155 				break;
       
   156 				}
       
   157 
       
   158 			}
       
   159 
       
   160 		CleanupStack::PopAndDestroy(); // close pool
       
   161 
       
   162 		if(!found)
       
   163 			error = aComposer->OutputDataL(oneChar);
       
   164 		else
       
   165 			found = EFalse;
       
   166 		
       
   167 		}
       
   168 
       
   169 	return error;
       
   170 	}
       
   171 
       
   172 //==================================================================================
       
   173 
       
   174 TInt CMDXMLEntityConverter::DTDEntityToText(TDes& /*aTextToConvert*/)
       
   175 //
       
   176 // DTD Specific entity to text converter
       
   177 // Takes a block of text and converts any entity references found to the 
       
   178 // appropriate text.  We hope that this can happen in-situ.
       
   179 // @param aTextToConvert Text to be converted - replacement text goes
       
   180 // out in the same
       
   181 //
       
   182 	{ // Nothing in the generic case.
       
   183 	return KErrNone;
       
   184 	}
       
   185 
       
   186 //==================================================================================
       
   187 
       
   188 TInt CMDXMLEntityConverter::ConvertEntityRefL(TDes& aTextToConvert)
       
   189 	{
       
   190 #ifdef _DEBUG
       
   191 	_LIT(KEntityConverter, "GMXML Entity Converter");
       
   192 	__ASSERT_ALWAYS(aTextToConvert.Length() > 1, User::Panic(KEntityConverter, KErrArgument));
       
   193 #endif
       
   194 
       
   195 	TInt err = KErrNotSupported;
       
   196 	TBool charRef = EFalse;
       
   197 	TInt idLen = 0;
       
   198 	TRadix elementRadix = EDecimal;
       
   199 
       
   200 	if(aTextToConvert.Find(KXMLHexidecimalId) != KErrNotFound)
       
   201 		{
       
   202 		charRef = ETrue;
       
   203 		idLen = TPtrC(KXMLHexidecimalId).Length();
       
   204 		elementRadix = EHex;
       
   205 		}
       
   206 	else if(aTextToConvert.Find(KXMLDecimalId) != KErrNotFound)
       
   207 		{
       
   208 		charRef = ETrue;
       
   209 		idLen = TPtrC(KXMLDecimalId).Length();
       
   210 		elementRadix = EDecimal;
       
   211 		}
       
   212 
       
   213 	if(charRef)
       
   214 		{
       
   215 		// Character entities
       
   216 		/*
       
   217 			algorithm explanation:
       
   218 			The entity reference in the descriptor holding the string is known by it's positions.
       
   219 				offset = offset to beginning of '&#x' from start of supplied descriptor. (0 indicates start of descriptor)
       
   220 				entitylen = offset to ';' character from the start of the entity reference.
       
   221 
       
   222 				the length of the entity preamble (&#x in this case) is known.
       
   223 
       
   224 			The algorithm assigns the required portion of the descriptor to a TLex16 object so that the 
       
   225 			value of the code can be read out.
       
   226 
       
   227 			The math for the assign is as follows:
       
   228 				start of number = offset + length of preamble
       
   229 				num of characters to take = entityLen - length of preamble
       
   230 
       
   231 			The entity is then deleted from the string, and the new code is inserted in the
       
   232 			correct location.
       
   233 				The deleted characters are 'entityLen' charcters from 'offset'.
       
   234 			
       
   235 			
       
   236 			e.g. for the string <a href="http://&#x77;ww.symbian.com">
       
   237 
       
   238 			offset = 16
       
   239 			entitylen = 5
       
   240 			string assigned to lexer = '77'
       
   241 			code = 0x77 (extracted from lexer)
       
   242 
       
   243 			descriptor after delete operation:
       
   244 
       
   245 				<a href="http://&ww.symbian.com">
       
   246 
       
   247 			array dereference fills in the descriptor such that the ampersand is replaced.
       
   248 				<a href="http://www.symbian.com">
       
   249 		*/
       
   250 		TInt entityLen = aTextToConvert.Length();
       
   251 
       
   252 		TLex16 element;
       
   253 		element.Assign(aTextToConvert.Mid(idLen, (entityLen - idLen)));
       
   254 
       
   255 		TUint16 code;
       
   256 		err = element.Val(code, elementRadix);
       
   257 
       
   258 		if(!err)
       
   259 			{
       
   260 			aTextToConvert.Delete(1, entityLen);
       
   261 			aTextToConvert[0] = code;
       
   262 			}
       
   263 		}
       
   264 	else
       
   265 		{
       
   266 		// Built in entities
       
   267 		RStringPool	pool;
       
   268 		CleanupClosePushL(pool);
       
   269 		pool.OpenL(XMLEntityReferences::Table);
       
   270 
       
   271 
       
   272 		for (TUint i=0; (i<XMLEntityReferences::Table.iCount) && (err != KErrNone); i++)
       
   273 			{
       
   274 			RStringF entity = pool.StringF(i, XMLEntityReferences::Table);
       
   275 			TBuf<KMaxBuiltinEntityLength> buf;
       
   276 			buf.Copy(entity.DesC());
       
   277 			TLex string(buf);
       
   278 
       
   279 			TPtrC token = string.NextToken(); 
       
   280 			TInt offset;
       
   281 			if((offset = aTextToConvert.Find(token)) != KErrNotFound)
       
   282 				{
       
   283 				aTextToConvert.Replace(offset, token.Length(), string.NextToken());
       
   284 				err = KErrNone;
       
   285 				}
       
   286 			}
       
   287 		CleanupStack::PopAndDestroy(); //close pool
       
   288 		}
       
   289 
       
   290 	return err;
       
   291 	}
       
   292 
       
   293 // End Of File