filehandling/htmltorichtextconverter/src/CHtmlToCrtConvCharsetConvert.cpp
changeset 0 2e3d3ce01487
equal deleted inserted replaced
-1:000000000000 0:2e3d3ce01487
       
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include <s32file.h>
       
    17 
       
    18 #include "CHtmlToCrtConvCharsetConvert.h"
       
    19 #include "CHtmlToCrtConvBuffer.h"
       
    20 #include "CHtmlToCrtConverter.h"
       
    21 #include "MHtmlToCrtConvResourceFile.h"
       
    22 #include <chtmltocrtconverter.rsg>
       
    23 
       
    24 const TInt KSampleBufferSize = 256;
       
    25 const TInt KMetaTagCharsetValueBufferLength	= 40;
       
    26 const TInt KMimimumConfidenceLevel = 50;
       
    27 
       
    28 CHtmlToCrtConvCharsetConvert* CHtmlToCrtConvCharsetConvert::NewL(CHtmlToCrtConvBuffer& aBuffer, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile, MConverterUiObserver* aObserver)
       
    29 	{
       
    30 	CHtmlToCrtConvCharsetConvert* self=new(ELeave) CHtmlToCrtConvCharsetConvert(aBuffer, aObserver, aFsSession, aCnvCharacterSetConverter, aResourceFile);
       
    31 	CleanupStack::PushL(self);
       
    32 	self->ConstructL();
       
    33 	CleanupStack::Pop(self);
       
    34 	return self;
       
    35 	}
       
    36 
       
    37 void CHtmlToCrtConvCharsetConvert::ConstructL()
       
    38 	{
       
    39 	iArrayOfCharacterSetsAvailable=CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableL(iFsSession);
       
    40 	}
       
    41 
       
    42 CHtmlToCrtConvCharsetConvert::CHtmlToCrtConvCharsetConvert(CHtmlToCrtConvBuffer& aBuffer, MConverterUiObserver* aObserver, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile)
       
    43 :iCnvCharacterSetConverter(aCnvCharacterSetConverter)
       
    44 ,iBuffer(aBuffer)
       
    45 ,iObserver(aObserver)
       
    46 ,iResourceFile(aResourceFile)
       
    47 ,iFsSession(aFsSession)
       
    48 	{
       
    49 	}
       
    50 
       
    51 CHtmlToCrtConvCharsetConvert::~CHtmlToCrtConvCharsetConvert()
       
    52 	{
       
    53 	delete iArrayOfCharacterSetsAvailable;
       
    54 	}
       
    55 
       
    56 //==================================================================================
       
    57 //GetCharSetFromPasswordL - uses MConverterUiObserver::QueryPasswordL
       
    58 //==================================================================================
       
    59 TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromPasswordL()
       
    60 	{
       
    61 	if(iObserver)
       
    62 		{
       
    63 		HBufC16* unicodeCharsetName=iObserver->QueryPasswordL(KHtmlToCrtRequestCharset);
       
    64 				
       
    65 		if(unicodeCharsetName)
       
    66 			{
       
    67 			//convert from unicode
       
    68 			HBufC8* charsetName=HBufC8::NewLC(unicodeCharsetName->Length());
       
    69 			ConvertFromUnicodeL(*unicodeCharsetName, *charsetName);		
       
    70 			//convert standard name of character set to TUint identifier
       
    71 			iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(charsetName), iFsSession);
       
    72 			CleanupStack::PopAndDestroy(charsetName);
       
    73 			}
       
    74 		}
       
    75 	return iCharacterSetIdentifier;
       
    76 	}
       
    77 
       
    78 //==================================================================================
       
    79 //GetCharSetFromMetaTagL - searches for META tag, then extracts charset value
       
    80 //<META http-equiv="Content-Type" content="text/html; charset = us-ascii">
       
    81 //==================================================================================
       
    82 TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromMetaTagL()
       
    83 	{
       
    84 	HBufC8* temp=HBufC8::NewLC(KSampleBufferSize);
       
    85 	TPtr8 buffer(temp->Des());
       
    86 	TInt offset=0;
       
    87 	iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset);
       
    88 
       
    89 	_LIT8(KMeta, "<meta");
       
    90 	_LIT8(KClosingTagIndicator, ">");
       
    91 	_LIT8(KCharset, "charset");
       
    92 	_LIT8(KEquals, "=");
       
    93 
       
    94 	offset=FindAndDeleteString(buffer, KMeta);
       
    95 	if(offset>=0)//meta found
       
    96 		{
       
    97 		//find >
       
    98 		TInt offset=buffer.FindF(KClosingTagIndicator);		
       
    99 		if(offset>=0)//> found
       
   100 			{
       
   101 			//delete > onwards - Delete() adjusts length so as not to delete beyond end of buffer
       
   102 			buffer.Delete(offset, buffer.Length());
       
   103 
       
   104 			//now buffer only contains META tag
       
   105 			offset=FindAndDeleteString(buffer, KCharset);
       
   106 			if(offset>=0)//charset found
       
   107 				{
       
   108 				offset=FindAndDeleteString(buffer, KEquals);
       
   109 				if(offset>=0)//= found
       
   110 					{
       
   111 					ParseCharsetValue(buffer);
       
   112 					//ConvertStandardNameOfCharacterSetToIdentifierL returns the UID of a 
       
   113 					//character set for a given Internet-standard name
       
   114 					//zero is returned if none found
       
   115 					iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(buffer, iFsSession);
       
   116 					}
       
   117 				}
       
   118 			}
       
   119 		}
       
   120 	CleanupStack::PopAndDestroy(temp);
       
   121 	return iCharacterSetIdentifier;
       
   122 	}
       
   123 
       
   124 //==================================================================================
       
   125 //FindAndDeleteString - deletes up to and including string
       
   126 //==================================================================================
       
   127 TInt CHtmlToCrtConvCharsetConvert::FindAndDeleteString(TDes8& aBuffer, const TDesC8& aString)
       
   128 	{
       
   129 	TInt offset=aBuffer.FindF(aString);
       
   130 	TInt length=aString.Length();
       
   131 	
       
   132 	if(offset>=0)//string found
       
   133 		{
       
   134 		//delete up to and including string
       
   135 		ASSERT(offset+length <= aBuffer.Length());
       
   136 		aBuffer.Delete(0, offset+length);
       
   137 		}
       
   138 	return offset;
       
   139 	}
       
   140 //==================================================================================
       
   141 //ParseCharsetValue - charset value read until " or white space character seen
       
   142 //==================================================================================
       
   143 void CHtmlToCrtConvCharsetConvert::ParseCharsetValue(TDes8& aBuffer)
       
   144 	{
       
   145 	aBuffer.TrimLeft();
       
   146 	TInt length=aBuffer.Length();
       
   147 	if(length)
       
   148 		{
       
   149 		TChar character;
       
   150 		TBuf8<KMetaTagCharsetValueBufferLength> charsetBuffer;
       
   151 		TBool finishedReadingValue=EFalse;
       
   152 		TInt ii=0;
       
   153 		while(ii<length && !finishedReadingValue)
       
   154 			{
       
   155 			character=aBuffer[ii++];
       
   156 			if(character.IsSpace() || character=='"')
       
   157 				{
       
   158 				finishedReadingValue=ETrue;
       
   159 				}
       
   160 			else
       
   161 				{
       
   162 				if(charsetBuffer.Length()<KMetaTagCharsetValueBufferLength)
       
   163 					{
       
   164 					charsetBuffer.Append(character);
       
   165 					}
       
   166 				else
       
   167 					{
       
   168 					ii=length;
       
   169 					}
       
   170 				}
       
   171 			}
       
   172 		aBuffer.Copy(charsetBuffer);
       
   173 		}
       
   174 	}
       
   175 //==================================================================================
       
   176 //GetCharSetUsingAutoDetectL
       
   177 //==================================================================================
       
   178 TBool CHtmlToCrtConvCharsetConvert::GetCharSetUsingAutoDetectL()
       
   179 	{
       
   180 	HBufC8* temp=HBufC8::NewLC(KSampleBufferSize);
       
   181 	TPtr8 buffer(temp->Des());
       
   182 	TInt offset=256;
       
   183 	//it's likely that the start of the file will contain the header, so offset
       
   184 	//is used to read from a position within the file
       
   185 	iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset);
       
   186 
       
   187 	TInt confidence=0;
       
   188 
       
   189 	if(buffer.Length())
       
   190 		{
       
   191 		iCnvCharacterSetConverter.AutoDetectCharacterSetL(confidence, iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, buffer);
       
   192 		}
       
   193 
       
   194 	CleanupStack::PopAndDestroy(temp);
       
   195 
       
   196 	if(iCharacterSetIdentifier == KCharacterSetIdentifierAscii)
       
   197 		{
       
   198 		//In order to support Latin letters, the character set needs to 
       
   199 		//be widened from ASCII to Code Page 1252
       
   200 		iCharacterSetIdentifier = KCharacterSetIdentifierCodePage1252;
       
   201 		}
       
   202 		
       
   203 	//confidence in range 0-100
       
   204 	if(confidence<KMimimumConfidenceLevel)
       
   205 		{
       
   206 		//low level of confidence in the chosen character set, so reset
       
   207 		// - this method of determining character set not successful
       
   208 		iCharacterSetIdentifier=0;
       
   209 		return EFalse;
       
   210 		}
       
   211 	else
       
   212 		return ETrue;
       
   213 	}
       
   214 
       
   215 //=============================================================
       
   216 //PrepareForConvertToUnicodeL
       
   217 //=============================================================
       
   218 void CHtmlToCrtConvCharsetConvert::PrepareForConvertToUnicodeL()
       
   219 	{
       
   220 	if(!iCharacterSetIdentifier)
       
   221 		{
       
   222 		//get default character set for locale from resource file
       
   223 		HBufC* unicodeDefaultCharacterSet=iResourceFile.ReadResourceHBufCLC(R_CNV_DEFAULT_CHARACTER_SET);
       
   224 
       
   225 		//convert from unicode
       
   226 		HBufC8* defaultCharacterSet=HBufC8::NewLC(unicodeDefaultCharacterSet->Length());
       
   227 		ConvertFromUnicodeL(*unicodeDefaultCharacterSet, *defaultCharacterSet);
       
   228 		
       
   229 		//convert standard name of character set to TUint identifier
       
   230 		iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(defaultCharacterSet), iFsSession);
       
   231 		CleanupStack::PopAndDestroy(2); //defaultCharacterSet, unicodeDefaultCharacterSet
       
   232 		}
       
   233 
       
   234 	iCnvCharacterSetConverter.PrepareToConvertToOrFromL(iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, iFsSession);
       
   235 	}
       
   236 //=============================================================
       
   237 //ConvertFromUnicodeL
       
   238 //=============================================================
       
   239 void CHtmlToCrtConvCharsetConvert::ConvertFromUnicodeL(const TDesC& aUnicode, HBufC8& aNarrow)
       
   240 	{
       
   241 	TPtr8 ptrNarrow(aNarrow.Des());	
       
   242 	iCnvCharacterSetConverter.PrepareToConvertToOrFromL(KCharacterSetIdentifierAscii, *iArrayOfCharacterSetsAvailable, iFsSession);
       
   243 	iCnvCharacterSetConverter.ConvertFromUnicode(ptrNarrow, aUnicode);
       
   244 	}