filehandling/htmltorichtextconverter/src/CHtmlToCrtConvCharsetConvert.cpp
changeset 0 2e3d3ce01487
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filehandling/htmltorichtextconverter/src/CHtmlToCrtConvCharsetConvert.cpp	Tue Feb 02 10:12:00 2010 +0200
@@ -0,0 +1,244 @@
+// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+#include <s32file.h>
+
+#include "CHtmlToCrtConvCharsetConvert.h"
+#include "CHtmlToCrtConvBuffer.h"
+#include "CHtmlToCrtConverter.h"
+#include "MHtmlToCrtConvResourceFile.h"
+#include <chtmltocrtconverter.rsg>
+
+const TInt KSampleBufferSize = 256;
+const TInt KMetaTagCharsetValueBufferLength	= 40;
+const TInt KMimimumConfidenceLevel = 50;
+
+CHtmlToCrtConvCharsetConvert* CHtmlToCrtConvCharsetConvert::NewL(CHtmlToCrtConvBuffer& aBuffer, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile, MConverterUiObserver* aObserver)
+	{
+	CHtmlToCrtConvCharsetConvert* self=new(ELeave) CHtmlToCrtConvCharsetConvert(aBuffer, aObserver, aFsSession, aCnvCharacterSetConverter, aResourceFile);
+	CleanupStack::PushL(self);
+	self->ConstructL();
+	CleanupStack::Pop(self);
+	return self;
+	}
+
+void CHtmlToCrtConvCharsetConvert::ConstructL()
+	{
+	iArrayOfCharacterSetsAvailable=CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableL(iFsSession);
+	}
+
+CHtmlToCrtConvCharsetConvert::CHtmlToCrtConvCharsetConvert(CHtmlToCrtConvBuffer& aBuffer, MConverterUiObserver* aObserver, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile)
+:iCnvCharacterSetConverter(aCnvCharacterSetConverter)
+,iBuffer(aBuffer)
+,iObserver(aObserver)
+,iResourceFile(aResourceFile)
+,iFsSession(aFsSession)
+	{
+	}
+
+CHtmlToCrtConvCharsetConvert::~CHtmlToCrtConvCharsetConvert()
+	{
+	delete iArrayOfCharacterSetsAvailable;
+	}
+
+//==================================================================================
+//GetCharSetFromPasswordL - uses MConverterUiObserver::QueryPasswordL
+//==================================================================================
+TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromPasswordL()
+	{
+	if(iObserver)
+		{
+		HBufC16* unicodeCharsetName=iObserver->QueryPasswordL(KHtmlToCrtRequestCharset);
+				
+		if(unicodeCharsetName)
+			{
+			//convert from unicode
+			HBufC8* charsetName=HBufC8::NewLC(unicodeCharsetName->Length());
+			ConvertFromUnicodeL(*unicodeCharsetName, *charsetName);		
+			//convert standard name of character set to TUint identifier
+			iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(charsetName), iFsSession);
+			CleanupStack::PopAndDestroy(charsetName);
+			}
+		}
+	return iCharacterSetIdentifier;
+	}
+
+//==================================================================================
+//GetCharSetFromMetaTagL - searches for META tag, then extracts charset value
+//<META http-equiv="Content-Type" content="text/html; charset = us-ascii">
+//==================================================================================
+TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromMetaTagL()
+	{
+	HBufC8* temp=HBufC8::NewLC(KSampleBufferSize);
+	TPtr8 buffer(temp->Des());
+	TInt offset=0;
+	iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset);
+
+	_LIT8(KMeta, "<meta");
+	_LIT8(KClosingTagIndicator, ">");
+	_LIT8(KCharset, "charset");
+	_LIT8(KEquals, "=");
+
+	offset=FindAndDeleteString(buffer, KMeta);
+	if(offset>=0)//meta found
+		{
+		//find >
+		TInt offset=buffer.FindF(KClosingTagIndicator);		
+		if(offset>=0)//> found
+			{
+			//delete > onwards - Delete() adjusts length so as not to delete beyond end of buffer
+			buffer.Delete(offset, buffer.Length());
+
+			//now buffer only contains META tag
+			offset=FindAndDeleteString(buffer, KCharset);
+			if(offset>=0)//charset found
+				{
+				offset=FindAndDeleteString(buffer, KEquals);
+				if(offset>=0)//= found
+					{
+					ParseCharsetValue(buffer);
+					//ConvertStandardNameOfCharacterSetToIdentifierL returns the UID of a 
+					//character set for a given Internet-standard name
+					//zero is returned if none found
+					iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(buffer, iFsSession);
+					}
+				}
+			}
+		}
+	CleanupStack::PopAndDestroy(temp);
+	return iCharacterSetIdentifier;
+	}
+
+//==================================================================================
+//FindAndDeleteString - deletes up to and including string
+//==================================================================================
+TInt CHtmlToCrtConvCharsetConvert::FindAndDeleteString(TDes8& aBuffer, const TDesC8& aString)
+	{
+	TInt offset=aBuffer.FindF(aString);
+	TInt length=aString.Length();
+	
+	if(offset>=0)//string found
+		{
+		//delete up to and including string
+		ASSERT(offset+length <= aBuffer.Length());
+		aBuffer.Delete(0, offset+length);
+		}
+	return offset;
+	}
+//==================================================================================
+//ParseCharsetValue - charset value read until " or white space character seen
+//==================================================================================
+void CHtmlToCrtConvCharsetConvert::ParseCharsetValue(TDes8& aBuffer)
+	{
+	aBuffer.TrimLeft();
+	TInt length=aBuffer.Length();
+	if(length)
+		{
+		TChar character;
+		TBuf8<KMetaTagCharsetValueBufferLength> charsetBuffer;
+		TBool finishedReadingValue=EFalse;
+		TInt ii=0;
+		while(ii<length && !finishedReadingValue)
+			{
+			character=aBuffer[ii++];
+			if(character.IsSpace() || character=='"')
+				{
+				finishedReadingValue=ETrue;
+				}
+			else
+				{
+				if(charsetBuffer.Length()<KMetaTagCharsetValueBufferLength)
+					{
+					charsetBuffer.Append(character);
+					}
+				else
+					{
+					ii=length;
+					}
+				}
+			}
+		aBuffer.Copy(charsetBuffer);
+		}
+	}
+//==================================================================================
+//GetCharSetUsingAutoDetectL
+//==================================================================================
+TBool CHtmlToCrtConvCharsetConvert::GetCharSetUsingAutoDetectL()
+	{
+	HBufC8* temp=HBufC8::NewLC(KSampleBufferSize);
+	TPtr8 buffer(temp->Des());
+	TInt offset=256;
+	//it's likely that the start of the file will contain the header, so offset
+	//is used to read from a position within the file
+	iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset);
+
+	TInt confidence=0;
+
+	if(buffer.Length())
+		{
+		iCnvCharacterSetConverter.AutoDetectCharacterSetL(confidence, iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, buffer);
+		}
+
+	CleanupStack::PopAndDestroy(temp);
+
+	if(iCharacterSetIdentifier == KCharacterSetIdentifierAscii)
+		{
+		//In order to support Latin letters, the character set needs to 
+		//be widened from ASCII to Code Page 1252
+		iCharacterSetIdentifier = KCharacterSetIdentifierCodePage1252;
+		}
+		
+	//confidence in range 0-100
+	if(confidence<KMimimumConfidenceLevel)
+		{
+		//low level of confidence in the chosen character set, so reset
+		// - this method of determining character set not successful
+		iCharacterSetIdentifier=0;
+		return EFalse;
+		}
+	else
+		return ETrue;
+	}
+
+//=============================================================
+//PrepareForConvertToUnicodeL
+//=============================================================
+void CHtmlToCrtConvCharsetConvert::PrepareForConvertToUnicodeL()
+	{
+	if(!iCharacterSetIdentifier)
+		{
+		//get default character set for locale from resource file
+		HBufC* unicodeDefaultCharacterSet=iResourceFile.ReadResourceHBufCLC(R_CNV_DEFAULT_CHARACTER_SET);
+
+		//convert from unicode
+		HBufC8* defaultCharacterSet=HBufC8::NewLC(unicodeDefaultCharacterSet->Length());
+		ConvertFromUnicodeL(*unicodeDefaultCharacterSet, *defaultCharacterSet);
+		
+		//convert standard name of character set to TUint identifier
+		iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(defaultCharacterSet), iFsSession);
+		CleanupStack::PopAndDestroy(2); //defaultCharacterSet, unicodeDefaultCharacterSet
+		}
+
+	iCnvCharacterSetConverter.PrepareToConvertToOrFromL(iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, iFsSession);
+	}
+//=============================================================
+//ConvertFromUnicodeL
+//=============================================================
+void CHtmlToCrtConvCharsetConvert::ConvertFromUnicodeL(const TDesC& aUnicode, HBufC8& aNarrow)
+	{
+	TPtr8 ptrNarrow(aNarrow.Des());	
+	iCnvCharacterSetConverter.PrepareToConvertToOrFromL(KCharacterSetIdentifierAscii, *iArrayOfCharacterSetsAvailable, iFsSession);
+	iCnvCharacterSetConverter.ConvertFromUnicode(ptrNarrow, aUnicode);
+	}