charconvfw/charconvplugins/src/plugins/shiftjis.cpp
changeset 0 1fb32624e06b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/charconvfw/charconvplugins/src/plugins/shiftjis.cpp	Tue Feb 02 02:02:46 2010 +0200
@@ -0,0 +1,236 @@
+/*
+* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+
+#include <e32std.h>
+#include <charconv.h>
+#include "shiftjis.h"
+#include <ecom/implementationproxy.h>
+#include <charactersetconverter.h>
+
+
+/**
+Shift-JIS character converter wrapper
+
+@internalTechnology 
+@released 9.1
+*/
+class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
+	{
+
+public:
+	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
+
+	virtual TInt ConvertFromUnicode(
+		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
+		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
+		TDes8& aForeign, 
+		const TDesC16& aUnicode, 
+		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
+
+	virtual TInt ConvertToUnicode(
+		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
+		TDes16& aUnicode, 
+		const TDesC8& aForeign, 
+		TInt& aState, 
+		TInt& aNumberOfUnconvertibleCharacters, 
+		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
+
+	virtual TBool IsInThisCharacterSetL(
+		TBool& aSetToTrue, 
+		TInt& aConfidenceLevel, 
+		const TDesC8& aSample);
+
+	static CShiftJisConverterImpl* NewL();
+	virtual ~CShiftJisConverterImpl();
+
+private:
+	CShiftJisConverterImpl();
+
+	};
+
+/**
+Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
+
+@return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
+@internalTechnology 
+*/
+const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
+	{
+	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
+	}
+
+TInt CShiftJisConverterImpl::ConvertFromUnicode(
+		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
+		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
+		TDes8& aForeign, 
+		const TDesC16& aUnicode, 
+		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
+	{
+	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
+	}
+
+
+/**
+ Converts Shift-JIS encoded input text to Unicode
+ 
+ NOTE: For debugging the selected character set is returned in the state.
+ 
+  @released  9.1
+  @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
+             in the foreign character set.
+  @param     aUnicode On return, contains the text converted into Unicode.
+  @param     aForeign The non-Unicode source text to be converted.
+  @param     aState Used to save state information across multiple calls
+             to <code>ConvertToUnicode()</code>.
+  @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
+             converted.
+  @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
+             input text that could not be converted. A negative
+             value indicates that all the characters were
+             converted.
+  @return 	 The number of unconverted bytes left at the end of the input descriptor 
+ 		     (e.g. because the output descriptor is not long enough to hold all the text), 
+ 		     or one of the error values defined in TError. 
+  @internalTechnology 
+*/
+TInt CShiftJisConverterImpl::ConvertToUnicode(
+		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
+		TDes16& aUnicode, 
+		const TDesC8& aForeign, 
+		TInt& /*aState*/, 
+		TInt& aNumberOfUnconvertibleCharacters, 
+		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
+	{
+	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
+	}
+
+
+/**
+ This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
+ This method returns a value between 0 and 100, indicating how likely it 
+ is that this is the correct converter, for the text supplied.  
+ @internalTechnology 
+ */
+TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
+		TBool& aSetToTrue, 
+		TInt& aConfidenceLevel, 
+		const TDesC8& aSample)
+	{
+	aSetToTrue=ETrue;
+	TInt sampleLength = aSample.Length();
+	aConfidenceLevel = 0;
+	TInt numberOfShiftJis=0;
+	TInt occurrence=0;
+	for (TInt i = 0; i < sampleLength; ++i)
+		{
+		// Check for JISX 0208:1997 Charset
+		// First Byte in range 0x81-0x9f, 0xe0-0xef
+		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
+			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
+			{
+			// check that the second byte is in range as well 
+			TInt increment1 = i+1;
+			if(increment1 >= sampleLength)
+				break;
+			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
+				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
+				{
+				// increase the confidence of this sample as ShiftJis
+				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
+	
+				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
+				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
+					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
+					occurrence++;
+				numberOfShiftJis++;
+				i++;
+				}
+			}
+		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
+		// eg EUC-JP's SS(Single shift) characters followed by the
+		if(aSample[i]==0x8e)
+			{
+			TInt increment1 = i+1;
+			if(increment1 >= sampleLength)
+				break;
+			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
+				{
+				// This could be EUC-JP format..
+				aConfidenceLevel=0;
+				i++;
+				}
+			}
+		if(aSample[i]==0x8f)
+			{
+			TInt increment1 = i+1;
+			TInt increment2 = i+2;
+			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
+				break;
+			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
+				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
+				{
+				// 	This is definitely EUC-JP format. 
+				aConfidenceLevel=0;
+				break;
+				}
+			}
+		} // for 
+
+	if(numberOfShiftJis)
+		{
+		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
+		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
+		}
+	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
+	return ETrue;
+	}
+
+
+CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
+	{
+	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
+	return self;
+	}
+
+
+CShiftJisConverterImpl::~CShiftJisConverterImpl()
+	{
+	}
+
+CShiftJisConverterImpl::CShiftJisConverterImpl()
+	{
+	}
+
+const TImplementationProxy ImplementationTable[] = 
+	{
+#ifdef KDDIAU_TEST
+		// for the test build use a special test UID which is called
+		//explicitly from test code
+		IMPLEMENTATION_PROXY_ENTRY(0x01000001,	CShiftJisConverterImpl::NewL)
+#else
+		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
+#endif
+	};
+
+
+EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
+	{
+	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
+
+	return ImplementationTable;
+	}