charconvfw/charconvplugins/src/plugins/shiftjis.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <charconv.h>
       
    21 #include "shiftjis.h"
       
    22 #include <ecom/implementationproxy.h>
       
    23 #include <charactersetconverter.h>
       
    24 
       
    25 
       
    26 /**
       
    27 Shift-JIS character converter wrapper
       
    28 
       
    29 @internalTechnology 
       
    30 @released 9.1
       
    31 */
       
    32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
       
    33 	{
       
    34 
       
    35 public:
       
    36 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    37 
       
    38 	virtual TInt ConvertFromUnicode(
       
    39 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    40 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    41 		TDes8& aForeign, 
       
    42 		const TDesC16& aUnicode, 
       
    43 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    44 
       
    45 	virtual TInt ConvertToUnicode(
       
    46 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    47 		TDes16& aUnicode, 
       
    48 		const TDesC8& aForeign, 
       
    49 		TInt& aState, 
       
    50 		TInt& aNumberOfUnconvertibleCharacters, 
       
    51 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    52 
       
    53 	virtual TBool IsInThisCharacterSetL(
       
    54 		TBool& aSetToTrue, 
       
    55 		TInt& aConfidenceLevel, 
       
    56 		const TDesC8& aSample);
       
    57 
       
    58 	static CShiftJisConverterImpl* NewL();
       
    59 	virtual ~CShiftJisConverterImpl();
       
    60 
       
    61 private:
       
    62 	CShiftJisConverterImpl();
       
    63 
       
    64 	};
       
    65 
       
    66 /**
       
    67 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
       
    68 
       
    69 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
       
    70 @internalTechnology 
       
    71 */
       
    72 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
    73 	{
       
    74 	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
       
    75 	}
       
    76 
       
    77 TInt CShiftJisConverterImpl::ConvertFromUnicode(
       
    78 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    79 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    80 		TDes8& aForeign, 
       
    81 		const TDesC16& aUnicode, 
       
    82 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
    83 	{
       
    84 	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
       
    85 	}
       
    86 
       
    87 
       
    88 /**
       
    89  Converts Shift-JIS encoded input text to Unicode
       
    90  
       
    91  NOTE: For debugging the selected character set is returned in the state.
       
    92  
       
    93   @released  9.1
       
    94   @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
       
    95              in the foreign character set.
       
    96   @param     aUnicode On return, contains the text converted into Unicode.
       
    97   @param     aForeign The non-Unicode source text to be converted.
       
    98   @param     aState Used to save state information across multiple calls
       
    99              to <code>ConvertToUnicode()</code>.
       
   100   @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
       
   101              converted.
       
   102   @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
       
   103              input text that could not be converted. A negative
       
   104              value indicates that all the characters were
       
   105              converted.
       
   106   @return 	 The number of unconverted bytes left at the end of the input descriptor 
       
   107  		     (e.g. because the output descriptor is not long enough to hold all the text), 
       
   108  		     or one of the error values defined in TError. 
       
   109   @internalTechnology 
       
   110 */
       
   111 TInt CShiftJisConverterImpl::ConvertToUnicode(
       
   112 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   113 		TDes16& aUnicode, 
       
   114 		const TDesC8& aForeign, 
       
   115 		TInt& /*aState*/, 
       
   116 		TInt& aNumberOfUnconvertibleCharacters, 
       
   117 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   118 	{
       
   119 	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
   120 	}
       
   121 
       
   122 
       
   123 /**
       
   124  This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
       
   125  This method returns a value between 0 and 100, indicating how likely it 
       
   126  is that this is the correct converter, for the text supplied.  
       
   127  @internalTechnology 
       
   128  */
       
   129 TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
       
   130 		TBool& aSetToTrue, 
       
   131 		TInt& aConfidenceLevel, 
       
   132 		const TDesC8& aSample)
       
   133 	{
       
   134 	aSetToTrue=ETrue;
       
   135 	TInt sampleLength = aSample.Length();
       
   136 	aConfidenceLevel = 0;
       
   137 	TInt numberOfShiftJis=0;
       
   138 	TInt occurrence=0;
       
   139 	for (TInt i = 0; i < sampleLength; ++i)
       
   140 		{
       
   141 		// Check for JISX 0208:1997 Charset
       
   142 		// First Byte in range 0x81-0x9f, 0xe0-0xef
       
   143 		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
       
   144 			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
       
   145 			{
       
   146 			// check that the second byte is in range as well 
       
   147 			TInt increment1 = i+1;
       
   148 			if(increment1 >= sampleLength)
       
   149 				break;
       
   150 			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
       
   151 				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
       
   152 				{
       
   153 				// increase the confidence of this sample as ShiftJis
       
   154 				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
       
   155 	
       
   156 				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
       
   157 				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
       
   158 					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
       
   159 					occurrence++;
       
   160 				numberOfShiftJis++;
       
   161 				i++;
       
   162 				}
       
   163 			}
       
   164 		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
       
   165 		// eg EUC-JP's SS(Single shift) characters followed by the
       
   166 		if(aSample[i]==0x8e)
       
   167 			{
       
   168 			TInt increment1 = i+1;
       
   169 			if(increment1 >= sampleLength)
       
   170 				break;
       
   171 			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
       
   172 				{
       
   173 				// This could be EUC-JP format..
       
   174 				aConfidenceLevel=0;
       
   175 				i++;
       
   176 				}
       
   177 			}
       
   178 		if(aSample[i]==0x8f)
       
   179 			{
       
   180 			TInt increment1 = i+1;
       
   181 			TInt increment2 = i+2;
       
   182 			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
       
   183 				break;
       
   184 			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
       
   185 				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
       
   186 				{
       
   187 				// 	This is definitely EUC-JP format. 
       
   188 				aConfidenceLevel=0;
       
   189 				break;
       
   190 				}
       
   191 			}
       
   192 		} // for 
       
   193 
       
   194 	if(numberOfShiftJis)
       
   195 		{
       
   196 		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
       
   197 		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
       
   198 		}
       
   199 	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
       
   200 	return ETrue;
       
   201 	}
       
   202 
       
   203 
       
   204 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
       
   205 	{
       
   206 	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
       
   207 	return self;
       
   208 	}
       
   209 
       
   210 
       
   211 CShiftJisConverterImpl::~CShiftJisConverterImpl()
       
   212 	{
       
   213 	}
       
   214 
       
   215 CShiftJisConverterImpl::CShiftJisConverterImpl()
       
   216 	{
       
   217 	}
       
   218 
       
   219 const TImplementationProxy ImplementationTable[] = 
       
   220 	{
       
   221 #ifdef KDDIAU_TEST
       
   222 		// for the test build use a special test UID which is called
       
   223 		//explicitly from test code
       
   224 		IMPLEMENTATION_PROXY_ENTRY(0x01000001,	CShiftJisConverterImpl::NewL)
       
   225 #else
       
   226 		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
       
   227 #endif
       
   228 	};
       
   229 
       
   230 
       
   231 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   232 	{
       
   233 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   234 
       
   235 	return ImplementationTable;
       
   236 	}