charconvfw/charconvplugins/src/plugins/SHIFTJIS_2.CPP
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:       
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <e32std.h>
       
    20 #include <charconv.h>
       
    21 #include "SHIFTJIS_2.H"
       
    22 #include <ecom/implementationproxy.h>
       
    23 #include "charactersetconverter.h"
       
    24 #include "featmgr/featmgr.h"
       
    25 
       
    26 /**
       
    27 Shift-JIS character converter wrapper
       
    28 
       
    29 @internalTechnology 
       
    30 @released 9.1
       
    31 */
       
    32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
       
    33 	{
       
    34 
       
    35 public:
       
    36 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    37 
       
    38 	virtual TInt ConvertFromUnicode(
       
    39 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    40 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    41 		TDes8& aForeign, 
       
    42 		const TDesC16& aUnicode, 
       
    43 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
       
    44 
       
    45 	virtual TInt ConvertToUnicode(
       
    46 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    47 		TDes16& aUnicode, 
       
    48 		const TDesC8& aForeign, 
       
    49 		TInt& aState, 
       
    50 		TInt& aNumberOfUnconvertibleCharacters, 
       
    51 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
    52 
       
    53 	virtual TBool IsInThisCharacterSetL(
       
    54 		TBool& aSetToTrue, 
       
    55 		TInt& aConfidenceLevel, 
       
    56 		const TDesC8& aSample);
       
    57 
       
    58 	static CShiftJisConverterImpl* NewL();
       
    59 	virtual ~CShiftJisConverterImpl();
       
    60 
       
    61 private:
       
    62 	CShiftJisConverterImpl();
       
    63 	void ConstructL();
       
    64 
       
    65 	};
       
    66 
       
    67 /**
       
    68 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
       
    69 
       
    70 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
       
    71 @internalTechnology 
       
    72 */
       
    73 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
       
    74 	{
       
    75 	return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
       
    76 	}
       
    77 
       
    78 TInt CShiftJisConverterImpl::ConvertFromUnicode(
       
    79 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    80 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    81 		TDes8& aForeign, 
       
    82 		const TDesC16& aUnicode, 
       
    83 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
    84 	{
       
    85 	return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
       
    86 	}
       
    87 
       
    88 
       
    89 /**
       
    90  Converts Shift-JIS encoded input text to Unicode
       
    91  
       
    92  NOTE: For debugging the selected character set is returned in the state.
       
    93  
       
    94   @released  9.1
       
    95   @param     aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
       
    96              in the foreign character set.
       
    97   @param     aUnicode On return, contains the text converted into Unicode.
       
    98   @param     aForeign The non-Unicode source text to be converted.
       
    99   @param     aState Used to save state information across multiple calls
       
   100              to <code>ConvertToUnicode()</code>.
       
   101   @param     aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
       
   102              converted.
       
   103   @param     aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
       
   104              input text that could not be converted. A negative
       
   105              value indicates that all the characters were
       
   106              converted.
       
   107   @return 	 The number of unconverted bytes left at the end of the input descriptor 
       
   108  		     (e.g. because the output descriptor is not long enough to hold all the text), 
       
   109  		     or one of the error values defined in TError. 
       
   110   @internalTechnology 
       
   111 */
       
   112 TInt CShiftJisConverterImpl::ConvertToUnicode(
       
   113 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   114 		TDes16& aUnicode, 
       
   115 		const TDesC8& aForeign, 
       
   116 		TInt& /*aState*/, 
       
   117 		TInt& aNumberOfUnconvertibleCharacters, 
       
   118 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   119 	{
       
   120 	return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
       
   121 	}
       
   122 
       
   123 
       
   124 /**
       
   125  This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL(). 
       
   126  This method returns a value between 0 and 100, indicating how likely it 
       
   127  is that this is the correct converter, for the text supplied.  
       
   128  @internalTechnology 
       
   129  */
       
   130 TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
       
   131 		TBool& aSetToTrue, 
       
   132 		TInt& aConfidenceLevel, 
       
   133 		const TDesC8& aSample)
       
   134 	{
       
   135 	aSetToTrue=ETrue;
       
   136 	TInt sampleLength = aSample.Length();
       
   137 	aConfidenceLevel = 0;
       
   138 	TInt numberOfShiftJis=0;
       
   139 	TInt occurrence=0;
       
   140 	for (TInt i = 0; i < sampleLength; ++i)
       
   141 		{
       
   142 		// Check for JISX 0208:1997 Charset
       
   143 		// First Byte in range 0x81-0x9f, 0xe0-0xef
       
   144 		if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
       
   145 			((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
       
   146 			{
       
   147 			// check that the second byte is in range as well 
       
   148 			TInt increment1 = i+1;
       
   149 			if(increment1 >= sampleLength)
       
   150 				break;
       
   151 			if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
       
   152 				((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
       
   153 				{
       
   154 				// increase the confidence of this sample as ShiftJis
       
   155 				aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
       
   156 	
       
   157 				TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
       
   158 				if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
       
   159 					(charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
       
   160 					occurrence++;
       
   161 				numberOfShiftJis++;
       
   162 				i++;
       
   163 				}
       
   164 			}
       
   165 		// Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
       
   166 		// eg EUC-JP's SS(Single shift) characters followed by the
       
   167 		if(aSample[i]==0x8e)
       
   168 			{
       
   169 			TInt increment1 = i+1;
       
   170 			if(increment1 >= sampleLength)
       
   171 				break;
       
   172 			if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
       
   173 				{
       
   174 				// This could be EUC-JP format..
       
   175 				aConfidenceLevel=0;
       
   176 				i++;
       
   177 				}
       
   178 			}
       
   179 		if(aSample[i]==0x8f)
       
   180 			{
       
   181 			TInt increment1 = i+1;
       
   182 			TInt increment2 = i+2;
       
   183 			if((increment1 >= sampleLength) || (increment2 >= sampleLength))
       
   184 				break;
       
   185 			if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) && 
       
   186 				((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
       
   187 				{
       
   188 				// 	This is definitely EUC-JP format. 
       
   189 				aConfidenceLevel=0;
       
   190 				break;
       
   191 				}
       
   192 			}
       
   193         // Check the half width Katakana
       
   194         if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
       
   195             {
       
   196             // increase the confidence of this sample as ShiftJis
       
   197             aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
       
   198             occurrence++;
       
   199             numberOfShiftJis++;
       
   200             }
       
   201         else if (aSample[i]>=0xf0)
       
   202             {
       
   203             aConfidenceLevel=0;
       
   204             }
       
   205 		} // for 
       
   206 
       
   207 	if(numberOfShiftJis)
       
   208 		{
       
   209 		aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
       
   210 		aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
       
   211 		}
       
   212 	aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
       
   213 	return ETrue;
       
   214 	}
       
   215 
       
   216 
       
   217 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
       
   218 	{
       
   219 	CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
       
   220 	CleanupStack::PushL(self);
       
   221 	self->ConstructL();
       
   222 	CleanupStack::Pop(self);
       
   223 	return self;
       
   224 	}
       
   225 
       
   226 
       
   227 CShiftJisConverterImpl::~CShiftJisConverterImpl()
       
   228 	{
       
   229     FeatureManager::UnInitializeLib();	
       
   230 	}
       
   231 
       
   232 CShiftJisConverterImpl::CShiftJisConverterImpl()
       
   233 	{
       
   234 	}
       
   235 
       
   236 
       
   237 void CShiftJisConverterImpl::ConstructL()
       
   238 	{
       
   239     FeatureManager::InitializeLibL();	
       
   240 	}
       
   241 
       
   242 const TImplementationProxy ImplementationTable[] = 
       
   243 	{
       
   244 		IMPLEMENTATION_PROXY_ENTRY(0x10000FBD,	CShiftJisConverterImpl::NewL)
       
   245 	};
       
   246 
       
   247 
       
   248 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   249 	{
       
   250 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   251 
       
   252 	return ImplementationTable;
       
   253 	}