charconvfw/Charconvplugin/src/iso2022kr.cpp
changeset 0 1fb32624e06b
child 16 56cd22a7a1cb
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). 
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:  ISO2022kr conversion plugin
       
    15 *
       
    16 */
       
    17 
       
    18 // INCLUDES
       
    19 #include <e32std.h>
       
    20 #include <charconv.h>
       
    21 #include <convgeneratedcpp.h>
       
    22 #include <ecom/implementationproxy.h>
       
    23 #include "cp949table.h"
       
    24 #include "charactersetconverter.h"
       
    25 
       
    26 static const TUint KBitsForNonStandardStates = 0x03;
       
    27 static const TUint KShiftedToKSCState = 0x01;
       
    28 
       
    29 static const TUint KMaxSizeOfTmpBuffer = 1024;
       
    30 
       
    31 static const TUint8 KMaxAscii = 0x9f;
       
    32 
       
    33 _LIT8(KLit8EscapeSequence, "\x1b\x24\x43");
       
    34 
       
    35 #define SHIFT_IN_BYTE  0x0F
       
    36 #define SHIFT_OUT_BYTE 0x0E
       
    37 
       
    38 typedef enum
       
    39 {
       
    40     EISO2022Initialize,
       
    41     EISO2022Ascii,
       
    42     EISO2022KSC
       
    43 } TISO2022FromUniState;
       
    44 
       
    45 // New Interface class
       
    46 class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface
       
    47 {
       
    48     public:
       
    49         virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
       
    50 
       
    51         virtual TInt ConvertFromUnicode(
       
    52             CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    53             const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    54             TDes8& aForeign, 
       
    55             const TDesC16& aUnicode, 
       
    56             CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters );
       
    57 
       
    58         virtual TInt ConvertToUnicode(
       
    59             CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    60             TDes16& aUnicode, 
       
    61             const TDesC8& aForeign, 
       
    62             TInt& aState, 
       
    63             TInt& aNumberOfUnconvertibleCharacters, 
       
    64             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter );
       
    65 
       
    66         virtual TBool IsInThisCharacterSetL(
       
    67             TBool& aSetToTrue, 
       
    68             TInt& aConfidenceLevel, 
       
    69             const TDesC8& );
       
    70 
       
    71         static CISO2022KRImplementation* NewL();
       
    72 
       
    73         virtual ~CISO2022KRImplementation();
       
    74     private:
       
    75         CISO2022KRImplementation();
       
    76 };
       
    77 
       
    78 // FUNCTION DEFINITIONS
       
    79 const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters()
       
    80 	{
       
    81 	return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters();
       
    82 	}
       
    83 
       
    84 TInt CISO2022KRImplementation::ConvertFromUnicode(
       
    85     CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
    86     const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
       
    87     TDes8& aForeign, 
       
    88     const TDesC16& aUnicode, 
       
    89     CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
       
    90 	{
       
    91     TInt ret;
       
    92     TInt currPos = 3;
       
    93     TUint outputConversionFlags = 0;
       
    94     TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
    95     TISO2022FromUniState currState = EISO2022Initialize;
       
    96     TUint8 shiftByte = 0;
       
    97     TPtr8 shiftBytePtr(NULL, 0);
       
    98 
       
    99     aForeign.SetLength(0);
       
   100 
       
   101     /* Start with escape sequence */
       
   102     aForeign.Append( KLit8EscapeSequence );
       
   103 
       
   104     ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(),
       
   105                                                            aDefaultEndiannessOfForeignCharacters,
       
   106                                                            aReplacementForUnconvertibleUnicodeCharacters,
       
   107                                                            aForeign,
       
   108                                                            aUnicode,
       
   109                                                            aIndicesOfUnconvertibleCharacters,
       
   110                                                            outputConversionFlags, 
       
   111                                                            inputConversionFlags );
       
   112     /* Append shift in and out bytes as needed */
       
   113     while( currPos < aForeign.Length() )
       
   114         {
       
   115         TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr();
       
   116         if( *currChar > KMaxAscii )
       
   117             { /* KSC character */
       
   118             if( currState != EISO2022KSC )
       
   119                 { /* Insert shift out byte */
       
   120                 shiftByte = SHIFT_OUT_BYTE;
       
   121                 currState = EISO2022KSC;
       
   122                 }
       
   123 
       
   124             /* Clear the 8th bit */
       
   125             *currChar = (*currChar & ~(0x80));
       
   126             }
       
   127         else
       
   128             { /* ASCII character */
       
   129             if( currState != EISO2022Ascii )
       
   130                 { /* Insert shift in byte */
       
   131                 shiftByte = SHIFT_IN_BYTE;
       
   132                 currState = EISO2022Ascii;
       
   133                 }
       
   134             }
       
   135 
       
   136         if( shiftByte )
       
   137             {
       
   138             if( (aForeign.Length() + 1) > aForeign.MaxLength() )
       
   139                 { /* Make room for shift byte */
       
   140                 if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii )
       
   141                     { /* Drop a dual byte KSC character */
       
   142                     aForeign.SetLength( aForeign.Length() - 2 );
       
   143                     }
       
   144                 else
       
   145                     { /* Drop a single byte ASCII character */
       
   146                     aForeign.SetLength( aForeign.Length() - 1 );
       
   147                     }
       
   148                     /* Increase unconverted amount */
       
   149                     ret++;
       
   150                 /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters
       
   151                         if possible */
       
   152                 }
       
   153                 shiftBytePtr.Set( &shiftByte, 1, 1 );
       
   154                 aForeign.Insert( currPos, shiftBytePtr );
       
   155                 currPos++;
       
   156                 shiftByte = 0;
       
   157             }
       
   158 
       
   159         /* Skip current character */
       
   160         currPos++;
       
   161         }
       
   162 
       
   163     return ret;
       
   164     }
       
   165 
       
   166 TInt CISO2022KRImplementation::ConvertToUnicode(
       
   167     CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
       
   168     TDes16& aUnicode, 
       
   169     const TDesC8& aForeign, 
       
   170     TInt& aState, 
       
   171     TInt& aNumberOfUnconvertibleCharacters, 
       
   172     TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   173 	{
       
   174     TInt err;
       
   175     TInt ret = 0;
       
   176     TInt currPos = 0;
       
   177     TInt convPos = 0;
       
   178     TInt shiftInPos = KErrNotFound;
       
   179     TInt shiftOutPos = KErrNotFound;
       
   180     TInt shiftPos = KErrNotFound;
       
   181     TInt escPos = KErrNotFound;
       
   182     TPtrC8 currSegment;
       
   183     TPtrC8 convSegment;
       
   184     TBool changeState = EFalse;
       
   185 
       
   186     TUint outputConversionFlags = 0;
       
   187     TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
       
   188     TInt numberOfUnconvertibleCharacters = 0;
       
   189     TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0;
       
   190     aNumberOfUnconvertibleCharacters = 0;
       
   191 
       
   192     while( currPos < aForeign.Length() )
       
   193         {
       
   194 
       
   195         currSegment.Set( aForeign.Mid( currPos ) );
       
   196 
       
   197         /* First change state if needed */
       
   198         if( changeState )
       
   199             {
       
   200             changeState = EFalse;
       
   201             if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
       
   202                 { /* Switch back to default ASCII */
       
   203                 aState &= ~(KShiftedToKSCState);
       
   204                 }
       
   205             else
       
   206                 { /* Switch to KSC */
       
   207                 aState |= KShiftedToKSCState; 
       
   208                 }
       
   209             }
       
   210 
       
   211         /* Search for escape which should be skipped */
       
   212         escPos = currSegment.Find( KLit8EscapeSequence );
       
   213         
       
   214         /* Search for shift in byte */
       
   215         shiftInPos = currSegment.Locate( SHIFT_IN_BYTE );
       
   216 
       
   217         /* Search for shift out byte */
       
   218         shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE );
       
   219 
       
   220         /* Set shift pos according to found shift bytes */
       
   221         if( shiftInPos == KErrNotFound &&
       
   222             shiftOutPos == KErrNotFound )
       
   223             { /* Neither found */
       
   224             shiftPos = KErrNotFound;
       
   225             }
       
   226         else
       
   227             {
       
   228             if( (shiftInPos != KErrNotFound) &&
       
   229                 ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) )
       
   230                 { /* shift in is nearer or shift out not found */
       
   231                 shiftPos = shiftInPos;
       
   232                 /* Set state change if needed */
       
   233                 if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
       
   234                     {
       
   235                     changeState = ETrue;
       
   236                     }
       
   237                 }
       
   238             else
       
   239                 { /* shift out must be nearer or shift in not fouind */
       
   240                 shiftPos = shiftOutPos;
       
   241                 /* Set state change if needed */
       
   242                 if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState )
       
   243                     {
       
   244                     changeState = ETrue;
       
   245                     }
       
   246                 }
       
   247             }
       
   248 
       
   249         if( shiftPos == KErrNotFound )
       
   250             { /* Shift byte not found, same coding for the rest of the data */
       
   251             if( escPos == KErrNotFound )
       
   252                 { /* No escape sequence either, just convert the rest */
       
   253                 convSegment.Set( currSegment );
       
   254                 }
       
   255             }
       
   256         else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) ||
       
   257                  (escPos == KErrNotFound) )
       
   258             { /* Shift byte found and it comes before escape sequence or no escape
       
   259                  sequence was found, convert data preceeding the shift byte if shift
       
   260                  byte isn't the first character */
       
   261                 if( shiftPos == 0 )
       
   262                 { /* No data to convert preceeds the shift byte, just skip it and continue */
       
   263                     currPos += 1;
       
   264                     continue;
       
   265                 }
       
   266                 convSegment.Set( currSegment.Left( shiftPos ) );
       
   267                 /* Clear to prevent convert to escape sequence */
       
   268                 escPos = KErrNotFound;
       
   269             }
       
   270 
       
   271         if( escPos != KErrNotFound )
       
   272             { /* Escape sequence found before any shift bytes,
       
   273                  clear possible state change and convert data
       
   274                  preceeding the escape sequence if
       
   275                  escape sequence is not at the beginning */
       
   276             changeState = EFalse;
       
   277             if( escPos == 0 )
       
   278                 { /* No data to convert preceeds the escape sequence, just skip it continue */
       
   279                 currPos += KLit8EscapeSequence().Length();
       
   280                 continue;
       
   281                 }
       
   282             convSegment.Set( currSegment.Left( escPos ) );
       
   283             }
       
   284 
       
   285         if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
       
   286             { /* Convert KSC encoded */
       
   287             HBufC8 *tmpForeign = NULL;
       
   288 
       
   289             if( (convSegment.Length() & 0x1) )
       
   290                 { /* KSC should have even amount of bytes */
       
   291                 ret = CCnvCharacterSetConverter::EErrorIllFormedInput;
       
   292                 }
       
   293             else
       
   294                 {
       
   295                 convPos = 0;
       
   296                 while( convPos < convSegment.Length() )
       
   297                     {
       
   298                     TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) );
       
   299                     if( err != KErrNone )
       
   300                         {
       
   301                         User::Panic( _L("ISO-2022-KR"), err );
       
   302                         }
       
   303 
       
   304                     if( convSegment.Length() < KMaxSizeOfTmpBuffer )
       
   305                         { /* Convert whole segment */
       
   306                         tmpForeign->Des().Copy( convSegment );
       
   307                         }
       
   308                     else
       
   309                         { /* Convert in chunks */
       
   310                         if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() )
       
   311                             { /* Last chunk */
       
   312                             tmpForeign->Des().Copy( convSegment.Mid( convPos ) );
       
   313                             }
       
   314                         else
       
   315                             {
       
   316                             tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) );
       
   317                             }
       
   318                         }
       
   319 
       
   320                     TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr();
       
   321                     for( TInt i = 0 ; i < tmpForeign->Length() ; i++ )
       
   322                         { /* Set highest bit in characters */
       
   323                         chars[i] |= 0x80;
       
   324                         }
       
   325 
       
   326                     numberOfUnconvertibleCharacters = 0;
       
   327                     ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
       
   328                                                                          aDefaultEndiannessOfForeignCharacters,
       
   329                                                                          aUnicode, *tmpForeign,
       
   330                                                                          numberOfUnconvertibleCharacters,
       
   331                                                                          indexOfFirstByteOfFirstUnconvertibleCharacter,
       
   332                                                                          outputConversionFlags,
       
   333                                                                          inputConversionFlags );
       
   334                     if( numberOfUnconvertibleCharacters != 0 &&
       
   335                         aNumberOfUnconvertibleCharacters == 0 )
       
   336                         { /* First uncovertible found, set index relative to actual input buffer*/
       
   337                         aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter);
       
   338                         }
       
   339 
       
   340                     aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
       
   341 
       
   342                     if( ret < 0 )
       
   343                         { /* Some error, break the loop,
       
   344                              errors are handled later */
       
   345                         delete tmpForeign;
       
   346                         break;
       
   347                         }
       
   348 
       
   349                     if( ret > 0 )
       
   350                         { /* Not all were converted, fix return value
       
   351                              to be relative to convSegment and break the loop */
       
   352                         ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret);
       
   353                         delete tmpForeign;
       
   354                         break;
       
   355                         }
       
   356 
       
   357                     convPos += tmpForeign->Length();
       
   358                     delete tmpForeign;
       
   359                     }
       
   360                 }
       
   361             }
       
   362         else
       
   363             { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */
       
   364                 numberOfUnconvertibleCharacters = 0;
       
   365                 ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
       
   366                                                                      aDefaultEndiannessOfForeignCharacters,
       
   367                                                                      aUnicode, convSegment,
       
   368                                                                      numberOfUnconvertibleCharacters,
       
   369                                                                      indexOfFirstByteOfFirstUnconvertibleCharacter,
       
   370                                                                      outputConversionFlags,
       
   371                                                                      inputConversionFlags );
       
   372                 if( numberOfUnconvertibleCharacters != 0 &&
       
   373                     aNumberOfUnconvertibleCharacters == 0 )
       
   374                     { /* First uncovertible found, set index relative to actual input buffer*/
       
   375                     aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter;
       
   376                     }
       
   377                 aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
       
   378             }
       
   379 
       
   380         if( ret < 0 )
       
   381             { /* Error during conversion */
       
   382             return ret;
       
   383             }
       
   384         else if( ret > 0 )
       
   385             { /* Not all characters where converted, return
       
   386                  value indicating how many bytes in total are left unconverted */
       
   387             return (aForeign.Length() - currPos - convSegment.Length() + ret);
       
   388             }
       
   389 
       
   390         /* Increase to skip converted data */
       
   391         currPos += convSegment.Length();
       
   392         if( escPos != KErrNotFound )
       
   393             { /* Increase to skip escape sequence */
       
   394             currPos += KLit8EscapeSequence().Length();
       
   395             }
       
   396         else if( shiftPos != KErrNotFound )
       
   397             { /* Increase to skip shift byte */
       
   398             currPos += 1;
       
   399             }
       
   400 
       
   401         }
       
   402 
       
   403     return 0;
       
   404 	}
       
   405 
       
   406 
       
   407 TBool CISO2022KRImplementation::IsInThisCharacterSetL(
       
   408     TBool& aSetToTrue, 
       
   409     TInt& aConfidenceLevel, 
       
   410     const TDesC8& aBuf)
       
   411 	{
       
   412     aSetToTrue=ETrue;
       
   413     aConfidenceLevel=200;
       
   414     
       
   415     TUint8 ch(0);
       
   416     for (TInt i=0;i<aBuf.Length();i++)
       
   417         {
       
   418         ch=aBuf[i];
       
   419         if (ch<0x7F)
       
   420             {
       
   421             continue;
       
   422             }
       
   423         else if (0xa1<=ch&&ch<=0xfe)
       
   424             {
       
   425             i++;
       
   426             __ASSERT_DEBUG(i<aBuf.Length(),User::Panic(_L("IS2022KR"),__LINE__));
       
   427             }
       
   428         else
       
   429             {
       
   430             aConfidenceLevel=0;
       
   431             aSetToTrue=EFalse;
       
   432             break;
       
   433             }
       
   434         }    
       
   435 	return aSetToTrue;
       
   436 	}
       
   437 
       
   438 CISO2022KRImplementation* CISO2022KRImplementation::NewL()
       
   439     {
       
   440     CISO2022KRImplementation* self = new(ELeave) CISO2022KRImplementation;
       
   441     return self;
       
   442     }
       
   443 
       
   444 CISO2022KRImplementation::CISO2022KRImplementation()
       
   445     {
       
   446     //default constructor.. do nothing
       
   447     }
       
   448 
       
   449 CISO2022KRImplementation::~CISO2022KRImplementation()
       
   450     {
       
   451     //default destructor .. do nothing
       
   452     }
       
   453 
       
   454 // ECOM CREATION FUNCTION
       
   455 const TImplementationProxy ImplementationTable[] = 
       
   456     {
       
   457     // Note: This is the same UID as defined in old mmp-file
       
   458     // Used also in 12221212.rss ( implementation_uid )
       
   459     IMPLEMENTATION_PROXY_ENTRY( 0x20010101, CISO2022KRImplementation::NewL )
       
   460     };
       
   461 
       
   462 EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount )
       
   463     {
       
   464     aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy);
       
   465     return ImplementationTable;
       
   466     }
       
   467