charconvfw/charconv_fw/src/charconv/unicodebig.cpp
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * Big-Endian converter
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 #include <e32std.h>
       
    21 #include <convdata.h>
       
    22 #include "unicode.h"
       
    23 
       
    24 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
       
    25 
       
    26 
       
    27 GLREF_D const SCnvConversionData unicodeConversionDataBig=
       
    28 	{
       
    29 	SCnvConversionData::EFixedBigEndian,
       
    30 		{
       
    31 		ARRAY_LENGTH(unicodeVariableByteDataRanges),
       
    32 		unicodeVariableByteDataRanges
       
    33 		},
       
    34 		{
       
    35 		ARRAY_LENGTH(unicodeTounicodeDataRanges),
       
    36 		unicodeTounicodeDataRanges
       
    37 		},
       
    38 		{
       
    39 		ARRAY_LENGTH(unicodeTounicodeDataRanges),
       
    40 		unicodeTounicodeDataRanges
       
    41 		},
       
    42 	NULL,
       
    43 	NULL
       
    44 	};
       
    45 
       
    46 GLREF_C void IsCharacterSetUnicodeBig(TInt& aConfidenceLevel, const TDesC8& aSample)
       
    47 	{
       
    48 	
       
    49 	TInt sampleLength = aSample.Length();
       
    50 	aConfidenceLevel =70;
       
    51 	if (sampleLength < 2)
       
    52 		return;
       
    53 
       
    54 	if (aSample[0]==0xfe)
       
    55 		{
       
    56 		// The first byte is a possible ByteOrderMark
       
    57 		// Try matching the next character 
       
    58 		if(aSample[1]==0xff)
       
    59 			{
       
    60 			// the byte order mark could be 0xFEFF or 0xFFFE depending on 
       
    61 			// endianness of the sample text.
       
    62 			aConfidenceLevel=100;
       
    63 			}
       
    64 		}
       
    65 
       
    66 	for (TInt i = 0; i < sampleLength-1; ++i)
       
    67 		{
       
    68 		if (aSample[i] == 0x0d)
       
    69  			{
       
    70  			if (aSample[i+1] == 0x0a)
       
    71  				{
       
    72  				// Reduce the confidence level
       
    73  				aConfidenceLevel-= 25;
       
    74  				}
       
    75  			}
       
    76 		}
       
    77 
       
    78 	// if not 100% confident already, check if most even bytes are 0
       
    79 	#define MAX_SAMPLE_LENGTH 2048
       
    80 	if ( aConfidenceLevel < 100 )
       
    81 		{	
       
    82 		TInt repeat=0;
       
    83 		// only check the first 2k if big sample
       
    84 		TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
       
    85 		
       
    86 		// start from 0 and check the even bytes
       
    87 		for (TInt i = 0; i < length-1; i+=2)
       
    88 			{
       
    89 			if (aSample[i] == 0x0) 
       
    90 				repeat ++;
       
    91 			}
       
    92 
       
    93 		// if more than 80% even bytes zero then this IS big Endian
       
    94 		if ( (repeat * 100) /  (length * 5) >= 8)
       
    95 			aConfidenceLevel  = 100;
       
    96 		}
       
    97 
       
    98 	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
       
    99 	}
       
   100