charconvfw/charconv_fw/src/charconv/unicodebig.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 02 Feb 2010 02:02:46 +0200
changeset 0 1fb32624e06b
permissions -rw-r--r--
Revision: 201003 Kit: 201005
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
* All rights reserved.
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
* This component and the accompanying materials are made available
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
* which accompanies this distribution, and is available
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
*
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
* Initial Contributors:
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
*
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
* Contributors:
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
*
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
* Description: 
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
* Big-Endian converter
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
*
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
*/
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
#include <e32std.h>
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
#include <convdata.h>
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
#include "unicode.h"
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
GLREF_D const SCnvConversionData unicodeConversionDataBig=
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
	{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
	SCnvConversionData::EFixedBigEndian,
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
		{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
		ARRAY_LENGTH(unicodeVariableByteDataRanges),
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
		unicodeVariableByteDataRanges
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
		},
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
		{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
		ARRAY_LENGTH(unicodeTounicodeDataRanges),
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
		unicodeTounicodeDataRanges
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
		},
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
		{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
		ARRAY_LENGTH(unicodeTounicodeDataRanges),
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
		unicodeTounicodeDataRanges
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
		},
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
	NULL,
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
	NULL
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
	};
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
GLREF_C void IsCharacterSetUnicodeBig(TInt& aConfidenceLevel, const TDesC8& aSample)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
	{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
	
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
	TInt sampleLength = aSample.Length();
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
	aConfidenceLevel =70;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
	if (sampleLength < 2)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
		return;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
	if (aSample[0]==0xfe)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
		{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
		// The first byte is a possible ByteOrderMark
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
		// Try matching the next character 
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
		if(aSample[1]==0xff)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
			{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
			// the byte order mark could be 0xFEFF or 0xFFFE depending on 
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
			// endianness of the sample text.
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
			aConfidenceLevel=100;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
			}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
		}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
	for (TInt i = 0; i < sampleLength-1; ++i)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
		{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
		if (aSample[i] == 0x0d)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
 			{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
 			if (aSample[i+1] == 0x0a)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
 				{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
 				// Reduce the confidence level
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
 				aConfidenceLevel-= 25;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
 				}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
 			}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
		}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
	// if not 100% confident already, check if most even bytes are 0
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
	#define MAX_SAMPLE_LENGTH 2048
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
	if ( aConfidenceLevel < 100 )
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
		{	
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
		TInt repeat=0;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
		// only check the first 2k if big sample
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
		TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength);
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
		
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
		// start from 0 and check the even bytes
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
		for (TInt i = 0; i < length-1; i+=2)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
			{
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
			if (aSample[i] == 0x0) 
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
				repeat ++;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
			}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
		// if more than 80% even bytes zero then this IS big Endian
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
		if ( (repeat * 100) /  (length * 5) >= 8)
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
			aConfidenceLevel  = 100;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
		}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
	}
1fb32624e06b Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100