|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Big-Endian converter |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include <e32std.h> |
|
21 #include <convdata.h> |
|
22 #include "unicode.h" |
|
23 |
|
24 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) |
|
25 |
|
26 |
|
27 GLREF_D const SCnvConversionData unicodeConversionDataBig= |
|
28 { |
|
29 SCnvConversionData::EFixedBigEndian, |
|
30 { |
|
31 ARRAY_LENGTH(unicodeVariableByteDataRanges), |
|
32 unicodeVariableByteDataRanges |
|
33 }, |
|
34 { |
|
35 ARRAY_LENGTH(unicodeTounicodeDataRanges), |
|
36 unicodeTounicodeDataRanges |
|
37 }, |
|
38 { |
|
39 ARRAY_LENGTH(unicodeTounicodeDataRanges), |
|
40 unicodeTounicodeDataRanges |
|
41 }, |
|
42 NULL, |
|
43 NULL |
|
44 }; |
|
45 |
|
46 GLREF_C void IsCharacterSetUnicodeBig(TInt& aConfidenceLevel, const TDesC8& aSample) |
|
47 { |
|
48 |
|
49 TInt sampleLength = aSample.Length(); |
|
50 aConfidenceLevel =70; |
|
51 if (sampleLength < 2) |
|
52 return; |
|
53 |
|
54 if (aSample[0]==0xfe) |
|
55 { |
|
56 // The first byte is a possible ByteOrderMark |
|
57 // Try matching the next character |
|
58 if(aSample[1]==0xff) |
|
59 { |
|
60 // the byte order mark could be 0xFEFF or 0xFFFE depending on |
|
61 // endianness of the sample text. |
|
62 aConfidenceLevel=100; |
|
63 } |
|
64 } |
|
65 |
|
66 for (TInt i = 0; i < sampleLength-1; ++i) |
|
67 { |
|
68 if (aSample[i] == 0x0d) |
|
69 { |
|
70 if (aSample[i+1] == 0x0a) |
|
71 { |
|
72 // Reduce the confidence level |
|
73 aConfidenceLevel-= 25; |
|
74 } |
|
75 } |
|
76 } |
|
77 |
|
78 // if not 100% confident already, check if most even bytes are 0 |
|
79 #define MAX_SAMPLE_LENGTH 2048 |
|
80 if ( aConfidenceLevel < 100 ) |
|
81 { |
|
82 TInt repeat=0; |
|
83 // only check the first 2k if big sample |
|
84 TInt length =( sampleLength > MAX_SAMPLE_LENGTH ? MAX_SAMPLE_LENGTH : sampleLength); |
|
85 |
|
86 // start from 0 and check the even bytes |
|
87 for (TInt i = 0; i < length-1; i+=2) |
|
88 { |
|
89 if (aSample[i] == 0x0) |
|
90 repeat ++; |
|
91 } |
|
92 |
|
93 // if more than 80% even bytes zero then this IS big Endian |
|
94 if ( (repeat * 100) / (length * 5) >= 8) |
|
95 aConfidenceLevel = 100; |
|
96 } |
|
97 |
|
98 aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0; |
|
99 } |
|
100 |