diff -r 000000000000 -r 1fb32624e06b charconvfw/charconv_fw/src/charconv/cp1252.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/charconvfw/charconv_fw/src/charconv/cp1252.cpp Tue Feb 02 02:02:46 2010 +0200 @@ -0,0 +1,369 @@ +/* +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include +#include + +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) + +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1[]= + { + 0x201a, + 0x0192, + 0x201e, + 0x2026, + 0x2020, + 0x2021, + 0x02c6, + 0x2030, + 0x0160, + 0x2039, + 0x0152 + }; + +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2[]= + { + 0x2018, + 0x2019, + 0x201c, + 0x201d, + 0x2022, + 0x2013, + 0x2014, + 0x02dc, + 0x2122, + 0x0161, + 0x203a, + 0x0153 + }; + +LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3[]= + { + 0x017e, + 0x0178 + }; + +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1[]= + { + { + 0x82, + 0x8c, + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1 + }, + { + 0x91, + 0x9c, + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2 + }, + { + 0x9e, + 0x9f, + keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3 + } + }; + +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToCodePage1252_1[]= + { + { + 0x0152, + 0x8c + }, + { + 0x0153, + 0x9c + }, + { + 0x0160, + 0x8a + }, + { + 0x0161, + 0x9a + }, + { + 0x0178, + 0x9f + }, + { + 0x017d, + 0x8e + }, + { + 0x017e, + 0x9e + }, + { + 0x0192, + 0x83 + }, + { + 0x02c6, + 0x88 + }, + { + 0x02dc, + 0x98 + }, + { + 0x2013, + 0x96 + }, + { + 0x2014, + 0x97 + }, + { + 0x2018, + 0x91 + }, + { + 0x2019, + 0x92 + }, + { + 0x201a, + 0x82 + }, + { + 0x201c, + 0x93 + }, + { + 0x201d, + 0x94 + }, + { + 0x201e, + 0x84 + }, + { + 0x2020, + 0x86 + }, + { + 0x2021, + 0x87 + }, + { + 0x2022, + 0x95 + }, + { + 0x2026, + 0x85 + }, + { + 0x2030, + 0x89 + }, + { + 0x2039, + 0x8b + }, + { + 0x203a, + 0x9b + }, + { + 0x20ac, + 0x80 + }, + { + 0x2122, + 0x99 + } + }; + +LOCAL_D const SCnvConversionData::SVariableByteData::SRange codePage1252VariableByteDataRanges[]= + { + { + 0x00, + 0xff, + 0, + 0 + } + }; + +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange codePage1252ToUnicodeDataRanges[]= + { + { + 0x00, + 0x7f, + SCnvConversionData::SOneDirectionData::SRange::EDirect, + 0, + 0, + { + 0, + 0 + } + }, + { + 0xa0, + 0xff, + SCnvConversionData::SOneDirectionData::SRange::EDirect, + 0, + 0, + { + 0, + 0 + } + }, + { + 0x80, + 0x80, + SCnvConversionData::SOneDirectionData::SRange::EOffset, + 0, + 0, + { + STATIC_CAST(TUint, 8236), + 0 + } + }, + { + 0x8e, + 0x8e, + SCnvConversionData::SOneDirectionData::SRange::EOffset, + 0, + 0, + { + STATIC_CAST(TUint, 239), + 0 + } + }, + { + 0x82, + 0x9f, + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16, + 0, + 0, + { + UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1) + } + } + }; + +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToCodePage1252DataRanges[]= + { + { + 0x0000, + 0x007f, + SCnvConversionData::SOneDirectionData::SRange::EDirect, + 1, + 0, + { + 0, + 0 + } + }, + { + 0x00a0, + 0x00ff, + SCnvConversionData::SOneDirectionData::SRange::EDirect, + 1, + 0, + { + 0, + 0 + } + }, + { + 0x0152, + 0x2122, + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, + 1, + 0, + { + UData_SKeyedTable1616(keyedTable1616_unicodeToCodePage1252_1) + } + } + }; + +GLREF_D const SCnvConversionData codePage1252ConversionData= + { + SCnvConversionData::EUnspecified, + { + ARRAY_LENGTH(codePage1252VariableByteDataRanges), + codePage1252VariableByteDataRanges + }, + { + ARRAY_LENGTH(codePage1252ToUnicodeDataRanges), + codePage1252ToUnicodeDataRanges + }, + { + ARRAY_LENGTH(unicodeToCodePage1252DataRanges), + unicodeToCodePage1252DataRanges + }, + NULL, + NULL + }; + +GLREF_C void IsCharacterSetCP1252(TInt& aConfidenceLevel, const TDesC8& aSample) + { + aConfidenceLevel = 60; + TInt sampleLength = aSample.Length(); + + for (TInt i=0; i= 0x80) && (aSample[i] <= 0x9f)) + { + if((aSample[i]==0x81)||(aSample[i]==0x8D)||(aSample[i]==0x8f)|| + (aSample[i]==0x90)||(aSample[i]==0x9d)) + { + // These code values are not supported by the Codepage CP1252 + aConfidenceLevel = 0; + break; + } + else + { + // problem: UTF8 uses the values 0x80-0x9f in more than 50% of it's multibyte representation + // so if the text was UTF8 .... the confidence here would hit the roof. Could check to make + // sure that this is not UTF8 + aConfidenceLevel+=1; + } + } + TInt increment1 = i+1; + TInt decrement1 = i-1; + // 0xf7 is the division symbol in CP1252. + // 0xd7 is the division symbol in CP1252.If char on either side of the division + // symbol is a number then the confidence that it's ISO88591 increases + if( decrement1>= 0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && increment1= sampleLength) + break; + if ( (aSample[decrement1] >= 0x30) && (aSample[decrement1] <= 0x39) && // char before is a number + (aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39) ) // char after is a number + { + aConfidenceLevel+=5; + } + } + // Can also use the currency symbol to increase confidence if the char after a + // currency symbol is numeric + if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && increment1= 0x30) && (aSample[increment1] <= 0x39)) + { + aConfidenceLevel+=5; + } + } + } // for loop + aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0; + }