charconvfw/charconv_fw/src/charconv/cp1252.cpp
changeset 0 1fb32624e06b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/charconvfw/charconv_fw/src/charconv/cp1252.cpp	Tue Feb 02 02:02:46 2010 +0200
@@ -0,0 +1,369 @@
+/*
+* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+
+#include <e32std.h>
+#include <convdata.h>
+
+#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
+
+LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1[]=
+	{
+	0x201a,
+	0x0192,
+	0x201e,
+	0x2026,
+	0x2020,
+	0x2021,
+	0x02c6,
+	0x2030,
+	0x0160,
+	0x2039,
+	0x0152
+	};
+
+LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2[]=
+	{
+	0x2018,
+	0x2019,
+	0x201c,
+	0x201d,
+	0x2022,
+	0x2013,
+	0x2014,
+	0x02dc,
+	0x2122,
+	0x0161,
+	0x203a,
+	0x0153
+	};
+
+LOCAL_D const TUint16 keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3[]=
+	{
+	0x017e,
+	0x0178
+	};
+
+LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1[]=
+	{
+		{
+		0x82,
+		0x8c,
+		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_1
+		},
+		{
+		0x91,
+		0x9c,
+		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_2
+		},
+		{
+		0x9e,
+		0x9f,
+		keyedTables16OfIndexedTables16_indexedEntries_codePage1252ToUnicode_3
+		}
+	};
+
+LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToCodePage1252_1[]=
+	{
+		{
+		0x0152,
+		0x8c
+		},
+		{
+		0x0153,
+		0x9c
+		},
+		{
+		0x0160,
+		0x8a
+		},
+		{
+		0x0161,
+		0x9a
+		},
+		{
+		0x0178,
+		0x9f
+		},
+		{
+		0x017d,
+		0x8e
+		},
+		{
+		0x017e,
+		0x9e
+		},
+		{
+		0x0192,
+		0x83
+		},
+		{
+		0x02c6,
+		0x88
+		},
+		{
+		0x02dc,
+		0x98
+		},
+		{
+		0x2013,
+		0x96
+		},
+		{
+		0x2014,
+		0x97
+		},
+		{
+		0x2018,
+		0x91
+		},
+		{
+		0x2019,
+		0x92
+		},
+		{
+		0x201a,
+		0x82
+		},
+		{
+		0x201c,
+		0x93
+		},
+		{
+		0x201d,
+		0x94
+		},
+		{
+		0x201e,
+		0x84
+		},
+		{
+		0x2020,
+		0x86
+		},
+		{
+		0x2021,
+		0x87
+		},
+		{
+		0x2022,
+		0x95
+		},
+		{
+		0x2026,
+		0x85
+		},
+		{
+		0x2030,
+		0x89
+		},
+		{
+		0x2039,
+		0x8b
+		},
+		{
+		0x203a,
+		0x9b
+		},
+		{
+		0x20ac,
+		0x80
+		},
+		{
+		0x2122,
+		0x99
+		}
+	};
+
+LOCAL_D const SCnvConversionData::SVariableByteData::SRange codePage1252VariableByteDataRanges[]=
+	{
+		{
+		0x00,
+		0xff,
+		0,
+		0
+		}
+	};
+
+LOCAL_D const SCnvConversionData::SOneDirectionData::SRange codePage1252ToUnicodeDataRanges[]=
+	{
+		{
+		0x00,
+		0x7f,
+		SCnvConversionData::SOneDirectionData::SRange::EDirect,
+		0,
+		0,
+			{
+			0,
+			0
+			}
+		},
+		{
+		0xa0,
+		0xff,
+		SCnvConversionData::SOneDirectionData::SRange::EDirect,
+		0,
+		0,
+			{
+			0,
+			0
+			}
+		},
+		{
+		0x80,
+		0x80,
+		SCnvConversionData::SOneDirectionData::SRange::EOffset,
+		0,
+		0,
+			{
+			STATIC_CAST(TUint, 8236),
+			0
+			}
+		},
+		{
+		0x8e,
+		0x8e,
+		SCnvConversionData::SOneDirectionData::SRange::EOffset,
+		0,
+		0,
+			{
+			STATIC_CAST(TUint, 239),
+			0
+			}
+		},
+		{
+		0x82,
+		0x9f,
+		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable16OfIndexedTables16,
+		0,
+		0,
+			{
+			UData_SKeyedTable16OfIndexedTables16(keyedTables16OfIndexedTables16_keyedEntries_codePage1252ToUnicode_1)
+			}
+		}
+	};
+
+LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToCodePage1252DataRanges[]=
+	{
+		{
+		0x0000,
+		0x007f,
+		SCnvConversionData::SOneDirectionData::SRange::EDirect,
+		1,
+		0,
+			{
+			0,
+			0
+			}
+		},
+		{
+		0x00a0,
+		0x00ff,
+		SCnvConversionData::SOneDirectionData::SRange::EDirect,
+		1,
+		0,
+			{
+			0,
+			0
+			}
+		},
+		{
+		0x0152,
+		0x2122,
+		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
+		1,
+		0,
+			{
+			UData_SKeyedTable1616(keyedTable1616_unicodeToCodePage1252_1)
+			}
+		}
+	};
+
+GLREF_D const SCnvConversionData codePage1252ConversionData=
+	{
+	SCnvConversionData::EUnspecified,
+		{
+		ARRAY_LENGTH(codePage1252VariableByteDataRanges),
+		codePage1252VariableByteDataRanges
+		},
+		{
+		ARRAY_LENGTH(codePage1252ToUnicodeDataRanges),
+		codePage1252ToUnicodeDataRanges
+		},
+		{
+		ARRAY_LENGTH(unicodeToCodePage1252DataRanges),
+		unicodeToCodePage1252DataRanges
+		},
+	NULL,
+	NULL
+	};
+
+GLREF_C void IsCharacterSetCP1252(TInt& aConfidenceLevel, const TDesC8& aSample)
+	{
+	aConfidenceLevel = 60;
+	TInt sampleLength = aSample.Length();
+
+	for (TInt i=0; i<sampleLength; ++i)
+		{
+		// CP1252 includes ASCII as well
+		// first check if the char is in the range 0x80 - 0x9f (controls codes in ISO88591)
+		// If it is in that range then the likelihood that it's CP1252 is a bit higher
+		if ((aSample[i] >= 0x80) && (aSample[i] <= 0x9f))
+			{
+			if((aSample[i]==0x81)||(aSample[i]==0x8D)||(aSample[i]==0x8f)||
+				(aSample[i]==0x90)||(aSample[i]==0x9d))
+				{
+				// These code values are not supported by the Codepage CP1252
+				aConfidenceLevel = 0;
+				break;
+				}
+			else
+				{
+				// problem: UTF8 uses the values 0x80-0x9f in more than 50% of it's multibyte representation
+				// so if the text was UTF8 .... the confidence here would hit the roof. Could check to make 
+				// sure that this is not UTF8
+				aConfidenceLevel+=1;
+				}
+			}
+		TInt increment1 = i+1;
+		TInt decrement1 = i-1;
+		// 0xf7 is the division symbol in CP1252.
+		// 0xd7 is the division symbol in CP1252.If char on either side of the division
+		// symbol is a number then the confidence that it's ISO88591 increases
+		if( decrement1>= 0 && ((aSample[i]==0xf7) || (aSample[i]==0xd7)) && increment1<sampleLength)
+			{
+			
+			if (increment1 >= sampleLength)
+				break;
+			if ( (aSample[decrement1] >= 0x30) && (aSample[decrement1] <= 0x39) &&  // char before is a number
+				 (aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39) )   // char after is a number
+				{
+				aConfidenceLevel+=5;
+				}
+			}
+		// Can also use the currency symbol to increase confidence if the char after a 
+		// currency symbol is numeric
+		if((aSample[i]>=0xa2) && (aSample[i] <= 0xa5) && increment1<sampleLength)
+			{
+			if ((aSample[increment1] >= 0x30) && (aSample[increment1] <= 0x39))
+				{
+				aConfidenceLevel+=5; 
+				}
+			}
+		} // for loop
+	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
+	}