symport/e32/include/unicode.h
changeset 1 0a7b44b10206
child 2 806186ab5e14
equal deleted inserted replaced
0:c55016431358 1:0a7b44b10206
       
     1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Symbian Foundation License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\include\unicode.h
       
    15 // The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
       
    16 // character attribute retrieval functions. It also contains:
       
    17 // structures used to store and search the tables of character information:
       
    18 // when modifying these, please remember that they form part of tables that must be initialised as aggregates,
       
    19 // so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
       
    20 // rather than class to make that clear.
       
    21 // default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
       
    22 // constructors and conversion functions for converting between integers and TUnicode objects
       
    23 // functions to retrieve the categories and attributes
       
    24 // The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
       
    25 // 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
       
    26 // Version 2.0'.
       
    27 // Because the category constants must be available to users they are defined not here but in the TChar
       
    28 // class in e32std.h.
       
    29 // 
       
    30 //
       
    31 
       
    32 /**
       
    33  @file
       
    34  @internalTechnology
       
    35 */
       
    36 
       
    37 
       
    38 #ifndef __UNICODE_H__
       
    39 #define __UNICODE_H__ 1
       
    40 
       
    41 #include <e32cmn.h>
       
    42 
       
    43 /*
       
    44 A structure to contain the raw data about a Unicode character:
       
    45 it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
       
    46 */
       
    47 struct TUnicodeData
       
    48 	{
       
    49 	// bit values for iFlags
       
    50 	enum
       
    51 		{
       
    52 		EHasLowerCase = 1,			// adding the case offset gives the lower case form
       
    53 		EHasUpperCase = 2,			// subtracting the case offset gives the upper case form
       
    54 		EHasTitleCase = 4,			// a title case form exists that is distinct from the upper case form
       
    55 		EMirrored = 8,				// this character is replaced by a mirror-image in right-to-left text
       
    56 		ENumericFlags = 0x70,		// one of these flags is set if this number has a numeric value
       
    57 		ENonNumeric = 0x00,			// this character has no numeric value
       
    58 		ESmallNumeric = 0x10,		// numeric in the range 0..255 (see iDigitOffset)
       
    59 		EFiveHundred = 0x20,		// numeric with the value 500
       
    60 		EOneThousand = 0x30,		// numeric with the value 1000
       
    61 		EFiveThousand = 0x40,		// numeric with the value 5000
       
    62 		ETenThousand = 0x50,		// numeric with the value 10000
       
    63 		EHundredThousand = 0x60,	// numeric with the value 100000
       
    64 		EFraction = 0x70			// numeric with a fractional value
       
    65 		};
       
    66 
       
    67 	TUint8 iCategory;					// general category
       
    68 	TUint8 iBdCategory;					// bidirectional category
       
    69 	TUint8 iCombiningClass;				// combining class
       
    70 	TInt8 iDigitOffset;					// if this character has a small numeric value, the difference between the low
       
    71 										// 8 bits of the character code and the numeric value
       
    72 	TInt16 iCaseOffset;					// offset to other case; subtract to get upper case, add to get lower
       
    73 										// case (this makes it more likely that characters
       
    74 										// differing only by case have the same	data, making the table smaller)
       
    75 	TUint8 iFlags;						// flags: does this character have a lower case form, etc.
       
    76 	};
       
    77 
       
    78 /*
       
    79 A structure for a range of Unicode characters with the same raw data; must not have a
       
    80 constructor because an array of these in unitable.cpp is initialised as an aggregate.
       
    81 */
       
    82 struct TUnicodeDataRange
       
    83 	{
       
    84 	TUint16 iRangeStart;	// Unicode value of the start of the range of characters
       
    85 	TInt16 iIndex;			// index into an array of character information structures (-1 means data no available)
       
    86 	};
       
    87 
       
    88 /*
       
    89 A structure to hold a set of overriding character data
       
    90 */
       
    91 struct TUnicodeDataSet
       
    92 	{
       
    93 	const TUnicodeData *iData;			// array of character data structures
       
    94 	const TUnicodeDataRange *iRange;	// array of ranges referring to elements of iData
       
    95 	TInt iRanges;						// number of elements in the array of ranges
       
    96 	};
       
    97 
       
    98 // A structure to hold the standard character data
       
    99 struct TStandardUnicodeDataSet
       
   100 	{
       
   101 	const TUint16* iIndex1;				// first trie index: 4096 elements indexed by high 12 bits of Unicode value
       
   102 	const TUint16* iIndex2;				// second trie index, indexed by values in iIndex1
       
   103 	const TUnicodeData *iData;			// array of character data structures, indexed by values in iIndex2, offset
       
   104 										// by low 4 bits of Unicode value
       
   105 	};
       
   106 
       
   107 /*
       
   108 A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
       
   109 composition (e.g., create a character from a base character and an accent), and decomposition
       
   110 (e.g., remove the accent from this character if there is one).
       
   111 */
       
   112 class TUnicode
       
   113 	{
       
   114 	public:
       
   115 
       
   116 	// Constructors
       
   117 	TUnicode() { iCode = 0xFFFF; }
       
   118 	TUnicode(TUint c) : iCode(c) {}
       
   119 	operator TUint() const { return iCode; }
       
   120 
       
   121 	// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
       
   122 	void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
       
   123 	IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
       
   124 	TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
       
   125 	TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
       
   126 	IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   127 	IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   128 	TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   129 	TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
       
   130 	TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
       
   131 	TChar::TCjkWidth GetCjkWidth() const;
       
   132 	IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
       
   133 	
       
   134 	// Utilities
       
   135 	static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
       
   136 
       
   137 	private:
       
   138 	const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
       
   139 	const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
       
   140 	TUint GetLowerCase(const TUnicodeData& aData) const;
       
   141 	TUint GetUpperCase(const TUnicodeData& aData) const;
       
   142 	TUint GetTitleCase(const TUnicodeData& aData) const;
       
   143 	TInt GetNumericValue(const TUnicodeData& aData) const;
       
   144 
       
   145 	TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
       
   146 
       
   147 	public:
       
   148 #ifndef __KERNEL_MODE__
       
   149 	static const TUint16 FoldTable[256];		// fold table (strip accents, fold case) for the range 0..255
       
   150 	static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
       
   151 #else
       
   152 	static const TUint16* FoldTable;
       
   153 	static const TUint16* CjkWidthFoldTable;
       
   154 #endif
       
   155 	};
       
   156 
       
   157 // Declarations for tables held in unitable.cpp and used by unicode.cpp.
       
   158 #ifndef __KERNEL_MODE__
       
   159 extern const TStandardUnicodeDataSet TheStandardUnicodeDataSet;
       
   160 extern const TUint16 TheUnicodeCompositionBuffer[];
       
   161 extern const TInt TheUnicodeCompositions;
       
   162 #else
       
   163 extern const TStandardUnicodeDataSet* pStandardUnicodeDataSet;
       
   164 extern const TUint16* TheUnicodeCompositionBuffer;
       
   165 extern TInt TheUnicodeCompositions;
       
   166 #endif
       
   167 
       
   168 #endif // __UNICODE_H__