|
1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // e32\include\unicode.h |
|
15 // The TUnicode class contains a Unicode value. It is provided for convenience in implementing the |
|
16 // character attribute retrieval functions. It also contains: |
|
17 // structures used to store and search the tables of character information: |
|
18 // when modifying these, please remember that they form part of tables that must be initialised as aggregates, |
|
19 // so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct' |
|
20 // rather than class to make that clear. |
|
21 // default constructor that sets the stored Unicode value to 0xFFFF - an invalid character |
|
22 // constructors and conversion functions for converting between integers and TUnicode objects |
|
23 // functions to retrieve the categories and attributes |
|
24 // The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file |
|
25 // 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard, |
|
26 // Version 2.0'. |
|
27 // Because the category constants must be available to users they are defined not here but in the TChar |
|
28 // class in e32std.h. |
|
29 // |
|
30 // WARNING: This file contains some APIs which are internal and are subject |
|
31 // to change without notice. Such APIs should therefore not be used |
|
32 // outside the Kernel and Hardware Services package. |
|
33 // |
|
34 |
|
35 /** |
|
36 @file |
|
37 @internalTechnology |
|
38 */ |
|
39 |
|
40 |
|
41 #ifndef __UNICODE_H__ |
|
42 #define __UNICODE_H__ 1 |
|
43 |
|
44 #include <e32cmn.h> |
|
45 |
|
46 /* |
|
47 A structure to contain the raw data about a Unicode character: |
|
48 it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate. |
|
49 */ |
|
50 struct TUnicodeData |
|
51 { |
|
52 // bit values for iFlags |
|
53 enum |
|
54 { |
|
55 EHasLowerCase = 1, // adding the case offset gives the lower case form |
|
56 EHasUpperCase = 2, // subtracting the case offset gives the upper case form |
|
57 EHasTitleCase = 4, // a title case form exists that is distinct from the upper case form |
|
58 EMirrored = 8, // this character is replaced by a mirror-image in right-to-left text |
|
59 ENumericFlags = 0x70, // one of these flags is set if this number has a numeric value |
|
60 ENonNumeric = 0x00, // this character has no numeric value |
|
61 ESmallNumeric = 0x10, // numeric in the range 0..255 (see iDigitOffset) |
|
62 EFiveHundred = 0x20, // numeric with the value 500 |
|
63 EOneThousand = 0x30, // numeric with the value 1000 |
|
64 EFiveThousand = 0x40, // numeric with the value 5000 |
|
65 ETenThousand = 0x50, // numeric with the value 10000 |
|
66 EHundredThousand = 0x60, // numeric with the value 100000 |
|
67 EFraction = 0x70 // numeric with a fractional value |
|
68 }; |
|
69 |
|
70 TUint8 iCategory; // general category |
|
71 TUint8 iBdCategory; // bidirectional category |
|
72 TUint8 iCombiningClass; // combining class |
|
73 TInt8 iDigitOffset; // if this character has a small numeric value, the difference between the low |
|
74 // 8 bits of the character code and the numeric value |
|
75 TInt16 iCaseOffset; // offset to other case; subtract to get upper case, add to get lower |
|
76 // case (this makes it more likely that characters |
|
77 // differing only by case have the same data, making the table smaller) |
|
78 TUint8 iFlags; // flags: does this character have a lower case form, etc. |
|
79 }; |
|
80 |
|
81 /* |
|
82 A structure for Unicode plane information. |
|
83 An array of 17 elements should be defined in unitable.cpp, which is generated |
|
84 by the readtype tool. All characters in a plane are divided into blocks. All |
|
85 blocks in a plane have the same block size. Block size can be 2, 4, 8, etc. |
|
86 Any field in this structure can be calculated from any other field. Such |
|
87 'redundant' information is just for faster runtime speed. |
|
88 For example, a plane has block size of 16, which is 2 ^ 4. The code number |
|
89 will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates |
|
90 block index. The mask for code point will be 0x000F, which means the lower 4 |
|
91 bits indicates index in block. |
|
92 */ |
|
93 struct TUnicodePlane |
|
94 { |
|
95 TUint8 iCodesPerBlock; // how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits) |
|
96 TUint16 iMaskForBlock; // mask of 16 bits for blocks (for example 8 bits would be 0xff00) |
|
97 TUint16 iMaskForCodePoint; // mask of 16 bits for index in block (for example 8 bits would be 0x00ff) |
|
98 }; |
|
99 |
|
100 /* |
|
101 A structure for a range of Unicode characters with the same raw data; must not have a |
|
102 constructor because an array of these in unitable.cpp is initialised as an aggregate. |
|
103 |
|
104 @deprecated |
|
105 */ |
|
106 struct TUnicodeDataRange |
|
107 { |
|
108 TUint16 iRangeStart; // Unicode value of the start of the range of characters |
|
109 TInt16 iIndex; // index into an array of character information structures (-1 means data no available) |
|
110 }; |
|
111 |
|
112 /* |
|
113 A structure to hold a set of overriding character data |
|
114 */ |
|
115 struct TUnicodeDataSet |
|
116 { |
|
117 const TUnicodeData *iData; // array of character data structures |
|
118 const TUnicodeDataRange *iRange; // array of ranges referring to elements of iData |
|
119 TInt iRanges; // number of elements in the array of ranges |
|
120 }; |
|
121 |
|
122 // A structure to hold the standard character data |
|
123 struct TStandardUnicodeDataSet |
|
124 { |
|
125 const TUint16* iIndex1; // first trie index: 4096 elements indexed by high 12 bits of Unicode value |
|
126 const TUint16* iIndex2; // second trie index, indexed by values in iIndex1 |
|
127 const TUnicodeData *iData; // array of character data structures, indexed by values in iIndex2, offset |
|
128 // by low 4 bits of Unicode value |
|
129 }; |
|
130 |
|
131 /* |
|
132 A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?) |
|
133 composition (e.g., create a character from a base character and an accent), and decomposition |
|
134 (e.g., remove the accent from this character if there is one). |
|
135 */ |
|
136 class TUnicode |
|
137 { |
|
138 public: |
|
139 |
|
140 // Constructors |
|
141 TUnicode() { iCode = 0xFFFF; } |
|
142 TUnicode(TUint c) : iCode(c) {} |
|
143 operator TUint() const { return iCode; } |
|
144 |
|
145 // Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C) |
|
146 void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const; |
|
147 IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const; |
|
148 TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const; |
|
149 TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const; |
|
150 IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
151 IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
152 TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
153 TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const; |
|
154 TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const; |
|
155 TChar::TCjkWidth GetCjkWidth() const; |
|
156 IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const; |
|
157 |
|
158 // Utilities |
|
159 static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2); |
|
160 |
|
161 private: |
|
162 const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const; |
|
163 const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const; |
|
164 TUint GetLowerCase(const TUnicodeData& aData) const; |
|
165 TUint GetUpperCase(const TUnicodeData& aData) const; |
|
166 TUint GetTitleCase(const TUnicodeData& aData) const; |
|
167 TInt GetNumericValue(const TUnicodeData& aData) const; |
|
168 |
|
169 TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used. |
|
170 |
|
171 public: |
|
172 #ifndef __KERNEL_MODE__ |
|
173 static const TUint16 FoldTable[256]; // fold table (strip accents, fold case) for the range 0..255 |
|
174 static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF |
|
175 #else |
|
176 static const TUint16* FoldTable; |
|
177 static const TUint16* CjkWidthFoldTable; |
|
178 #endif |
|
179 }; |
|
180 |
|
181 #endif // __UNICODE_H__ |