|
1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Symbian Foundation License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // e32\include\unicode.h |
|
15 // The TUnicode class contains a Unicode value. It is provided for convenience in implementing the |
|
16 // character attribute retrieval functions. It also contains: |
|
17 // structures used to store and search the tables of character information: |
|
18 // when modifying these, please remember that they form part of tables that must be initialised as aggregates, |
|
19 // so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct' |
|
20 // rather than class to make that clear. |
|
21 // default constructor that sets the stored Unicode value to 0xFFFF - an invalid character |
|
22 // constructors and conversion functions for converting between integers and TUnicode objects |
|
23 // functions to retrieve the categories and attributes |
|
24 // The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file |
|
25 // 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard, |
|
26 // Version 2.0'. |
|
27 // Because the category constants must be available to users they are defined not here but in the TChar |
|
28 // class in e32std.h. |
|
29 // |
|
30 // |
|
31 |
|
32 /** |
|
33 @file |
|
34 @internalTechnology |
|
35 */ |
|
36 |
|
37 |
|
38 #ifndef __UNICODE_H__ |
|
39 #define __UNICODE_H__ 1 |
|
40 |
|
41 #include <e32cmn.h> |
|
42 |
|
43 /* |
|
44 A structure to contain the raw data about a Unicode character: |
|
45 it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate. |
|
46 */ |
|
47 struct TUnicodeData |
|
48 { |
|
49 // bit values for iFlags |
|
50 enum |
|
51 { |
|
52 EHasLowerCase = 1, // adding the case offset gives the lower case form |
|
53 EHasUpperCase = 2, // subtracting the case offset gives the upper case form |
|
54 EHasTitleCase = 4, // a title case form exists that is distinct from the upper case form |
|
55 EMirrored = 8, // this character is replaced by a mirror-image in right-to-left text |
|
56 ENumericFlags = 0x70, // one of these flags is set if this number has a numeric value |
|
57 ENonNumeric = 0x00, // this character has no numeric value |
|
58 ESmallNumeric = 0x10, // numeric in the range 0..255 (see iDigitOffset) |
|
59 EFiveHundred = 0x20, // numeric with the value 500 |
|
60 EOneThousand = 0x30, // numeric with the value 1000 |
|
61 EFiveThousand = 0x40, // numeric with the value 5000 |
|
62 ETenThousand = 0x50, // numeric with the value 10000 |
|
63 EHundredThousand = 0x60, // numeric with the value 100000 |
|
64 EFraction = 0x70 // numeric with a fractional value |
|
65 }; |
|
66 |
|
67 TUint8 iCategory; // general category |
|
68 TUint8 iBdCategory; // bidirectional category |
|
69 TUint8 iCombiningClass; // combining class |
|
70 TInt8 iDigitOffset; // if this character has a small numeric value, the difference between the low |
|
71 // 8 bits of the character code and the numeric value |
|
72 TInt16 iCaseOffset; // offset to other case; subtract to get upper case, add to get lower |
|
73 // case (this makes it more likely that characters |
|
74 // differing only by case have the same data, making the table smaller) |
|
75 TUint8 iFlags; // flags: does this character have a lower case form, etc. |
|
76 }; |
|
77 |
|
78 /* |
|
79 A structure for a range of Unicode characters with the same raw data; must not have a |
|
80 constructor because an array of these in unitable.cpp is initialised as an aggregate. |
|
81 */ |
|
82 struct TUnicodeDataRange |
|
83 { |
|
84 TUint16 iRangeStart; // Unicode value of the start of the range of characters |
|
85 TInt16 iIndex; // index into an array of character information structures (-1 means data no available) |
|
86 }; |
|
87 |
|
88 /* |
|
89 A structure to hold a set of overriding character data |
|
90 */ |
|
91 struct TUnicodeDataSet |
|
92 { |
|
93 const TUnicodeData *iData; // array of character data structures |
|
94 const TUnicodeDataRange *iRange; // array of ranges referring to elements of iData |
|
95 TInt iRanges; // number of elements in the array of ranges |
|
96 }; |
|
97 |
|
98 // A structure to hold the standard character data |
|
99 struct TStandardUnicodeDataSet |
|
100 { |
|
101 const TUint16* iIndex1; // first trie index: 4096 elements indexed by high 12 bits of Unicode value |
|
102 const TUint16* iIndex2; // second trie index, indexed by values in iIndex1 |
|
103 const TUnicodeData *iData; // array of character data structures, indexed by values in iIndex2, offset |
|
104 // by low 4 bits of Unicode value |
|
105 }; |
|
106 |
|
107 /* |
|
108 A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?) |
|
109 composition (e.g., create a character from a base character and an accent), and decomposition |
|
110 (e.g., remove the accent from this character if there is one). |
|
111 */ |
|
112 class TUnicode |
|
113 { |
|
114 public: |
|
115 |
|
116 // Constructors |
|
117 TUnicode() { iCode = 0xFFFF; } |
|
118 TUnicode(TUint c) : iCode(c) {} |
|
119 operator TUint() const { return iCode; } |
|
120 |
|
121 // Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C) |
|
122 void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const; |
|
123 IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const; |
|
124 TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const; |
|
125 TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const; |
|
126 IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
127 IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
128 TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const; |
|
129 TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const; |
|
130 TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const; |
|
131 TChar::TCjkWidth GetCjkWidth() const; |
|
132 IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const; |
|
133 |
|
134 // Utilities |
|
135 static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2); |
|
136 |
|
137 private: |
|
138 const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const; |
|
139 const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const; |
|
140 TUint GetLowerCase(const TUnicodeData& aData) const; |
|
141 TUint GetUpperCase(const TUnicodeData& aData) const; |
|
142 TUint GetTitleCase(const TUnicodeData& aData) const; |
|
143 TInt GetNumericValue(const TUnicodeData& aData) const; |
|
144 |
|
145 TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used. |
|
146 |
|
147 public: |
|
148 #ifndef __KERNEL_MODE__ |
|
149 static const TUint16 FoldTable[256]; // fold table (strip accents, fold case) for the range 0..255 |
|
150 static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF |
|
151 #else |
|
152 static const TUint16* FoldTable; |
|
153 static const TUint16* CjkWidthFoldTable; |
|
154 #endif |
|
155 }; |
|
156 |
|
157 // Declarations for tables held in unitable.cpp and used by unicode.cpp. |
|
158 #ifndef __KERNEL_MODE__ |
|
159 extern const TStandardUnicodeDataSet TheStandardUnicodeDataSet; |
|
160 extern const TUint16 TheUnicodeCompositionBuffer[]; |
|
161 extern const TInt TheUnicodeCompositions; |
|
162 #else |
|
163 extern const TStandardUnicodeDataSet* pStandardUnicodeDataSet; |
|
164 extern const TUint16* TheUnicodeCompositionBuffer; |
|
165 extern TInt TheUnicodeCompositions; |
|
166 #endif |
|
167 |
|
168 #endif // __UNICODE_H__ |