0
|
1 |
// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
|
|
2 |
// All rights reserved.
|
|
3 |
// This component and the accompanying materials are made available
|
|
4 |
// under the terms of the License "Eclipse Public License v1.0"
|
|
5 |
// which accompanies this distribution, and is available
|
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
|
7 |
//
|
|
8 |
// Initial Contributors:
|
|
9 |
// Nokia Corporation - initial contribution.
|
|
10 |
//
|
|
11 |
// Contributors:
|
|
12 |
//
|
|
13 |
// Description:
|
|
14 |
// e32\include\unicode.h
|
|
15 |
// The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
|
|
16 |
// character attribute retrieval functions. It also contains:
|
|
17 |
// structures used to store and search the tables of character information:
|
|
18 |
// when modifying these, please remember that they form part of tables that must be initialised as aggregates,
|
|
19 |
// so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
|
|
20 |
// rather than class to make that clear.
|
|
21 |
// default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
|
|
22 |
// constructors and conversion functions for converting between integers and TUnicode objects
|
|
23 |
// functions to retrieve the categories and attributes
|
|
24 |
// The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
|
|
25 |
// 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
|
|
26 |
// Version 2.0'.
|
|
27 |
// Because the category constants must be available to users they are defined not here but in the TChar
|
|
28 |
// class in e32std.h.
|
|
29 |
//
|
|
30 |
// WARNING: This file contains some APIs which are internal and are subject
|
|
31 |
// to change without notice. Such APIs should therefore not be used
|
|
32 |
// outside the Kernel and Hardware Services package.
|
|
33 |
//
|
|
34 |
|
|
35 |
/**
|
|
36 |
@file
|
|
37 |
@internalTechnology
|
|
38 |
*/
|
|
39 |
|
|
40 |
|
|
41 |
#ifndef __UNICODE_H__
|
|
42 |
#define __UNICODE_H__ 1
|
|
43 |
|
|
44 |
#include <e32cmn.h>
|
|
45 |
|
|
46 |
/*
|
|
47 |
A structure to contain the raw data about a Unicode character:
|
|
48 |
it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
|
|
49 |
*/
|
|
50 |
struct TUnicodeData
|
|
51 |
{
|
|
52 |
// bit values for iFlags
|
|
53 |
enum
|
|
54 |
{
|
|
55 |
EHasLowerCase = 1, // adding the case offset gives the lower case form
|
|
56 |
EHasUpperCase = 2, // subtracting the case offset gives the upper case form
|
|
57 |
EHasTitleCase = 4, // a title case form exists that is distinct from the upper case form
|
|
58 |
EMirrored = 8, // this character is replaced by a mirror-image in right-to-left text
|
|
59 |
ENumericFlags = 0x70, // one of these flags is set if this number has a numeric value
|
|
60 |
ENonNumeric = 0x00, // this character has no numeric value
|
|
61 |
ESmallNumeric = 0x10, // numeric in the range 0..255 (see iDigitOffset)
|
|
62 |
EFiveHundred = 0x20, // numeric with the value 500
|
|
63 |
EOneThousand = 0x30, // numeric with the value 1000
|
|
64 |
EFiveThousand = 0x40, // numeric with the value 5000
|
|
65 |
ETenThousand = 0x50, // numeric with the value 10000
|
|
66 |
EHundredThousand = 0x60, // numeric with the value 100000
|
|
67 |
EFraction = 0x70 // numeric with a fractional value
|
|
68 |
};
|
|
69 |
|
|
70 |
TUint8 iCategory; // general category
|
|
71 |
TUint8 iBdCategory; // bidirectional category
|
|
72 |
TUint8 iCombiningClass; // combining class
|
|
73 |
TInt8 iDigitOffset; // if this character has a small numeric value, the difference between the low
|
|
74 |
// 8 bits of the character code and the numeric value
|
|
75 |
TInt16 iCaseOffset; // offset to other case; subtract to get upper case, add to get lower
|
|
76 |
// case (this makes it more likely that characters
|
|
77 |
// differing only by case have the same data, making the table smaller)
|
|
78 |
TUint8 iFlags; // flags: does this character have a lower case form, etc.
|
|
79 |
};
|
|
80 |
|
|
81 |
/*
|
|
82 |
A structure for Unicode plane information.
|
|
83 |
An array of 17 elements should be defined in unitable.cpp, which is generated
|
|
84 |
by the readtype tool. All characters in a plane are divided into blocks. All
|
|
85 |
blocks in a plane have the same block size. Block size can be 2, 4, 8, etc.
|
|
86 |
Any field in this structure can be calculated from any other field. Such
|
|
87 |
'redundant' information is just for faster runtime speed.
|
|
88 |
For example, a plane has block size of 16, which is 2 ^ 4. The code number
|
|
89 |
will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates
|
|
90 |
block index. The mask for code point will be 0x000F, which means the lower 4
|
|
91 |
bits indicates index in block.
|
|
92 |
*/
|
|
93 |
struct TUnicodePlane
|
|
94 |
{
|
|
95 |
TUint8 iCodesPerBlock; // how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits)
|
|
96 |
TUint16 iMaskForBlock; // mask of 16 bits for blocks (for example 8 bits would be 0xff00)
|
|
97 |
TUint16 iMaskForCodePoint; // mask of 16 bits for index in block (for example 8 bits would be 0x00ff)
|
|
98 |
};
|
|
99 |
|
|
100 |
/*
|
|
101 |
A structure for a range of Unicode characters with the same raw data; must not have a
|
|
102 |
constructor because an array of these in unitable.cpp is initialised as an aggregate.
|
|
103 |
|
|
104 |
@deprecated
|
|
105 |
*/
|
|
106 |
struct TUnicodeDataRange
|
|
107 |
{
|
|
108 |
TUint16 iRangeStart; // Unicode value of the start of the range of characters
|
|
109 |
TInt16 iIndex; // index into an array of character information structures (-1 means data no available)
|
|
110 |
};
|
|
111 |
|
|
112 |
/*
|
|
113 |
A structure to hold a set of overriding character data
|
|
114 |
*/
|
|
115 |
struct TUnicodeDataSet
|
|
116 |
{
|
|
117 |
const TUnicodeData *iData; // array of character data structures
|
|
118 |
const TUnicodeDataRange *iRange; // array of ranges referring to elements of iData
|
|
119 |
TInt iRanges; // number of elements in the array of ranges
|
|
120 |
};
|
|
121 |
|
|
122 |
// A structure to hold the standard character data
|
|
123 |
struct TStandardUnicodeDataSet
|
|
124 |
{
|
|
125 |
const TUint16* iIndex1; // first trie index: 4096 elements indexed by high 12 bits of Unicode value
|
|
126 |
const TUint16* iIndex2; // second trie index, indexed by values in iIndex1
|
|
127 |
const TUnicodeData *iData; // array of character data structures, indexed by values in iIndex2, offset
|
|
128 |
// by low 4 bits of Unicode value
|
|
129 |
};
|
|
130 |
|
|
131 |
/*
|
|
132 |
A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
|
|
133 |
composition (e.g., create a character from a base character and an accent), and decomposition
|
|
134 |
(e.g., remove the accent from this character if there is one).
|
|
135 |
*/
|
|
136 |
class TUnicode
|
|
137 |
{
|
|
138 |
public:
|
|
139 |
|
|
140 |
// Constructors
|
|
141 |
TUnicode() { iCode = 0xFFFF; }
|
|
142 |
TUnicode(TUint c) : iCode(c) {}
|
|
143 |
operator TUint() const { return iCode; }
|
|
144 |
|
|
145 |
// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
|
|
146 |
void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
|
|
147 |
IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
148 |
TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
149 |
TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
150 |
IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
151 |
IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
152 |
TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
153 |
TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
154 |
TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
155 |
TChar::TCjkWidth GetCjkWidth() const;
|
|
156 |
IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
|
|
157 |
|
|
158 |
// Utilities
|
|
159 |
static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
|
|
160 |
|
|
161 |
private:
|
|
162 |
const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
|
|
163 |
const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
|
|
164 |
TUint GetLowerCase(const TUnicodeData& aData) const;
|
|
165 |
TUint GetUpperCase(const TUnicodeData& aData) const;
|
|
166 |
TUint GetTitleCase(const TUnicodeData& aData) const;
|
|
167 |
TInt GetNumericValue(const TUnicodeData& aData) const;
|
|
168 |
|
|
169 |
TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
|
|
170 |
|
|
171 |
public:
|
|
172 |
#ifndef __KERNEL_MODE__
|
|
173 |
static const TUint16 FoldTable[256]; // fold table (strip accents, fold case) for the range 0..255
|
|
174 |
static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
|
|
175 |
#else
|
|
176 |
static const TUint16* FoldTable;
|
|
177 |
static const TUint16* CjkWidthFoldTable;
|
|
178 |
#endif
|
|
179 |
};
|
|
180 |
|
|
181 |
#endif // __UNICODE_H__
|