kernel/eka/include/unicode.h
changeset 0 a41df078684a
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\include\unicode.h
       
    15 // The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
       
    16 // character attribute retrieval functions. It also contains:
       
    17 // structures used to store and search the tables of character information:
       
    18 // when modifying these, please remember that they form part of tables that must be initialised as aggregates,
       
    19 // so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
       
    20 // rather than class to make that clear.
       
    21 // default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
       
    22 // constructors and conversion functions for converting between integers and TUnicode objects
       
    23 // functions to retrieve the categories and attributes
       
    24 // The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
       
    25 // 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
       
    26 // Version 2.0'.
       
    27 // Because the category constants must be available to users they are defined not here but in the TChar
       
    28 // class in e32std.h.
       
    29 // 
       
    30 // WARNING: This file contains some APIs which are internal and are subject
       
    31 //          to change without notice. Such APIs should therefore not be used
       
    32 //          outside the Kernel and Hardware Services package.
       
    33 //
       
    34 
       
    35 /**
       
    36  @file
       
    37  @internalTechnology
       
    38 */
       
    39 
       
    40 
       
    41 #ifndef __UNICODE_H__
       
    42 #define __UNICODE_H__ 1
       
    43 
       
    44 #include <e32cmn.h>
       
    45 
       
    46 /*
       
    47 A structure to contain the raw data about a Unicode character:
       
    48 it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
       
    49 */
       
    50 struct TUnicodeData
       
    51 	{
       
    52 	// bit values for iFlags
       
    53 	enum
       
    54 		{
       
    55 		EHasLowerCase = 1,			// adding the case offset gives the lower case form
       
    56 		EHasUpperCase = 2,			// subtracting the case offset gives the upper case form
       
    57 		EHasTitleCase = 4,			// a title case form exists that is distinct from the upper case form
       
    58 		EMirrored = 8,				// this character is replaced by a mirror-image in right-to-left text
       
    59 		ENumericFlags = 0x70,		// one of these flags is set if this number has a numeric value
       
    60 		ENonNumeric = 0x00,			// this character has no numeric value
       
    61 		ESmallNumeric = 0x10,		// numeric in the range 0..255 (see iDigitOffset)
       
    62 		EFiveHundred = 0x20,		// numeric with the value 500
       
    63 		EOneThousand = 0x30,		// numeric with the value 1000
       
    64 		EFiveThousand = 0x40,		// numeric with the value 5000
       
    65 		ETenThousand = 0x50,		// numeric with the value 10000
       
    66 		EHundredThousand = 0x60,	// numeric with the value 100000
       
    67 		EFraction = 0x70			// numeric with a fractional value
       
    68 		};
       
    69 
       
    70 	TUint8 iCategory;					// general category
       
    71 	TUint8 iBdCategory;					// bidirectional category
       
    72 	TUint8 iCombiningClass;				// combining class
       
    73 	TInt8 iDigitOffset;					// if this character has a small numeric value, the difference between the low
       
    74 										// 8 bits of the character code and the numeric value
       
    75 	TInt16 iCaseOffset;					// offset to other case; subtract to get upper case, add to get lower
       
    76 										// case (this makes it more likely that characters
       
    77 										// differing only by case have the same	data, making the table smaller)
       
    78 	TUint8 iFlags;						// flags: does this character have a lower case form, etc.
       
    79 	};
       
    80 
       
    81 /*
       
    82 A structure for Unicode plane information.
       
    83 An array of 17 elements should be defined in unitable.cpp, which is generated
       
    84 by the readtype tool. All characters in a plane are divided into blocks. All
       
    85 blocks in a plane have the same block size. Block size can be 2, 4, 8, etc.
       
    86 Any field in this structure can be calculated from any other field. Such
       
    87 'redundant' information is just for faster runtime speed.
       
    88 For example, a plane has block size of 16, which is 2 ^ 4. The code number 
       
    89 will be 4. The mask for block will be 0xFFF0, which means high 12 bit indicates
       
    90 block index. The mask for code point will be 0x000F, which means the lower 4
       
    91 bits indicates index in block.
       
    92 */
       
    93 struct TUnicodePlane
       
    94 	{
       
    95 	TUint8 iCodesPerBlock;			// how many bits are used to represent code points (for example if there were 4096 blocks (12 bits), this would be 4 bits)
       
    96 	TUint16 iMaskForBlock;			// mask of 16 bits for blocks (for example 8 bits would be 0xff00)
       
    97 	TUint16 iMaskForCodePoint;		// mask of 16 bits for index in block (for example 8 bits would be 0x00ff)
       
    98 	};
       
    99 
       
   100 /*
       
   101 A structure for a range of Unicode characters with the same raw data; must not have a
       
   102 constructor because an array of these in unitable.cpp is initialised as an aggregate.
       
   103 
       
   104 @deprecated
       
   105 */
       
   106 struct TUnicodeDataRange
       
   107 	{
       
   108 	TUint16 iRangeStart;	// Unicode value of the start of the range of characters
       
   109 	TInt16 iIndex;			// index into an array of character information structures (-1 means data no available)
       
   110 	};
       
   111 
       
   112 /*
       
   113 A structure to hold a set of overriding character data
       
   114 */
       
   115 struct TUnicodeDataSet
       
   116 	{
       
   117 	const TUnicodeData *iData;			// array of character data structures
       
   118 	const TUnicodeDataRange *iRange;	// array of ranges referring to elements of iData
       
   119 	TInt iRanges;						// number of elements in the array of ranges
       
   120 	};
       
   121 
       
   122 // A structure to hold the standard character data
       
   123 struct TStandardUnicodeDataSet
       
   124 	{
       
   125 	const TUint16* iIndex1;				// first trie index: 4096 elements indexed by high 12 bits of Unicode value
       
   126 	const TUint16* iIndex2;				// second trie index, indexed by values in iIndex1
       
   127 	const TUnicodeData *iData;			// array of character data structures, indexed by values in iIndex2, offset
       
   128 										// by low 4 bits of Unicode value
       
   129 	};
       
   130 
       
   131 /*
       
   132 A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
       
   133 composition (e.g., create a character from a base character and an accent), and decomposition
       
   134 (e.g., remove the accent from this character if there is one).
       
   135 */
       
   136 class TUnicode
       
   137 	{
       
   138 	public:
       
   139 
       
   140 	// Constructors
       
   141 	TUnicode() { iCode = 0xFFFF; }
       
   142 	TUnicode(TUint c) : iCode(c) {}
       
   143 	operator TUint() const { return iCode; }
       
   144 
       
   145 	// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
       
   146 	void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
       
   147 	IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
       
   148 	TChar::TBdCategory GetBdCategory(const TUnicodeDataSet *aOverridingDataSet) const;
       
   149 	TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
       
   150 	IMPORT_C TUint GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   151 	IMPORT_C TUint GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   152 	TUint GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
       
   153 	TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
       
   154 	TInt GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
       
   155 	TChar::TCjkWidth GetCjkWidth() const;
       
   156 	IMPORT_C TUint Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
       
   157 	
       
   158 	// Utilities
       
   159 	static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);
       
   160 
       
   161 	private:
       
   162 	const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
       
   163 	const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
       
   164 	TUint GetLowerCase(const TUnicodeData& aData) const;
       
   165 	TUint GetUpperCase(const TUnicodeData& aData) const;
       
   166 	TUint GetTitleCase(const TUnicodeData& aData) const;
       
   167 	TInt GetNumericValue(const TUnicodeData& aData) const;
       
   168 
       
   169 	TUint iCode; // not TUint16 because values in the extended range from 0x10000 to 0xFFFFF may be used.
       
   170 
       
   171 	public:
       
   172 #ifndef __KERNEL_MODE__
       
   173 	static const TUint16 FoldTable[256];		// fold table (strip accents, fold case) for the range 0..255
       
   174 	static const TUint16 CjkWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
       
   175 #else
       
   176 	static const TUint16* FoldTable;
       
   177 	static const TUint16* CjkWidthFoldTable;
       
   178 #endif
       
   179 	};
       
   180 
       
   181 #endif // __UNICODE_H__