diff -r 000000000000 -r 83f4b4db085c toolsandutils/e32tools/readtype/readtype.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolsandutils/e32tools/readtype/readtype.cpp Tue Feb 02 01:39:43 2010 +0200 @@ -0,0 +1,868 @@ +// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of the License "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// Reads a Unicode character type data file (such as UnicodeData-3.0.0.txt or a file containing locale-specific overrides) +// and writes C++ definitions of tables containing the information. +// Usage: readtype { }. +// : either the standard Unicode character data file (e.g., UnicodeData-3.0.0.txt) or a file containing +// overriding information for a certain locale, in the same format as the standard file, but with ranges for which +// there is no data given in the form: +// 0041;;;;;;;;;;;;;; +// 006A;;;;;;;;;;;;;; +// FFFF;;;;;;;;;;;;;; +// (in this example, these entries show that there is no overriding data for the character 0041 and range +// 006A..FFFF inclusive). +// Both single entries with no data and ranges with no data must have nothing in the third field (category). +// : the C++ source file to be output: this file becomes \e32\unicode\unitable.cpp, or an overriding +// file in \e32\lsrc; there are none of these yet. +// : a an optional name to be inserted into identifiers in the output file: omit this for the standard +// data set; use names like 'Turkish', 'Japanese', etc., for locales. +// +// + + +#include +#include +#include + +#ifndef _UNICODE +#define _UNICODE +#endif + +#include + +// don't use unicode.h::TUnicodeDataRange, since its for 16-bit, and deprecated +struct TUnicodeDataRange32 // Only used inside this cpp. + { + TUint32 iRangeStart; // Unicode value of the start of the range of characters + TInt16 iIndex; // index into an array of character information structures (-1 means data no available) + }; + +const int PlaneCount = 17; +TUnicodePlane ThePlanesInReadType[PlaneCount]; + +// Tables to convert names used in the data file to categories defined in TChar. +struct CatInfo + { + const char* iName; + TChar::TCategory iCat; + }; + +static const CatInfo TheCatInfo[] = + { + { "Lu", TChar::ELuCategory }, + { "Ll", TChar::ELlCategory }, + { "Lt", TChar::ELtCategory }, + { "Lo", TChar::ELoCategory }, + { "Lm", TChar::ELmCategory }, + { "Mn", TChar::EMnCategory }, + { "Mc", TChar::EMcCategory }, + { "Me", TChar::EMeCategory }, + { "Nd", TChar::ENdCategory }, + { "Nl", TChar::ENlCategory }, + { "No", TChar::ENoCategory }, + { "Pc", TChar::EPcCategory }, + { "Pd", TChar::EPdCategory }, + { "Ps", TChar::EPsCategory }, + { "Pe", TChar::EPeCategory }, + { "Pi", TChar::EPiCategory }, + { "Pf", TChar::EPfCategory }, + { "Po", TChar::EPoCategory }, + { "Sm", TChar::ESmCategory }, + { "Sc", TChar::EScCategory }, + { "Sk", TChar::ESkCategory }, + { "So", TChar::ESoCategory }, + { "Zs", TChar::EZsCategory }, + { "Zl", TChar::EZlCategory }, + { "Zp", TChar::EZpCategory }, + { "Cc", TChar::ECcCategory }, + { "Cf", TChar::ECfCategory }, + { "Cs", TChar::ECsCategory }, + { "Co", TChar::ECoCategory }, + { "Cn", TChar::ECnCategory } + }; +const int TheCategories = sizeof(TheCatInfo) / sizeof(TheCatInfo[0]); + +struct BdCatInfo + { + const char* iName; + TChar::TBdCategory iBdCat; + }; + +static const BdCatInfo TheBdCatInfo[] = + { + { "L", TChar::ELeftToRight }, + { "LRE", TChar::ELeftToRightEmbedding }, + { "LRO", TChar::ELeftToRightOverride }, + { "R", TChar::ERightToLeft }, + { "AL", TChar::ERightToLeftArabic }, + { "RLE", TChar::ERightToLeftEmbedding }, + { "RLO", TChar::ERightToLeftOverride }, + { "PDF", TChar::EPopDirectionalFormat }, + { "EN", TChar::EEuropeanNumber }, + { "ES", TChar::EEuropeanNumberSeparator }, + { "ET", TChar::EEuropeanNumberTerminator }, + { "AN", TChar::EArabicNumber }, + { "CS", TChar::ECommonNumberSeparator }, + { "NSM", TChar::ENonSpacingMark }, + { "BN", TChar::EBoundaryNeutral }, + { "B", TChar::EParagraphSeparator }, + { "S", TChar::ESegmentSeparator }, + { "WS", TChar::EWhitespace }, + { "ON", TChar::EOtherNeutral }, + }; +const int TheBdCategories = sizeof(TheBdCatInfo) / sizeof(TheBdCatInfo[0]); + +// Class derived from TUnicodeData to provide constructor etc. +class Data: public TUnicodeData + { + public: + Data(); + TBool operator==(const Data& c) const; + TBool operator!=(const Data& c) const { return !(*this == c); } + void Write(); + }; + +// The character information table. +const int MaxDatas = 1000; +Data TheData[MaxDatas]; +int Datas = 0; + +// The range table, containing indices to the character information table. +const int MaxRanges = 4000; +TUnicodeDataRange32 TheRange[MaxRanges]; +int Ranges = 0; + +// The exhaustive index table, containing indices from every 16-bit value to the character information table. +int TheIndex[0x110000]; + +// The special tables for characters in the range 0..255. +TUint16 LowerCaseTable[256]; +TUint16 FoldTable[256]; + +// The special table for characters in the range 0xFF00..0xFFFF +TUint16 CjkWidthFoldTable[256]; + +/* +The composition table. The compositions are stored as a word made up from the composition tag (high byte) and +the number of components (low byte), the Unicode value of the composed character, then the Unicode values of +the components. + +Two tables are created containing the indices of compositions. One of these is sorted by +composed character, one by decomposition. This enables quick conversions to be made in both directions. +*/ +const int MaxCompositionWords = 14000; +TUint32 CompositionBuffer[MaxCompositionWords]; +int CompositionWords = 0; +const int MaxCompositions = 8000; +TInt16 Compose[MaxCompositions]; // composition buffer indices, sorted by composed character +TInt16 Decompose[MaxCompositions]; // composition buffer indices, sorted by decomposition +int Compositions = 0; +int trie_data[0x110000]; // used to build the trie + +FILE *input_file; +FILE *output_file; +const char *input_filename; +const char *output_filename; + +// Convert a hex string to an integer. +static int hex(const char *s) + { + int x = 0; + while (*s) + { + int n = *s; + if (n >= '0' && n <= '9') + n -= '0'; + else if (n >= 'A' && n <= 'F') + n -= 'A' - 10; + else if (n >= 'a' && n <= 'f') + n -= 'a' - 10; + else + break; + x = x * 16 + n; + + s++; + } + return x; + } + +static TChar::TCategory Category(const char* aName,bool aWarn) + { + for (int i = 0; i < TheCategories; i++) + if (!strcmp(aName,TheCatInfo[i].iName)) + return TheCatInfo[i].iCat; + if (aWarn) + fprintf(stderr,"unknown category %s\n",aName); + return (TChar::TCategory)(-1); + } + +static TChar::TBdCategory BdCategory(const char* aName,bool aWarn) + { + for (int i = 0; i < TheBdCategories; i++) + if (!strcmp(aName,TheBdCatInfo[i].iName)) + return TheBdCatInfo[i].iBdCat; + if (aWarn) + fprintf(stderr,"unknown bidirectional category %s\n",aName); + return (TChar::TBdCategory)(-1); + } + +// Write an aggregate initialiser for a Data object to the output file. +void Data::Write() + { + fprintf(output_file,"{ %d, %d, %d, %d, %d, %d }", + (int)iCategory, + (int)iBdCategory, + (int)iCombiningClass, + (int)iDigitOffset, + (int)iCaseOffset, + (int)iFlags); + } + +/* +Add a new entry to the range table. If the category is the illegal value -1 store -1 as the +index; this feature is used when creating character data for specific locales, which mostly +consists of ranges for which the data is held in the main table, and is marked in this way +as unspecified in the locale table. +*/ +void add_range(Data& info,TInt code) + { + // Get an index to the character info; add a new entry if necessary. + int index = -1; + if (info.iCategory != TChar::TCategory(0xFF)) + { + for (int i = 0; i < Datas && index == -1; i++) + if (TheData[i] == info) + index = i; + if (index == -1) + { + if (Datas >= MaxDatas) + { + fprintf(stderr,"too many Datas: > %d\n",MaxDatas); + exit(1); + } + TheData[index = Datas++] = info; + } + } + + // Add the entry to the range table. + if (Ranges >= MaxRanges) + { + fprintf(stderr,"too many Ranges: > %d, when processing U+%x\n", MaxRanges, code); + exit(1); + } + TheRange[Ranges].iRangeStart = code; + TheRange[Ranges].iIndex = (TInt16)index; + Ranges++; + } + +// Write a table of "entries" integers each of "entry_size" bytes. +int write_table(const void *table,const char *name, + int entries,int input_entry_size,int output_entry_size, + int entry_signed,int entries_per_row,int write_array_size) + { + const char *type = entry_signed ? "TInt" : "TUint"; + const int bits = output_entry_size * 8; + + /* + There is a choice here whether or not the number of entries in the array is written: + either [] or [] is written. The latter method is used where the header + says [] so that compilers like GCC don't moan about type mismatches. + */ + if (entries == 0) + { + // In case that given plane has no character. + fprintf(output_file,"const %s%d * const %s = NULL;\n",type,bits,name); + return 0; + } + if (write_array_size) + fprintf(output_file,"const %s%d %s[%d] = \n\t{",type,bits,name,entries); + else + fprintf(output_file,"const %s%d %s[] = \n\t{ // %d entries",type,bits,name,entries); + + const unsigned char *p = (const unsigned char *)table; + for (int i = 0; i < entries; i++, p += input_entry_size) + { + if (i % entries_per_row == 0) + fprintf(output_file,"\n\t"); + if (output_entry_size == 1) + fprintf(output_file,"0x%02x",(int)(*p)); + else if (output_entry_size == 2) + fprintf(output_file,"0x%04x",(int)(*((TUint16 *)p))); + else if (output_entry_size == 4) + fprintf(output_file,"0x%08x",(int)(*((TUint32 *)p))); + else + { + fprintf(stderr,"illegal output entry size: %d\n",output_entry_size); + exit(1); + } + if (i < entries - 1) + fputc(',',output_file); + // comment for easy read + //if ((i+1) % entries_per_row == 0) + // fprintf(output_file, "\t// U+%X-U+%X (%d-%d)", i+1-entries_per_row, i, i+1-entries_per_row, i); + } + fprintf(output_file,"\n\t};\n"); + + return entries * output_entry_size; + } + +/* +Create and write a trie representing the data in 'aTheIndex' +The trie is of two levels, the first level indexed by the high 'aBlockBits' bits of the +character code, the second by the low bits. There is one wrinkle; if the index value, which is 16 bits, +has its top bit set, it is not an index but the actual data value for all entries in that block. + +Thus the way to get the value for a code is: + +int index = trie_index[code >> aBlockBits]; +if (index & 0x8000) + value = index & ~0x8000; +else + value = aTrieData[code & (1 << (16 - aBlockBits))]; + +The data size in bytes is returned. +The argument 'aWrite' determines whether the data is written or not. +The arguments 'aTrie1Name' and 'aTrie2Name' are used as variable names in generated unitable.cpp. +*/ +int write_trie(int aOutputEntrySize,int aBlockBits,bool aWrite, int *aTheIndex, int *aTrieData, char *aTrie1Name, char *aTrie2Name) + { + int n = 0; // number of entries used in trie_data + + int block_size = 1 << aBlockBits; + int blocks = 1 << (16 - aBlockBits); + + int* trie_index = new int[blocks]; + int* block = new int[block_size]; + + for (int block_index = 0; block_index < blocks; block_index++) + { + // Write the data for the current block. + int block_start = block_index * block_size; + bool all_the_same = true; + for (int code = 0; code < block_size; code++) + { + block[code] = aTheIndex[block_start + code]; + if (block[code] != block[0]) + all_the_same = false; + } + + // Try to find a match for it. + int insert_at; + if (all_the_same) + trie_index[block_index] = block[0] | 0x8000; + else + { + for (insert_at = 0; insert_at < n; insert_at++) + { + int entries = n - insert_at; + if (entries > block_size) + entries = block_size; + int bytes = entries * sizeof(int); + if (memcmp(block,aTrieData + insert_at,bytes) == 0) + break; + } + + memcpy(aTrieData + insert_at,block,block_size * sizeof(int)); + if (insert_at + block_size > n) + n = insert_at + block_size; + trie_index[block_index] = insert_at; + } + } + + if (aWrite) + { + write_table(trie_index,aTrie1Name,blocks,4,2,false,16,true); + write_table(aTrieData,aTrie2Name,n,4,aOutputEntrySize,false,32,true); + } + + delete [] trie_index; + delete [] block; + + return blocks * 2 + n * aOutputEntrySize; + } + +// Write the best possible 2-level trie for all planes, trying block sizes of 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 and 8192 +// @return Data size in bytes. +int write_trie() + { + int byteCount = 0; + for (int plane=0; plane\n\n"); + + // Export two variables for unicode.cpp. + fprintf(output_file, "\n"); + fprintf(output_file, "// Declarations for tables held in unitable.cpp and used by unicode.cpp.\n"); + fprintf(output_file, "extern const TStandardUnicodeDataSet TheStandardUnicodeDataSet[];\n"); + fprintf(output_file, "extern const TUnicodePlane ThePlanes[17];\n\n\n"); + + // Write the trie data. + data_bytes += write_trie(); + + // Write the character information table. + fprintf(output_file,"static const TUnicodeData TheUnicodeData[] =\n\t{ // %d entries\n", Datas); + int i; + for (i = 0; i < Datas; i++) + { + fputc('\t',output_file); + TheData[i].Write(); + if (i < Datas - 1) + fputc(',',output_file); + fprintf(output_file, "\t// 0x%X (%d)", i, i); + fputc('\n',output_file); + } + fprintf(output_file,"\t};\n\n"); + data_bytes += Datas * sizeof(Data); + + // write plane properties + fprintf(output_file, "const TUnicodePlane ThePlanes[%d] =\n\t{\n", PlaneCount); + int plane; + for (plane=0; plane<=16; plane++) + { + fprintf(output_file, "\t{%d, 0x%04X, 0x%04X }", + ThePlanesInReadType[plane].iCodesPerBlock, ThePlanesInReadType[plane].iMaskForBlock, ThePlanesInReadType[plane].iMaskForCodePoint); + if (plane < 16) + fprintf(output_file, ",\n"); + } + fprintf(output_file, "\n\t};\n\n"); + data_bytes += 5*PlaneCount; + + // Write a data structure referring to the trie data. + fprintf(output_file,"const TStandardUnicodeDataSet TheStandardUnicodeDataSet[] =\n\t{ // %d entries\n", PlaneCount); + for (plane=0; plane<=16; plane++) + { + fprintf(output_file,"\t{ ThePlane%02dTrieIndex1, ThePlane%02dTrieIndex2, TheUnicodeData }", plane, plane); + if (plane < 16) + fprintf(output_file, ",\n"); + } + fprintf(output_file, "\n\t};\n\n"); + data_bytes += 12*PlaneCount; + + // Convert the fold table to lower case. + for (i = 0; i < 256; i++) + FoldTable[i] = LowerCaseTable[FoldTable[i]]; + + // Make 00A0 (non-break space) fold to space. + FoldTable[0xA0] = 0x20; + + // Make unassigned characters in the CJK width fold table fold to themselves. + for (i = 0; i < 256; i++) + if (CjkWidthFoldTable[i] == 0) + CjkWidthFoldTable[i] = (TUint16)(0xFF00 + i); + + // Write the special tables + data_bytes += write_table(FoldTable,"TUnicode::FoldTable",256,2,2,false,16,true); + data_bytes += write_table(CjkWidthFoldTable,"TUnicode::CjkWidthFoldTable",256,2,2,false,16,true); + + // Write the number of data bytes at the end of the file. + fprintf(output_file,"\n// The tables and structures contain %d bytes of data.\n",data_bytes); + } + +int main(int argc,char **argv) + { + if (argc < 2) + { + fputs("usage: readtype ",stderr); + exit(1); + } + + input_filename = argv[1]; + output_filename = argv[2]; + + // Locale support in previous version is deprecated. + + input_file = fopen(input_filename,"r"); + if (!input_file) + { + fprintf(stderr,"cannot open input file %s\n",input_filename); + exit(1); + } + output_file = fopen(output_filename,"w"); + if (!output_file) + { + fprintf(stderr,"cannot open output file %s\n",output_filename); + exit(1); + } + + Data range_info; // attributes of the current range + Data unassigned_info; // attributes used for unassigned characters; the default constructor + // sets the category to Cn, bidirectional category to L, everything else to 0. + TBool first = true; + + char line[1024]; + const int Fields = 15; + char *field[Fields]; + TInt prev_code = 0; + while (fgets(line,sizeof(line),input_file)) + { + // Strip trailing newline if any. + int length = strlen(line); + if (length && line[length - 1] == '\n') + line[length - 1] = 0; + + // Parse into fields. + int n = 1; + field[0] = line; + for (char *p = line; *p; p++) + if (*p == ';' && n < Fields) + { + *p = 0; + field[n++] = p + 1; + } + + // Ignore the line if there is only one field. + if (n == 1) + continue; + + // Extract fields of interest. + + // Field 0: Unicode value in hexadecimal. + int code = hex(field[0]); + + // Field 2: Category. + Data cur_info; + cur_info.iCategory = (TUint8)Category(field[2], true); + + // Field 3: Combining class. + cur_info.iCombiningClass = (TUint8)atoi(field[3]); + + // Field 4: Bidirectional category. + cur_info.iBdCategory = (TUint8)BdCategory(field[4], true); + + // Prepare to determine the folded version (converted to lower case, stripped of accents). + int folded_code = code; + + // Field 5: Character decomposition. + if (field[5][0]) + { + int components = 0; + const int MaxComponents = 18; // FDFA; ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM has 18 components! + TUint32 component[MaxComponents]; + + // Extract the tag if any. + char *p = field[5]; + const char *tag = NULL; + if (field[5][0] == '<') + { + tag = ++p; + while (*p && *p != '>') + p++; + if (!*p) + { + fprintf(stderr,"syntax error: missing > on the line for code %x\n",code); + exit(1); + } + *p++ = 0; + } + + // Read the components. + while (*p) + { + while (*p == ' ') + p++; + if (components >= MaxComponents) + { + fprintf(stderr,"decomposition of %x has too many components: increase MaxComponents\n",code); + exit(1); + } + component[components++] = hex(p); + while (*p && *p != ' ') + p++; + } + + // Store the composition if it has a null tag and is therefore canonical. + if (tag == NULL) + { + // Put its index into the tables. + if (Compositions >= MaxCompositions) + { + fprintf(stderr,"too many compositions (at code %x): increase MaxCompositions\n",code); + exit(1); + } + if (CompositionWords >= 65535) + { + fprintf(stderr, "too many compositions (at code %x): need 32 bit!?\n", code); + exit(1); + } + Compose[Compositions] = Decompose[Compositions] = (TInt16)CompositionWords; + Compositions++; + + // Put it into the composition buffer. + if (CompositionWords + 2 + components >= MaxCompositionWords) + { + fprintf(stderr,"too many compositions (at code %x): increase MaxCompositionWords\n",code); + exit(1); + } + CompositionBuffer[CompositionWords++] = code; + CompositionBuffer[CompositionWords++] = components; + for (int i = 0; i < components; i++) + CompositionBuffer[CompositionWords++] = component[i]; + } + + // Store the code used in the ordinary and CJK fold tables. + if (components > 0) + { + if (code < 256) + { + if (tag == NULL) + folded_code = component[0]; + } + else if (code >= 0xFF00 && code <= 0xFFEE) // tag will always be or + folded_code = component[0]; + } + } + + // Field 8. Numeric value. + if (field[8][0]) + { + if (field[8][1] == '/' || field[8][2] == '/') // fractions + cur_info.iFlags |= TUnicodeData::EFraction; + else + { + int value = atoi(field[8]); + if (value >= 0 && value <= 255) + { + cur_info.iDigitOffset = (TUint8)((value - (code & 255)) & 255); + cur_info.iFlags |= TUnicodeData::ESmallNumeric; + } + else if (value == 500) + cur_info.iFlags |= TUnicodeData::EFiveHundred; + else if (value == 1000) + cur_info.iFlags |= TUnicodeData::EOneThousand; + else if (value == 5000) + cur_info.iFlags |= TUnicodeData::EFiveThousand; + else if (value == 10000) + cur_info.iFlags |= TUnicodeData::ETenThousand; + else if (value == 100000) + cur_info.iFlags |= TUnicodeData::EHundredThousand; + else + fprintf(stderr,"Warning: U+%X has a large numeric property with unrepresentable value %d. Ignored.\n",code,value); + } + } + + // Field 9: Mirrored property. + if (field[9][0] == 'Y') + cur_info.iFlags |= TUnicodeData::EMirrored; + + // Fields 12, 13, 14: Case variants. + int uc = code, lc = code, tc = code; + if (field[12][0]) + { + uc = hex(field[12]); + int uc_offset = uc - code; + if (abs(uc_offset) > 32767) + { + fprintf(stderr, "Warning: offset to upper case is too large: code %X, upper case %X, offset %X. Ignored!\n", code, uc, uc_offset); + } + else + { + cur_info.iFlags |= TUnicodeData::EHasUpperCase; + cur_info.iCaseOffset = (TInt16)(-uc_offset); + if (code<0x10000 && uc>0x10000 || code>0x10000 && uc<0x10000) + fprintf(stderr, "Info: %X and its upper case %X locate at different planes.\n"); + } + } + if (field[13][0]) + { + lc = hex(field[13]); + int lc_offset = lc - code; + if (abs(lc_offset) > 32767) + { + fprintf(stderr, "Warning: offset to lower case is too large: code %X, lower case %X, offset %X. Ignored!\n", code, lc, lc_offset); + } + else + { + cur_info.iFlags |= TUnicodeData::EHasLowerCase; + cur_info.iCaseOffset = (TInt16)lc_offset; + if (code<0x10000 && lc>0x10000 || code>0x10000 && lc<0x10000) + fprintf(stderr, "Info: %X and its lower case %X locate at different planes.\n"); + } + } + if (field[14][0]) + tc = hex(field[14]); + if (tc != lc && tc != uc) + cur_info.iFlags |= TUnicodeData::EHasTitleCase; + + // If this code is < 256 fill in the entries in the special tables. + if (code < 256) + { + LowerCaseTable[code] = (TUint16)lc; + FoldTable[code] = (TUint16)folded_code; + } + + // If the code is >= 0xFF00 fill in the entry in the CJK width folding table. + else if (code >= 0xFF00 && code <= 0xFFFF) + CjkWidthFoldTable[code & 0xFF] = (TUint16)folded_code; + + /* + If there was a gap between this code and the previous one, write an 'unassigned' range, + unless this character is actually the end of a range not fully listed (like the CJK ideographs + from 4E00 to 9FA5 inclusive), in which case the character name will end in ' Last>'. + */ + if (code - prev_code > 1) + { + TBool last_in_range = false; + int name_length = strlen(field[1]); + if (name_length >= 6 && !strcmp(field[1] + name_length - 6," Last>")) + last_in_range = TRUE; + if (!last_in_range) + { + add_range(unassigned_info,prev_code + 1); + range_info = unassigned_info; + } + } + + // Write the range. + if (first || cur_info != range_info) + { + add_range(cur_info,code); + range_info = cur_info; + } + + first = false; + prev_code = code; + } + + /* + If there was a gap at the end of the encoding (there is at present; FFFE and FFFF are not Unicode characters) + write an 'unassigned' range. + */ + if (prev_code < 0xFFFF) + add_range(unassigned_info,prev_code + 1); + + // Write an array of indices from Unicode character values to character data sets. + for (int i = 0; i < Ranges; i++) + { + TUint32 end = i < Ranges - 1 ? TheRange[i + 1].iRangeStart : 0x110000; + for (TUint32 j = TheRange[i].iRangeStart; j < end; j++) + TheIndex[j] = TheRange[i].iIndex; + } + + // Write the output file. + write_output(); + printf("\nDone.\n"); + + return 0; + } + +Data::Data() + { + iCategory = TChar::ECnCategory; + iBdCategory = TChar::ELeftToRight; + iCombiningClass = 0; + iDigitOffset = 0; + iCaseOffset = 0; + iFlags = 0; + } + +TBool Data::operator==(const Data& c) const + { + return iCategory == c.iCategory && + iBdCategory == c.iBdCategory && + iCombiningClass == c.iCombiningClass && + iDigitOffset == c.iDigitOffset && + iCaseOffset == c.iCaseOffset && + iFlags == c.iFlags; + } + +/* +This function is copied from unicode.cpp: having it here saves me having to link in unicode.cpp and +unitable.cpp, which is probably the file we're trying to write! +*/ +TInt TUnicode::Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2) + { + for (TInt i = 0; i < aLength1 || i < aLength2; i++, aString1++, aString2++) + { + TInt x = i < aLength1 ? *aString1 : -1; + TInt y = i < aLength2 ? *aString2 : -1; + if (x != y) + return x - y; + } + return 0; + }