--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kernel/eka/include/collate.h Mon Oct 19 15:55:17 2009 +0100
@@ -0,0 +1,370 @@
+// Copyright (c) 1996-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of the License "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+// e32\include\collate.h
+// Definitions needed for Unicode collation.
+// Collation is the comparison of two Unicode strings to produce an ordering
+// that may be used in a dictionary or other list.
+// Collation is implemented using the Standard Unicode Collation algorithm. There
+// are four levels of comparison:
+// primary: basic character identity
+// secondary: accents and diacritics
+// tertiary: upper and lower case, and other minor attributes
+// quaternary: Unicode character value
+// Punctuation is normally ignored but can optionally be taken into account.
+// Strings are fully expanded using the standard Unicode canonical expansions before
+// they are compared. Thai and Lao vowels are swapped with the following character
+// if any.
+// EUSER contains the 'basic collation method'. This method assigns the standard Unicode collation key values
+// to the characters in the WGL4 repertoire, plus commonly used control characters and fixed-width spaces, plus
+// the CJK ideograms (for which the keys can be generated algorithmically). Other characters are collated after
+// all the characters for which keys are defined, and ordered by their Unicode values.
+// Locales can supply any number of other collation methods. They will usually supply a 'tailoring' of the standard
+// method. This is done by using the standard table as the main key table (signalled by placing NULL in
+// TCollationMethod::iMainTable) and specifying an override table (TCollationMethod::iOverrideTable).
+// Locale-specific collation data resides in ELOCL.
+//
+// WARNING: This file contains some APIs which are internal and are subject
+// to change without notice. Such APIs should therefore not be used
+// outside the Kernel and Hardware Services package.
+//
+
+#ifndef __COLLATE_H__
+#define __COLLATE_H__
+
+#ifdef __KERNEL_MODE__
+#include <e32cmn.h>
+#else
+#include <e32std.h>
+#endif
+
+//This material is used in the Unicode build only.
+#ifdef _UNICODE
+
+/**
+Collation key table structure.
+@publishedPartner
+@released
+*/
+struct TCollationKeyTable
+ {
+public:
+ /**
+ Masks for the various parts of the elements of the iKey array.
+ */
+ enum
+ {
+ ELevel0Mask = 0xFFFF0000, // primary key - basic character identity
+ ELevel1Mask = 0x0000FF00, // secondary key - accents and diacritics
+ ELevel2Mask = 0x000000FC, // tertiary key - case, etc.
+ EIgnoreFlag = 0x2, // if set, this key is normally ignored
+ EStopFlag = 0x1 // if set, this key is the last in a sequence representing a Unicode value or values
+ };
+
+ /**
+ An array containing all of the keys and strings of keys concatenated
+ together. Each key has EStopFlag set only if it is the last key in its
+ string. Eack key contains the keys for levels 0, 1 and 2, and a flag
+ EIgnoreFlag if the key is usually ignored (for punctuation & spaces
+ etc.).
+ */
+ const TUint32* iKey;
+ /**
+ An array of indices into the iKey array. Each element has its high 16
+ bits indicating a Unicode value and its low 16 bits indicating an index
+ into the iKey array at which its key starts. For surrogate pairs, high
+ surrogate code is in index[i]:16-31, and low surrogate code is in
+ index[i+1]:16-31. These two elements are combined to represent a surrogate
+ pair. The elements are sorted by Unicode value.
+ */
+ const TUint32* iIndex;
+ /**
+ The size of the iIndex array.
+ */
+ TInt iIndices;
+ /**
+ Concatenated Unicode strings. Each is a strings that is to be converted
+ to keys differently from how it would be if each letter were converted
+ independently. An example is "ch" in Spanish, which sorts as though it
+ were a single letter. Each Unicode string is preceeded by a 16-bit value
+ indicating the string's length (in 16-bit). The end of the string is not
+ delimited. A surrogate pair is represented by two ajacent 16-bit values.
+ */
+ const TUint16* iString;
+ /**
+ An array of elements mapping elements of iString to elements of iIndex.
+ Each element has its high 16 bits indicating the index of the start of
+ an element of iString, and its low 16 bits indicating the corresponding
+ element in iIndex. This array is sorted on the string index.
+ */
+ const TUint32* iStringIndex;
+ /**
+ The size of the iStringIndex array.
+ */
+ TInt iStringIndices;
+ };
+
+/**
+Defines a collation method.
+
+Collation means sorting pieces of text. It needs to take into account characters,
+accents and case; spaces and punctuation are usually ignored. It differs from
+ordinary methods of sorting in that it is locale-dependent - different
+languages use different ordering methods. Additionally, multiple collation
+methods may exist within the same locale.
+
+A collation method provides the collation keys and other data needed to customise
+collation; the Mem and TDesC16 collation functions (e.g. Mem::CompareC())
+perform the collation. Note that these functions use the standard collation
+method for the current locale - you only need to specify an object of class
+TCollationMethod to customise this collation scheme. Collation methods can
+be retrieved using member functions of the Mem class. Each one has a unique
+identifier.
+
+A collation method specifies a main table of collation keys, and optionally
+an overriding table that contains keys for which the values in the main table
+are overridden. A collation key table (TCollationKeyTable) is the set of collation
+keys: primary (basic character identity), secondary (accents and diacritics)
+and tertiary (case). The quaternary key is the Unicode character values themselves.
+
+The simplest way to customise a collation method is to create a local copy
+of the standard collation method and change it. For example, you could use
+the standard method, but not ignore punctuation and spaces:
+
+@code
+TCollationMethod m = *Mem::CollationMethodByIndex(0); // get the standard method
+m.iFlags |= TCollationMethod::EIgnoreNone; // dont ignore punctuation and spaces
+@endcode
+
+@publishedPartner
+@released
+*/
+struct TCollationMethod
+ {
+ public:
+ /**
+ The UID of this collation method.
+ */
+ TUint iId;
+
+ /**
+ The main collation key table; if NULL, use the standard table.
+ */
+ const TCollationKeyTable* iMainTable;
+
+ /**
+ If non-NULL, tailoring for collation keys.
+ */
+ const TCollationKeyTable* iOverrideTable;
+ enum
+ {
+ /**
+ Don't ignore any keys (punctuation, etc. is normally ignored).
+ */
+ EIgnoreNone = 1,
+
+ /**
+ Reverse the normal order for characters differing only in case
+ */
+ ESwapCase = 2,
+
+ /**
+ Compare secondary keys which represent accents in reverse
+ order (from right to left); this is needed for French when comparing
+ words that differ only in accents.
+ */
+ EAccentsBackwards = 4,
+
+ /**
+ Reverse the normal order for characters differing only in whether they
+ are katakana or hiragana.
+ */
+ ESwapKana = 8,
+
+ /**
+ Fold all characters to lower case before extracting keys; needed for
+ comparison of filenames, for which case is ignored but other
+ tertiary (level-2) distinctions are not.
+ */
+ EFoldCase = 16,
+
+ /** Flag to indicate a collation method for matching purpose
+ This flag is only needed if we wish to specify a particular collation method
+ to be used for matching purpose.
+ */
+ EMatchingTable = 32,
+
+ /** Ignore the check for adjacent combining characters. A combining
+ character effectively changes the character it combines with to something
+ else and so a match doesn't occur. Setting this flag will allow character
+ matching regardless of any combining characters.
+ */
+ EIgnoreCombining = 64
+ };
+
+ /**
+ Flags.
+
+ @see TCollationMethod::EIgnoreNone
+ @see TCollationMethod::ESwapCase
+ @see TCollationMethod::EAccentsBackwards
+ @see TCollationMethod::ESwapKana
+ @see TCollationMethod::EFoldCase
+ */
+ TUint iFlags;
+ };
+
+/**
+A collation data set provides any collation methods needed by a locale.
+@publishedPartner
+@released
+*/
+struct TCollationDataSet
+ {
+ public:
+ const TCollationMethod* iMethod;
+ TInt iMethods;
+ };
+
+// Collation method IDs
+
+/**
+A collation data set provides any collation methods needed by a locale.
+@internalTechnology
+@released
+*/
+const TUint KUidBasicCollationMethod = 0x10004F4E;
+
+/**
+A collation data set provides any collation methods needed by a locale.
+@internalTechnology
+@released
+*/
+const TUint KUidStandardUnicodeCollationMethod = 0x10004E96;
+
+#ifndef __KERNEL_MODE__
+
+//Forward declarations
+class TUTF32Iterator;
+struct LCharSet;
+
+/**
+Provides low-level collation functions.
+@internalComponent
+@released
+*/
+class TCollate
+ {
+public:
+ /**
+ Construct a TCollate object based on the collation method specified
+ within aCharSet, if any. If there is none, or aCharSet is null, the
+ standard collation method will be used. aMask and aFlags provide a
+ method for overriding the flags in the collation method: Each flag set
+ to 1 in aMask is a flag that will be overridden and set to the
+ corresponding flag value in aFlags. Ownership of aCharSet is not passed.
+ */
+ TCollate(const LCharSet* aCharSet,TUint aMask = 0,TUint aFlags = 0xFFFFFFFF);
+ /**
+ Construct a TCollate object based on an already constructed
+ TCollationMethod specified in aMethod. Ownership is not passed.
+ */
+ TCollate(const TCollationMethod& aMethod);
+
+ enum TComparisonResult
+ {
+ ELeftComparesLessAndIsNotPrefix = -2,
+ ELeftIsPrefixOfRight = -1,
+ EStringsIdentical = 0,
+ ERightIsPrefixOfLeft = 1,
+ ERightComparesLessAndIsNotPrefix = 2
+ };
+
+ /**
+ Compare the string beginning at aString1 of length aLength1 against the
+ string beginning at aString2 of length aLength2.
+ aMaxLevel determines the tightness of the collation. At level 0, only
+ character identities are distinguished. At level 1 accents are
+ distinguished as well. At level 2 case is distinguishes as well. At
+ level 3 all valid different Unicode characters are considered different.
+ */
+ TComparisonResult Compare(const TUint16* aString1,TInt aLength1,
+ const TUint16* aString2,TInt aLength2,
+ TInt aMaxLevel = 3) const;
+ /**
+ Find the string beginning at aString2 of length aLength2 in the string
+ beginning at aString1 of length aLength1. aMaxLevel determines
+ the tightness of the collation, see Compare for details.
+ */
+ TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
+ TInt aMaxLevel,TUint aString2WildChar = 0) const;
+
+ TInt Find(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2,
+ TInt &aLengthFound,TInt aMaxLevel,TUint aString2WildChar = 0) const;
+
+ /**
+ Test if the string beginning at aSearchTerm of length aSearchTermLength
+ matches the string beginning at aCandidate of length aCandidateLength.
+ aMaxLevel determines the tightness of the collation, see
+ Compare for details. The search term may have wild card characters as
+ specified by aWildChar (for matching a single grapheme- i.e. character
+ and any characters that combine with it, such as accents) and
+ aWildSequenceChar (for matching any sequence of whole graphemes). The
+ return value is KErrNotFound iff the search term does not match the
+ candidate string exactly. To find a match within the candidate string,
+ the search term must begin and end with a wild sequence character. If
+ the search term does match the candidate string, 0 will be returned,
+ unless the first character of the search term is a wild sequence
+ character in which case the value returned will be the index into
+ aCandidate at which the first non-wild sequence character matched.
+ aWildSequenceChar must be a valid (non-surrogate) Unicode character
+ below FFFE.
+ */
+ TInt Match(const TUint16 *aCandidate, TInt aCandidateLength,
+ const TUint16 *aSearchTerm,TInt aSearchTermLength,
+ TInt aMaxLevel, TUint aWildChar = '?', TUint aWildSequenceChar = '*', TUint aEscapeChar = 0) const;
+
+private:
+ /**
+ Compare values output from the iterators. After the comparison, if
+ ERightIsPrefixOfLeft or EStringsIdentical is returned, then aLeft and
+ aRight will be pointing at the next key (at MaxLevel) after the match.
+ If right is shown to be a prefix of left, this means that it has been
+ checked at all requested levels. If it is reported that the right is a
+ prefix of the left, then this will mean also that there are no unmatched
+ combining characters on the left.
+ */
+ TComparisonResult CompareKeySequences(TUTF32Iterator& aLeft, TUTF32Iterator& aRight,
+ TInt aMaxLevel, TInt aRightStringWildChar, TInt aEscapeChar) const;
+ /**
+ Finds search term inside candidate string. Returns KErrNotFound if there
+ is no match, returns the offset into the candidate string at which the
+ search term was found (note that this is the offset from the start of
+ the iteration, not from where the iteration was when the function was
+ called). If a string was found, the search term iterator is left
+ pointing at the end of the search term, and the candidate iterator is
+ left pointing just after the matched keys. aMatchPos returns where in
+ the candidate string the match was found.
+ */
+ TInt FindKeySequence(TUTF32Iterator& aCandidate, TUTF32Iterator& aSearchTerm,
+ TInt aMaxLevel, TInt aWildChar, TInt aEscapeChar, TInt& aLengthFound) const;
+
+private:
+ TCollationMethod iMethod;
+ };
+
+#endif // __KERNEL_MODE__
+
+#endif // _UNICODE
+
+#endif // __COLLATE_H__