symport/e32/euser/unicode/compareimp.h
changeset 1 0a7b44b10206
child 2 806186ab5e14
equal deleted inserted replaced
0:c55016431358 1:0a7b44b10206
       
     1 // Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Symbian Foundation License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // Folding and decomposition implementation
       
    15 // 
       
    16 //
       
    17 
       
    18 #ifndef __COMPAREIMP_H__
       
    19 #define __COMPAREIMP_H__
       
    20 
       
    21 #include <e32std.h>
       
    22 #include <unicode.h>
       
    23 
       
    24 //Forward declarations
       
    25 class TUTF32Iterator;
       
    26 
       
    27 //////////////////////////////////////////////////////////////////////////////////////////////
       
    28 // Global functions
       
    29 //////////////////////////////////////////////////////////////////////////////////////////////
       
    30 
       
    31 TChar UTF16ToChar(const TText16* a);
       
    32 TBool IsBaseCharacter(TChar);
       
    33 
       
    34 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
       
    35 
       
    36 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
       
    37 
       
    38 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
    39                         const TText16* aSearchTermStart, const TText16* aSearchTermEnd);
       
    40 
       
    41 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
    42                              const TText16* aSearchTermStart, const TText16* aSearchTermEnd);
       
    43 
       
    44 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm);
       
    45 
       
    46 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight);
       
    47 
       
    48 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined);
       
    49 
       
    50 TBool DecomposeChar(TChar aCh, TPtrC16& aResult);
       
    51 
       
    52 inline void SkipCombiningCharacters(TUTF32Iterator& aUTF32It);
       
    53 
       
    54 /**
       
    55 Converts UTF16 into UTF32, ignoring non-characters and
       
    56 unpaired surrogates and combining paired surrogates.
       
    57 @internalComponent
       
    58 */
       
    59 class TUTF32Iterator
       
    60 	{
       
    61 public:
       
    62 	enum TStartsWithValidCharacter { EStartsWithValidCharacter };
       
    63 	inline TUTF32Iterator();
       
    64 	inline explicit TUTF32Iterator(const TText16* aSingleton);
       
    65 	inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd);
       
    66 	inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd, TStartsWithValidCharacter);
       
    67 
       
    68 	inline TUTF32Iterator CurrentAsIterator() const;
       
    69 	inline TBool AtEnd() const;
       
    70 	void Next();
       
    71 	inline TChar Current() const;
       
    72 	TBool LocateFoldedBaseCharacter(TChar aChar);
       
    73 	inline const TText16* CurrentPosition() const;
       
    74 	inline TInt Length() const;
       
    75 	inline TInt operator[](TInt) const;
       
    76 	inline void SetStart(const TText16*);
       
    77 private:
       
    78 	const TText16* iStart;
       
    79 	const TText16* iEnd;
       
    80 	TChar iCurrent;
       
    81 	};
       
    82 
       
    83 //////////////////////////////////////////////////////////////////////////////////////////////
       
    84 // FOLDING
       
    85 //////////////////////////////////////////////////////////////////////////////////////////////
       
    86 
       
    87 /**
       
    88 @internalComponent
       
    89 */
       
    90 class TFoldedDecompIterator
       
    91 	{
       
    92 public:
       
    93 	inline TFoldedDecompIterator();
       
    94 	explicit TFoldedDecompIterator(const TUTF32Iterator&);
       
    95 	inline void Set(const TUTF32Iterator&);
       
    96 	TBool AtEnd() const;
       
    97 	TBool AtEndOrWildcard() const;
       
    98 	TBool EnterFoldedSequence();
       
    99 	TBool StrictEnterFoldedSequence();
       
   100 	inline TBool IsInFoldedSequence() const;
       
   101 	TBool CurrentIsBaseFoldedFromCombiner() const;
       
   102 	TChar Current() const;
       
   103 	TBool Match(TChar aCode);
       
   104 	TBool Match(TFoldedDecompIterator& aThat);
       
   105 	void Next();
       
   106 	inline TUTF32Iterator BaseIterator() const;
       
   107 private:
       
   108 	TUTF32Iterator iOriginal;
       
   109 	TUTF32Iterator iFolded;
       
   110 	};
       
   111 
       
   112 /**
       
   113 Sorts sequences of combining characters with non-zero combining classes into
       
   114 order of their combining classes.
       
   115 @internalComponent
       
   116 */
       
   117 class TFoldedSortedDecompIterator
       
   118 	{
       
   119 public:
       
   120 	inline TFoldedSortedDecompIterator();
       
   121 	TInt Set(TFoldedDecompIterator &aBase);
       
   122 	void Set();
       
   123 	TBool AtEnd() const;
       
   124 	TChar Current() const;
       
   125 	void Next();
       
   126 
       
   127 private:
       
   128 	TFoldedDecompIterator iStart; // Starting code.	
       
   129 	TInt iLength; // Length in decomposed codes.
       
   130 	TFoldedDecompIterator iCurrent; // Current code.
       
   131 	TInt iCurrentCount; // Number of decomposed codes iCurrent is past iStart
       
   132 	TInt iCurrentClass; // Current class being searched for.
       
   133 	TInt iRemaining; // Number of Next()s left
       
   134 	};
       
   135 
       
   136 /**
       
   137 Iterator that outputs canonically decomposed folded strings.
       
   138 This is much slower than using the matching functions, so should only
       
   139 be used where an ordering is required.
       
   140 @internalComponent
       
   141 */
       
   142 class TFoldedCanonicalIterator
       
   143 	{
       
   144 public:
       
   145 	TFoldedCanonicalIterator(const TUTF32Iterator&);
       
   146 	TBool AtEnd() const;
       
   147 	TChar Current() const;
       
   148 	void Next(const TUnicodeDataSet* aCharDataSet);
       
   149 private:
       
   150 	TFoldedDecompIterator iBase;
       
   151 	TFoldedSortedDecompIterator iSorted;
       
   152 	};
       
   153 
       
   154 
       
   155 //////////////////////////////////////////////////////////////////////////////////////////////
       
   156 // COLLATION
       
   157 //////////////////////////////////////////////////////////////////////////////////////////////
       
   158 
       
   159 /**
       
   160 @internalComponent
       
   161 */
       
   162 class TDecompositionIterator
       
   163 	{
       
   164 public:
       
   165 	inline TDecompositionIterator();
       
   166 	void Set(const TUTF32Iterator&);
       
   167 	explicit TDecompositionIterator(const TUTF32Iterator&);
       
   168 	TBool AtEnd() const;
       
   169 	TChar Current() const;
       
   170 	void Next();
       
   171 	const TText16* CurrentPosition() const;
       
   172 private:
       
   173 	TUTF32Iterator iBase;
       
   174 	TUTF32Iterator iDecomposition;
       
   175 	};
       
   176 
       
   177 /**
       
   178 @internalComponent
       
   179 */
       
   180 class TCanonicalDecompositionIterator
       
   181 	{
       
   182 public:
       
   183 	inline TCanonicalDecompositionIterator();
       
   184 	void Set(const TUTF32Iterator&);
       
   185 	TBool AtEnd() const;
       
   186 	TChar Current() const;
       
   187 	void Next();
       
   188 	const TText16* CurrentPositionIfAtCharacter() const;
       
   189 	TBool IsInOpenSequence() const;
       
   190 private:
       
   191 	TDecompositionIterator iBase;
       
   192 	// iBase.CurrentPosition() before the last move
       
   193 	const TText16* iLastPosition;
       
   194 	// If iCurrent is active, iCurrentCombiningClass
       
   195 	// is nonzero, and represents the combining class
       
   196 	// of the character it points to.
       
   197 	TInt iCurrentCombiningClass;
       
   198 	// contains true if more characters added to the end may change
       
   199 	// the characters currently being output
       
   200 	TBool iInOpenSequence;
       
   201 	// Iterator that looks for characters to be sorted.
       
   202 	TDecompositionIterator iCurrent;
       
   203 	};
       
   204 
       
   205 /**
       
   206 Iterator that gives the canonically decomposed form of
       
   207 its input, and allows a limited amount of look-ahead (i.e.
       
   208 peeking further into the decomposition without moving
       
   209 the iterator)
       
   210 @internalComponent
       
   211 */
       
   212 class TCanonicalDecompositionIteratorCached
       
   213 	{
       
   214 public:
       
   215 	void Set(const TUTF32Iterator&);
       
   216 	TBool AtEnd() const;
       
   217 	// Advance aOffset characters.
       
   218 	void Next(TInt aOffset);
       
   219 	// Get the character at the position of the iterator plus aOffset steps.
       
   220 	// Returns FFFF if we are looking too far ahead.
       
   221 	TChar Get(TInt aOffset);
       
   222 	// If the current position in the original string is representable
       
   223 	// as a pointer into it and we know what it is, return it.
       
   224 	const TText16* CurrentPositionIfAtCharacter() const;
       
   225 private:
       
   226 	// KMaxLookAhead must be a power of 2
       
   227 	enum { KMaxLookAhead = 8 };
       
   228 	TCanonicalDecompositionIterator iBase;
       
   229 	struct TCache
       
   230 		{
       
   231 		TChar iChar;
       
   232 		const TText16* iPos;
       
   233 		};
       
   234 	TCache iCache[KMaxLookAhead + 1];
       
   235 	TInt iCacheStart;
       
   236 	TInt iCacheSize;
       
   237 	};
       
   238 
       
   239 #include "compareimp.inl"
       
   240 
       
   241 #endif //__COMPAREIMP_H__