|
1 // Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of the License "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Folding and decomposition implementation |
|
15 // |
|
16 // |
|
17 |
|
18 #ifndef __COMPAREIMP_H__ |
|
19 #define __COMPAREIMP_H__ |
|
20 |
|
21 #include <e32std.h> |
|
22 #include <unicode.h> |
|
23 |
|
24 //Forward declarations |
|
25 class TUTF32Iterator; |
|
26 |
|
27 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
28 // Global functions |
|
29 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
30 |
|
31 TChar UTF16ToChar(const TText16* a); |
|
32 TBool IsBaseCharacter(TChar); |
|
33 |
|
34 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
35 |
|
36 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
37 |
|
38 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
39 const TText16* aSearchTermStart, const TText16* aSearchTermEnd); |
|
40 |
|
41 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
42 const TText16* aSearchTermStart, const TText16* aSearchTermEnd); |
|
43 |
|
44 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
45 |
|
46 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight); |
|
47 |
|
48 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined); |
|
49 |
|
50 TBool DecomposeChar(TChar aCh, TPtrC16& aResult); |
|
51 |
|
52 inline void SkipCombiningCharacters(TUTF32Iterator& aUTF32It); |
|
53 |
|
54 /** |
|
55 Converts UTF16 into UTF32, ignoring non-characters and |
|
56 unpaired surrogates and combining paired surrogates. |
|
57 @internalComponent |
|
58 */ |
|
59 class TUTF32Iterator |
|
60 { |
|
61 public: |
|
62 enum TStartsWithValidCharacter { EStartsWithValidCharacter }; |
|
63 inline TUTF32Iterator(); |
|
64 inline explicit TUTF32Iterator(const TText16* aSingleton); |
|
65 inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd); |
|
66 inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd, TStartsWithValidCharacter); |
|
67 |
|
68 inline TUTF32Iterator CurrentAsIterator() const; |
|
69 inline TBool AtEnd() const; |
|
70 void Next(); |
|
71 inline TChar Current() const; |
|
72 TBool LocateFoldedBaseCharacter(TChar aChar); |
|
73 inline const TText16* CurrentPosition() const; |
|
74 inline TInt Length() const; |
|
75 inline TInt operator[](TInt) const; |
|
76 inline void SetStart(const TText16*); |
|
77 private: |
|
78 const TText16* iStart; |
|
79 const TText16* iEnd; |
|
80 TChar iCurrent; |
|
81 }; |
|
82 |
|
83 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
84 // FOLDING |
|
85 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
86 |
|
87 /** |
|
88 @internalComponent |
|
89 */ |
|
90 class TFoldedDecompIterator |
|
91 { |
|
92 public: |
|
93 inline TFoldedDecompIterator(); |
|
94 explicit TFoldedDecompIterator(const TUTF32Iterator&); |
|
95 inline void Set(const TUTF32Iterator&); |
|
96 TBool AtEnd() const; |
|
97 TBool AtEndOrWildcard() const; |
|
98 TBool EnterFoldedSequence(); |
|
99 TBool StrictEnterFoldedSequence(); |
|
100 inline TBool IsInFoldedSequence() const; |
|
101 TBool CurrentIsBaseFoldedFromCombiner() const; |
|
102 TChar Current() const; |
|
103 TBool Match(TChar aCode); |
|
104 TBool Match(TFoldedDecompIterator& aThat); |
|
105 void Next(); |
|
106 inline TUTF32Iterator BaseIterator() const; |
|
107 private: |
|
108 TUTF32Iterator iOriginal; |
|
109 TUTF32Iterator iFolded; |
|
110 }; |
|
111 |
|
112 /** |
|
113 Sorts sequences of combining characters with non-zero combining classes into |
|
114 order of their combining classes. |
|
115 @internalComponent |
|
116 */ |
|
117 class TFoldedSortedDecompIterator |
|
118 { |
|
119 public: |
|
120 inline TFoldedSortedDecompIterator(); |
|
121 TInt Set(TFoldedDecompIterator &aBase); |
|
122 void Set(); |
|
123 TBool AtEnd() const; |
|
124 TChar Current() const; |
|
125 void Next(); |
|
126 |
|
127 private: |
|
128 TFoldedDecompIterator iStart; // Starting code. |
|
129 TInt iLength; // Length in decomposed codes. |
|
130 TFoldedDecompIterator iCurrent; // Current code. |
|
131 TInt iCurrentCount; // Number of decomposed codes iCurrent is past iStart |
|
132 TInt iCurrentClass; // Current class being searched for. |
|
133 TInt iRemaining; // Number of Next()s left |
|
134 }; |
|
135 |
|
136 /** |
|
137 Iterator that outputs canonically decomposed folded strings. |
|
138 This is much slower than using the matching functions, so should only |
|
139 be used where an ordering is required. |
|
140 @internalComponent |
|
141 */ |
|
142 class TFoldedCanonicalIterator |
|
143 { |
|
144 public: |
|
145 TFoldedCanonicalIterator(const TUTF32Iterator&); |
|
146 TBool AtEnd() const; |
|
147 TChar Current() const; |
|
148 void Next(const TUnicodeDataSet* aCharDataSet); |
|
149 private: |
|
150 TFoldedDecompIterator iBase; |
|
151 TFoldedSortedDecompIterator iSorted; |
|
152 }; |
|
153 |
|
154 |
|
155 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
156 // COLLATION |
|
157 ////////////////////////////////////////////////////////////////////////////////////////////// |
|
158 |
|
159 /** |
|
160 @internalComponent |
|
161 */ |
|
162 class TDecompositionIterator |
|
163 { |
|
164 public: |
|
165 inline TDecompositionIterator(); |
|
166 void Set(const TUTF32Iterator&); |
|
167 explicit TDecompositionIterator(const TUTF32Iterator&); |
|
168 TBool AtEnd() const; |
|
169 TChar Current() const; |
|
170 void Next(); |
|
171 const TText16* CurrentPosition() const; |
|
172 private: |
|
173 TUTF32Iterator iBase; |
|
174 TUTF32Iterator iDecomposition; |
|
175 }; |
|
176 |
|
177 /** |
|
178 @internalComponent |
|
179 */ |
|
180 class TCanonicalDecompositionIterator |
|
181 { |
|
182 public: |
|
183 inline TCanonicalDecompositionIterator(); |
|
184 void Set(const TUTF32Iterator&); |
|
185 TBool AtEnd() const; |
|
186 TChar Current() const; |
|
187 void Next(); |
|
188 const TText16* CurrentPositionIfAtCharacter() const; |
|
189 TBool IsInOpenSequence() const; |
|
190 private: |
|
191 TDecompositionIterator iBase; |
|
192 // iBase.CurrentPosition() before the last move |
|
193 const TText16* iLastPosition; |
|
194 // If iCurrent is active, iCurrentCombiningClass |
|
195 // is nonzero, and represents the combining class |
|
196 // of the character it points to. |
|
197 TInt iCurrentCombiningClass; |
|
198 // contains true if more characters added to the end may change |
|
199 // the characters currently being output |
|
200 TBool iInOpenSequence; |
|
201 // Iterator that looks for characters to be sorted. |
|
202 TDecompositionIterator iCurrent; |
|
203 }; |
|
204 |
|
205 /** |
|
206 Iterator that gives the canonically decomposed form of |
|
207 its input, and allows a limited amount of look-ahead (i.e. |
|
208 peeking further into the decomposition without moving |
|
209 the iterator) |
|
210 @internalComponent |
|
211 */ |
|
212 class TCanonicalDecompositionIteratorCached |
|
213 { |
|
214 public: |
|
215 void Set(const TUTF32Iterator&); |
|
216 TBool AtEnd() const; |
|
217 // Advance aOffset characters. |
|
218 void Next(TInt aOffset); |
|
219 // Get the character at the position of the iterator plus aOffset steps. |
|
220 // Returns FFFF if we are looking too far ahead. |
|
221 TChar Get(TInt aOffset); |
|
222 // If the current position in the original string is representable |
|
223 // as a pointer into it and we know what it is, return it. |
|
224 const TText16* CurrentPositionIfAtCharacter() const; |
|
225 private: |
|
226 // KMaxLookAhead must be a power of 2 |
|
227 enum { KMaxLookAhead = 8 }; |
|
228 TCanonicalDecompositionIterator iBase; |
|
229 struct TCache |
|
230 { |
|
231 TChar iChar; |
|
232 const TText16* iPos; |
|
233 }; |
|
234 TCache iCache[KMaxLookAhead + 1]; |
|
235 TInt iCacheStart; |
|
236 TInt iCacheSize; |
|
237 }; |
|
238 |
|
239 #include "CompareImp.inl" |
|
240 |
|
241 #endif //__COMPAREIMP_H__ |