author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Mon, 04 Oct 2010 02:35:35 +0300 | |
changeset 281 | 13fbfa31d2ba |
parent 0 | a41df078684a |
permissions | -rw-r--r-- |
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
1 |
// Copyright (c) 2002-2010 Nokia Corporation and/or its subsidiary(-ies). |
0 | 2 |
// All rights reserved. |
3 |
// This component and the accompanying materials are made available |
|
4 |
// under the terms of the License "Eclipse Public License v1.0" |
|
5 |
// which accompanies this distribution, and is available |
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 |
// |
|
8 |
// Initial Contributors: |
|
9 |
// Nokia Corporation - initial contribution. |
|
10 |
// |
|
11 |
// Contributors: |
|
12 |
// |
|
13 |
// Description: |
|
14 |
// Folding and decomposition implementation |
|
15 |
// |
|
16 |
// |
|
17 |
||
18 |
#ifndef __COMPAREIMP_H__ |
|
19 |
#define __COMPAREIMP_H__ |
|
20 |
||
21 |
#include <e32std.h> |
|
22 |
#include <unicode.h> |
|
23 |
||
24 |
//Forward declarations |
|
25 |
class TUTF32Iterator; |
|
26 |
||
27 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
28 |
// Global functions |
|
29 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
30 |
||
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
31 |
inline TChar UTF16ToChar(const TText16* a); |
0 | 32 |
TBool IsBaseCharacter(TChar); |
33 |
||
34 |
TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
35 |
||
36 |
TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
37 |
||
38 |
TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
39 |
const TText16* aSearchTermStart, const TText16* aSearchTermEnd); |
|
40 |
||
41 |
TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd, |
|
42 |
const TText16* aSearchTermStart, const TText16* aSearchTermEnd); |
|
43 |
||
44 |
TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm); |
|
45 |
||
46 |
TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight); |
|
47 |
||
48 |
TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined); |
|
49 |
||
50 |
TBool DecomposeChar(TChar aCh, TPtrC16& aResult); |
|
51 |
||
52 |
inline void SkipCombiningCharacters(TUTF32Iterator& aUTF32It); |
|
53 |
||
54 |
/** |
|
55 |
Converts UTF16 into UTF32, ignoring non-characters and |
|
56 |
unpaired surrogates and combining paired surrogates. |
|
57 |
@internalComponent |
|
58 |
*/ |
|
59 |
class TUTF32Iterator |
|
60 |
{ |
|
61 |
public: |
|
62 |
enum TStartsWithValidCharacter { EStartsWithValidCharacter }; |
|
63 |
inline TUTF32Iterator(); |
|
64 |
inline explicit TUTF32Iterator(const TText16* aSingleton); |
|
65 |
inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd); |
|
66 |
inline TUTF32Iterator(const TText16* aStart, const TText16* aEnd, TStartsWithValidCharacter); |
|
67 |
||
68 |
inline TUTF32Iterator CurrentAsIterator() const; |
|
69 |
inline TBool AtEnd() const; |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
70 |
inline void Next(); |
0 | 71 |
inline TChar Current() const; |
72 |
TBool LocateFoldedBaseCharacter(TChar aChar); |
|
73 |
inline const TText16* CurrentPosition() const; |
|
74 |
inline TInt Length() const; |
|
75 |
inline TInt operator[](TInt) const; |
|
76 |
inline void SetStart(const TText16*); |
|
77 |
private: |
|
78 |
const TText16* iStart; |
|
79 |
const TText16* iEnd; |
|
80 |
TChar iCurrent; |
|
81 |
}; |
|
82 |
||
83 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
84 |
// FOLDING |
|
85 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
86 |
||
87 |
/** |
|
88 |
@internalComponent |
|
89 |
*/ |
|
90 |
class TFoldedDecompIterator |
|
91 |
{ |
|
92 |
public: |
|
93 |
inline TFoldedDecompIterator(); |
|
94 |
explicit TFoldedDecompIterator(const TUTF32Iterator&); |
|
95 |
inline void Set(const TUTF32Iterator&); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
96 |
inline TBool AtEnd() const; |
0 | 97 |
TBool AtEndOrWildcard() const; |
98 |
TBool EnterFoldedSequence(); |
|
99 |
TBool StrictEnterFoldedSequence(); |
|
100 |
inline TBool IsInFoldedSequence() const; |
|
101 |
TBool CurrentIsBaseFoldedFromCombiner() const; |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
102 |
inline TChar Current() const; |
0 | 103 |
TBool Match(TChar aCode); |
104 |
TBool Match(TFoldedDecompIterator& aThat); |
|
105 |
void Next(); |
|
106 |
inline TUTF32Iterator BaseIterator() const; |
|
107 |
private: |
|
108 |
TUTF32Iterator iOriginal; |
|
109 |
TUTF32Iterator iFolded; |
|
110 |
}; |
|
111 |
||
112 |
/** |
|
113 |
Sorts sequences of combining characters with non-zero combining classes into |
|
114 |
order of their combining classes. |
|
115 |
@internalComponent |
|
116 |
*/ |
|
117 |
class TFoldedSortedDecompIterator |
|
118 |
{ |
|
119 |
public: |
|
120 |
inline TFoldedSortedDecompIterator(); |
|
121 |
TInt Set(TFoldedDecompIterator &aBase); |
|
122 |
void Set(); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
123 |
inline TBool AtEnd() const; |
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
124 |
inline TChar Current() const; |
0 | 125 |
void Next(); |
126 |
||
127 |
private: |
|
128 |
TFoldedDecompIterator iStart; // Starting code. |
|
129 |
TInt iLength; // Length in decomposed codes. |
|
130 |
TFoldedDecompIterator iCurrent; // Current code. |
|
131 |
TInt iCurrentCount; // Number of decomposed codes iCurrent is past iStart |
|
132 |
TInt iCurrentClass; // Current class being searched for. |
|
133 |
TInt iRemaining; // Number of Next()s left |
|
134 |
}; |
|
135 |
||
136 |
/** |
|
137 |
Iterator that outputs canonically decomposed folded strings. |
|
138 |
This is much slower than using the matching functions, so should only |
|
139 |
be used where an ordering is required. |
|
140 |
@internalComponent |
|
141 |
*/ |
|
142 |
class TFoldedCanonicalIterator |
|
143 |
{ |
|
144 |
public: |
|
145 |
TFoldedCanonicalIterator(const TUTF32Iterator&); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
146 |
inline TBool AtEnd() const; |
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
147 |
inline TChar Current() const; |
0 | 148 |
void Next(const TUnicodeDataSet* aCharDataSet); |
149 |
private: |
|
150 |
TFoldedDecompIterator iBase; |
|
151 |
TFoldedSortedDecompIterator iSorted; |
|
152 |
}; |
|
153 |
||
154 |
||
155 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
156 |
// COLLATION |
|
157 |
////////////////////////////////////////////////////////////////////////////////////////////// |
|
158 |
||
159 |
/** |
|
160 |
@internalComponent |
|
161 |
*/ |
|
162 |
class TDecompositionIterator |
|
163 |
{ |
|
164 |
public: |
|
165 |
inline TDecompositionIterator(); |
|
166 |
void Set(const TUTF32Iterator&); |
|
167 |
explicit TDecompositionIterator(const TUTF32Iterator&); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
168 |
inline TBool AtEnd() const; |
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
169 |
inline TChar Current() const; |
0 | 170 |
void Next(); |
171 |
const TText16* CurrentPosition() const; |
|
172 |
private: |
|
173 |
TUTF32Iterator iBase; |
|
174 |
TUTF32Iterator iDecomposition; |
|
175 |
}; |
|
176 |
||
177 |
/** |
|
178 |
@internalComponent |
|
179 |
*/ |
|
180 |
class TCanonicalDecompositionIterator |
|
181 |
{ |
|
182 |
public: |
|
183 |
inline TCanonicalDecompositionIterator(); |
|
184 |
void Set(const TUTF32Iterator&); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
185 |
inline TBool AtEnd() const; |
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
186 |
inline TChar Current() const; |
0 | 187 |
void Next(); |
188 |
const TText16* CurrentPositionIfAtCharacter() const; |
|
189 |
TBool IsInOpenSequence() const; |
|
190 |
private: |
|
191 |
TDecompositionIterator iBase; |
|
192 |
// iBase.CurrentPosition() before the last move |
|
193 |
const TText16* iLastPosition; |
|
194 |
// If iCurrent is active, iCurrentCombiningClass |
|
195 |
// is nonzero, and represents the combining class |
|
196 |
// of the character it points to. |
|
197 |
TInt iCurrentCombiningClass; |
|
198 |
// contains true if more characters added to the end may change |
|
199 |
// the characters currently being output |
|
200 |
TBool iInOpenSequence; |
|
201 |
// Iterator that looks for characters to be sorted. |
|
202 |
TDecompositionIterator iCurrent; |
|
203 |
}; |
|
204 |
||
205 |
/** |
|
206 |
Iterator that gives the canonically decomposed form of |
|
207 |
its input, and allows a limited amount of look-ahead (i.e. |
|
208 |
peeking further into the decomposition without moving |
|
209 |
the iterator) |
|
210 |
@internalComponent |
|
211 |
*/ |
|
212 |
class TCanonicalDecompositionIteratorCached |
|
213 |
{ |
|
214 |
public: |
|
215 |
void Set(const TUTF32Iterator&); |
|
281
13fbfa31d2ba
Revision: 201039
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
216 |
inline TBool AtEnd() const; |
0 | 217 |
// Advance aOffset characters. |
218 |
void Next(TInt aOffset); |
|
219 |
// Get the character at the position of the iterator plus aOffset steps. |
|
220 |
// Returns FFFF if we are looking too far ahead. |
|
221 |
TChar Get(TInt aOffset); |
|
222 |
// If the current position in the original string is representable |
|
223 |
// as a pointer into it and we know what it is, return it. |
|
224 |
const TText16* CurrentPositionIfAtCharacter() const; |
|
225 |
private: |
|
226 |
// KMaxLookAhead must be a power of 2 |
|
227 |
enum { KMaxLookAhead = 8 }; |
|
228 |
TCanonicalDecompositionIterator iBase; |
|
229 |
struct TCache |
|
230 |
{ |
|
231 |
TChar iChar; |
|
232 |
const TText16* iPos; |
|
233 |
}; |
|
234 |
TCache iCache[KMaxLookAhead + 1]; |
|
235 |
TInt iCacheStart; |
|
236 |
TInt iCacheSize; |
|
237 |
}; |
|
238 |
||
239 |
#include "CompareImp.inl" |
|
240 |
||
241 |
#endif //__COMPAREIMP_H__ |