45
|
1 |
// Copyright (c) 2003-2010 Nokia Corporation and/or its subsidiary(-ies).
|
|
2 |
// All rights reserved.
|
|
3 |
// This component and the accompanying materials are made available
|
|
4 |
// under the terms of "Eclipse Public License v1.0"
|
|
5 |
// which accompanies this distribution, and is available
|
|
6 |
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
|
7 |
//
|
|
8 |
// Initial Contributors:
|
|
9 |
// Nokia Corporation - initial contribution.
|
|
10 |
//
|
|
11 |
// Contributors:
|
|
12 |
//
|
|
13 |
// Description:
|
|
14 |
//
|
|
15 |
|
|
16 |
|
|
17 |
#include <e32std.h>
|
|
18 |
#include <e32svr.h>
|
|
19 |
#include "LineBreak.h"
|
|
20 |
#include "LineBreakImp.h"
|
|
21 |
#include "GlyphSel.h"
|
|
22 |
|
|
23 |
const TText16 KThaiCodePageStart = 0x0E00;
|
|
24 |
const TText16 KThaiCodePageEnd = 0x0E5C;
|
|
25 |
const TUint KNumThaiCharacters = KThaiCodePageEnd - KThaiCodePageStart;
|
|
26 |
|
|
27 |
/**
|
|
28 |
Ecanpsulates rules for when Thai character sequence line breaking.
|
|
29 |
@internalComponent
|
|
30 |
*/
|
|
31 |
class ThaiLinebreakRules
|
|
32 |
{
|
|
33 |
public:
|
|
34 |
enum TCharClassification
|
|
35 |
{
|
|
36 |
EOutOfRange,
|
|
37 |
|
|
38 |
EConsOAng,
|
|
39 |
EConsYoYak,
|
|
40 |
EConsHoHip,
|
|
41 |
EConsWoWean,
|
|
42 |
EConsDigraph,
|
|
43 |
EConsOther,
|
|
44 |
|
|
45 |
EPostVowelA,
|
|
46 |
EPostVowelAA,
|
|
47 |
EPostVowelOther,
|
|
48 |
EPreVowel,
|
|
49 |
|
|
50 |
EDepMaiHanAkat,
|
|
51 |
EDepSaraI,
|
|
52 |
EDepOther,
|
|
53 |
|
|
54 |
// marker for end
|
|
55 |
EMaxClassification
|
|
56 |
};
|
|
57 |
enum
|
|
58 |
{
|
|
59 |
KOutOfRangeFlag = 1 << EOutOfRange,
|
|
60 |
KConsOAngFlag = 1 << EConsOAng,
|
|
61 |
KConsYoYakFlag = 1 << EConsYoYak,
|
|
62 |
KConsHoHipFlag = 1 << EConsHoHip,
|
|
63 |
KConsWoWeanFlag = 1 << EConsWoWean,
|
|
64 |
KConsDigraphFlag = 1 << EConsDigraph,
|
|
65 |
KConsOtherFlag = 1 << EConsOther,
|
|
66 |
KPostVowelAFlag = 1 << EPostVowelA,
|
|
67 |
KPostVowelAAFlag = 1 << EPostVowelAA,
|
|
68 |
KPostVowelOtherFlag = 1 << EPostVowelOther,
|
|
69 |
KPreVowelFlag = 1 << EPreVowel,
|
|
70 |
KDepMaiHanAkatFlag = 1 << EDepMaiHanAkat,
|
|
71 |
KDepSaraIFlag = 1 << EDepSaraI,
|
|
72 |
KDepOtherFlag = 1 << EDepOther,
|
|
73 |
|
|
74 |
KSpecialDepFlags = KDepMaiHanAkatFlag | KDepSaraIFlag,
|
|
75 |
KPostVowelFlags = KPostVowelAFlag | KPostVowelAAFlag | KPostVowelOtherFlag,
|
|
76 |
KConsFlags = KConsOtherFlag | KConsDigraphFlag | KConsWoWeanFlag
|
|
77 |
| KConsHoHipFlag | KConsYoYakFlag | KConsOAngFlag,
|
|
78 |
KAllFlags = KOutOfRangeFlag | KConsOAngFlag | KConsYoYakFlag
|
|
79 |
| KConsHoHipFlag | KConsWoWeanFlag | KConsDigraphFlag
|
|
80 |
| KConsOtherFlag | KPostVowelAFlag | KPostVowelAAFlag
|
|
81 |
| KPostVowelOtherFlag | KPreVowelFlag | KDepMaiHanAkatFlag
|
|
82 |
| KDepSaraIFlag | KDepOtherFlag
|
|
83 |
};
|
|
84 |
|
|
85 |
/** Returns the Thai linebreaking class of the character. */
|
|
86 |
static TCharClassification Class(TInt aChar);
|
|
87 |
/** Returns true if aChar is a combining character according to aBreaker. */
|
|
88 |
static TBool IsCombiningChar(TInt aChar, const MLineBreaker& aBreaker);
|
|
89 |
/** Returns the Thai linebreaking class of the character at (*aChar),
|
|
90 |
searching backwards for the base character if it is foreign and of type CM.
|
|
91 |
@param aChar The position of the character within the string.
|
|
92 |
@param aStart The start of the string.
|
|
93 |
@param aBreaker The line breaker to query for line breaking class.
|
|
94 |
*/
|
|
95 |
static TCharClassification DerivedClass(const TText* aChar,
|
|
96 |
const TText* aStart, const MLineBreaker& aBreaker);
|
|
97 |
/**
|
|
98 |
Gets the line break rule for the previous and current character pair
|
|
99 |
@param aPrevClass Class of the previous character.
|
|
100 |
@param aClass Class of the current character.
|
|
101 |
@return the rule code corresponding to the input pair.
|
|
102 |
*/
|
|
103 |
static TBool BreakAllowedBetween(
|
|
104 |
TCharClassification aPrevClass, TCharClassification aClass);
|
|
105 |
/** Returns whether a line break is allowed before the SA character at
|
|
106 |
(*aChar).
|
|
107 |
@param aChar The position of the character within the string.
|
|
108 |
@param aStart The start of the string.
|
|
109 |
@param aBreaker The line breaker to query for line breaking class.
|
|
110 |
*/
|
|
111 |
static TBool BreakAllowedAt(const TText* aChar,
|
|
112 |
const TText* aStart, const MLineBreaker& aBreaker);
|
|
113 |
|
|
114 |
/**
|
|
115 |
Classification of each character
|
|
116 |
*/
|
|
117 |
static const TUint8 KCharClassifications[KNumThaiCharacters];
|
|
118 |
|
|
119 |
/**
|
|
120 |
Rules table of prev to next character
|
|
121 |
*/
|
|
122 |
static const TUint32 KRules[EMaxClassification];
|
|
123 |
};
|
|
124 |
|
|
125 |
const TUint8 ThaiLinebreakRules::KCharClassifications[KNumThaiCharacters] =
|
|
126 |
{
|
|
127 |
EOutOfRange, // 0x0E00
|
|
128 |
EConsOther, // 0x0E01
|
|
129 |
EConsOther, // 0x0E02
|
|
130 |
EConsOther, // 0x0E03
|
|
131 |
EConsOther, // 0x0E04
|
|
132 |
EConsOther, // 0x0E05
|
|
133 |
EConsOther, // 0x0E06
|
|
134 |
EConsDigraph, // 0x0E07
|
|
135 |
EConsOther, // 0x0E08
|
|
136 |
EConsOther, // 0x0E09
|
|
137 |
EConsOther, // 0x0E0A
|
|
138 |
EConsOther, // 0x0E0B
|
|
139 |
EConsOther, // 0x0E0C
|
|
140 |
EConsOther, // 0x0E0D
|
|
141 |
EConsOther, // 0x0E0E
|
|
142 |
EConsOther, // 0x0E0F
|
|
143 |
|
|
144 |
EConsOther, // 0x0E10
|
|
145 |
EConsOther, // 0x0E11
|
|
146 |
EConsOther, // 0x0E12
|
|
147 |
EConsOther, // 0x0E13
|
|
148 |
EConsOther, // 0x0E14
|
|
149 |
EConsOther, // 0x0E15
|
|
150 |
EConsOther, // 0x0E16
|
|
151 |
EConsOther, // 0x0E17
|
|
152 |
EConsOther, // 0x0E18
|
|
153 |
EConsDigraph, // 0x0E19
|
|
154 |
EConsOther, // 0x0E1A
|
|
155 |
EConsOther, // 0x0E1B
|
|
156 |
EConsOther, // 0x0E1C
|
|
157 |
EConsOther, // 0x0E1D
|
|
158 |
EConsOther, // 0x0E1E
|
|
159 |
EConsOther, // 0x0E1F
|
|
160 |
|
|
161 |
EConsOther, // 0x0E20
|
|
162 |
EConsDigraph, // 0x0E21
|
|
163 |
EConsYoYak, // 0x0E22
|
|
164 |
EConsDigraph, // 0x0E23
|
|
165 |
EConsOther, // 0x0E24
|
|
166 |
EConsDigraph, // 0x0E25
|
|
167 |
EConsOther, // 0x0E26
|
|
168 |
EConsWoWean, // 0x0E27
|
|
169 |
EConsOther, // 0x0E28
|
|
170 |
EConsOther, // 0x0E29
|
|
171 |
EConsOther, // 0x0E2A
|
|
172 |
EConsHoHip, // 0x0E2B
|
|
173 |
EConsOther, // 0x0E2C
|
|
174 |
EConsOAng, // 0x0E2D
|
|
175 |
EConsOther, // 0x0E2E
|
|
176 |
EOutOfRange, // 0x0E2F
|
|
177 |
|
|
178 |
EPostVowelA, // 0x0E30
|
|
179 |
EDepMaiHanAkat, // 0x0E31
|
|
180 |
EPostVowelAA, // 0x0E32
|
|
181 |
EPostVowelOther,// 0x0E33
|
|
182 |
|
|
183 |
EDepSaraI, // 0x0E34
|
|
184 |
EDepOther, // 0x0E35
|
|
185 |
EDepOther, // 0x0E36
|
|
186 |
EDepOther, // 0x0E37
|
|
187 |
EDepOther, // 0x0E38
|
|
188 |
EDepOther, // 0x0E39
|
|
189 |
EDepOther, // 0x0E3A
|
|
190 |
|
|
191 |
EOutOfRange, // 0x0E3B
|
|
192 |
EOutOfRange, // 0x0E3C
|
|
193 |
EOutOfRange, // 0x0E3D
|
|
194 |
EOutOfRange, // 0x0E3E
|
|
195 |
EOutOfRange, // 0x0E3F
|
|
196 |
|
|
197 |
EPreVowel, // 0x0E40
|
|
198 |
EPreVowel, // 0x0E41
|
|
199 |
EPreVowel, // 0x0E42
|
|
200 |
EPreVowel, // 0x0E43
|
|
201 |
EPreVowel, // 0x0E44
|
|
202 |
|
|
203 |
EPostVowelOther,// 0x0E45
|
|
204 |
EOutOfRange, // 0x0E46
|
|
205 |
|
|
206 |
EDepOther, // 0x0E47
|
|
207 |
EDepOther, // 0x0E48
|
|
208 |
EDepOther, // 0x0E49
|
|
209 |
EDepOther, // 0x0E4A
|
|
210 |
EDepOther, // 0x0E4B
|
|
211 |
EDepOther, // 0x0E4C
|
|
212 |
EDepOther, // 0x0E4D
|
|
213 |
EDepOther, // 0x0E4E
|
|
214 |
EOutOfRange, // 0x0E4F
|
|
215 |
|
|
216 |
EOutOfRange, // 0x0E50
|
|
217 |
EOutOfRange, // 0x0E51
|
|
218 |
EOutOfRange, // 0x0E52
|
|
219 |
EOutOfRange, // 0x0E53
|
|
220 |
EOutOfRange, // 0x0E54
|
|
221 |
EOutOfRange, // 0x0E55
|
|
222 |
EOutOfRange, // 0x0E56
|
|
223 |
EOutOfRange, // 0x0E57
|
|
224 |
EOutOfRange, // 0x0E58
|
|
225 |
EOutOfRange, // 0x0E59
|
|
226 |
EOutOfRange, // 0x0E5A
|
|
227 |
EOutOfRange // 0x0E5B
|
|
228 |
};
|
|
229 |
|
|
230 |
const TUint32 KNormalBreaksBeforeCons =
|
|
231 |
ThaiLinebreakRules::KPreVowelFlag
|
|
232 |
| ThaiLinebreakRules::KConsOtherFlag
|
|
233 |
| ThaiLinebreakRules::KConsDigraphFlag
|
|
234 |
| ThaiLinebreakRules::KConsHoHipFlag
|
|
235 |
| ThaiLinebreakRules::KOutOfRangeFlag;
|
|
236 |
const TUint32 KNormalBreaksBeforePostVowel =
|
|
237 |
ThaiLinebreakRules::KPreVowelFlag
|
|
238 |
| ThaiLinebreakRules::KPostVowelFlags
|
|
239 |
| ThaiLinebreakRules::KConsFlags
|
|
240 |
| ThaiLinebreakRules::KOutOfRangeFlag;
|
|
241 |
const TUint32 ThaiLinebreakRules::KRules[EMaxClassification] =
|
|
242 |
{
|
|
243 |
/* Prev Char EOutOfRange */
|
|
244 |
KAllFlags - KOutOfRangeFlag,
|
|
245 |
/* Prev Char EConsOAng */
|
|
246 |
KNormalBreaksBeforeCons,
|
|
247 |
/* Prev Char EConsYoYak */
|
|
248 |
KNormalBreaksBeforeCons,
|
|
249 |
/* Prev Char EConsHoHip */
|
|
250 |
KNormalBreaksBeforeCons - KConsDigraphFlag,
|
|
251 |
/* Prev Char EConsWoWean */
|
|
252 |
KNormalBreaksBeforeCons - KConsDigraphFlag - KConsHoHipFlag,
|
|
253 |
/* Prev Char EConsDigraph */
|
|
254 |
KNormalBreaksBeforeCons,
|
|
255 |
/* Prev Char EConsOther */
|
|
256 |
KNormalBreaksBeforeCons,
|
|
257 |
/* Prev Char EPostVowelA */
|
|
258 |
KNormalBreaksBeforePostVowel,
|
|
259 |
/* Prev Char EPostVowelAA */
|
|
260 |
KNormalBreaksBeforePostVowel - KPostVowelAFlag,
|
|
261 |
/* Prev Char EPostVowelOther */
|
|
262 |
KNormalBreaksBeforePostVowel,
|
|
263 |
/* Prev Char EPreVowel */
|
|
264 |
KPreVowelFlag | KPostVowelFlags | KOutOfRangeFlag,
|
|
265 |
/* Prev Char EDepMaiHanAkat */
|
|
266 |
KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
|
|
267 |
| KPostVowelAFlag | KOutOfRangeFlag,
|
|
268 |
/* Prev Char EDepSaraI */
|
|
269 |
KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
|
|
270 |
| KPostVowelAFlag | KOutOfRangeFlag,
|
|
271 |
/* Prev Char EDepOther */
|
|
272 |
KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
|
|
273 |
| KPostVowelAFlag | KConsOtherFlag | KConsDigraphFlag
|
|
274 |
| KConsWoWeanFlag | KConsHoHipFlag | KOutOfRangeFlag
|
|
275 |
};
|
|
276 |
|
|
277 |
ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::Class(TInt a)
|
|
278 |
{
|
|
279 |
return static_cast<TCharClassification>(
|
|
280 |
(KThaiCodePageStart <= a && a < KThaiCodePageEnd) ?
|
|
281 |
KCharClassifications[a - KThaiCodePageStart] :
|
|
282 |
EOutOfRange);
|
|
283 |
}
|
|
284 |
|
|
285 |
TBool ThaiLinebreakRules::IsCombiningChar(TInt aChar,
|
|
286 |
const MLineBreaker& aBreaker)
|
|
287 |
{
|
|
288 |
TUint dummy1, dummy2;
|
|
289 |
return aBreaker.LineBreakClass(aChar, dummy1, dummy2) == MLineBreaker::ECmLineBreakClass;
|
|
290 |
}
|
|
291 |
|
|
292 |
ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::DerivedClass(
|
|
293 |
const TText* aChar, const TText* aStart, const MLineBreaker& aBreaker)
|
|
294 |
{
|
|
295 |
ThaiLinebreakRules::TCharClassification c = Class(*aChar);
|
|
296 |
while (c == EOutOfRange && aChar != aStart
|
|
297 |
&& IsCombiningChar(*aChar, aBreaker))
|
|
298 |
{
|
|
299 |
--aChar;
|
|
300 |
c = Class(*aChar);
|
|
301 |
}
|
|
302 |
return c;
|
|
303 |
}
|
|
304 |
|
|
305 |
TBool ThaiLinebreakRules::BreakAllowedBetween(
|
|
306 |
ThaiLinebreakRules::TCharClassification aClass,
|
|
307 |
ThaiLinebreakRules::TCharClassification aNextClass)
|
|
308 |
{
|
|
309 |
return KRules[aClass] & (1 << aNextClass);
|
|
310 |
}
|
|
311 |
|
|
312 |
TBool ThaiLinebreakRules::BreakAllowedAt(const TText* aChar,
|
|
313 |
const TText* aStart, const MLineBreaker& aBreaker)
|
|
314 |
{
|
|
315 |
__ASSERT_DEBUG(aStart < aChar, User::Invariant());
|
|
316 |
TCharClassification c = Class(*aChar);
|
|
317 |
if (c == EOutOfRange && IsCombiningChar(*aChar, aBreaker))
|
|
318 |
return EFalse;
|
|
319 |
return BreakAllowedBetween(DerivedClass(aChar - 1, aStart, aBreaker), c);
|
|
320 |
}
|
|
321 |
|
|
322 |
/**
|
|
323 |
Returns whether a line break is possible within a run of characters all having
|
|
324 |
the class ESaLineBreakClass (Complex content). Languages with Unicocde
|
|
325 |
characters having such a class include: Thai, Lao, Myanmar and Khmer. This
|
|
326 |
default implementation of the GetLineBreakInContext() method only supports the
|
|
327 |
Thai script. Breaks are determined in Thai based on a simple understanding of
|
|
328 |
syllable boundaries. When characters from the other unsupported Sa class
|
|
329 |
languages are found the method exits with EFalse.
|
|
330 |
@param aText
|
|
331 |
The text to be searched, which is a contiguous run of characters of class SA
|
|
332 |
(or CM attatched to SA). The break position may be restricted further by
|
|
333 |
aMinBreakPos and aMaxBreakPos, but more text is provided for extra context
|
|
334 |
should it be needed.
|
|
335 |
@param aMinBreakPos
|
|
336 |
The start of the text to be considered for line breaks.
|
|
337 |
@param aMaxBreakPos
|
|
338 |
The end of the text to be considered for line breaks.
|
|
339 |
@param aForwards
|
|
340 |
ETrue if aBreakPos is to be set with the first legal break position,
|
|
341 |
EFalse if aBreakPos is to be set with the last legal break position.
|
|
342 |
@param aBreakPos
|
|
343 |
If break position found on exit its value is >= Min and <= Max
|
|
344 |
positions supplied.
|
|
345 |
@return TBool
|
|
346 |
ETrue if and only if a legal break was found, EFalse otherwise.
|
|
347 |
@publishedAll
|
|
348 |
@released
|
|
349 |
*/
|
|
350 |
EXPORT_C TBool MLineBreaker::GetLineBreakInContext(const TDesC16& aText,
|
|
351 |
TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards,
|
|
352 |
TInt& aBreakPos) const
|
|
353 |
{
|
|
354 |
__ASSERT_DEBUG (0 <= aMinBreakPos && aMaxBreakPos <= aText.Length(),
|
|
355 |
Panic(ELineBreakPanic_InvalidInputParam));
|
|
356 |
|
|
357 |
TInt length = aText.Length();
|
|
358 |
|
|
359 |
if (aMinBreakPos < 1)
|
|
360 |
aMinBreakPos = 1;
|
|
361 |
if (length - 1 < aMaxBreakPos)
|
|
362 |
aMaxBreakPos = length - 1;
|
|
363 |
if (aMaxBreakPos < aMinBreakPos)
|
|
364 |
return EFalse;
|
|
365 |
|
|
366 |
const TText16* text = aText.Ptr();
|
|
367 |
|
|
368 |
if (*text == KZeroWidthSpace)
|
|
369 |
{
|
|
370 |
aBreakPos = aMinBreakPos;
|
|
371 |
return ETrue;
|
|
372 |
}
|
|
373 |
else if (*(text+length-1) == KZeroWidthSpace)
|
|
374 |
return EFalse;
|
|
375 |
|
|
376 |
TInt start = aForwards? aMinBreakPos : aMaxBreakPos;
|
|
377 |
TInt end = aForwards? aMaxBreakPos + 1 : aMinBreakPos - 1;
|
|
378 |
TInt direction = aForwards? 1 : -1;
|
|
379 |
for (TInt i = start; i != end; i += direction)
|
|
380 |
{
|
|
381 |
if (ThaiLinebreakRules::BreakAllowedAt(text + i, text, *this))
|
|
382 |
{
|
|
383 |
aBreakPos = i;
|
|
384 |
return ETrue;
|
|
385 |
}
|
|
386 |
}
|
|
387 |
return EFalse;
|
|
388 |
}
|