|
1 // Copyright (c) 2006-2010 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // This file was last edited on 09/04/2007. Please read the 'Unicode Character Data and Line Break data Update History.doc' file |
|
15 // for the latest updates. |
|
16 // |
|
17 // |
|
18 |
|
19 #include "LineBreak.h" |
|
20 #include "LineBreakImp.h" |
|
21 #include <e32std.h> |
|
22 #include <e32svr.h> |
|
23 |
|
24 _LIT(KLinebreakPanicCategory,"LineBreak"); |
|
25 |
|
26 GLDEF_C void Panic(TInt aError) |
|
27 { |
|
28 User::Panic(KLinebreakPanicCategory,aError); |
|
29 } |
|
30 |
|
31 const TUint KOpFlag = 1 << MLineBreaker::EOpLineBreakClass; |
|
32 const TUint KClFlag = 1 << MLineBreaker::EClLineBreakClass; |
|
33 const TUint KNsFlag = 1 << MLineBreaker::ENsLineBreakClass; |
|
34 const TUint KExFlag = 1 << MLineBreaker::EExLineBreakClass; |
|
35 const TUint KSyFlag = 1 << MLineBreaker::ESyLineBreakClass; |
|
36 const TUint KIsFlag = 1 << MLineBreaker::EIsLineBreakClass; |
|
37 const TUint KPrFlag = 1 << MLineBreaker::EPrLineBreakClass; |
|
38 const TUint KPoFlag = 1 << MLineBreaker::EPoLineBreakClass; |
|
39 const TUint KNuFlag = 1 << MLineBreaker::ENuLineBreakClass; |
|
40 const TUint KAlFlag = 1 << MLineBreaker::EAlLineBreakClass; |
|
41 const TUint KIdFlag = 1 << MLineBreaker::EIdLineBreakClass; |
|
42 const TUint KInFlag = 1 << MLineBreaker::EInLineBreakClass; |
|
43 const TUint KBbFlag = 1 << MLineBreaker::EBbLineBreakClass; |
|
44 const TUint KB2Flag = 1 << MLineBreaker::EB2LineBreakClass; |
|
45 const TUint KZwFlag = 1 << MLineBreaker::EZwLineBreakClass; |
|
46 const TUint KCmFlag = 1 << MLineBreaker::ECmLineBreakClass; |
|
47 const TUint KAiFlag = 1 << MLineBreaker::EAiLineBreakClass; |
|
48 |
|
49 const TUint KAiAlFlags = KAiFlag | KAlFlag; |
|
50 |
|
51 const TUint KAllBreaks = 0xFFFFFFFF; |
|
52 const TUint KUsualForbidden = KClFlag | KExFlag | KSyFlag | KIsFlag | KZwFlag; |
|
53 const TUint KUsualAllowed = KOpFlag | KPrFlag | KPoFlag | KNuFlag | KAiAlFlags | KIdFlag | KInFlag | KBbFlag | KB2Flag; |
|
54 |
|
55 const TUint KAlAllowed = KOpFlag | KPrFlag | KPoFlag | KIdFlag | KBbFlag | KB2Flag; |
|
56 |
|
57 // A table of break rules, indexed by the class of the character before the possible break. |
|
58 static const TLineBreakRule TheLineBreakRuleTable[MLineBreaker::ELineBreakClasses] = |
|
59 { |
|
60 { KAllBreaks - KCmFlag,0 }, // Op |
|
61 { KUsualForbidden | KNsFlag,KUsualAllowed - KPoFlag }, // Cl |
|
62 { KUsualForbidden | KOpFlag,0 }, // Qu |
|
63 { KUsualForbidden, 0 }, // Gl |
|
64 { KUsualForbidden, KUsualAllowed }, // Ns |
|
65 { KUsualForbidden, KUsualAllowed }, // Ex |
|
66 { KUsualForbidden, KUsualAllowed - KNuFlag }, // Sy |
|
67 { KUsualForbidden, KUsualAllowed - KNuFlag }, // Is |
|
68 { KUsualForbidden, KPrFlag | KPoFlag | KInFlag | KBbFlag | KB2Flag }, // Pr |
|
69 { KUsualForbidden, KUsualAllowed }, // Po |
|
70 { KUsualForbidden, KOpFlag | KPrFlag | KIdFlag | KBbFlag | KB2Flag }, // Nu |
|
71 { KUsualForbidden, KAlAllowed }, // Al |
|
72 { KUsualForbidden, KUsualAllowed - KPoFlag - KInFlag }, // Id |
|
73 { KUsualForbidden, KUsualAllowed - KInFlag }, // In |
|
74 { KUsualForbidden, KUsualAllowed }, // Hy |
|
75 { KUsualForbidden, KUsualAllowed }, // Ba |
|
76 { KUsualForbidden, 0 }, // Bb |
|
77 { KUsualForbidden | KB2Flag, KUsualAllowed }, // B2 |
|
78 { KZwFlag, KAllBreaks - KZwFlag - KCmFlag}, // Zw |
|
79 { KUsualForbidden, KOpFlag | KPrFlag | KPoFlag | KIdFlag | KBbFlag | KB2Flag }, // Cm |
|
80 { 0, 0, }, // Bk |
|
81 { 0, 0, }, // Cr |
|
82 { 0, 0, }, // Lf |
|
83 { 0, 0, }, // Sg |
|
84 { 0, 0, }, // Cb |
|
85 { KAllBreaks, 0, }, // Sp |
|
86 { 0, 0, }, // Sa |
|
87 { KUsualForbidden, KAlAllowed, }, // Ai |
|
88 { 0, 0, } // Xx |
|
89 }; |
|
90 |
|
91 /** |
|
92 A sorted table of ranges of characters, with their line breaking classes. |
|
93 A range includes all codes for iStart <= code < iEnd. |
|
94 All these items have fixed range, please don't split any item into more or |
|
95 combine consecutive items even if they have the same property, |
|
96 because these change will cause mismatch with customer definition. |
|
97 */ |
|
98 static const TLineBreakRange TheLineBreakRangeTable[] = |
|
99 { |
|
100 { 0x0000, 0x0009, MLineBreaker::ECmLineBreakClass }, //<control> |
|
101 { 0x0009, 0x000a, MLineBreaker::EBaLineBreakClass }, //<control> |
|
102 { 0x000a, 0x000b, MLineBreaker::ELfLineBreakClass }, //<control> |
|
103 { 0x000b, 0x000c, MLineBreaker::ECmLineBreakClass }, //<control> |
|
104 { 0x000c, 0x000d, MLineBreaker::EBkLineBreakClass }, //<control> |
|
105 { 0x000d, 0x000e, MLineBreaker::ECrLineBreakClass }, //<control> |
|
106 { 0x000e, 0x0020, MLineBreaker::ECmLineBreakClass }, //<control> |
|
107 { 0x0020, 0x0021, MLineBreaker::ESpLineBreakClass }, //space |
|
108 { 0x0021, 0x0022, MLineBreaker::EExLineBreakClass }, //exclamation mark |
|
109 { 0x0022, 0x0023, MLineBreaker::EQuLineBreakClass }, //quotation mark |
|
110 { 0x0023, 0x0024, MLineBreaker::EAlLineBreakClass }, //number sign |
|
111 { 0x0024, 0x0025, MLineBreaker::EPrLineBreakClass }, //dollar sign |
|
112 { 0x0025, 0x0026, MLineBreaker::EPoLineBreakClass }, //percent sign |
|
113 { 0x0026, 0x0027, MLineBreaker::EAlLineBreakClass }, //ampersand |
|
114 { 0x0027, 0x0028, MLineBreaker::EQuLineBreakClass }, //apostrophe |
|
115 { 0x0028, 0x0029, MLineBreaker::EOpLineBreakClass }, //left parenthesis |
|
116 { 0x0029, 0x002a, MLineBreaker::EClLineBreakClass }, //right parenthesis |
|
117 { 0x002a, 0x002b, MLineBreaker::EAlLineBreakClass }, //asterisk |
|
118 { 0x002b, 0x002c, MLineBreaker::EPrLineBreakClass }, //plus sign |
|
119 { 0x002c, 0x002d, MLineBreaker::EIsLineBreakClass }, //comma |
|
120 { 0x002d, 0x002e, MLineBreaker::EHyLineBreakClass }, //hyphen-minus |
|
121 { 0x002e, 0x002f, MLineBreaker::EIsLineBreakClass }, //full stop |
|
122 { 0x002f, 0x0030, MLineBreaker::ESyLineBreakClass }, //solidus |
|
123 { 0x0030, 0x003a, MLineBreaker::ENuLineBreakClass }, //digit zero |
|
124 { 0x003a, 0x003c, MLineBreaker::EIsLineBreakClass }, //colon |
|
125 { 0x003c, 0x003f, MLineBreaker::EAlLineBreakClass }, //less-than sign |
|
126 { 0x003f, 0x0040, MLineBreaker::EExLineBreakClass }, //question mark |
|
127 { 0x0040, 0x005b, MLineBreaker::EAlLineBreakClass }, //commercial at |
|
128 { 0x005b, 0x005c, MLineBreaker::EOpLineBreakClass }, //left square bracket |
|
129 { 0x005c, 0x005d, MLineBreaker::EPrLineBreakClass }, //reverse solidus |
|
130 { 0x005d, 0x005e, MLineBreaker::EClLineBreakClass }, //right square bracket |
|
131 { 0x005e, 0x005f, MLineBreaker::EAlLineBreakClass }, //circumflex accent |
|
132 { 0x005f, 0x0060, MLineBreaker::EAlLineBreakClass }, //low line (underscore) |
|
133 { 0x0060, 0x007b, MLineBreaker::EAlLineBreakClass }, //grave accent |
|
134 { 0x007b, 0x007c, MLineBreaker::EOpLineBreakClass }, //left curly bracket |
|
135 { 0x007c, 0x007d, MLineBreaker::EBaLineBreakClass }, //vertical line |
|
136 { 0x007d, 0x007e, MLineBreaker::EClLineBreakClass }, //right curly bracket |
|
137 { 0x007e, 0x007f, MLineBreaker::EAlLineBreakClass }, //tilde |
|
138 { 0x007f, 0x00a0, MLineBreaker::ECmLineBreakClass }, //<control> |
|
139 { 0x00a0, 0x00a1, MLineBreaker::EGlLineBreakClass }, //no-break space |
|
140 { 0x00a1, 0x00a2, MLineBreaker::EAiLineBreakClass }, //inverted exclamation mark |
|
141 { 0x00a2, 0x00a3, MLineBreaker::EPoLineBreakClass }, //cent sign |
|
142 { 0x00a3, 0x00a6, MLineBreaker::EPrLineBreakClass }, //pound sign |
|
143 { 0x00a6, 0x00a7, MLineBreaker::EAlLineBreakClass }, //broken bar |
|
144 { 0x00a7, 0x00a9, MLineBreaker::EAiLineBreakClass }, //section sign |
|
145 { 0x00a9, 0x00aa, MLineBreaker::EAlLineBreakClass }, //copyright sign |
|
146 { 0x00aa, 0x00ab, MLineBreaker::EAiLineBreakClass }, //feminine ordinal indicator |
|
147 { 0x00ab, 0x00ac, MLineBreaker::EQuLineBreakClass }, //left-pointing double angle quotation mark |
|
148 { 0x00ac, 0x00ad, MLineBreaker::EAlLineBreakClass }, //not sign |
|
149 { 0x00ad, 0x00ae, MLineBreaker::EBaLineBreakClass }, //soft hyphen |
|
150 { 0x00ae, 0x00b0, MLineBreaker::EAlLineBreakClass }, //registered sign |
|
151 { 0x00b0, 0x00b1, MLineBreaker::EPoLineBreakClass }, //degree sign |
|
152 { 0x00b1, 0x00b2, MLineBreaker::EPrLineBreakClass }, //plus-minus sign |
|
153 { 0x00b2, 0x00b4, MLineBreaker::EAiLineBreakClass }, //superscript two |
|
154 { 0x00b4, 0x00b5, MLineBreaker::EBaLineBreakClass }, //acute accent |
|
155 { 0x00b5, 0x00b6, MLineBreaker::EAlLineBreakClass }, //micro sign |
|
156 { 0x00b6, 0x00bb, MLineBreaker::EAiLineBreakClass }, //pilcrow sign |
|
157 { 0x00bb, 0x00bc, MLineBreaker::EQuLineBreakClass }, //right-pointing double angle quotation mark |
|
158 { 0x00bc, 0x00c0, MLineBreaker::EAiLineBreakClass }, //vulgar fraction one quarter |
|
159 { 0x00c0, 0x00c6, MLineBreaker::EAlLineBreakClass }, //latin capital letter a with grave |
|
160 { 0x00c6, 0x00c7, MLineBreaker::EAiLineBreakClass }, //latin capital letter ae |
|
161 { 0x00c7, 0x00d0, MLineBreaker::EAlLineBreakClass }, //latin capital letter c with cedilla |
|
162 { 0x00d0, 0x00d1, MLineBreaker::EAiLineBreakClass }, //latin capital letter eth |
|
163 { 0x00d1, 0x00d7, MLineBreaker::EAlLineBreakClass }, //latin capital letter n with tilde |
|
164 { 0x00d7, 0x00d9, MLineBreaker::EAiLineBreakClass }, //multiplication sign |
|
165 { 0x00d9, 0x00de, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with grave |
|
166 { 0x00de, 0x00e2, MLineBreaker::EAiLineBreakClass }, //latin capital letter thorn |
|
167 { 0x00e2, 0x00e6, MLineBreaker::EAlLineBreakClass }, //latin small letter a with circumflex |
|
168 { 0x00e6, 0x00e7, MLineBreaker::EAiLineBreakClass }, //latin small letter ae |
|
169 { 0x00e7, 0x00e8, MLineBreaker::EAlLineBreakClass }, //latin small letter c with cedilla |
|
170 { 0x00e8, 0x00eb, MLineBreaker::EAiLineBreakClass }, //latin small letter e with grave |
|
171 { 0x00eb, 0x00ec, MLineBreaker::EAlLineBreakClass }, //latin small letter e with diaeresis |
|
172 { 0x00ec, 0x00ee, MLineBreaker::EAiLineBreakClass }, //latin small letter i with grave |
|
173 { 0x00ee, 0x00f0, MLineBreaker::EAlLineBreakClass }, //latin small letter i with circumflex |
|
174 { 0x00f0, 0x00f1, MLineBreaker::EAiLineBreakClass }, //latin small letter eth |
|
175 { 0x00f1, 0x00f2, MLineBreaker::EAlLineBreakClass }, //latin small letter n with tilde |
|
176 { 0x00f2, 0x00f4, MLineBreaker::EAiLineBreakClass }, //latin small letter o with grave |
|
177 { 0x00f4, 0x00f7, MLineBreaker::EAlLineBreakClass }, //latin small letter o with circumflex |
|
178 { 0x00f7, 0x00fb, MLineBreaker::EAiLineBreakClass }, //division sign |
|
179 { 0x00fb, 0x00fc, MLineBreaker::EAlLineBreakClass }, //latin small letter u with circumflex |
|
180 { 0x00fc, 0x00fd, MLineBreaker::EAiLineBreakClass }, //latin small letter u with diaeresis |
|
181 { 0x00fd, 0x00fe, MLineBreaker::EAlLineBreakClass }, //latin small letter y with acute |
|
182 { 0x00fe, 0x00ff, MLineBreaker::EAiLineBreakClass }, //latin small letter thorn |
|
183 { 0x00ff, 0x0101, MLineBreaker::EAlLineBreakClass }, //latin small letter y with diaeresis |
|
184 { 0x0101, 0x0102, MLineBreaker::EAiLineBreakClass }, //latin small letter a with macron |
|
185 { 0x0102, 0x0111, MLineBreaker::EAlLineBreakClass }, //latin capital letter a with breve |
|
186 { 0x0111, 0x0112, MLineBreaker::EAiLineBreakClass }, //latin small letter d with stroke |
|
187 { 0x0112, 0x0113, MLineBreaker::EAlLineBreakClass }, //latin capital letter e with macron |
|
188 { 0x0113, 0x0114, MLineBreaker::EAiLineBreakClass }, //latin small letter e with macron |
|
189 { 0x0114, 0x011b, MLineBreaker::EAlLineBreakClass }, //latin capital letter e with breve |
|
190 { 0x011b, 0x011c, MLineBreaker::EAiLineBreakClass }, //latin small letter e with caron |
|
191 { 0x011c, 0x0126, MLineBreaker::EAlLineBreakClass }, //latin capital letter g with circumflex |
|
192 { 0x0126, 0x0128, MLineBreaker::EAiLineBreakClass }, //latin capital letter h with stroke |
|
193 { 0x0128, 0x012b, MLineBreaker::EAlLineBreakClass }, //latin capital letter i with tilde |
|
194 { 0x012b, 0x012c, MLineBreaker::EAiLineBreakClass }, //latin small letter i with macron |
|
195 { 0x012c, 0x0131, MLineBreaker::EAlLineBreakClass }, //latin capital letter i with breve |
|
196 { 0x0131, 0x0134, MLineBreaker::EAiLineBreakClass }, //latin small letter dotless i |
|
197 { 0x0134, 0x0138, MLineBreaker::EAlLineBreakClass }, //latin capital letter j with circumflex |
|
198 { 0x0138, 0x0139, MLineBreaker::EAiLineBreakClass }, //latin small letter kra |
|
199 { 0x0139, 0x013f, MLineBreaker::EAlLineBreakClass }, //latin capital letter l with acute |
|
200 { 0x013f, 0x0143, MLineBreaker::EAiLineBreakClass }, //latin capital letter l with middle dot |
|
201 { 0x0143, 0x0144, MLineBreaker::EAlLineBreakClass }, //latin capital letter n with acute |
|
202 { 0x0144, 0x0145, MLineBreaker::EAiLineBreakClass }, //latin small letter n with acute |
|
203 { 0x0145, 0x0148, MLineBreaker::EAlLineBreakClass }, //latin capital letter n with cedilla |
|
204 { 0x0148, 0x014b, MLineBreaker::EAiLineBreakClass }, //latin small letter n with caron |
|
205 { 0x014b, 0x014d, MLineBreaker::EAlLineBreakClass }, //latin small letter eng |
|
206 { 0x014d, 0x014e, MLineBreaker::EAiLineBreakClass }, //latin small letter o with macron |
|
207 { 0x014e, 0x0152, MLineBreaker::EAlLineBreakClass }, //latin capital letter o with breve |
|
208 { 0x0152, 0x0154, MLineBreaker::EAiLineBreakClass }, //latin capital ligature oe |
|
209 { 0x0154, 0x0166, MLineBreaker::EAlLineBreakClass }, //latin capital letter r with acute |
|
210 { 0x0166, 0x0168, MLineBreaker::EAiLineBreakClass }, //latin capital letter t with stroke |
|
211 { 0x0168, 0x016b, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with tilde |
|
212 { 0x016b, 0x016c, MLineBreaker::EAiLineBreakClass }, //latin small letter u with macron |
|
213 { 0x016c, 0x01ce, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with breve |
|
214 { 0x01ce, 0x01cf, MLineBreaker::EAiLineBreakClass }, //latin small letter a with caron |
|
215 { 0x01cf, 0x01d0, MLineBreaker::EAlLineBreakClass }, //latin capital letter i with caron |
|
216 { 0x01d0, 0x01d1, MLineBreaker::EAiLineBreakClass }, //latin small letter i with caron |
|
217 { 0x01d1, 0x01d2, MLineBreaker::EAlLineBreakClass }, //latin capital letter o with caron |
|
218 { 0x01d2, 0x01d3, MLineBreaker::EAiLineBreakClass }, //latin small letter o with caron |
|
219 { 0x01d3, 0x01d4, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with caron |
|
220 { 0x01d4, 0x01d5, MLineBreaker::EAiLineBreakClass }, //latin small letter u with caron |
|
221 { 0x01d5, 0x01d6, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with diaeresis and macron |
|
222 { 0x01d6, 0x01d7, MLineBreaker::EAiLineBreakClass }, //latin small letter u with diaeresis and macron |
|
223 { 0x01d7, 0x01d8, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with diaeresis and acute |
|
224 { 0x01d8, 0x01d9, MLineBreaker::EAiLineBreakClass }, //latin small letter u with diaeresis and acute |
|
225 { 0x01d9, 0x01da, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with diaeresis and caron |
|
226 { 0x01da, 0x01db, MLineBreaker::EAiLineBreakClass }, //latin small letter u with diaeresis and caron |
|
227 { 0x01db, 0x01dc, MLineBreaker::EAlLineBreakClass }, //latin capital letter u with diaeresis and grave |
|
228 { 0x01dc, 0x01dd, MLineBreaker::EAiLineBreakClass }, //latin small letter u with diaeresis and grave |
|
229 { 0x01dd, 0x0250, MLineBreaker::EAlLineBreakClass }, //latin small letter turned e |
|
230 { 0x0250, 0x0251, MLineBreaker::EAlLineBreakClass }, //latin small letter turned a |
|
231 { 0x0251, 0x0252, MLineBreaker::EAiLineBreakClass }, //latin small letter alpha |
|
232 { 0x0252, 0x0261, MLineBreaker::EAlLineBreakClass }, //latin small letter turned alpha |
|
233 { 0x0261, 0x0262, MLineBreaker::EAiLineBreakClass }, //latin small letter script g |
|
234 { 0x0262, 0x02c7, MLineBreaker::EAlLineBreakClass }, //latin letter small capital g |
|
235 { 0x02c7, 0x02c8, MLineBreaker::EAiLineBreakClass }, //caron |
|
236 { 0x02c8, 0x02c9, MLineBreaker::EBbLineBreakClass }, //modifier letter vertical line |
|
237 { 0x02c9, 0x02cc, MLineBreaker::EAiLineBreakClass }, //modifier letter macron |
|
238 { 0x02cc, 0x02cd, MLineBreaker::EBbLineBreakClass }, //modifier letter low vertical line |
|
239 { 0x02cd, 0x02ce, MLineBreaker::EAiLineBreakClass }, //modifier letter low macron |
|
240 { 0x02ce, 0x02d0, MLineBreaker::EAlLineBreakClass }, //modifier letter low grave accent |
|
241 { 0x02d0, 0x02d1, MLineBreaker::EAiLineBreakClass }, //modifier letter triangular colon |
|
242 { 0x02d1, 0x02d8, MLineBreaker::EAlLineBreakClass }, //modifier letter half triangular colon |
|
243 { 0x02d8, 0x02dc, MLineBreaker::EAiLineBreakClass }, //breve |
|
244 { 0x02dc, 0x02dd, MLineBreaker::EAlLineBreakClass }, //small tilde |
|
245 { 0x02dd, 0x02de, MLineBreaker::EAiLineBreakClass }, //double acute accent |
|
246 { 0x02de, 0x0300, MLineBreaker::EAlLineBreakClass }, //modifier letter rhotic hook |
|
247 { 0x0300, 0x034f, MLineBreaker::ECmLineBreakClass }, //combining grave accent |
|
248 { 0x034f, 0x0350, MLineBreaker::EGlLineBreakClass }, //COMBINING GRAPHEME JOINER |
|
249 { 0x0350, 0x035c, MLineBreaker::ECmLineBreakClass }, //COMBINING RIGHT ARROWHEAD ABOVE |
|
250 { 0x035c, 0x0360, MLineBreaker::EGlLineBreakClass }, //COMBINING DOUBLE BREVE BELOW |
|
251 { 0x0360, 0x0370, MLineBreaker::ECmLineBreakClass }, //combining double tilde |
|
252 { 0x0374, 0x0376, MLineBreaker::EAlLineBreakClass }, //greek numeral sign |
|
253 { 0x037a, 0x037b, MLineBreaker::EAlLineBreakClass }, //greek ypogegrammeni |
|
254 { 0x037b, 0x037e, MLineBreaker::EAlLineBreakClass }, //GREEK SMALL REVERSED LUNATE SIGMA SYMBOL |
|
255 { 0x037e, 0x037f, MLineBreaker::EAlLineBreakClass }, //greek question mark |
|
256 { 0x0384, 0x038b, MLineBreaker::EAlLineBreakClass }, //greek tonos |
|
257 { 0x038c, 0x038d, MLineBreaker::EAlLineBreakClass }, //greek capital letter omicron with tonos |
|
258 { 0x038e, 0x0391, MLineBreaker::EAlLineBreakClass }, //greek capital letter upsilon with tonos |
|
259 { 0x0391, 0x03a2, MLineBreaker::EAiLineBreakClass }, //greek capital letter alpha |
|
260 { 0x03a3, 0x03aa, MLineBreaker::EAiLineBreakClass }, //greek capital letter sigma |
|
261 { 0x03aa, 0x03b1, MLineBreaker::EAlLineBreakClass }, //greek capital letter iota with dialytika |
|
262 { 0x03b1, 0x03c2, MLineBreaker::EAiLineBreakClass }, //greek small letter alpha |
|
263 { 0x03c2, 0x03c3, MLineBreaker::EAlLineBreakClass }, //greek small letter final sigma |
|
264 { 0x03c3, 0x03ca, MLineBreaker::EAiLineBreakClass }, //greek small letter sigma |
|
265 { 0x03ca, 0x03cf, MLineBreaker::EAlLineBreakClass }, //greek small letter iota with dialytika |
|
266 { 0x03d0, 0x0400, MLineBreaker::EAlLineBreakClass }, //greek beta symbol |
|
267 { 0x0400, 0x0401, MLineBreaker::EAlLineBreakClass }, //cyrillic capital letter ie with grave |
|
268 { 0x0401, 0x0402, MLineBreaker::EAiLineBreakClass }, //cyrillic capital letter io |
|
269 { 0x0402, 0x0410, MLineBreaker::EAlLineBreakClass }, //cyrillic capital letter dje |
|
270 { 0x0410, 0x0450, MLineBreaker::EAiLineBreakClass }, //cyrillic capital letter a |
|
271 { 0x0450, 0x0451, MLineBreaker::EAlLineBreakClass }, //cyrillic small letter ie with grave |
|
272 { 0x0451, 0x0452, MLineBreaker::EAiLineBreakClass }, //cyrillic small letter io |
|
273 { 0x0452, 0x0483, MLineBreaker::EAlLineBreakClass }, //cyrillic small letter dje |
|
274 { 0x0483, 0x0487, MLineBreaker::ECmLineBreakClass }, //combining cyrillic titlo |
|
275 { 0x0488, 0x048a, MLineBreaker::ECmLineBreakClass }, //combining cyrillic hundred thousands sign |
|
276 { 0x048a, 0x0514, MLineBreaker::EAlLineBreakClass }, //CYRILLIC CAPITAL LETTER SHORT I WITH TAIL |
|
277 { 0x0531, 0x0557, MLineBreaker::EAlLineBreakClass }, //armenian capital letter ayb |
|
278 { 0x0559, 0x0560, MLineBreaker::EAlLineBreakClass }, //armenian modifier letter left half ring |
|
279 { 0x0561, 0x0588, MLineBreaker::EAlLineBreakClass }, //armenian small letter ayb |
|
280 { 0x0589, 0x058a, MLineBreaker::EIsLineBreakClass }, //armenian full stop |
|
281 { 0x058a, 0x058b, MLineBreaker::EBaLineBreakClass }, //armenian hyphen |
|
282 { 0x0591, 0x05be, MLineBreaker::ECmLineBreakClass }, //hebrew accent etnahta |
|
283 { 0x05be, 0x05bf, MLineBreaker::EAlLineBreakClass }, //hebrew punctuation maqaf |
|
284 { 0x05bf, 0x05c0, MLineBreaker::ECmLineBreakClass }, //hebrew point rafe |
|
285 { 0x05c0, 0x05c1, MLineBreaker::EAlLineBreakClass }, //hebrew punctuation paseq |
|
286 { 0x05c1, 0x05c3, MLineBreaker::ECmLineBreakClass }, //hebrew point shin dot |
|
287 { 0x05c3, 0x05c4, MLineBreaker::EAlLineBreakClass }, //hebrew punctuation sof pasuq |
|
288 { 0x05c4, 0x05c5, MLineBreaker::ECmLineBreakClass }, //hebrew mark upper dot |
|
289 { 0x05c5, 0x05c6, MLineBreaker::ECmLineBreakClass }, //HEBREW MARK LOWER DOT |
|
290 { 0x05c6, 0x05c7, MLineBreaker::EExLineBreakClass }, //HEBREW PUNCTUATION NUN HAFUKHA |
|
291 { 0x05c7, 0x05c8, MLineBreaker::ECmLineBreakClass }, //HEBREW POINT QAMATS QATAN |
|
292 { 0x05d0, 0x05eb, MLineBreaker::EAlLineBreakClass }, //hebrew letter alef |
|
293 { 0x05f0, 0x05f5, MLineBreaker::EAlLineBreakClass }, //hebrew ligature yiddish double vav |
|
294 { 0x0600, 0x0604, MLineBreaker::EAlLineBreakClass }, //ARABIC NUMBER SIGN |
|
295 { 0x060b, 0x060c, MLineBreaker::EPoLineBreakClass }, //AFGHANI SIGN |
|
296 { 0x060c, 0x060d, MLineBreaker::EExLineBreakClass }, //arabic comma |
|
297 { 0x060d, 0x060e, MLineBreaker::EIsLineBreakClass }, //ARABIC DATE SEPARATOR |
|
298 { 0x060e, 0x0610, MLineBreaker::EAlLineBreakClass }, //ARABIC POETIC VERSE SIGN |
|
299 { 0x0610, 0x0616, MLineBreaker::ECmLineBreakClass }, //ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM |
|
300 { 0x061b, 0x061c, MLineBreaker::EExLineBreakClass }, //arabic semicolon |
|
301 { 0x061e, 0x061f, MLineBreaker::EExLineBreakClass }, //ARABIC TRIPLE DOT PUNCTUATION MARK |
|
302 { 0x061f, 0x0620, MLineBreaker::EExLineBreakClass }, //arabic question mark |
|
303 { 0x0621, 0x063b, MLineBreaker::EAlLineBreakClass }, //arabic letter hamza |
|
304 { 0x0640, 0x064b, MLineBreaker::EAlLineBreakClass }, //arabic tatweel |
|
305 { 0x064b, 0x065f, MLineBreaker::ECmLineBreakClass }, //arabic fathatan |
|
306 { 0x0660, 0x066a, MLineBreaker::ENuLineBreakClass }, //arabic-indic digit zero |
|
307 { 0x066a, 0x066b, MLineBreaker::EExLineBreakClass }, //arabic percent sign |
|
308 { 0x066b, 0x066d, MLineBreaker::ENuLineBreakClass }, //arabic decimal & thousands separator |
|
309 { 0x066d, 0x066e, MLineBreaker::EAlLineBreakClass }, //arabic five pointed star |
|
310 { 0x066e, 0x0670, MLineBreaker::EAlLineBreakClass }, //arabic five pointed star |
|
311 { 0x0670, 0x0671, MLineBreaker::ECmLineBreakClass }, //arabic letter superscript alef |
|
312 { 0x0671, 0x06d4, MLineBreaker::EAlLineBreakClass }, //arabic letter alef wasla |
|
313 { 0x06d4, 0x06d5, MLineBreaker::EExLineBreakClass }, //arabic full stop |
|
314 { 0x06d5, 0x06d6, MLineBreaker::EAlLineBreakClass }, //arabic letter ae |
|
315 { 0x06d6, 0x06e5, MLineBreaker::ECmLineBreakClass }, //arabic small high ligature sad with lam with alef maksura |
|
316 { 0x06e5, 0x06e7, MLineBreaker::EAlLineBreakClass }, //arabic small waw |
|
317 { 0x06e7, 0x06e9, MLineBreaker::ECmLineBreakClass }, //arabic small high yeh |
|
318 { 0x06e9, 0x06ea, MLineBreaker::EAlLineBreakClass }, //arabic place of sajdah |
|
319 { 0x06ea, 0x06ee, MLineBreaker::ECmLineBreakClass }, //arabic empty centre low stop |
|
320 { 0x06ee, 0x06f0, MLineBreaker::EAlLineBreakClass }, //ARABIC LETTER DAL WITH INVERTED V |
|
321 { 0x06f0, 0x06fa, MLineBreaker::ENuLineBreakClass }, //extended arabic-indic digit zero |
|
322 { 0x06fa, 0x070e, MLineBreaker::EAlLineBreakClass }, //arabic letter sheen with dot below |
|
323 { 0x070f, 0x0710, MLineBreaker::ECmLineBreakClass }, //syriac abbreviation mark |
|
324 { 0x0710, 0x0711, MLineBreaker::EAlLineBreakClass }, //syriac letter alaph |
|
325 { 0x0711, 0x0712, MLineBreaker::ECmLineBreakClass }, //syriac letter superscript alaph |
|
326 { 0x0712, 0x0730, MLineBreaker::EAlLineBreakClass }, //syriac letter beth |
|
327 { 0x0730, 0x074b, MLineBreaker::ECmLineBreakClass }, //syriac pthaha above |
|
328 { 0x074d, 0x076e, MLineBreaker::EAlLineBreakClass }, //SYRIAC LETTER SOGDIAN ZHAIN |
|
329 { 0x0780, 0x07a6, MLineBreaker::EAlLineBreakClass }, //thaana letter haa |
|
330 { 0x07a6, 0x07b1, MLineBreaker::ECmLineBreakClass }, //thaana abafili |
|
331 { 0x07b1, 0x07b2, MLineBreaker::EAlLineBreakClass }, //THAANA LETTER NAA |
|
332 { 0x07c0, 0x07ca, MLineBreaker::ENuLineBreakClass }, //NKO DIGIT ZERO |
|
333 { 0x07ca, 0x07eb, MLineBreaker::EAlLineBreakClass }, //NKO LETTER A |
|
334 { 0x07eb, 0x07f4, MLineBreaker::ECmLineBreakClass }, //NKO COMBINING SHORT HIGH TONE |
|
335 { 0x07f4, 0x07f8, MLineBreaker::EAlLineBreakClass }, //NKO HIGH TONE APOSTROPHE |
|
336 { 0x07f8, 0x07f9, MLineBreaker::EIsLineBreakClass }, //NKO COMMA |
|
337 { 0x07f9, 0x07fa, MLineBreaker::EExLineBreakClass }, //NKO EXCLAMATION MARK |
|
338 { 0x07fa, 0x07fb, MLineBreaker::EAlLineBreakClass }, //NKO LAJANYALAN |
|
339 { 0x0901, 0x0904, MLineBreaker::ECmLineBreakClass }, //devanagari sign candrabindu |
|
340 { 0x0904, 0x093a, MLineBreaker::EAlLineBreakClass }, //devanagari letter short a: CR1080 |
|
341 { 0x093c, 0x093d, MLineBreaker::ECmLineBreakClass }, //devanagari sign nukta |
|
342 { 0x093d, 0x093e, MLineBreaker::EAlLineBreakClass }, //devanagari sign avagraha |
|
343 { 0x093e, 0x094e, MLineBreaker::ECmLineBreakClass }, //devanagari vowel sign aa |
|
344 { 0x0950, 0x0951, MLineBreaker::EAlLineBreakClass }, //devanagari om |
|
345 { 0x0951, 0x0955, MLineBreaker::ECmLineBreakClass }, //devanagari stress sign udatta |
|
346 { 0x0958, 0x0962, MLineBreaker::EAlLineBreakClass }, //devanagari letter qa |
|
347 { 0x0962, 0x0964, MLineBreaker::ECmLineBreakClass }, //devanagari vowel sign vocalic l |
|
348 { 0x0964, 0x0966, MLineBreaker::EAlLineBreakClass }, //devanagari danda |
|
349 { 0x0966, 0x0970, MLineBreaker::ENuLineBreakClass }, //devanagari digit zero |
|
350 { 0x0970, 0x0971, MLineBreaker::EAlLineBreakClass }, //devanagari abbreviation sign |
|
351 { 0x097b, 0x0980, MLineBreaker::EAlLineBreakClass }, //devanagari letter gga: CR1080 |
|
352 { 0x0981, 0x0984, MLineBreaker::ECmLineBreakClass }, //bengali sign candrabindu |
|
353 { 0x0985, 0x098d, MLineBreaker::EAlLineBreakClass }, //bengali letter a |
|
354 { 0x098f, 0x0991, MLineBreaker::EAlLineBreakClass }, //bengali letter e |
|
355 { 0x0993, 0x09a9, MLineBreaker::EAlLineBreakClass }, //bengali letter o |
|
356 { 0x09aa, 0x09b1, MLineBreaker::EAlLineBreakClass }, //bengali letter pa |
|
357 { 0x09b2, 0x09b3, MLineBreaker::EAlLineBreakClass }, //bengali letter la |
|
358 { 0x09b6, 0x09ba, MLineBreaker::EAlLineBreakClass }, //bengali letter sha |
|
359 { 0x09bc, 0x09bd, MLineBreaker::ECmLineBreakClass }, //bengali sign nukta |
|
360 { 0x09bd, 0x09be, MLineBreaker::EAlLineBreakClass }, //bengali sign avagraha: CR1080 |
|
361 { 0x09be, 0x09c5, MLineBreaker::ECmLineBreakClass }, //bengali vowel sign aa |
|
362 { 0x09c7, 0x09c9, MLineBreaker::ECmLineBreakClass }, //bengali vowel sign e |
|
363 { 0x09cb, 0x09ce, MLineBreaker::ECmLineBreakClass }, //bengali vowel sign o |
|
364 { 0x09ce, 0x09cf, MLineBreaker::EAlLineBreakClass }, //bengali letter khanda ta: CR1080 |
|
365 { 0x09d7, 0x09d8, MLineBreaker::ECmLineBreakClass }, //bengali au length mark |
|
366 { 0x09dc, 0x09de, MLineBreaker::EAlLineBreakClass }, //bengali letter rra |
|
367 { 0x09df, 0x09e2, MLineBreaker::EAlLineBreakClass }, //bengali letter yya |
|
368 { 0x09e2, 0x09e4, MLineBreaker::ECmLineBreakClass }, //bengali vowel sign vocalic l |
|
369 { 0x09e6, 0x09f0, MLineBreaker::ENuLineBreakClass }, //bengali digit zero |
|
370 { 0x09f0, 0x09f2, MLineBreaker::EAlLineBreakClass }, //bengali letter ra with middle diagonal |
|
371 { 0x09f2, 0x09f4, MLineBreaker::EPrLineBreakClass }, //bengali rupee mark |
|
372 { 0x09f4, 0x09fb, MLineBreaker::EAlLineBreakClass }, //bengali currency numerator one |
|
373 { 0x0a01, 0x0a04, MLineBreaker::ECmLineBreakClass }, //gurmukhi sign adak bindi: CR1080 |
|
374 { 0x0a05, 0x0a0b, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter a |
|
375 { 0x0a0f, 0x0a11, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter ee |
|
376 { 0x0a13, 0x0a29, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter oo |
|
377 { 0x0a2a, 0x0a31, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter pa |
|
378 { 0x0a32, 0x0a34, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter la |
|
379 { 0x0a35, 0x0a37, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter va |
|
380 { 0x0a38, 0x0a3a, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter sa |
|
381 { 0x0a3c, 0x0a3d, MLineBreaker::ECmLineBreakClass }, //gurmukhi sign nukta |
|
382 { 0x0a3e, 0x0a43, MLineBreaker::ECmLineBreakClass }, //gurmukhi vowel sign aa |
|
383 { 0x0a47, 0x0a49, MLineBreaker::ECmLineBreakClass }, //gurmukhi vowel sign ee |
|
384 { 0x0a4b, 0x0a4e, MLineBreaker::ECmLineBreakClass }, //gurmukhi vowel sign oo |
|
385 { 0x0a59, 0x0a5d, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter khha |
|
386 { 0x0a5e, 0x0a5f, MLineBreaker::EAlLineBreakClass }, //gurmukhi letter fa |
|
387 { 0x0a66, 0x0a70, MLineBreaker::ENuLineBreakClass }, //gurmukhi digit zero |
|
388 { 0x0a70, 0x0a72, MLineBreaker::ECmLineBreakClass }, //gurmukhi tippi |
|
389 { 0x0a72, 0x0a75, MLineBreaker::EAlLineBreakClass }, //gurmukhi iri |
|
390 { 0x0a81, 0x0a84, MLineBreaker::ECmLineBreakClass }, //gujarati sign candrabindu |
|
391 { 0x0a85, 0x0a8d, MLineBreaker::EAlLineBreakClass }, //gujarati letter a: CR1080 |
|
392 { 0x0a8d, 0x0a8e, MLineBreaker::EAlLineBreakClass }, //gujarati vowel candra e |
|
393 { 0x0a8f, 0x0a92, MLineBreaker::EAlLineBreakClass }, //gujarati letter e |
|
394 { 0x0a93, 0x0aa9, MLineBreaker::EAlLineBreakClass }, //gujarati letter o |
|
395 { 0x0aaa, 0x0ab1, MLineBreaker::EAlLineBreakClass }, //gujarati letter pa |
|
396 { 0x0ab2, 0x0ab4, MLineBreaker::EAlLineBreakClass }, //gujarati letter la |
|
397 { 0x0ab5, 0x0aba, MLineBreaker::EAlLineBreakClass }, //gujarati letter va |
|
398 { 0x0abc, 0x0abd, MLineBreaker::ECmLineBreakClass }, //gujarati sign nukta |
|
399 { 0x0abd, 0x0abe, MLineBreaker::EAlLineBreakClass }, //gujarati sign avagraha |
|
400 { 0x0abe, 0x0ac6, MLineBreaker::ECmLineBreakClass }, //gujarati vowel sign aa |
|
401 { 0x0ac7, 0x0aca, MLineBreaker::ECmLineBreakClass }, //gujarati vowel sign e |
|
402 { 0x0acb, 0x0ace, MLineBreaker::ECmLineBreakClass }, //gujarati vowel sign o |
|
403 { 0x0ad0, 0x0ad1, MLineBreaker::EAlLineBreakClass }, //gujarati om |
|
404 { 0x0ae0, 0x0ae2, MLineBreaker::EAlLineBreakClass }, //gujarati letter vocalic rr: CR1080 |
|
405 { 0x0ae2, 0x0ae4, MLineBreaker::ECmLineBreakClass }, //gujarati vowel sign vocalic l: CR1080 |
|
406 { 0x0ae6, 0x0af0, MLineBreaker::ENuLineBreakClass }, //gujarati digit zero |
|
407 { 0x0af1, 0x0af2, MLineBreaker::EPrLineBreakClass }, //gujarati rupee sign: CR1080 |
|
408 { 0x0b01, 0x0b04, MLineBreaker::ECmLineBreakClass }, //oriya sign candrabindu |
|
409 { 0x0b05, 0x0b0d, MLineBreaker::EAlLineBreakClass }, //oriya letter a |
|
410 { 0x0b0f, 0x0b11, MLineBreaker::EAlLineBreakClass }, //oriya letter e |
|
411 { 0x0b13, 0x0b29, MLineBreaker::EAlLineBreakClass }, //oriya letter o |
|
412 { 0x0b2a, 0x0b31, MLineBreaker::EAlLineBreakClass }, //oriya letter pa |
|
413 { 0x0b32, 0x0b34, MLineBreaker::EAlLineBreakClass }, //oriya letter la |
|
414 { 0x0b35, 0x0b36, MLineBreaker::EAlLineBreakClass }, //oriya letter va: CR1080 |
|
415 { 0x0b36, 0x0b3a, MLineBreaker::EAlLineBreakClass }, //oriya letter sha |
|
416 { 0x0b3c, 0x0b3d, MLineBreaker::ECmLineBreakClass }, //oriya sign nukta |
|
417 { 0x0b3d, 0x0b3e, MLineBreaker::EAlLineBreakClass }, //oriya sign avagraha |
|
418 { 0x0b3e, 0x0b44, MLineBreaker::ECmLineBreakClass }, //oriya vowel sign aa |
|
419 { 0x0b47, 0x0b49, MLineBreaker::ECmLineBreakClass }, //oriya vowel sign e |
|
420 { 0x0b4b, 0x0b4e, MLineBreaker::ECmLineBreakClass }, //oriya vowel sign o |
|
421 { 0x0b56, 0x0b58, MLineBreaker::ECmLineBreakClass }, //oriya ai length mark |
|
422 { 0x0b5c, 0x0b5e, MLineBreaker::EAlLineBreakClass }, //oriya letter rra |
|
423 { 0x0b5f, 0x0b62, MLineBreaker::EAlLineBreakClass }, //oriya letter yya |
|
424 { 0x0b66, 0x0b70, MLineBreaker::ENuLineBreakClass }, //oriya digit zero |
|
425 { 0x0b70, 0x0b71, MLineBreaker::EAlLineBreakClass }, //oriya isshar |
|
426 { 0x0b71, 0x0b72, MLineBreaker::EAlLineBreakClass }, //oriya letter wa: CR1080 |
|
427 { 0x0b82, 0x0b84, MLineBreaker::ECmLineBreakClass }, //tamil sign anusvara |
|
428 { 0x0b85, 0x0b8b, MLineBreaker::EAlLineBreakClass }, //tamil letter a |
|
429 { 0x0b8e, 0x0b91, MLineBreaker::EAlLineBreakClass }, //tamil letter e |
|
430 { 0x0b92, 0x0b96, MLineBreaker::EAlLineBreakClass }, //tamil letter o |
|
431 { 0x0b99, 0x0b9b, MLineBreaker::EAlLineBreakClass }, //tamil letter nga |
|
432 { 0x0b9c, 0x0b9d, MLineBreaker::EAlLineBreakClass }, //tamil letter ja |
|
433 { 0x0b9e, 0x0ba0, MLineBreaker::EAlLineBreakClass }, //tamil letter nya |
|
434 { 0x0ba3, 0x0ba5, MLineBreaker::EAlLineBreakClass }, //tamil letter nna |
|
435 { 0x0ba8, 0x0bab, MLineBreaker::EAlLineBreakClass }, //tamil letter na |
|
436 { 0x0bae, 0x0bba, MLineBreaker::EAlLineBreakClass }, //tamil letter ma: CR1080 |
|
437 { 0x0bbe, 0x0bc3, MLineBreaker::ECmLineBreakClass }, //tamil vowel sign aa |
|
438 { 0x0bc6, 0x0bc9, MLineBreaker::ECmLineBreakClass }, //tamil vowel sign e |
|
439 { 0x0bca, 0x0bce, MLineBreaker::ECmLineBreakClass }, //tamil vowel sign o |
|
440 { 0x0bd7, 0x0bd8, MLineBreaker::ECmLineBreakClass }, //tamil au length mark |
|
441 { 0x0be6, 0x0bf0, MLineBreaker::ENuLineBreakClass }, //tamil digit one: CR1080 |
|
442 { 0x0bf0, 0x0bf9, MLineBreaker::EAlLineBreakClass }, //tamil number ten: CR1080 |
|
443 { 0x0bf9, 0x0bfa, MLineBreaker::EPrLineBreakClass }, //tamil rupee sign: CR1080 |
|
444 { 0x0bfa, 0x0bfb, MLineBreaker::EAlLineBreakClass }, //tamil number sign: CR1080 |
|
445 { 0x0c01, 0x0c04, MLineBreaker::ECmLineBreakClass }, //telugu sign candrabindu |
|
446 { 0x0c05, 0x0c0d, MLineBreaker::EAlLineBreakClass }, //telugu letter a |
|
447 { 0x0c0e, 0x0c11, MLineBreaker::EAlLineBreakClass }, //telugu letter e |
|
448 { 0x0c12, 0x0c29, MLineBreaker::EAlLineBreakClass }, //telugu letter o |
|
449 { 0x0c2a, 0x0c34, MLineBreaker::EAlLineBreakClass }, //telugu letter pa |
|
450 { 0x0c35, 0x0c3a, MLineBreaker::EAlLineBreakClass }, //telugu letter va |
|
451 { 0x0c3e, 0x0c45, MLineBreaker::ECmLineBreakClass }, //telugu vowel sign aa |
|
452 { 0x0c46, 0x0c49, MLineBreaker::ECmLineBreakClass }, //telugu vowel sign e |
|
453 { 0x0c4a, 0x0c4e, MLineBreaker::ECmLineBreakClass }, //telugu vowel sign o |
|
454 { 0x0c55, 0x0c57, MLineBreaker::ECmLineBreakClass }, //telugu length mark |
|
455 { 0x0c60, 0x0c62, MLineBreaker::EAlLineBreakClass }, //telugu letter vocalic rr |
|
456 { 0x0c66, 0x0c70, MLineBreaker::ENuLineBreakClass }, //telugu digit zero |
|
457 { 0x0c82, 0x0c84, MLineBreaker::ECmLineBreakClass }, //kannada sign anusvara |
|
458 { 0x0c85, 0x0c8d, MLineBreaker::EAlLineBreakClass }, //kannada letter a |
|
459 { 0x0c8e, 0x0c91, MLineBreaker::EAlLineBreakClass }, //kannada letter e |
|
460 { 0x0c92, 0x0ca9, MLineBreaker::EAlLineBreakClass }, //kannada letter o |
|
461 { 0x0caa, 0x0cb4, MLineBreaker::EAlLineBreakClass }, //kannada letter pa |
|
462 { 0x0cb5, 0x0cba, MLineBreaker::EAlLineBreakClass }, //kannada letter va |
|
463 { 0x0cbc, 0x0cbd, MLineBreaker::ECmLineBreakClass }, //kannada sign nukta: CR1080 |
|
464 { 0x0cbd, 0x0cbe, MLineBreaker::EAlLineBreakClass }, //kannada sign avagraha: CR1080 |
|
465 { 0x0cbe, 0x0cc5, MLineBreaker::ECmLineBreakClass }, //kannada vowel sign aa |
|
466 { 0x0cc6, 0x0cc9, MLineBreaker::ECmLineBreakClass }, //kannada vowel sign e |
|
467 { 0x0cca, 0x0cce, MLineBreaker::ECmLineBreakClass }, //kannada vowel sign o |
|
468 { 0x0cd5, 0x0cd7, MLineBreaker::ECmLineBreakClass }, //kannada length mark |
|
469 { 0x0cde, 0x0cdf, MLineBreaker::EAlLineBreakClass }, //kannada letter fa |
|
470 { 0x0ce0, 0x0ce2, MLineBreaker::EAlLineBreakClass }, //kannada letter vocalic rr |
|
471 { 0x0ce2, 0x0ce4, MLineBreaker::ECmLineBreakClass }, //kannada vowel sign vocalic l: CR1080 |
|
472 { 0x0ce6, 0x0cf0, MLineBreaker::ENuLineBreakClass }, //kannada digit zero |
|
473 { 0x0cf1, 0x0cf3, MLineBreaker::EAlLineBreakClass }, //kannada sign jihvamuliya: CR1080 |
|
474 { 0x0d02, 0x0d04, MLineBreaker::ECmLineBreakClass }, //malayalam sign anusvara |
|
475 { 0x0d05, 0x0d0d, MLineBreaker::EAlLineBreakClass }, //malayalam letter a |
|
476 { 0x0d0e, 0x0d11, MLineBreaker::EAlLineBreakClass }, //malayalam letter e |
|
477 { 0x0d12, 0x0d29, MLineBreaker::EAlLineBreakClass }, //malayalam letter o |
|
478 { 0x0d2a, 0x0d3a, MLineBreaker::EAlLineBreakClass }, //malayalam letter pa |
|
479 { 0x0d3e, 0x0d44, MLineBreaker::ECmLineBreakClass }, //malayalam vowel sign aa |
|
480 { 0x0d46, 0x0d49, MLineBreaker::ECmLineBreakClass }, //malayalam vowel sign e |
|
481 { 0x0d4a, 0x0d4e, MLineBreaker::ECmLineBreakClass }, //malayalam vowel sign o |
|
482 { 0x0d57, 0x0d58, MLineBreaker::ECmLineBreakClass }, //malayalam au length mark |
|
483 { 0x0d60, 0x0d62, MLineBreaker::EAlLineBreakClass }, //malayalam letter vocalic rr |
|
484 { 0x0d66, 0x0d70, MLineBreaker::ENuLineBreakClass }, //malayalam digit zero |
|
485 { 0x0d82, 0x0d84, MLineBreaker::ECmLineBreakClass }, //sinhala sign anusvaraya |
|
486 { 0x0d85, 0x0d97, MLineBreaker::EAlLineBreakClass }, //sinhala letter ayanna |
|
487 { 0x0d9a, 0x0db2, MLineBreaker::EAlLineBreakClass }, //sinhala letter alpapraana kayanna |
|
488 { 0x0db3, 0x0dbc, MLineBreaker::EAlLineBreakClass }, //sinhala letter sanyaka dayanna |
|
489 { 0x0dbd, 0x0dbe, MLineBreaker::EAlLineBreakClass }, //sinhala letter dantaja layanna |
|
490 { 0x0dc0, 0x0dc7, MLineBreaker::EAlLineBreakClass }, //sinhala letter vayanna |
|
491 { 0x0dca, 0x0dcb, MLineBreaker::ECmLineBreakClass }, //sinhala sign al-lakuna |
|
492 { 0x0dcf, 0x0dd5, MLineBreaker::ECmLineBreakClass }, //sinhala vowel sign aela-pilla |
|
493 { 0x0dd6, 0x0dd7, MLineBreaker::ECmLineBreakClass }, //sinhala vowel sign diga paa-pilla |
|
494 { 0x0dd8, 0x0de0, MLineBreaker::ECmLineBreakClass }, //sinhala vowel sign gaetta-pilla |
|
495 { 0x0df2, 0x0df4, MLineBreaker::ECmLineBreakClass }, //sinhala vowel sign diga gaetta-pilla |
|
496 { 0x0df4, 0x0df5, MLineBreaker::EAlLineBreakClass }, //sinhala punctuation kunddaliya |
|
497 { 0x0e01, 0x0e31, MLineBreaker::ESaLineBreakClass }, //thai character ko kai |
|
498 { 0x0e31, 0x0e32, MLineBreaker::ECmLineBreakClass }, //thai character mai han-akat |
|
499 { 0x0e32, 0x0e34, MLineBreaker::ESaLineBreakClass }, //thai character sara aa |
|
500 { 0x0e34, 0x0e3b, MLineBreaker::ECmLineBreakClass }, //thai character sara i |
|
501 { 0x0e3f, 0x0e40, MLineBreaker::EPrLineBreakClass }, //thai currency symbol baht |
|
502 { 0x0e40, 0x0e47, MLineBreaker::ESaLineBreakClass }, //thai character sara e |
|
503 { 0x0e47, 0x0e4f, MLineBreaker::ECmLineBreakClass }, //thai character maitaikhu |
|
504 { 0x0e4f, 0x0e50, MLineBreaker::EAlLineBreakClass }, //thai character fongman |
|
505 { 0x0e50, 0x0e5a, MLineBreaker::ENuLineBreakClass }, //thai digit zero |
|
506 { 0x0e5a, 0x0e5c, MLineBreaker::ENsLineBreakClass }, //thai character angkhankhu |
|
507 { 0x0e81, 0x0e83, MLineBreaker::ESaLineBreakClass }, //lao letter ko |
|
508 { 0x0e84, 0x0e85, MLineBreaker::ESaLineBreakClass }, //lao letter kho tam |
|
509 { 0x0e87, 0x0e89, MLineBreaker::ESaLineBreakClass }, //lao letter ngo |
|
510 { 0x0e8a, 0x0e8b, MLineBreaker::ESaLineBreakClass }, //lao letter so tam |
|
511 { 0x0e8d, 0x0e8e, MLineBreaker::ESaLineBreakClass }, //lao letter nyo |
|
512 { 0x0e94, 0x0e98, MLineBreaker::ESaLineBreakClass }, //lao letter do |
|
513 { 0x0e99, 0x0ea0, MLineBreaker::ESaLineBreakClass }, //lao letter no |
|
514 { 0x0ea1, 0x0ea4, MLineBreaker::ESaLineBreakClass }, //lao letter mo |
|
515 { 0x0ea5, 0x0ea6, MLineBreaker::ESaLineBreakClass }, //lao letter lo loot |
|
516 { 0x0ea7, 0x0ea8, MLineBreaker::ESaLineBreakClass }, //lao letter wo |
|
517 { 0x0eaa, 0x0eac, MLineBreaker::ESaLineBreakClass }, //lao letter so sung |
|
518 { 0x0ead, 0x0eb1, MLineBreaker::ESaLineBreakClass }, //lao letter o |
|
519 { 0x0eb1, 0x0eb2, MLineBreaker::ECmLineBreakClass }, //lao vowel sign mai kan |
|
520 { 0x0eb2, 0x0eb4, MLineBreaker::ESaLineBreakClass }, //lao vowel sign aa |
|
521 { 0x0eb4, 0x0eba, MLineBreaker::ECmLineBreakClass }, //lao vowel sign i |
|
522 { 0x0ebb, 0x0ebd, MLineBreaker::ECmLineBreakClass }, //lao vowel sign mai kon |
|
523 { 0x0ebd, 0x0ebe, MLineBreaker::ESaLineBreakClass }, //lao semivowel sign nyo |
|
524 { 0x0ec0, 0x0ec5, MLineBreaker::ESaLineBreakClass }, //lao vowel sign e |
|
525 { 0x0ec6, 0x0ec7, MLineBreaker::ESaLineBreakClass }, //lao ko la |
|
526 { 0x0ec8, 0x0ece, MLineBreaker::ECmLineBreakClass }, //lao tone mai ek |
|
527 { 0x0ed0, 0x0eda, MLineBreaker::ENuLineBreakClass }, //lao digit zero |
|
528 { 0x0edc, 0x0ede, MLineBreaker::ESaLineBreakClass }, //lao ho no |
|
529 { 0x0f00, 0x0f0b, MLineBreaker::EAlLineBreakClass }, //tibetan syllable om |
|
530 { 0x0f0b, 0x0f0c, MLineBreaker::EBaLineBreakClass }, //tibetan mark intersyllabic tsheg |
|
531 { 0x0f0c, 0x0f0d, MLineBreaker::EGlLineBreakClass }, //tibetan mark delimiter tsheg bstar |
|
532 { 0x0f0d, 0x0f18, MLineBreaker::EAlLineBreakClass }, //tibetan mark shad |
|
533 { 0x0f18, 0x0f1a, MLineBreaker::ECmLineBreakClass }, //tibetan astrological sign -khyud pa |
|
534 { 0x0f1a, 0x0f20, MLineBreaker::EAlLineBreakClass }, //tibetan sign rdel dkar gcig |
|
535 { 0x0f20, 0x0f2a, MLineBreaker::ENuLineBreakClass }, //tibetan digit zero |
|
536 { 0x0f2a, 0x0f35, MLineBreaker::EAlLineBreakClass }, //tibetan digit half one |
|
537 { 0x0f35, 0x0f36, MLineBreaker::ECmLineBreakClass }, //tibetan mark ngas bzung nyi zla |
|
538 { 0x0f36, 0x0f37, MLineBreaker::EAlLineBreakClass }, //tibetan mark caret -dzud rtags bzhi mig can |
|
539 { 0x0f37, 0x0f38, MLineBreaker::ECmLineBreakClass }, //tibetan mark ngas bzung sgor rtags |
|
540 { 0x0f38, 0x0f39, MLineBreaker::EAlLineBreakClass }, //tibetan mark che mgo |
|
541 { 0x0f39, 0x0f3a, MLineBreaker::ECmLineBreakClass }, //tibetan mark tsa -phru |
|
542 { 0x0f3a, 0x0f3b, MLineBreaker::EOpLineBreakClass }, //tibetan mark gug rtags gyon |
|
543 { 0x0f3b, 0x0f3c, MLineBreaker::EClLineBreakClass }, //tibetan mark gug rtags gyas |
|
544 { 0x0f3c, 0x0f3d, MLineBreaker::EOpLineBreakClass }, //tibetan mark ang khang gyon |
|
545 { 0x0f3d, 0x0f3e, MLineBreaker::EClLineBreakClass }, //tibetan mark ang khang gyas |
|
546 { 0x0f3e, 0x0f40, MLineBreaker::ECmLineBreakClass }, //tibetan sign yar tshes |
|
547 { 0x0f40, 0x0f48, MLineBreaker::EAlLineBreakClass }, //tibetan letter ka |
|
548 { 0x0f49, 0x0f6b, MLineBreaker::EAlLineBreakClass }, //tibetan letter nya |
|
549 { 0x0f71, 0x0f85, MLineBreaker::ECmLineBreakClass }, //tibetan vowel sign aa |
|
550 { 0x0f85, 0x0f86, MLineBreaker::EAlLineBreakClass }, //tibetan mark paluta |
|
551 { 0x0f86, 0x0f88, MLineBreaker::ECmLineBreakClass }, //tibetan sign lci rtags |
|
552 { 0x0f88, 0x0f8c, MLineBreaker::EAlLineBreakClass }, //tibetan sign lce tsa can |
|
553 { 0x0f90, 0x0f98, MLineBreaker::ECmLineBreakClass }, //tibetan subjoined letter ka |
|
554 { 0x0f99, 0x0fbd, MLineBreaker::ECmLineBreakClass }, //tibetan subjoined letter nya |
|
555 { 0x0fbe, 0x0fc6, MLineBreaker::EAlLineBreakClass }, //tibetan ku ru kha |
|
556 { 0x0fc6, 0x0fc7, MLineBreaker::ECmLineBreakClass }, //tibetan symbol padma gdan |
|
557 { 0x0fc7, 0x0fcd, MLineBreaker::EAlLineBreakClass }, //tibetan symbol rdo rje rgya gram |
|
558 { 0x0fcf, 0x0fd0, MLineBreaker::EAlLineBreakClass }, //tibetan sign rdel nag gsum |
|
559 { 0x0fd0, 0x0fd2, MLineBreaker::EBbLineBreakClass }, //TIBETAN MARK BSKA- SHOG GI MGO RGYAN |
|
560 { 0x1000, 0x1022, MLineBreaker::ESaLineBreakClass }, //myanmar letter ka |
|
561 { 0x1023, 0x1028, MLineBreaker::ESaLineBreakClass }, //myanmar letter i |
|
562 { 0x1029, 0x102b, MLineBreaker::ESaLineBreakClass }, //myanmar letter o |
|
563 { 0x102c, 0x1033, MLineBreaker::ECmLineBreakClass }, //myanmar vowel sign aa |
|
564 { 0x1036, 0x103a, MLineBreaker::ECmLineBreakClass }, //myanmar sign anusvara |
|
565 { 0x1040, 0x104a, MLineBreaker::ENuLineBreakClass }, //myanmar digit zero |
|
566 { 0x104a, 0x1050, MLineBreaker::EAlLineBreakClass }, //myanmar sign little section |
|
567 { 0x1050, 0x1056, MLineBreaker::ESaLineBreakClass }, //myanmar letter sha |
|
568 { 0x1056, 0x105a, MLineBreaker::ECmLineBreakClass }, //myanmar vowel sign vocalic r |
|
569 { 0x10a0, 0x10c6, MLineBreaker::EAlLineBreakClass }, //georgian capital letter an |
|
570 { 0x10d0, 0x10fb, MLineBreaker::EAlLineBreakClass }, //georgian letter an |
|
571 { 0x10fb, 0x10fc, MLineBreaker::EAlLineBreakClass }, //georgian paragraph separator |
|
572 { 0x10fc, 0x10fd, MLineBreaker::EAlLineBreakClass }, //MODIFIER LETTER GEORGIAN NAR |
|
573 { 0x1100, 0x115a, MLineBreaker::EIdLineBreakClass }, //hangul choseong kiyeok |
|
574 { 0x115f, 0x1160, MLineBreaker::EIdLineBreakClass }, //hangul choseong filler |
|
575 { 0x1160, 0x11a3, MLineBreaker::ECmLineBreakClass }, //hangul jungseong filler |
|
576 { 0x11a8, 0x11fa, MLineBreaker::ECmLineBreakClass }, //hangul jongseong kiyeok |
|
577 { 0x1200, 0x1248, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable ha |
|
578 { 0x1248, 0x1249, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable qwa |
|
579 { 0x124a, 0x124e, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable qwi |
|
580 { 0x1250, 0x1257, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable qha |
|
581 { 0x1258, 0x1259, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable qhwa |
|
582 { 0x125a, 0x125e, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable qhwi |
|
583 { 0x1260, 0x1288, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable ba |
|
584 { 0x1288, 0x1289, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable xwa |
|
585 { 0x128a, 0x128e, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable xwi |
|
586 { 0x1290, 0x12b0, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable na |
|
587 { 0x12b0, 0x12b1, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable kwa |
|
588 { 0x12b2, 0x12b6, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable kwi |
|
589 { 0x12b8, 0x12bf, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable kxa |
|
590 { 0x12c0, 0x12c1, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable kxwa |
|
591 { 0x12c2, 0x12c6, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable kxwi |
|
592 { 0x12c8, 0x12d7, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable wa |
|
593 { 0x12d8, 0x1310, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable za |
|
594 { 0x1310, 0x1311, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable gwa |
|
595 { 0x1312, 0x1316, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable gwi |
|
596 { 0x1318, 0x135b, MLineBreaker::EAlLineBreakClass }, //ethiopic syllable gga |
|
597 { 0x135f, 0x1360, MLineBreaker::ECmLineBreakClass }, //ETHIOPIC COMBINING GEMINATION MARK |
|
598 { 0x1360, 0x1361, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SECTION MARK |
|
599 { 0x1361, 0x1362, MLineBreaker::EBaLineBreakClass }, //ethiopic wordspace |
|
600 { 0x1362, 0x1369, MLineBreaker::EAlLineBreakClass }, //ethiopic full stop |
|
601 { 0x1369, 0x1372, MLineBreaker::ENuLineBreakClass }, //ethiopic digit one |
|
602 { 0x1372, 0x137d, MLineBreaker::EAlLineBreakClass }, //ethiopic number ten |
|
603 { 0x1380, 0x139a, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE SEBATBEIT MWA |
|
604 { 0x13a0, 0x13f5, MLineBreaker::EAlLineBreakClass }, //cherokee letter a |
|
605 { 0x1401, 0x1677, MLineBreaker::EAlLineBreakClass }, //canadian syllabics e |
|
606 { 0x1680, 0x1681, MLineBreaker::EBaLineBreakClass }, //ogham space mark |
|
607 { 0x1681, 0x169b, MLineBreaker::EAlLineBreakClass }, //ogham letter beith |
|
608 { 0x169b, 0x169c, MLineBreaker::EOpLineBreakClass }, //ogham feather mark |
|
609 { 0x169c, 0x169d, MLineBreaker::EClLineBreakClass }, //ogham reversed feather mark |
|
610 { 0x16a0, 0x16f1, MLineBreaker::EAlLineBreakClass }, //runic letter fehu feoh fe f |
|
611 { 0x1700, 0x170d, MLineBreaker::EAlLineBreakClass }, //TAGALOG LETTER A |
|
612 { 0x170e, 0x1712, MLineBreaker::EAlLineBreakClass }, //TAGALOG LETTER LA |
|
613 { 0x1712, 0x1715, MLineBreaker::ECmLineBreakClass }, //TAGALOG VOWEL SIGN I |
|
614 { 0x1720, 0x1732, MLineBreaker::EAlLineBreakClass }, //HANUNOO LETTER A |
|
615 { 0x1732, 0x1735, MLineBreaker::ECmLineBreakClass }, //HANUNOO VOWEL SIGN I |
|
616 { 0x1735, 0x1737, MLineBreaker::EBaLineBreakClass }, //PHILIPPINE SINGLE PUNCTUATION |
|
617 { 0x1740, 0x1752, MLineBreaker::EAlLineBreakClass }, //BUHID LETTER A |
|
618 { 0x1752, 0x1754, MLineBreaker::ECmLineBreakClass }, //BUHID VOWEL SIGN I |
|
619 { 0x1760, 0x176d, MLineBreaker::EAlLineBreakClass }, //TAGBANWA LETTER A |
|
620 { 0x176e, 0x1771, MLineBreaker::EAlLineBreakClass }, //TAGBANWA LETTER LA |
|
621 { 0x1772, 0x1774, MLineBreaker::ECmLineBreakClass }, //TAGBANWA VOWEL SIGN I |
|
622 { 0x1780, 0x17b4, MLineBreaker::ESaLineBreakClass }, //khmer letter ka |
|
623 { 0x17b4, 0x17d4, MLineBreaker::ECmLineBreakClass }, //khmer vowel inherent aq |
|
624 { 0x17d4, 0x17d5, MLineBreaker::ENsLineBreakClass }, //khmer sign khan |
|
625 { 0x17d5, 0x17d6, MLineBreaker::EBaLineBreakClass }, //khmer sign bariyoosan |
|
626 { 0x17d6, 0x17db, MLineBreaker::ENsLineBreakClass }, //khmer sign camnuc pii kuuh |
|
627 { 0x17db, 0x17dc, MLineBreaker::EPrLineBreakClass }, //khmer currency symbol riel |
|
628 { 0x17dc, 0x17dd, MLineBreaker::EAlLineBreakClass }, //khmer sign avakrahasanya |
|
629 { 0x17dd, 0x17de, MLineBreaker::ESaLineBreakClass }, //KHMER SIGN ATTHACAN |
|
630 { 0x17e0, 0x17ea, MLineBreaker::ENuLineBreakClass }, //khmer digit zero |
|
631 { 0x17f0, 0x17fa, MLineBreaker::EAlLineBreakClass }, //KHMER SYMBOL LEK ATTAK SON |
|
632 { 0x1800, 0x1806, MLineBreaker::EAlLineBreakClass }, //mongolian birga |
|
633 { 0x1806, 0x1807, MLineBreaker::EBbLineBreakClass }, //mongolian todo soft hyphen |
|
634 { 0x1807, 0x180b, MLineBreaker::EAlLineBreakClass }, //mongolian sibe syllable boundary marker |
|
635 { 0x180b, 0x180f, MLineBreaker::ECmLineBreakClass }, //mongolian free variation selector one |
|
636 { 0x1810, 0x181a, MLineBreaker::ENuLineBreakClass }, //mongolian digit zero |
|
637 { 0x1820, 0x1878, MLineBreaker::EAlLineBreakClass }, //mongolian letter a |
|
638 { 0x1880, 0x18a9, MLineBreaker::EAlLineBreakClass }, //mongolian letter ali gali anusvara one |
|
639 { 0x18a9, 0x18aa, MLineBreaker::ECmLineBreakClass }, //mongolian letter ali gali dagalga |
|
640 { 0x1900, 0x191d, MLineBreaker::EAlLineBreakClass }, //LIMBU VOWEL-CARRIER LETTER |
|
641 { 0x1920, 0x192c, MLineBreaker::ECmLineBreakClass }, //LIMBU VOWEL SIGN A |
|
642 { 0x1930, 0x193c, MLineBreaker::ECmLineBreakClass }, //LIMBU SMALL LETTER KA |
|
643 { 0x1940, 0x1941, MLineBreaker::EAlLineBreakClass }, //LIMBU SIGN LOO |
|
644 { 0x1944, 0x1946, MLineBreaker::EExLineBreakClass }, //LIMBU EXCLAMATION MARK |
|
645 { 0x1946, 0x1950, MLineBreaker::ENuLineBreakClass }, //LIMBU DIGIT ZERO |
|
646 { 0x1950, 0x196e, MLineBreaker::ESaLineBreakClass }, //TAI LE LETTER KA |
|
647 { 0x1970, 0x1975, MLineBreaker::ESaLineBreakClass }, //TAI LE LETTER TONE-2 |
|
648 { 0x1980, 0x19aa, MLineBreaker::ESaLineBreakClass }, //NEW TAI LUE LETTER HIGH QA |
|
649 { 0x19b0, 0x19ca, MLineBreaker::ESaLineBreakClass }, //NEW TAI LUE VOWEL SIGN VOWEL SHORTENER |
|
650 { 0x19d0, 0x19da, MLineBreaker::ENuLineBreakClass }, //NEW TAI LUE DIGIT ZERO |
|
651 { 0x19de, 0x19e0, MLineBreaker::ESaLineBreakClass }, //NEW TAI LUE SIGN LAE |
|
652 { 0x19e0, 0x1a17, MLineBreaker::EAlLineBreakClass }, //KHMER SYMBOL PATHAMASAT |
|
653 { 0x1a17, 0x1a1c, MLineBreaker::ECmLineBreakClass }, //BUGINESE VOWEL SIGN I |
|
654 { 0x1a1e, 0x1a1f, MLineBreaker::EBaLineBreakClass }, //BUGINESE PALLAWA |
|
655 { 0x1a1f, 0x1a20, MLineBreaker::EAlLineBreakClass }, //BUGINESE END OF SECTION |
|
656 { 0x1b00, 0x1b05, MLineBreaker::ECmLineBreakClass }, //BALINESE SIGN ULU RICEM |
|
657 { 0x1b05, 0x1b34, MLineBreaker::EAlLineBreakClass }, //BALINESE LETTER AKARA |
|
658 { 0x1b34, 0x1b45, MLineBreaker::ECmLineBreakClass }, //BALINESE SIGN REREKAN |
|
659 { 0x1b45, 0x1b4c, MLineBreaker::EAlLineBreakClass }, //BALINESE LETTER KAF SASAK |
|
660 { 0x1b50, 0x1b5a, MLineBreaker::ENuLineBreakClass }, //BALINESE DIGIT ZERO |
|
661 { 0x1b5a, 0x1b61, MLineBreaker::EBaLineBreakClass }, //BALINESE PANTI |
|
662 { 0x1b61, 0x1b6b, MLineBreaker::EAlLineBreakClass }, //BALINESE MUSICAL SYMBOL DONG |
|
663 { 0x1b6b, 0x1b74, MLineBreaker::ECmLineBreakClass }, //BALINESE MUSICAL SYMBOL COMBINING TEGEH |
|
664 { 0x1b74, 0x1b7d, MLineBreaker::EAlLineBreakClass }, //BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG |
|
665 { 0x1d00, 0x1dc0, MLineBreaker::EAlLineBreakClass }, //LATIN LETTER SMALL CAPITAL A |
|
666 { 0x1dc0, 0x1dcb, MLineBreaker::ECmLineBreakClass }, //COMBINING DOTTED GRAVE ACCENT |
|
667 { 0x1dfe, 0x1e00, MLineBreaker::ECmLineBreakClass }, //COMBINING LEFT ARROWHEAD ABOVE |
|
668 { 0x1e00, 0x1e9c, MLineBreaker::EAlLineBreakClass }, //latin capital letter a with ring below |
|
669 { 0x1ea0, 0x1efa, MLineBreaker::EAlLineBreakClass }, //latin capital letter a with dot below |
|
670 { 0x1f00, 0x1f16, MLineBreaker::EAlLineBreakClass }, //greek small letter alpha with psili |
|
671 { 0x1f18, 0x1f1e, MLineBreaker::EAlLineBreakClass }, //greek capital letter epsilon with psili |
|
672 { 0x1f20, 0x1f46, MLineBreaker::EAlLineBreakClass }, //greek small letter eta with psili |
|
673 { 0x1f48, 0x1f4e, MLineBreaker::EAlLineBreakClass }, //greek capital letter omicron with psili |
|
674 { 0x1f50, 0x1f58, MLineBreaker::EAlLineBreakClass }, //greek small letter upsilon with psili |
|
675 { 0x1f59, 0x1f5a, MLineBreaker::EAlLineBreakClass }, //greek capital letter upsilon with dasia |
|
676 { 0x1f5b, 0x1f5c, MLineBreaker::EAlLineBreakClass }, //greek capital letter upsilon with dasia and varia |
|
677 { 0x1f5d, 0x1f5e, MLineBreaker::EAlLineBreakClass }, //greek capital letter upsilon with dasia and oxia |
|
678 { 0x1f5f, 0x1f7e, MLineBreaker::EAlLineBreakClass }, //greek capital letter upsilon with dasia and perispomeni |
|
679 { 0x1f80, 0x1fb5, MLineBreaker::EAlLineBreakClass }, //greek small letter alpha with psili and ypogegrammeni |
|
680 { 0x1fb6, 0x1fc5, MLineBreaker::EAlLineBreakClass }, //greek small letter alpha with perispomeni |
|
681 { 0x1fc6, 0x1fd4, MLineBreaker::EAlLineBreakClass }, //greek small letter eta with perispomeni |
|
682 { 0x1fd6, 0x1fdc, MLineBreaker::EAlLineBreakClass }, //greek small letter iota with perispomeni |
|
683 { 0x1fdd, 0x1ff0, MLineBreaker::EAlLineBreakClass }, //greek dasia and varia |
|
684 { 0x1ff2, 0x1ff5, MLineBreaker::EAlLineBreakClass }, //greek small letter omega with varia and ypogegrammeni |
|
685 { 0x1ff6, 0x1fff, MLineBreaker::EAlLineBreakClass }, //greek small letter omega with perispomeni |
|
686 { 0x2000, 0x2007, MLineBreaker::EBaLineBreakClass }, //en quad |
|
687 { 0x2007, 0x2008, MLineBreaker::EGlLineBreakClass }, //figure space |
|
688 { 0x2008, 0x200b, MLineBreaker::EBaLineBreakClass }, //punctuation space |
|
689 { 0x200b, 0x200c, MLineBreaker::EZwLineBreakClass }, //zero width space |
|
690 { 0x200c, 0x2010, MLineBreaker::ECmLineBreakClass }, //zero width non-joiner |
|
691 { 0x2010, 0x2011, MLineBreaker::EBaLineBreakClass }, //hyphen |
|
692 { 0x2011, 0x2012, MLineBreaker::EGlLineBreakClass }, //non-breaking hyphen |
|
693 { 0x2012, 0x2014, MLineBreaker::EBaLineBreakClass }, //figure dash |
|
694 { 0x2014, 0x2015, MLineBreaker::EB2LineBreakClass }, //em dash |
|
695 { 0x2015, 0x2016, MLineBreaker::EAlLineBreakClass }, //horizontal bar |
|
696 { 0x2016, 0x2017, MLineBreaker::EAiLineBreakClass }, //double vertical line |
|
697 { 0x2017, 0x2018, MLineBreaker::EAlLineBreakClass }, //double low line |
|
698 { 0x2018, 0x201a, MLineBreaker::EQuLineBreakClass }, //left single quotation mark |
|
699 { 0x201a, 0x201b, MLineBreaker::EOpLineBreakClass }, //single low-9 quotation mark |
|
700 { 0x201b, 0x201e, MLineBreaker::EQuLineBreakClass }, //single high-reversed-9 quotation mark |
|
701 { 0x201e, 0x201f, MLineBreaker::EOpLineBreakClass }, //double low-9 quotation mark |
|
702 { 0x201f, 0x2020, MLineBreaker::EQuLineBreakClass }, //double high-reversed-9 quotation mark |
|
703 { 0x2020, 0x2022, MLineBreaker::EAiLineBreakClass }, //dagger |
|
704 { 0x2022, 0x2024, MLineBreaker::EAlLineBreakClass }, //bullet |
|
705 { 0x2024, 0x2027, MLineBreaker::EInLineBreakClass }, //one dot leader |
|
706 { 0x2027, 0x2028, MLineBreaker::EBaLineBreakClass }, //hyphenation point |
|
707 { 0x2028, 0x202a, MLineBreaker::EBkLineBreakClass }, //line separator |
|
708 { 0x202a, 0x202f, MLineBreaker::ECmLineBreakClass }, //left-to-right embedding |
|
709 { 0x202f, 0x2030, MLineBreaker::EGlLineBreakClass }, //narrow no-break space |
|
710 { 0x2030, 0x2038, MLineBreaker::EPoLineBreakClass }, //per mille sign |
|
711 { 0x2038, 0x2039, MLineBreaker::EAlLineBreakClass }, //caret |
|
712 { 0x2039, 0x203b, MLineBreaker::EQuLineBreakClass }, //single left-pointing angle quotation mark |
|
713 { 0x203b, 0x203c, MLineBreaker::EAiLineBreakClass }, //reference mark |
|
714 { 0x203c, 0x203d, MLineBreaker::ENsLineBreakClass }, //double exclamation mark |
|
715 { 0x203d, 0x2044, MLineBreaker::EAlLineBreakClass }, //interrobang |
|
716 { 0x2044, 0x2045, MLineBreaker::ENsLineBreakClass }, //fraction slash |
|
717 { 0x2045, 0x2046, MLineBreaker::EOpLineBreakClass }, //left square bracket with quill |
|
718 { 0x2046, 0x2047, MLineBreaker::EClLineBreakClass }, //right square bracket with quill |
|
719 { 0x2047, 0x2048, MLineBreaker::ENsLineBreakClass }, //DOUBLE QUESTION MARK |
|
720 { 0x2048, 0x2056, MLineBreaker::EAlLineBreakClass }, //question exclamation mark |
|
721 { 0x2056, 0x2057, MLineBreaker::EBaLineBreakClass }, //THREE DOT PUNCTUATION |
|
722 { 0x2057, 0x2058, MLineBreaker::EAlLineBreakClass }, //QUADRUPLE PRIME |
|
723 { 0x2058, 0x205c, MLineBreaker::EBaLineBreakClass }, //FOUR DOT PUNCTUATION |
|
724 { 0x205c, 0x205d, MLineBreaker::EAlLineBreakClass }, //DOTTED CROSS |
|
725 { 0x205d, 0x2060, MLineBreaker::EBaLineBreakClass }, //TRICOLON |
|
726 { 0x2060, 0x2061, MLineBreaker::EGlLineBreakClass }, //WORD JOINER |
|
727 { 0x2061, 0x2064, MLineBreaker::EAlLineBreakClass }, //FUNCTION APPLICATION |
|
728 { 0x206a, 0x2070, MLineBreaker::ECmLineBreakClass }, //inhibit symmetric swapping |
|
729 { 0x2070, 0x2071, MLineBreaker::EAlLineBreakClass }, //superscript zero |
|
730 { 0x2071, 0x2072, MLineBreaker::EAlLineBreakClass }, //SUPERSCRIPT LATIN SMALL LETTER I |
|
731 { 0x2074, 0x2075, MLineBreaker::EAiLineBreakClass }, //superscript four |
|
732 { 0x2075, 0x207d, MLineBreaker::EAlLineBreakClass }, //superscript five |
|
733 { 0x207d, 0x207e, MLineBreaker::EOpLineBreakClass }, //superscript left parenthesis |
|
734 { 0x207e, 0x207f, MLineBreaker::EClLineBreakClass }, //superscript right parenthesis |
|
735 { 0x207f, 0x2080, MLineBreaker::EAiLineBreakClass }, //superscript latin small letter n |
|
736 { 0x2080, 0x2081, MLineBreaker::EAlLineBreakClass }, //subscript zero |
|
737 { 0x2081, 0x2085, MLineBreaker::EAiLineBreakClass }, //subscript one |
|
738 { 0x2085, 0x208d, MLineBreaker::EAlLineBreakClass }, //subscript five |
|
739 { 0x208d, 0x208e, MLineBreaker::EOpLineBreakClass }, //subscript left parenthesis |
|
740 { 0x208e, 0x208f, MLineBreaker::EClLineBreakClass }, //subscript right parenthesis |
|
741 { 0x2090, 0x2095, MLineBreaker::EAlLineBreakClass }, //LATIN SUBSCRIPT SMALL LETTER A |
|
742 { 0x20a0, 0x20a7, MLineBreaker::EPrLineBreakClass }, //euro-currency sign |
|
743 { 0x20a7, 0x20a8, MLineBreaker::EPoLineBreakClass }, //peseta sign |
|
744 { 0x20a8, 0x20b6, MLineBreaker::EPrLineBreakClass }, //rupee sign |
|
745 { 0x20d0, 0x20f0, MLineBreaker::ECmLineBreakClass }, //combining left harpoon above |
|
746 { 0x2100, 0x2103, MLineBreaker::EAlLineBreakClass }, //account of |
|
747 { 0x2103, 0x2104, MLineBreaker::EPoLineBreakClass }, //degree celsius |
|
748 { 0x2104, 0x2105, MLineBreaker::EAlLineBreakClass }, //centre line symbol |
|
749 { 0x2105, 0x2106, MLineBreaker::EAiLineBreakClass }, //care of |
|
750 { 0x2106, 0x2109, MLineBreaker::EAlLineBreakClass }, //cada una |
|
751 { 0x2109, 0x210a, MLineBreaker::EPoLineBreakClass }, //degree fahrenheit |
|
752 { 0x210a, 0x2113, MLineBreaker::EAlLineBreakClass }, //script small g |
|
753 { 0x2113, 0x2114, MLineBreaker::EAiLineBreakClass }, //script small l |
|
754 { 0x2114, 0x2116, MLineBreaker::EAlLineBreakClass }, //l b bar symbol |
|
755 { 0x2116, 0x2117, MLineBreaker::EPrLineBreakClass }, //numero sign |
|
756 { 0x2117, 0x2121, MLineBreaker::EAlLineBreakClass }, //sound recording copyright |
|
757 { 0x2121, 0x2123, MLineBreaker::EAiLineBreakClass }, //telephone sign |
|
758 { 0x2123, 0x2126, MLineBreaker::EAlLineBreakClass }, //versicle |
|
759 { 0x2126, 0x2127, MLineBreaker::EPoLineBreakClass }, //ohm sign |
|
760 { 0x2127, 0x212b, MLineBreaker::EAlLineBreakClass }, //inverted ohm sign |
|
761 { 0x212b, 0x212c, MLineBreaker::EAiLineBreakClass }, //angstrom sign |
|
762 { 0x212c, 0x214f, MLineBreaker::EAlLineBreakClass }, //script capital b |
|
763 { 0x2153, 0x2154, MLineBreaker::EAlLineBreakClass }, //vulgar fraction one third |
|
764 { 0x2154, 0x2156, MLineBreaker::EAiLineBreakClass }, //vulgar fraction two thirds |
|
765 { 0x2156, 0x215b, MLineBreaker::EAlLineBreakClass }, //vulgar fraction two fifths |
|
766 { 0x215b, 0x215c, MLineBreaker::EAiLineBreakClass }, //vulgar fraction one eighth |
|
767 { 0x215c, 0x215e, MLineBreaker::EAlLineBreakClass }, //vulgar fraction three eighths |
|
768 { 0x215e, 0x215f, MLineBreaker::EAiLineBreakClass }, //vulgar fraction seven eighths |
|
769 { 0x215f, 0x2160, MLineBreaker::EAlLineBreakClass }, //fraction numerator one |
|
770 { 0x2160, 0x216c, MLineBreaker::EAiLineBreakClass }, //roman numeral one |
|
771 { 0x216c, 0x2170, MLineBreaker::EAlLineBreakClass }, //roman numeral fifty |
|
772 { 0x2170, 0x217a, MLineBreaker::EAiLineBreakClass }, //small roman numeral one |
|
773 { 0x217a, 0x2185, MLineBreaker::EAlLineBreakClass }, //small roman numeral eleven |
|
774 { 0x2190, 0x219a, MLineBreaker::EAiLineBreakClass }, //leftwards arrow |
|
775 { 0x219a, 0x21d2, MLineBreaker::EAlLineBreakClass }, //leftwards arrow with stroke |
|
776 { 0x21d2, 0x21d3, MLineBreaker::EAiLineBreakClass }, //rightwards double arrow |
|
777 { 0x21d3, 0x21d4, MLineBreaker::EAlLineBreakClass }, //downwards double arrow |
|
778 { 0x21d4, 0x21d5, MLineBreaker::EAiLineBreakClass }, //left right double arrow |
|
779 { 0x21d5, 0x2200, MLineBreaker::EAlLineBreakClass }, //up down double arrow |
|
780 { 0x2200, 0x2201, MLineBreaker::EAiLineBreakClass }, //for all |
|
781 { 0x2201, 0x2202, MLineBreaker::EAlLineBreakClass }, //complement |
|
782 { 0x2202, 0x2204, MLineBreaker::EAiLineBreakClass }, //partial differential |
|
783 { 0x2204, 0x2207, MLineBreaker::EAlLineBreakClass }, //there does not exist |
|
784 { 0x2207, 0x2209, MLineBreaker::EAiLineBreakClass }, //nabla |
|
785 { 0x2209, 0x220b, MLineBreaker::EAlLineBreakClass }, //not an element of |
|
786 { 0x220b, 0x220c, MLineBreaker::EAiLineBreakClass }, //contains as member |
|
787 { 0x220c, 0x220f, MLineBreaker::EAlLineBreakClass }, //does not contain as member |
|
788 { 0x220f, 0x2210, MLineBreaker::EAiLineBreakClass }, //n-ary product |
|
789 { 0x2210, 0x2211, MLineBreaker::EAlLineBreakClass }, //n-ary coproduct |
|
790 { 0x2211, 0x2212, MLineBreaker::EAiLineBreakClass }, //n-ary summation |
|
791 { 0x2212, 0x2214, MLineBreaker::EPrLineBreakClass }, //minus sign |
|
792 { 0x2214, 0x2215, MLineBreaker::EAlLineBreakClass }, //dot plus |
|
793 { 0x2215, 0x2216, MLineBreaker::EAiLineBreakClass }, //division slash |
|
794 { 0x2216, 0x221a, MLineBreaker::EAlLineBreakClass }, //set minus |
|
795 { 0x221a, 0x221b, MLineBreaker::EAiLineBreakClass }, //square root |
|
796 { 0x221b, 0x221d, MLineBreaker::EAlLineBreakClass }, //cube root |
|
797 { 0x221d, 0x2221, MLineBreaker::EAiLineBreakClass }, //proportional to |
|
798 { 0x2221, 0x2223, MLineBreaker::EAlLineBreakClass }, //measured angle |
|
799 { 0x2223, 0x2224, MLineBreaker::EAiLineBreakClass }, //divides |
|
800 { 0x2224, 0x2225, MLineBreaker::EAlLineBreakClass }, //does not divide |
|
801 { 0x2225, 0x2226, MLineBreaker::EAiLineBreakClass }, //parallel to |
|
802 { 0x2226, 0x2227, MLineBreaker::EAlLineBreakClass }, //not parallel to |
|
803 { 0x2227, 0x222d, MLineBreaker::EAiLineBreakClass }, //logical and |
|
804 { 0x222d, 0x222e, MLineBreaker::EAlLineBreakClass }, //triple integral |
|
805 { 0x222e, 0x222f, MLineBreaker::EAiLineBreakClass }, //contour integral |
|
806 { 0x222f, 0x2234, MLineBreaker::EAlLineBreakClass }, //surface integral |
|
807 { 0x2234, 0x2238, MLineBreaker::EAiLineBreakClass }, //therefore |
|
808 { 0x2238, 0x223c, MLineBreaker::EAlLineBreakClass }, //dot minus |
|
809 { 0x223c, 0x223e, MLineBreaker::EAiLineBreakClass }, //tilde operator |
|
810 { 0x223e, 0x2248, MLineBreaker::EAlLineBreakClass }, //inverted lazy s |
|
811 { 0x2248, 0x2249, MLineBreaker::EAiLineBreakClass }, //almost equal to |
|
812 { 0x2249, 0x224c, MLineBreaker::EAlLineBreakClass }, //not almost equal to |
|
813 { 0x224c, 0x224d, MLineBreaker::EAiLineBreakClass }, //all equal to |
|
814 { 0x224d, 0x2252, MLineBreaker::EAlLineBreakClass }, //equivalent to |
|
815 { 0x2252, 0x2253, MLineBreaker::EAiLineBreakClass }, //approximately equal to or the image of |
|
816 { 0x2253, 0x2260, MLineBreaker::EAlLineBreakClass }, //image of or approximately equal to |
|
817 { 0x2260, 0x2262, MLineBreaker::EAiLineBreakClass }, //not equal to |
|
818 { 0x2262, 0x2264, MLineBreaker::EAlLineBreakClass }, //not identical to |
|
819 { 0x2264, 0x2268, MLineBreaker::EAiLineBreakClass }, //less-than or equal to |
|
820 { 0x2268, 0x226a, MLineBreaker::EAlLineBreakClass }, //less-than but not equal to |
|
821 { 0x226a, 0x226c, MLineBreaker::EAiLineBreakClass }, //much less-than |
|
822 { 0x226c, 0x226e, MLineBreaker::EAlLineBreakClass }, //between |
|
823 { 0x226e, 0x2270, MLineBreaker::EAiLineBreakClass }, //not less-than |
|
824 { 0x2270, 0x2282, MLineBreaker::EAlLineBreakClass }, //neither less-than nor equal to |
|
825 { 0x2282, 0x2284, MLineBreaker::EAiLineBreakClass }, //subset of |
|
826 { 0x2284, 0x2286, MLineBreaker::EAlLineBreakClass }, //not a subset of |
|
827 { 0x2286, 0x2288, MLineBreaker::EAiLineBreakClass }, //subset of or equal to |
|
828 { 0x2288, 0x2295, MLineBreaker::EAlLineBreakClass }, //neither a subset of nor equal to |
|
829 { 0x2295, 0x2296, MLineBreaker::EAiLineBreakClass }, //circled plus |
|
830 { 0x2296, 0x2299, MLineBreaker::EAlLineBreakClass }, //circled minus |
|
831 { 0x2299, 0x229a, MLineBreaker::EAiLineBreakClass }, //circled dot operator |
|
832 { 0x229a, 0x22a5, MLineBreaker::EAlLineBreakClass }, //circled ring operator |
|
833 { 0x22a5, 0x22a6, MLineBreaker::EAiLineBreakClass }, //up tack |
|
834 { 0x22a6, 0x22bf, MLineBreaker::EAlLineBreakClass }, //assertion |
|
835 { 0x22bf, 0x22c0, MLineBreaker::EAiLineBreakClass }, //right triangle |
|
836 { 0x22c0, 0x2312, MLineBreaker::EAlLineBreakClass }, //n-ary logical and |
|
837 { 0x2312, 0x2313, MLineBreaker::EAiLineBreakClass }, //arc |
|
838 { 0x2313, 0x2329, MLineBreaker::EAlLineBreakClass }, //segment |
|
839 { 0x2329, 0x232a, MLineBreaker::EOpLineBreakClass }, //left-pointing angle bracket |
|
840 { 0x232a, 0x232b, MLineBreaker::EClLineBreakClass }, //right-pointing angle bracket |
|
841 { 0x232b, 0x23e8, MLineBreaker::EAlLineBreakClass }, //erase to the left |
|
842 { 0x2400, 0x2427, MLineBreaker::EAlLineBreakClass }, //symbol for null |
|
843 { 0x2440, 0x244b, MLineBreaker::EAlLineBreakClass }, //ocr hook |
|
844 { 0x2460, 0x24c0, MLineBreaker::EAiLineBreakClass }, //circled digit one |
|
845 { 0x24c0, 0x24d0, MLineBreaker::EAlLineBreakClass }, //circled latin capital letter k |
|
846 { 0x24d0, 0x24ea, MLineBreaker::EAiLineBreakClass }, //circled latin small letter a |
|
847 { 0x24ea, 0x24eb, MLineBreaker::EAlLineBreakClass }, //circled digit zero |
|
848 { 0x24eb, 0x24ff, MLineBreaker::EAiLineBreakClass }, //NEGATIVE CIRCLED NUMBER ELEVEN |
|
849 { 0x24ff, 0x2500, MLineBreaker::EAlLineBreakClass }, //NEGATIVE CIRCLED DIGIT ZERO |
|
850 { 0x2500, 0x254c, MLineBreaker::EAiLineBreakClass }, //box drawings light horizontal |
|
851 { 0x254c, 0x2550, MLineBreaker::EAlLineBreakClass }, //box drawings light double dash horizontal |
|
852 { 0x2550, 0x2575, MLineBreaker::EAiLineBreakClass }, //box drawings double horizontal |
|
853 { 0x2575, 0x2580, MLineBreaker::EAlLineBreakClass }, //box drawings light up |
|
854 { 0x2580, 0x2590, MLineBreaker::EAiLineBreakClass }, //upper half block |
|
855 { 0x2590, 0x2592, MLineBreaker::EAlLineBreakClass }, //right half block |
|
856 { 0x2592, 0x2596, MLineBreaker::EAiLineBreakClass }, //medium shade |
|
857 { 0x2596, 0x25a0, MLineBreaker::EAlLineBreakClass }, //QUADRANT LOWER LEFT |
|
858 { 0x25a0, 0x25a2, MLineBreaker::EAiLineBreakClass }, //black square |
|
859 { 0x25a2, 0x25a3, MLineBreaker::EAlLineBreakClass }, //white square with rounded corners |
|
860 { 0x25a3, 0x25aa, MLineBreaker::EAiLineBreakClass }, //white square containing black small square |
|
861 { 0x25aa, 0x25b2, MLineBreaker::EAlLineBreakClass }, //black small square |
|
862 { 0x25b2, 0x25b4, MLineBreaker::EAiLineBreakClass }, //black up-pointing triangle |
|
863 { 0x25b4, 0x25b6, MLineBreaker::EAlLineBreakClass }, //black up-pointing small triangle |
|
864 { 0x25b6, 0x25b8, MLineBreaker::EAiLineBreakClass }, //black right-pointing triangle |
|
865 { 0x25b8, 0x25bc, MLineBreaker::EAlLineBreakClass }, //black right-pointing small triangle |
|
866 { 0x25bc, 0x25be, MLineBreaker::EAiLineBreakClass }, //black down-pointing triangle |
|
867 { 0x25be, 0x25c0, MLineBreaker::EAlLineBreakClass }, //black down-pointing small triangle |
|
868 { 0x25c0, 0x25c2, MLineBreaker::EAiLineBreakClass }, //black left-pointing triangle |
|
869 { 0x25c2, 0x25c6, MLineBreaker::EAlLineBreakClass }, //black left-pointing small triangle |
|
870 { 0x25c6, 0x25c8, MLineBreaker::EAiLineBreakClass }, //black diamond |
|
871 { 0x25c8, 0x25cb, MLineBreaker::EAlLineBreakClass }, //white diamond containing black small diamond |
|
872 { 0x25cb, 0x25cc, MLineBreaker::EAiLineBreakClass }, //white circle |
|
873 { 0x25cc, 0x25ce, MLineBreaker::EAlLineBreakClass }, //dotted circle |
|
874 { 0x25ce, 0x25d2, MLineBreaker::EAiLineBreakClass }, //bullseye |
|
875 { 0x25d2, 0x25e2, MLineBreaker::EAlLineBreakClass }, //circle with lower half black |
|
876 { 0x25e2, 0x25e6, MLineBreaker::EAiLineBreakClass }, //black lower right triangle |
|
877 { 0x25e6, 0x25ef, MLineBreaker::EAlLineBreakClass }, //white bullet |
|
878 { 0x25ef, 0x25f0, MLineBreaker::EAiLineBreakClass }, //large circle |
|
879 { 0x25f0, 0x2605, MLineBreaker::EAlLineBreakClass }, //white square with upper left quadrant |
|
880 { 0x2605, 0x2607, MLineBreaker::EAiLineBreakClass }, //black star |
|
881 { 0x2607, 0x2609, MLineBreaker::EAlLineBreakClass }, //lightning |
|
882 { 0x2609, 0x260a, MLineBreaker::EAiLineBreakClass }, //sun |
|
883 { 0x260a, 0x260e, MLineBreaker::EAlLineBreakClass }, //ascending node |
|
884 { 0x260e, 0x2610, MLineBreaker::EAiLineBreakClass }, //black telephone |
|
885 { 0x2610, 0x2614, MLineBreaker::EAlLineBreakClass }, //ballot box |
|
886 { 0x2614, 0x2618, MLineBreaker::EAiLineBreakClass }, //UMBRELLA WITH RAIN DROPS |
|
887 { 0x2618, 0x261c, MLineBreaker::EAlLineBreakClass }, //SHAMROCK |
|
888 { 0x261c, 0x261d, MLineBreaker::EAiLineBreakClass }, //white left pointing index |
|
889 { 0x261d, 0x261e, MLineBreaker::EAlLineBreakClass }, //white up pointing index |
|
890 { 0x261e, 0x261f, MLineBreaker::EAiLineBreakClass }, //white right pointing index |
|
891 { 0x261f, 0x2640, MLineBreaker::EAlLineBreakClass }, //white down pointing index |
|
892 { 0x2640, 0x2641, MLineBreaker::EAiLineBreakClass }, //female sign |
|
893 { 0x2641, 0x2642, MLineBreaker::EAlLineBreakClass }, //earth |
|
894 { 0x2642, 0x2643, MLineBreaker::EAiLineBreakClass }, //male sign |
|
895 { 0x2643, 0x2660, MLineBreaker::EAlLineBreakClass }, //jupiter |
|
896 { 0x2660, 0x2662, MLineBreaker::EAiLineBreakClass }, //black spade suit |
|
897 { 0x2662, 0x2663, MLineBreaker::EAlLineBreakClass }, //white diamond suit |
|
898 { 0x2663, 0x2666, MLineBreaker::EAiLineBreakClass }, //black club suit |
|
899 { 0x2666, 0x2667, MLineBreaker::EAlLineBreakClass }, //black diamond suit |
|
900 { 0x2667, 0x266b, MLineBreaker::EAiLineBreakClass }, //white club suit |
|
901 { 0x266b, 0x266c, MLineBreaker::EAlLineBreakClass }, //beamed eighth notes |
|
902 { 0x266c, 0x266e, MLineBreaker::EAiLineBreakClass }, //beamed sixteenth notes |
|
903 { 0x266e, 0x266f, MLineBreaker::EAlLineBreakClass }, //music natural sign |
|
904 { 0x266f, 0x2670, MLineBreaker::EAiLineBreakClass }, //music sharp sign |
|
905 { 0x2670, 0x269d, MLineBreaker::EAlLineBreakClass }, //west syriac cross |
|
906 { 0x26a0, 0x26b3, MLineBreaker::EAlLineBreakClass }, //WARNING SIGN |
|
907 { 0x2701, 0x2705, MLineBreaker::EAlLineBreakClass }, //upper blade scissors |
|
908 { 0x2706, 0x270a, MLineBreaker::EAlLineBreakClass }, //telephone location sign |
|
909 { 0x270c, 0x2728, MLineBreaker::EAlLineBreakClass }, //victory hand |
|
910 { 0x2729, 0x274c, MLineBreaker::EAlLineBreakClass }, //stress outlined white star |
|
911 { 0x274d, 0x274e, MLineBreaker::EAlLineBreakClass }, //shadowed white circle |
|
912 { 0x274f, 0x2753, MLineBreaker::EAlLineBreakClass }, //lower right drop-shadowed white square |
|
913 { 0x2756, 0x2757, MLineBreaker::EAlLineBreakClass }, //black diamond minus white x |
|
914 { 0x2758, 0x275f, MLineBreaker::EAlLineBreakClass }, //light vertical bar |
|
915 { 0x2761, 0x2768, MLineBreaker::EAlLineBreakClass }, //curved stem paragraph sign ornament |
|
916 { 0x2768, 0x2769, MLineBreaker::EOpLineBreakClass }, //MEDIUM LEFT PARENTHESIS ORNAMENT |
|
917 { 0x2769, 0x276a, MLineBreaker::EClLineBreakClass }, //MEDIUM RIGHT PARENTHESIS ORNAMENT |
|
918 { 0x276a, 0x276b, MLineBreaker::EOpLineBreakClass }, //MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT |
|
919 { 0x276b, 0x276c, MLineBreaker::EClLineBreakClass }, //MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT |
|
920 { 0x276c, 0x276d, MLineBreaker::EOpLineBreakClass }, //MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT |
|
921 { 0x276d, 0x276e, MLineBreaker::EClLineBreakClass }, //MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT |
|
922 { 0x276e, 0x276f, MLineBreaker::EOpLineBreakClass }, //HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT |
|
923 { 0x276f, 0x2770, MLineBreaker::EClLineBreakClass }, //HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT |
|
924 { 0x2770, 0x2771, MLineBreaker::EOpLineBreakClass }, //HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT |
|
925 { 0x2771, 0x2772, MLineBreaker::EClLineBreakClass }, //HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT |
|
926 { 0x2772, 0x2773, MLineBreaker::EOpLineBreakClass }, //LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT |
|
927 { 0x2773, 0x2774, MLineBreaker::EClLineBreakClass }, //LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT |
|
928 { 0x2774, 0x2775, MLineBreaker::EOpLineBreakClass }, //MEDIUM LEFT CURLY BRACKET ORNAMENT |
|
929 { 0x2775, 0x2776, MLineBreaker::EClLineBreakClass }, //MEDIUM RIGHT CURLY BRACKET ORNAMENT |
|
930 { 0x2776, 0x2795, MLineBreaker::EAlLineBreakClass }, //dingbat negative circled digit one |
|
931 { 0x2798, 0x27b0, MLineBreaker::EAlLineBreakClass }, //heavy south east arrow |
|
932 { 0x27b1, 0x27bf, MLineBreaker::EAlLineBreakClass }, //notched upper right-shadowed white rightwards arrow |
|
933 { 0x27c0, 0x27c5, MLineBreaker::EAlLineBreakClass }, //THREE DIMENSIONAL ANGLE |
|
934 { 0x27c5, 0x27c6, MLineBreaker::EOpLineBreakClass }, //LEFT S-SHAPED BAG DELIMITER |
|
935 { 0x27c6, 0x27c7, MLineBreaker::EClLineBreakClass }, //RIGHT S-SHAPED BAG DELIMITER |
|
936 { 0x27c7, 0x27cb, MLineBreaker::EAlLineBreakClass }, //OR WITH DOT INSIDE |
|
937 { 0x27d0, 0x27e6, MLineBreaker::EAlLineBreakClass }, //WHITE DIAMOND WITH CENTRED DOT |
|
938 { 0x27e6, 0x27e7, MLineBreaker::EOpLineBreakClass }, //MATHEMATICAL LEFT WHITE SQUARE BRACKET |
|
939 { 0x27e7, 0x27e8, MLineBreaker::EClLineBreakClass }, //MATHEMATICAL RIGHT WHITE SQUARE BRACKET |
|
940 { 0x27e8, 0x27e9, MLineBreaker::EOpLineBreakClass }, //MATHEMATICAL LEFT ANGLE BRACKET |
|
941 { 0x27e9, 0x27ea, MLineBreaker::EClLineBreakClass }, //MATHEMATICAL RIGHT ANGLE BRACKET |
|
942 { 0x27ea, 0x27eb, MLineBreaker::EOpLineBreakClass }, //MATHEMATICAL LEFT DOUBLE ANGLE BRACKET |
|
943 { 0x27eb, 0x27ec, MLineBreaker::EClLineBreakClass }, //MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET |
|
944 { 0x27f0, 0x2983, MLineBreaker::EAlLineBreakClass }, //UPWARDS QUADRUPLE ARROW |
|
945 { 0x2983, 0x2984, MLineBreaker::EOpLineBreakClass }, //LEFT WHITE CURLY BRACKET |
|
946 { 0x2984, 0x2985, MLineBreaker::EClLineBreakClass }, //RIGHT WHITE CURLY BRACKET |
|
947 { 0x2985, 0x2986, MLineBreaker::EOpLineBreakClass }, //LEFT WHITE PARENTHESIS |
|
948 { 0x2986, 0x2987, MLineBreaker::EClLineBreakClass }, //RIGHT WHITE PARENTHESIS |
|
949 { 0x2987, 0x2988, MLineBreaker::EOpLineBreakClass }, //Z NOTATION LEFT IMAGE BRACKET |
|
950 { 0x2988, 0x2989, MLineBreaker::EClLineBreakClass }, //Z NOTATION RIGHT IMAGE BRACKET |
|
951 { 0x2989, 0x298a, MLineBreaker::EOpLineBreakClass }, //Z NOTATION LEFT BINDING BRACKET |
|
952 { 0x298a, 0x298b, MLineBreaker::EClLineBreakClass }, //Z NOTATION RIGHT BINDING BRACKET |
|
953 { 0x298b, 0x298c, MLineBreaker::EOpLineBreakClass }, //LEFT SQUARE BRACKET WITH UNDERBAR |
|
954 { 0x298c, 0x298d, MLineBreaker::EClLineBreakClass }, //RIGHT SQUARE BRACKET WITH UNDERBAR |
|
955 { 0x298d, 0x298e, MLineBreaker::EOpLineBreakClass }, //LEFT SQUARE BRACKET WITH TICK IN TOP CORNER |
|
956 { 0x298e, 0x298f, MLineBreaker::EClLineBreakClass }, //RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER |
|
957 { 0x298f, 0x2990, MLineBreaker::EOpLineBreakClass }, //LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER |
|
958 { 0x2990, 0x2991, MLineBreaker::EClLineBreakClass }, //RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER |
|
959 { 0x2991, 0x2992, MLineBreaker::EOpLineBreakClass }, //LEFT ANGLE BRACKET WITH DOT |
|
960 { 0x2992, 0x2993, MLineBreaker::EClLineBreakClass }, //RIGHT ANGLE BRACKET WITH DOT |
|
961 { 0x2993, 0x2994, MLineBreaker::EOpLineBreakClass }, //LEFT ARC LESS-THAN BRACKET |
|
962 { 0x2994, 0x2995, MLineBreaker::EClLineBreakClass }, //RIGHT ARC GREATER-THAN BRACKET |
|
963 { 0x2995, 0x2996, MLineBreaker::EOpLineBreakClass }, //DOUBLE LEFT ARC GREATER-THAN BRACKET |
|
964 { 0x2996, 0x2997, MLineBreaker::EClLineBreakClass }, //DOUBLE RIGHT ARC LESS-THAN BRACKET |
|
965 { 0x2997, 0x2998, MLineBreaker::EOpLineBreakClass }, //LEFT BLACK TORTOISE SHELL BRACKET |
|
966 { 0x2998, 0x2999, MLineBreaker::EClLineBreakClass }, //RIGHT BLACK TORTOISE SHELL BRACKET |
|
967 { 0x2999, 0x29d8, MLineBreaker::EAlLineBreakClass }, //DOTTED FENCE |
|
968 { 0x29d8, 0x29d9, MLineBreaker::EOpLineBreakClass }, //LEFT WIGGLY FENCE |
|
969 { 0x29d9, 0x29da, MLineBreaker::EClLineBreakClass }, //RIGHT WIGGLY FENCE |
|
970 { 0x29da, 0x29db, MLineBreaker::EOpLineBreakClass }, //LEFT DOUBLE WIGGLY FENCE |
|
971 { 0x29db, 0x29dc, MLineBreaker::EClLineBreakClass }, //RIGHT DOUBLE WIGGLY FENCE |
|
972 { 0x29dc, 0x29fc, MLineBreaker::EAlLineBreakClass }, //INCOMPLETE INFINITY |
|
973 { 0x29fc, 0x29fd, MLineBreaker::EOpLineBreakClass }, //LEFT-POINTING CURVED ANGLE BRACKET |
|
974 { 0x29fd, 0x29fe, MLineBreaker::EClLineBreakClass }, //RIGHT-POINTING CURVED ANGLE BRACKET |
|
975 { 0x29fe, 0x2b1b, MLineBreaker::EAlLineBreakClass }, //TINY |
|
976 { 0x2b20, 0x2b24, MLineBreaker::EAlLineBreakClass }, //WHITE PENTAGON |
|
977 { 0x2c00, 0x2c2f, MLineBreaker::EAlLineBreakClass }, //GLAGOLITIC CAPITAL LETTER AZU |
|
978 { 0x2c30, 0x2c5f, MLineBreaker::EAlLineBreakClass }, //GLAGOLITIC SMALL LETTER AZU |
|
979 { 0x2c60, 0x2c6d, MLineBreaker::EAlLineBreakClass }, //LATIN CAPITAL LETTER L WITH DOUBLE BAR |
|
980 { 0x2c74, 0x2c78, MLineBreaker::EAlLineBreakClass }, //LATIN SMALL LETTER V WITH CURL |
|
981 { 0x2c80, 0x2ceb, MLineBreaker::EAlLineBreakClass }, //COPTIC CAPITAL LETTER ALFA |
|
982 { 0x2cf9, 0x2cfd, MLineBreaker::EBaLineBreakClass }, //COPTIC OLD NUBIAN FULL STOP |
|
983 { 0x2cfd, 0x2cfe, MLineBreaker::EAlLineBreakClass }, //COPTIC FRACTION ONE HALF |
|
984 { 0x2cfe, 0x2d00, MLineBreaker::EBaLineBreakClass }, //COPTIC FULL STOP |
|
985 { 0x2d00, 0x2d26, MLineBreaker::EAlLineBreakClass }, //GEORGIAN SMALL LETTER AN |
|
986 { 0x2d30, 0x2d66, MLineBreaker::EAlLineBreakClass }, //TIFINAGH LETTER YA |
|
987 { 0x2d6f, 0x2d70, MLineBreaker::EAlLineBreakClass }, //TIFINAGH MODIFIER LETTER LABIALIZATION MARK |
|
988 { 0x2d80, 0x2d97, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE LOA |
|
989 { 0x2da0, 0x2da7, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE SSA |
|
990 { 0x2da8, 0x2daf, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE CCA |
|
991 { 0x2db0, 0x2db7, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE ZZA |
|
992 { 0x2db8, 0x2dbf, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE CCHA |
|
993 { 0x2dc0, 0x2dc7, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE QYA |
|
994 { 0x2dc8, 0x2dcf, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE KYA |
|
995 { 0x2dd0, 0x2dd7, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE XYA |
|
996 { 0x2dd8, 0x2ddf, MLineBreaker::EAlLineBreakClass }, //ETHIOPIC SYLLABLE GYA |
|
997 { 0x2e00, 0x2e0e, MLineBreaker::EQuLineBreakClass }, //RIGHT ANGLE SUBSTITUTION MARKER |
|
998 { 0x2e0e, 0x2e16, MLineBreaker::EBaLineBreakClass }, //EDITORIAL CORONIS |
|
999 { 0x2e16, 0x2e17, MLineBreaker::EAlLineBreakClass }, //DOTTED RIGHT-POINTING ANGLE |
|
1000 { 0x2e17, 0x2e18, MLineBreaker::EBaLineBreakClass }, //DOUBLE OBLIQUE HYPHEN |
|
1001 { 0x2e1c, 0x2e1e, MLineBreaker::EQuLineBreakClass }, //LEFT LOW PARAPHRASE BRACKET |
|
1002 { 0x2e80, 0x2e9a, MLineBreaker::EIdLineBreakClass }, //cjk radical repeat |
|
1003 { 0x2e9b, 0x2ef4, MLineBreaker::EIdLineBreakClass }, //cjk radical choke |
|
1004 { 0x2f00, 0x2fd6, MLineBreaker::EIdLineBreakClass }, //kangxi radical one |
|
1005 { 0x2ff0, 0x2ffc, MLineBreaker::EIdLineBreakClass }, //ideographic description character left to right |
|
1006 { 0x3000, 0x3001, MLineBreaker::EIdLineBreakClass }, //ideographic space |
|
1007 { 0x3001, 0x3003, MLineBreaker::EClLineBreakClass }, //ideographic comma |
|
1008 { 0x3003, 0x3005, MLineBreaker::EIdLineBreakClass }, //ditto mark |
|
1009 { 0x3005, 0x3006, MLineBreaker::ENsLineBreakClass }, //ideographic iteration mark |
|
1010 { 0x3006, 0x3008, MLineBreaker::EIdLineBreakClass }, //ideographic closing mark |
|
1011 { 0x3008, 0x3009, MLineBreaker::EOpLineBreakClass }, //left angle bracket |
|
1012 { 0x3009, 0x300a, MLineBreaker::EClLineBreakClass }, //right angle bracket |
|
1013 { 0x300a, 0x300b, MLineBreaker::EOpLineBreakClass }, //left double angle bracket |
|
1014 { 0x300b, 0x300c, MLineBreaker::EClLineBreakClass }, //right double angle bracket |
|
1015 { 0x300c, 0x300d, MLineBreaker::EOpLineBreakClass }, //left corner bracket |
|
1016 { 0x300d, 0x300e, MLineBreaker::EClLineBreakClass }, //right corner bracket |
|
1017 { 0x300e, 0x300f, MLineBreaker::EOpLineBreakClass }, //left white corner bracket |
|
1018 { 0x300f, 0x3010, MLineBreaker::EClLineBreakClass }, //right white corner bracket |
|
1019 { 0x3010, 0x3011, MLineBreaker::EOpLineBreakClass }, //left black lenticular bracket |
|
1020 { 0x3011, 0x3012, MLineBreaker::EClLineBreakClass }, //right black lenticular bracket |
|
1021 { 0x3012, 0x3014, MLineBreaker::EIdLineBreakClass }, //postal mark |
|
1022 { 0x3014, 0x3015, MLineBreaker::EOpLineBreakClass }, //left tortoise shell bracket |
|
1023 { 0x3015, 0x3016, MLineBreaker::EClLineBreakClass }, //right tortoise shell bracket |
|
1024 { 0x3016, 0x3017, MLineBreaker::EOpLineBreakClass }, //left white lenticular bracket |
|
1025 { 0x3017, 0x3018, MLineBreaker::EClLineBreakClass }, //right white lenticular bracket |
|
1026 { 0x3018, 0x3019, MLineBreaker::EOpLineBreakClass }, //left white tortoise shell bracket |
|
1027 { 0x3019, 0x301a, MLineBreaker::EClLineBreakClass }, //right white tortoise shell bracket |
|
1028 { 0x301a, 0x301b, MLineBreaker::EOpLineBreakClass }, //left white square bracket |
|
1029 { 0x301b, 0x301c, MLineBreaker::EClLineBreakClass }, //right white square bracket |
|
1030 { 0x301c, 0x301d, MLineBreaker::ENsLineBreakClass }, //wave dash |
|
1031 { 0x301d, 0x301e, MLineBreaker::EOpLineBreakClass }, //reversed double prime quotation mark |
|
1032 { 0x301e, 0x3020, MLineBreaker::EClLineBreakClass }, //double prime quotation mark |
|
1033 { 0x3020, 0x302a, MLineBreaker::EIdLineBreakClass }, //postal mark face |
|
1034 { 0x302a, 0x3030, MLineBreaker::ECmLineBreakClass }, //ideographic level tone mark |
|
1035 { 0x3030, 0x303b, MLineBreaker::EIdLineBreakClass }, //wavy dash |
|
1036 { 0x303b, 0x303d, MLineBreaker::ENsLineBreakClass }, //VERTICAL IDEOGRAPHIC ITERATION MARK |
|
1037 { 0x303d, 0x3040, MLineBreaker::EIdLineBreakClass }, //PART ALTERNATION MARK |
|
1038 { 0x3041, 0x3042, MLineBreaker::ENsLineBreakClass }, //hiragana letter small a |
|
1039 { 0x3042, 0x3043, MLineBreaker::EIdLineBreakClass }, //hiragana letter a |
|
1040 { 0x3043, 0x3044, MLineBreaker::ENsLineBreakClass }, //hiragana letter small i |
|
1041 { 0x3044, 0x3045, MLineBreaker::EIdLineBreakClass }, //hiragana letter i |
|
1042 { 0x3045, 0x3046, MLineBreaker::ENsLineBreakClass }, //hiragana letter small u |
|
1043 { 0x3046, 0x3047, MLineBreaker::EIdLineBreakClass }, //hiragana letter u |
|
1044 { 0x3047, 0x3048, MLineBreaker::ENsLineBreakClass }, //hiragana letter small e |
|
1045 { 0x3048, 0x3049, MLineBreaker::EIdLineBreakClass }, //hiragana letter e |
|
1046 { 0x3049, 0x304a, MLineBreaker::ENsLineBreakClass }, //hiragana letter small o |
|
1047 { 0x304a, 0x3063, MLineBreaker::EIdLineBreakClass }, //hiragana letter o |
|
1048 { 0x3063, 0x3064, MLineBreaker::ENsLineBreakClass }, //hiragana letter small tu |
|
1049 { 0x3064, 0x3083, MLineBreaker::EIdLineBreakClass }, //hiragana letter tu |
|
1050 { 0x3083, 0x3084, MLineBreaker::ENsLineBreakClass }, //hiragana letter small ya |
|
1051 { 0x3084, 0x3085, MLineBreaker::EIdLineBreakClass }, //hiragana letter ya |
|
1052 { 0x3085, 0x3086, MLineBreaker::ENsLineBreakClass }, //hiragana letter small yu |
|
1053 { 0x3086, 0x3087, MLineBreaker::EIdLineBreakClass }, //hiragana letter yu |
|
1054 { 0x3087, 0x3088, MLineBreaker::ENsLineBreakClass }, //hiragana letter small yo |
|
1055 { 0x3088, 0x308e, MLineBreaker::EIdLineBreakClass }, //hiragana letter yo |
|
1056 { 0x308e, 0x308f, MLineBreaker::ENsLineBreakClass }, //hiragana letter small wa |
|
1057 { 0x308f, 0x3095, MLineBreaker::EIdLineBreakClass }, //hiragana letter wa |
|
1058 { 0x3095, 0x3097, MLineBreaker::ENsLineBreakClass }, //HIRAGANA LETTER SMALL KA |
|
1059 { 0x3099, 0x309b, MLineBreaker::ECmLineBreakClass }, //combining katakana-hiragana voiced sound mark |
|
1060 { 0x309b, 0x309f, MLineBreaker::ENsLineBreakClass }, //katakana-hiragana voiced sound mark |
|
1061 { 0x309f, 0x30a0, MLineBreaker::EIdLineBreakClass }, //HIRAGANA DIGRAPH YORI |
|
1062 { 0x30a0, 0x30a1, MLineBreaker::ENsLineBreakClass }, //KATAKANA-HIRAGANA DOUBLE HYPHEN |
|
1063 { 0x30a1, 0x30a2, MLineBreaker::ENsLineBreakClass }, //katakana letter small a |
|
1064 { 0x30a2, 0x30a3, MLineBreaker::EIdLineBreakClass }, //katakana letter a |
|
1065 { 0x30a3, 0x30a4, MLineBreaker::ENsLineBreakClass }, //katakana letter small i |
|
1066 { 0x30a4, 0x30a5, MLineBreaker::EIdLineBreakClass }, //katakana letter i |
|
1067 { 0x30a5, 0x30a6, MLineBreaker::ENsLineBreakClass }, //katakana letter small u |
|
1068 { 0x30a6, 0x30a7, MLineBreaker::EIdLineBreakClass }, //katakana letter u |
|
1069 { 0x30a7, 0x30a8, MLineBreaker::ENsLineBreakClass }, //katakana letter small e |
|
1070 { 0x30a8, 0x30a9, MLineBreaker::EIdLineBreakClass }, //katakana letter e |
|
1071 { 0x30a9, 0x30aa, MLineBreaker::ENsLineBreakClass }, //katakana letter small o |
|
1072 { 0x30aa, 0x30c3, MLineBreaker::EIdLineBreakClass }, //katakana letter o |
|
1073 { 0x30c3, 0x30c4, MLineBreaker::ENsLineBreakClass }, //katakana letter small tu |
|
1074 { 0x30c4, 0x30e3, MLineBreaker::EIdLineBreakClass }, //katakana letter tu |
|
1075 { 0x30e3, 0x30e4, MLineBreaker::ENsLineBreakClass }, //katakana letter small ya |
|
1076 { 0x30e4, 0x30e5, MLineBreaker::EIdLineBreakClass }, //katakana letter ya |
|
1077 { 0x30e5, 0x30e6, MLineBreaker::ENsLineBreakClass }, //katakana letter small yu |
|
1078 { 0x30e6, 0x30e7, MLineBreaker::EIdLineBreakClass }, //katakana letter yu |
|
1079 { 0x30e7, 0x30e8, MLineBreaker::ENsLineBreakClass }, //katakana letter small yo |
|
1080 { 0x30e8, 0x30ee, MLineBreaker::EIdLineBreakClass }, //katakana letter yo |
|
1081 { 0x30ee, 0x30ef, MLineBreaker::ENsLineBreakClass }, //katakana letter small wa |
|
1082 { 0x30ef, 0x30f5, MLineBreaker::EIdLineBreakClass }, //katakana letter wa |
|
1083 { 0x30f5, 0x30f7, MLineBreaker::ENsLineBreakClass }, //katakana letter small ka |
|
1084 { 0x30f7, 0x30fb, MLineBreaker::EIdLineBreakClass }, //katakana letter va |
|
1085 { 0x30fb, 0x30fc, MLineBreaker::ENsLineBreakClass }, //katakana middle dot |
|
1086 { 0x30fc, 0x30fd, MLineBreaker::EIdLineBreakClass }, //katakana-hiragana prolonged sound mark |
|
1087 { 0x30fd, 0x30fe, MLineBreaker::ENsLineBreakClass }, //katakana iteration mark |
|
1088 { 0x30fe, 0x30ff, MLineBreaker::EIdLineBreakClass }, //katakana voiced iteration mark |
|
1089 { 0x30ff, 0x3100, MLineBreaker::EIdLineBreakClass }, //KATAKANA DIGRAPH KOTO |
|
1090 { 0x3105, 0x312d, MLineBreaker::EIdLineBreakClass }, //bopomofo letter b |
|
1091 { 0x3131, 0x318f, MLineBreaker::EIdLineBreakClass }, //hangul letter kiyeok |
|
1092 { 0x3190, 0x31b8, MLineBreaker::EIdLineBreakClass }, //ideographic annotation linking mark |
|
1093 { 0x31c0, 0x31d0, MLineBreaker::EIdLineBreakClass }, //CJK STROKE T |
|
1094 { 0x31f0, 0x3200, MLineBreaker::ENsLineBreakClass }, //KATAKANA LETTER SMALL KU |
|
1095 { 0x3200, 0x321f, MLineBreaker::EIdLineBreakClass }, //parenthesized hangul kiyeok |
|
1096 { 0x3220, 0x3244, MLineBreaker::EIdLineBreakClass }, //parenthesized ideograph one |
|
1097 { 0x3250, 0x32ff, MLineBreaker::EIdLineBreakClass }, //PARTNERSHIP SIGN |
|
1098 { 0x3300, 0x4db6, MLineBreaker::EIdLineBreakClass }, //square apaato |
|
1099 { 0x4dc0, 0x4e00, MLineBreaker::EAlLineBreakClass }, //HEXAGRAM FOR THE CREATIVE HEAVEN |
|
1100 { 0x4e00, 0x9fbb, MLineBreaker::EIdLineBreakClass }, //<cjk ideograph, |
|
1101 { 0xa000, 0xa48d, MLineBreaker::EIdLineBreakClass }, //yi syllable it |
|
1102 { 0xa490, 0xa4c6, MLineBreaker::EIdLineBreakClass }, //yi radical qot |
|
1103 { 0xa4c6, 0xa4c7, MLineBreaker::EIdLineBreakClass }, //yi radical ke |
|
1104 { 0xa700, 0xa71b, MLineBreaker::EAlLineBreakClass }, //MODIFIER LETTER CHINESE TONE YIN PING |
|
1105 { 0xa720, 0xa722, MLineBreaker::EAlLineBreakClass }, //MODIFIER LETTER STRESS AND HIGH TONE |
|
1106 { 0xa800, 0xa802, MLineBreaker::EAlLineBreakClass }, //SYLOTI NAGRI LETTER A |
|
1107 { 0xa802, 0xa803, MLineBreaker::ECmLineBreakClass }, //SYLOTI NAGRI SIGN DVISVARA |
|
1108 { 0xa803, 0xa806, MLineBreaker::EAlLineBreakClass }, //SYLOTI NAGRI LETTER U |
|
1109 { 0xa806, 0xa807, MLineBreaker::ECmLineBreakClass }, //SYLOTI NAGRI SIGN HASANTA |
|
1110 { 0xa807, 0xa80b, MLineBreaker::EAlLineBreakClass }, //SYLOTI NAGRI LETTER KO |
|
1111 { 0xa80b, 0xa80c, MLineBreaker::ECmLineBreakClass }, //SYLOTI NAGRI SIGN ANUSVARA |
|
1112 { 0xa80c, 0xa823, MLineBreaker::EAlLineBreakClass }, //SYLOTI NAGRI LETTER CO |
|
1113 { 0xa823, 0xa828, MLineBreaker::ECmLineBreakClass }, //SYLOTI NAGRI VOWEL SIGN A |
|
1114 { 0xa828, 0xa82c, MLineBreaker::EAlLineBreakClass }, //SYLOTI NAGRI POETRY MARK-1 |
|
1115 { 0xa840, 0xa874, MLineBreaker::EAlLineBreakClass }, //PHAGS-PA LETTER KA |
|
1116 { 0xa874, 0xa876, MLineBreaker::EBbLineBreakClass }, //PHAGS-PA SINGLE HEAD MARK |
|
1117 { 0xa876, 0xa878, MLineBreaker::EExLineBreakClass }, //PHAGS-PA MARK SHAD |
|
1118 { 0xac00, 0xd7a4, MLineBreaker::EIdLineBreakClass }, //<hangul syllable, |
|
1119 { 0xd800, 0xe000, MLineBreaker::ESgLineBreakClass }, //<non private use high surrogate, |
|
1120 { 0xe000, 0xf900, MLineBreaker::EAiLineBreakClass }, //<private use, |
|
1121 { 0xf900, 0xfa2e, MLineBreaker::EIdLineBreakClass }, //cjk compatibility ideograph-f900 |
|
1122 { 0xfa30, 0xfa6b, MLineBreaker::EIdLineBreakClass }, //CJK COMPATIBILITY IDEOGRAPH-FA30 |
|
1123 { 0xfa70, 0xfada, MLineBreaker::EIdLineBreakClass }, //CJK COMPATIBILITY IDEOGRAPH-FA70 |
|
1124 { 0xfb00, 0xfb07, MLineBreaker::EAlLineBreakClass }, //latin small ligature ff |
|
1125 { 0xfb13, 0xfb18, MLineBreaker::EAlLineBreakClass }, //armenian small ligature men now |
|
1126 { 0xfb1d, 0xfb1e, MLineBreaker::EAlLineBreakClass }, //hebrew letter yod with hiriq |
|
1127 { 0xfb1e, 0xfb1f, MLineBreaker::ECmLineBreakClass }, //hebrew point judeo-spanish varika |
|
1128 { 0xfb1f, 0xfb37, MLineBreaker::EAlLineBreakClass }, //hebrew ligature yiddish yod yod patah |
|
1129 { 0xfb38, 0xfb3d, MLineBreaker::EAlLineBreakClass }, //hebrew letter tet with dagesh |
|
1130 { 0xfb3e, 0xfb3f, MLineBreaker::EAlLineBreakClass }, //hebrew letter mem with dagesh |
|
1131 { 0xfb40, 0xfb42, MLineBreaker::EAlLineBreakClass }, //hebrew letter nun with dagesh |
|
1132 { 0xfb43, 0xfb45, MLineBreaker::EAlLineBreakClass }, //hebrew letter final pe with dagesh |
|
1133 { 0xfb46, 0xfbb2, MLineBreaker::EAlLineBreakClass }, //hebrew letter tsadi with dagesh |
|
1134 { 0xfbd3, 0xfd3e, MLineBreaker::EAlLineBreakClass }, //arabic letter ng isolated form |
|
1135 { 0xfd3e, 0xfd3f, MLineBreaker::EOpLineBreakClass }, //ornate left parenthesis |
|
1136 { 0xfd3f, 0xfd40, MLineBreaker::EClLineBreakClass }, //ornate right parenthesis |
|
1137 { 0xfd50, 0xfd90, MLineBreaker::EAlLineBreakClass }, //arabic ligature teh with jeem with meem initial form |
|
1138 { 0xfd92, 0xfdc8, MLineBreaker::EAlLineBreakClass }, //arabic ligature meem with jeem with khah initial form |
|
1139 { 0xfdf0, 0xfdfc, MLineBreaker::EAlLineBreakClass }, //arabic ligature salla used as koranic stop sign isolated form |
|
1140 { 0xfdfc, 0xfdfd, MLineBreaker::EPoLineBreakClass }, //RIAL SIGN |
|
1141 { 0xfdfd, 0xfdfe, MLineBreaker::EAlLineBreakClass }, //ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM |
|
1142 { 0xfe00, 0xfe10, MLineBreaker::ECmLineBreakClass }, //VARIATION SELECTOR-1 |
|
1143 { 0xfe10, 0xfe11, MLineBreaker::EIsLineBreakClass }, //PRESENTATION FORM FOR VERTICAL COMMA |
|
1144 { 0xfe11, 0xfe13, MLineBreaker::EClLineBreakClass }, //PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA |
|
1145 { 0xfe13, 0xfe15, MLineBreaker::EIsLineBreakClass }, //PRESENTATION FORM FOR VERTICAL COLON |
|
1146 { 0xfe15, 0xfe17, MLineBreaker::EExLineBreakClass }, //PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK |
|
1147 { 0xfe17, 0xfe18, MLineBreaker::EOpLineBreakClass }, //PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET |
|
1148 { 0xfe18, 0xfe19, MLineBreaker::EClLineBreakClass }, //PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET |
|
1149 { 0xfe19, 0xfe1a, MLineBreaker::EInLineBreakClass }, //PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS |
|
1150 { 0xfe20, 0xfe24, MLineBreaker::ECmLineBreakClass }, //combining ligature left half |
|
1151 { 0xfe30, 0xfe35, MLineBreaker::EIdLineBreakClass }, //presentation form for vertical two dot leader |
|
1152 { 0xfe35, 0xfe36, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left parenthesis |
|
1153 { 0xfe36, 0xfe37, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right parenthesis |
|
1154 { 0xfe37, 0xfe38, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left curly bracket |
|
1155 { 0xfe38, 0xfe39, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right curly bracket |
|
1156 { 0xfe39, 0xfe3a, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left tortoise shell bracket |
|
1157 { 0xfe3a, 0xfe3b, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right tortoise shell bracket |
|
1158 { 0xfe3b, 0xfe3c, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left black lenticular bracket |
|
1159 { 0xfe3c, 0xfe3d, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right black lenticular bracket |
|
1160 { 0xfe3d, 0xfe3e, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left double angle bracket |
|
1161 { 0xfe3e, 0xfe3f, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right double angle bracket |
|
1162 { 0xfe3f, 0xfe40, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left angle bracket |
|
1163 { 0xfe40, 0xfe41, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right angle bracket |
|
1164 { 0xfe41, 0xfe42, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left corner bracket |
|
1165 { 0xfe42, 0xfe43, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right corner bracket |
|
1166 { 0xfe43, 0xfe44, MLineBreaker::EOpLineBreakClass }, //presentation form for vertical left white corner bracket |
|
1167 { 0xfe44, 0xfe45, MLineBreaker::EClLineBreakClass }, //presentation form for vertical right white corner bracket |
|
1168 { 0xfe45, 0xfe47, MLineBreaker::EIdLineBreakClass }, //SESAME DOT |
|
1169 { 0xfe47, 0xfe48, MLineBreaker::EOpLineBreakClass }, //PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET |
|
1170 { 0xfe48, 0xfe49, MLineBreaker::EClLineBreakClass }, //PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET |
|
1171 { 0xfe49, 0xfe50, MLineBreaker::EIdLineBreakClass }, //dashed overline |
|
1172 { 0xfe50, 0xfe51, MLineBreaker::EClLineBreakClass }, //small comma |
|
1173 { 0xfe51, 0xfe52, MLineBreaker::EIdLineBreakClass }, //small ideographic comma |
|
1174 { 0xfe52, 0xfe53, MLineBreaker::EClLineBreakClass }, //small full stop |
|
1175 { 0xfe54, 0xfe56, MLineBreaker::ENsLineBreakClass }, //small semicolon |
|
1176 { 0xfe56, 0xfe58, MLineBreaker::EExLineBreakClass }, //small question mark |
|
1177 { 0xfe58, 0xfe59, MLineBreaker::EIdLineBreakClass }, //small em dash |
|
1178 { 0xfe59, 0xfe5a, MLineBreaker::EOpLineBreakClass }, //small left parenthesis |
|
1179 { 0xfe5a, 0xfe5b, MLineBreaker::EClLineBreakClass }, //small right parenthesis |
|
1180 { 0xfe5b, 0xfe5c, MLineBreaker::EOpLineBreakClass }, //small left curly bracket |
|
1181 { 0xfe5c, 0xfe5d, MLineBreaker::EClLineBreakClass }, //small right curly bracket |
|
1182 { 0xfe5d, 0xfe5e, MLineBreaker::EOpLineBreakClass }, //small left tortoise shell bracket |
|
1183 { 0xfe5e, 0xfe5f, MLineBreaker::EClLineBreakClass }, //small right tortoise shell bracket |
|
1184 { 0xfe5f, 0xfe67, MLineBreaker::EIdLineBreakClass }, //small number sign |
|
1185 { 0xfe68, 0xfe69, MLineBreaker::EIdLineBreakClass }, //small reverse solidus |
|
1186 { 0xfe69, 0xfe6a, MLineBreaker::EPrLineBreakClass }, //small dollar sign |
|
1187 { 0xfe6a, 0xfe6b, MLineBreaker::EPoLineBreakClass }, //small percent sign |
|
1188 { 0xfe6b, 0xfe6c, MLineBreaker::EAlLineBreakClass }, //small commercial at |
|
1189 { 0xfe70, 0xfe74, MLineBreaker::EAlLineBreakClass }, //arabic fathatan isolated form |
|
1190 { 0xfe74, 0xfe75, MLineBreaker::EAlLineBreakClass }, //arabic kasratan isolated form |
|
1191 { 0xfe76, 0xfefd, MLineBreaker::EAlLineBreakClass }, //arabic fatha isolated form |
|
1192 { 0xfeff, 0xff00, MLineBreaker::EGlLineBreakClass }, //zero width no-break space |
|
1193 { 0xff01, 0xff02, MLineBreaker::EExLineBreakClass }, //fullwidth exclamation mark |
|
1194 { 0xff02, 0xff04, MLineBreaker::EIdLineBreakClass }, //fullwidth quotation mark |
|
1195 { 0xff04, 0xff05, MLineBreaker::EPrLineBreakClass }, //fullwidth dollar sign |
|
1196 { 0xff05, 0xff06, MLineBreaker::EPoLineBreakClass }, //fullwidth percent sign |
|
1197 { 0xff06, 0xff08, MLineBreaker::EIdLineBreakClass }, //fullwidth ampersand |
|
1198 { 0xff08, 0xff09, MLineBreaker::EOpLineBreakClass }, //fullwidth left parenthesis |
|
1199 { 0xff09, 0xff0a, MLineBreaker::EClLineBreakClass }, //fullwidth right parenthesis |
|
1200 { 0xff0a, 0xff0c, MLineBreaker::EIdLineBreakClass }, //fullwidth asterisk |
|
1201 { 0xff0c, 0xff0d, MLineBreaker::EClLineBreakClass }, //fullwidth comma |
|
1202 { 0xff0d, 0xff0e, MLineBreaker::EIdLineBreakClass }, //fullwidth hyphen-minus |
|
1203 { 0xff0e, 0xff0f, MLineBreaker::EClLineBreakClass }, //fullwidth full stop |
|
1204 { 0xff0f, 0xff1a, MLineBreaker::EIdLineBreakClass }, //fullwidth solidus |
|
1205 { 0xff1a, 0xff1c, MLineBreaker::ENsLineBreakClass }, //fullwidth colon |
|
1206 { 0xff1c, 0xff1f, MLineBreaker::EIdLineBreakClass }, //fullwidth less-than sign |
|
1207 { 0xff1f, 0xff20, MLineBreaker::EExLineBreakClass }, //fullwidth question mark |
|
1208 { 0xff20, 0xff3b, MLineBreaker::EIdLineBreakClass }, //fullwidth commercial at |
|
1209 { 0xff3b, 0xff3c, MLineBreaker::EOpLineBreakClass }, //fullwidth left square bracket |
|
1210 { 0xff3c, 0xff3d, MLineBreaker::EIdLineBreakClass }, //fullwidth reverse solidus |
|
1211 { 0xff3d, 0xff3e, MLineBreaker::EClLineBreakClass }, //fullwidth right square bracket |
|
1212 { 0xff3e, 0xff5b, MLineBreaker::EIdLineBreakClass }, //fullwidth circumflex accent |
|
1213 { 0xff5b, 0xff5c, MLineBreaker::EOpLineBreakClass }, //fullwidth left curly bracket |
|
1214 { 0xff5c, 0xff5d, MLineBreaker::EIdLineBreakClass }, //fullwidth vertical line |
|
1215 { 0xff5d, 0xff5e, MLineBreaker::EClLineBreakClass }, //fullwidth right curly bracket |
|
1216 { 0xff5e, 0xff5f, MLineBreaker::EIdLineBreakClass }, //fullwidth tilde |
|
1217 { 0xff5f, 0xff60, MLineBreaker::EOpLineBreakClass }, //FULLWIDTH LEFT WHITE PARENTHESIS |
|
1218 { 0xff60, 0xff61, MLineBreaker::EClLineBreakClass }, //FULLWIDTH RIGHT WHITE PARENTHESIS |
|
1219 { 0xff61, 0xff62, MLineBreaker::EClLineBreakClass }, //halfwidth ideographic full stop |
|
1220 { 0xff62, 0xff63, MLineBreaker::EOpLineBreakClass }, //halfwidth left corner bracket |
|
1221 { 0xff63, 0xff65, MLineBreaker::EClLineBreakClass }, //halfwidth right corner bracket |
|
1222 { 0xff65, 0xff66, MLineBreaker::ENsLineBreakClass }, //halfwidth katakana middle dot |
|
1223 { 0xff66, 0xff67, MLineBreaker::EAlLineBreakClass }, //halfwidth katakana letter wo |
|
1224 { 0xff67, 0xff71, MLineBreaker::ENsLineBreakClass }, //halfwidth katakana letter small a |
|
1225 { 0xff71, 0xff9e, MLineBreaker::EAlLineBreakClass }, //halfwidth katakana letter a |
|
1226 { 0xff9e, 0xffa0, MLineBreaker::ENsLineBreakClass }, //halfwidth katakana voiced sound mark |
|
1227 { 0xffa0, 0xffbf, MLineBreaker::EAlLineBreakClass }, //halfwidth hangul filler |
|
1228 { 0xffc2, 0xffc8, MLineBreaker::EAlLineBreakClass }, //halfwidth hangul letter a |
|
1229 { 0xffca, 0xffd0, MLineBreaker::EAlLineBreakClass }, //halfwidth hangul letter yeo |
|
1230 { 0xffd2, 0xffd8, MLineBreaker::EAlLineBreakClass }, //halfwidth hangul letter yo |
|
1231 { 0xffda, 0xffdd, MLineBreaker::EAlLineBreakClass }, //halfwidth hangul letter eu |
|
1232 { 0xffe0, 0xffe1, MLineBreaker::EPoLineBreakClass }, //fullwidth cent sign |
|
1233 { 0xffe1, 0xffe2, MLineBreaker::EPrLineBreakClass }, //fullwidth pound sign |
|
1234 { 0xffe2, 0xffe5, MLineBreaker::EIdLineBreakClass }, //fullwidth not sign |
|
1235 { 0xffe5, 0xffe7, MLineBreaker::EPrLineBreakClass }, //fullwidth yen sign |
|
1236 { 0xffe8, 0xffef, MLineBreaker::EAlLineBreakClass }, //halfwidth forms light vertical |
|
1237 { 0xfff9, 0xfffc, MLineBreaker::ECmLineBreakClass }, //interlinear annotation anchor |
|
1238 { 0xfffc, 0xfffd, MLineBreaker::ECbLineBreakClass }, //object replacement character |
|
1239 { 0xfffd, 0xfffe, MLineBreaker::EAiLineBreakClass }, //replacement character |
|
1240 { 0x10000, 0x1000c, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYLLABLE B008 A |
|
1241 { 0x1000d, 0x10027, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYLLABLE B036 JO |
|
1242 { 0x10028, 0x1003b, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYLLABLE B060 RA |
|
1243 { 0x1003c, 0x1003e, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYLLABLE B017 ZA |
|
1244 { 0x1003f, 0x1004e, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYLLABLE B020 ZO |
|
1245 { 0x10050, 0x1005e, MLineBreaker::EAlLineBreakClass }, //LINEAR B SYMBOL B018 |
|
1246 { 0x10080, 0x100fb, MLineBreaker::EAlLineBreakClass }, //LINEAR B IDEOGRAM B100 MAN |
|
1247 { 0x10100, 0x10103, MLineBreaker::EBaLineBreakClass }, //AEGEAN WORD SEPARATOR LINE |
|
1248 { 0x10107, 0x10134, MLineBreaker::EAlLineBreakClass }, //AEGEAN NUMBER ONE |
|
1249 { 0x10137, 0x1018b, MLineBreaker::EAlLineBreakClass }, //AEGEAN WEIGHT BASE UNIT |
|
1250 { 0x10300, 0x1031f, MLineBreaker::EAlLineBreakClass }, //OLD ITALIC LETTER A |
|
1251 { 0x10320, 0x10324, MLineBreaker::EAlLineBreakClass }, //OLD ITALIC NUMERAL ONE |
|
1252 { 0x10330, 0x1034b, MLineBreaker::EAlLineBreakClass }, //GOTHIC LETTER AHSA |
|
1253 { 0x10380, 0x1039e, MLineBreaker::EAlLineBreakClass }, //UGARITIC LETTER ALPA |
|
1254 { 0x1039f, 0x103a0, MLineBreaker::EBaLineBreakClass }, //UGARITIC WORD DIVIDER |
|
1255 { 0x103a0, 0x103c4, MLineBreaker::EAlLineBreakClass }, //OLD PERSIAN SIGN A |
|
1256 { 0x103c8, 0x103d0, MLineBreaker::EAlLineBreakClass }, //OLD PERSIAN SIGN AURAMAZDAA |
|
1257 { 0x103d0, 0x103d1, MLineBreaker::EBaLineBreakClass }, //OLD PERSIAN WORD DIVIDER |
|
1258 { 0x103d1, 0x103d6, MLineBreaker::EAlLineBreakClass }, //OLD PERSIAN NUMBER ONE |
|
1259 { 0x10400, 0x1049e, MLineBreaker::EAlLineBreakClass }, //DESERET CAPITAL LETTER LONG I |
|
1260 { 0x104a0, 0x104aa, MLineBreaker::ENuLineBreakClass }, //OSMANYA DIGIT ZERO |
|
1261 { 0x10800, 0x10806, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE A |
|
1262 { 0x10808, 0x10809, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE JO |
|
1263 { 0x1080a, 0x10836, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE KA |
|
1264 { 0x10837, 0x10839, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE XA |
|
1265 { 0x1083c, 0x1083d, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE ZA |
|
1266 { 0x1083f, 0x10840, MLineBreaker::EAlLineBreakClass }, //CYPRIOT SYLLABLE ZO |
|
1267 { 0x10900, 0x1091a, MLineBreaker::EAlLineBreakClass }, //PHOENICIAN LETTER ALF |
|
1268 { 0x1091f, 0x10920, MLineBreaker::EBaLineBreakClass }, //PHOENICIAN WORD SEPARATOR |
|
1269 { 0x10a00, 0x10a01, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI LETTER A |
|
1270 { 0x10a01, 0x10a04, MLineBreaker::ECmLineBreakClass }, //KHAROSHTHI VOWEL SIGN I |
|
1271 { 0x10a05, 0x10a07, MLineBreaker::ECmLineBreakClass }, //KHAROSHTHI VOWEL SIGN E |
|
1272 { 0x10a0c, 0x10a10, MLineBreaker::ECmLineBreakClass }, //KHAROSHTHI VOWEL LENGTH MARK |
|
1273 { 0x10a10, 0x10a14, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI LETTER KA |
|
1274 { 0x10a15, 0x10a18, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI LETTER CA |
|
1275 { 0x10a19, 0x10a34, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI LETTER NYA |
|
1276 { 0x10a38, 0x10a3b, MLineBreaker::ECmLineBreakClass }, //KHAROSHTHI SIGN BAR ABOVE |
|
1277 { 0x10a3f, 0x10a40, MLineBreaker::ECmLineBreakClass }, //KHAROSHTHI VIRAMA |
|
1278 { 0x10a40, 0x10a48, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI DIGIT ONE |
|
1279 { 0x10a50, 0x10a58, MLineBreaker::EBaLineBreakClass }, //KHAROSHTHI PUNCTUATION DOT |
|
1280 { 0x10a58, 0x10a59, MLineBreaker::EAlLineBreakClass }, //KHAROSHTHI PUNCTUATION LINES |
|
1281 { 0x12000, 0x1236f, MLineBreaker::EAlLineBreakClass }, //CUNEIFORM SIGN A |
|
1282 { 0x12400, 0x12463, MLineBreaker::EAlLineBreakClass }, //CUNEIFORM NUMERIC SIGN TWO ASH |
|
1283 { 0x12470, 0x12474, MLineBreaker::EBaLineBreakClass }, //CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER |
|
1284 { 0x1d000, 0x1d0f6, MLineBreaker::EAlLineBreakClass }, //BYZANTINE MUSICAL SYMBOL PSILI |
|
1285 { 0x1d100, 0x1d127, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL SINGLE BARLINE |
|
1286 { 0x1d12a, 0x1d165, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL DOUBLE SHARP |
|
1287 { 0x1d165, 0x1d16a, MLineBreaker::ECmLineBreakClass }, //MUSICAL SYMBOL COMBINING STEM |
|
1288 { 0x1d16a, 0x1d16d, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL FINGERED TREMOLO-1 |
|
1289 { 0x1d16d, 0x1d183, MLineBreaker::ECmLineBreakClass }, //MUSICAL SYMBOL COMBINING AUGMENTATION DOT |
|
1290 { 0x1d183, 0x1d185, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL ARPEGGIATO UP |
|
1291 { 0x1d185, 0x1d18c, MLineBreaker::ECmLineBreakClass }, //MUSICAL SYMBOL COMBINING DOIT |
|
1292 { 0x1d18c, 0x1d1aa, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL RINFORZANDO |
|
1293 { 0x1d1aa, 0x1d1ae, MLineBreaker::ECmLineBreakClass }, //MUSICAL SYMBOL COMBINING DOWN BOW |
|
1294 { 0x1d1ae, 0x1d1de, MLineBreaker::EAlLineBreakClass }, //MUSICAL SYMBOL PEDAL MARK |
|
1295 { 0x1d200, 0x1d242, MLineBreaker::EAlLineBreakClass }, //GREEK VOCAL NOTATION SYMBOL-1 |
|
1296 { 0x1d242, 0x1d245, MLineBreaker::ECmLineBreakClass }, //COMBINING GREEK MUSICAL TRISEME |
|
1297 { 0x1d245, 0x1d246, MLineBreaker::EAlLineBreakClass }, //GREEK MUSICAL LEIMMA |
|
1298 { 0x1d300, 0x1d357, MLineBreaker::EAlLineBreakClass }, //MONOGRAM FOR EARTH |
|
1299 { 0x1d360, 0x1d372, MLineBreaker::EAlLineBreakClass }, //COUNTING ROD UNIT DIGIT ONE |
|
1300 { 0x1d400, 0x1d455, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL BOLD CAPITAL A |
|
1301 { 0x1d456, 0x1d49d, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL ITALIC SMALL I |
|
1302 { 0x1d49e, 0x1d4a0, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT CAPITAL C |
|
1303 { 0x1d4a2, 0x1d4a3, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT CAPITAL G |
|
1304 { 0x1d4a5, 0x1d4a7, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT CAPITAL J |
|
1305 { 0x1d4a9, 0x1d4ad, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT CAPITAL N |
|
1306 { 0x1d4ae, 0x1d4ba, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT CAPITAL S |
|
1307 { 0x1d4bb, 0x1d4bc, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT SMALL F |
|
1308 { 0x1d4bd, 0x1d4c4, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT SMALL H |
|
1309 { 0x1d4c5, 0x1d506, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL SCRIPT SMALL P |
|
1310 { 0x1d507, 0x1d50b, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL FRAKTUR CAPITAL D |
|
1311 { 0x1d50d, 0x1d515, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL FRAKTUR CAPITAL J |
|
1312 { 0x1d516, 0x1d51d, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL FRAKTUR CAPITAL S |
|
1313 { 0x1d51e, 0x1d53a, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL FRAKTUR SMALL A |
|
1314 { 0x1d53b, 0x1d53f, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL DOUBLE-STRUCK CAPITAL D |
|
1315 { 0x1d540, 0x1d545, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL DOUBLE-STRUCK CAPITAL I |
|
1316 { 0x1d546, 0x1d547, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL DOUBLE-STRUCK CAPITAL O |
|
1317 { 0x1d54a, 0x1d551, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL DOUBLE-STRUCK CAPITAL S |
|
1318 { 0x1d552, 0x1d6a6, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL DOUBLE-STRUCK SMALL A |
|
1319 { 0x1d6a8, 0x1d7cc, MLineBreaker::EAlLineBreakClass }, //MATHEMATICAL BOLD CAPITAL ALPHA |
|
1320 { 0x1d7ce, 0x1d800, MLineBreaker::ENuLineBreakClass }, //MATHEMATICAL BOLD DIGIT ZERO |
|
1321 { 0x20000, 0x2a6d6, MLineBreaker::EIdLineBreakClass }, //<CJK Ideograph Extension B, First>..<CJK Ideograph Extension B, Last> |
|
1322 { 0x2f800, 0x2fa1e, MLineBreaker::EIdLineBreakClass }, //CJK COMPATIBILITY IDEOGRAPH-2F800 |
|
1323 { 0xe0001, 0xe0002, MLineBreaker::ECmLineBreakClass }, //LANGUAGE TAG |
|
1324 { 0xe0020, 0xe0080, MLineBreaker::ECmLineBreakClass }, //TAG SPACE |
|
1325 { 0xe0100, 0xe01f0, MLineBreaker::ECmLineBreakClass }, //VARIATION SELECTOR-17 |
|
1326 { 0xf0000, 0xffffd, MLineBreaker::EXxLineBreakClass }, //<Plane 15 Private Use, First>..<Plane 15 Private Use, Last> |
|
1327 { 0x100000, 0x10fffd, MLineBreaker::EXxLineBreakClass }, //<Plane 16 Private Use, First>..<Plane 16 Private Use, Last> |
|
1328 }; |
|
1329 |
|
1330 static const TInt TheLineBreakRanges = sizeof(TheLineBreakRangeTable) / sizeof(TheLineBreakRangeTable[0]); |
|
1331 |
|
1332 void TestLineBreakTables(void) |
|
1333 { |
|
1334 #ifdef _DEBUG |
|
1335 TUint prev_end = 0; |
|
1336 for (int i = 0; i < TheLineBreakRanges; i++) |
|
1337 { |
|
1338 const TLineBreakRange& r = TheLineBreakRangeTable[i]; |
|
1339 if (r.iStart >= r.iEnd || r.iClass >= MLineBreaker::ELineBreakClasses || r.iStart < prev_end) |
|
1340 Panic(ELineBreakPanic_BadTable); |
|
1341 prev_end = r.iEnd; |
|
1342 } |
|
1343 #endif |
|
1344 } |
|
1345 |
|
1346 TUint TLineBreakClassCache::LineBreakClass(TUint aChar) |
|
1347 { |
|
1348 if (aChar >= iStart && aChar < iEnd) |
|
1349 return iClass; |
|
1350 TUint new_start, new_end; |
|
1351 TUint new_class = iBreaker.LineBreakClass(aChar,new_start,new_end); |
|
1352 if (new_start < new_end - 1) |
|
1353 { |
|
1354 iStart = new_start; |
|
1355 iEnd = new_end; |
|
1356 iClass = new_class; |
|
1357 } |
|
1358 return new_class; |
|
1359 } |
|
1360 |
|
1361 EXPORT_C TBool MLineBreaker::LineBreakPossible( |
|
1362 TUint aPrevClass, TUint aNextClass, TBool aHaveSpaces) const |
|
1363 /** Returns whether line breaks are legal between a character of line breaking |
|
1364 class aPrevClass and aNextClass, with intervening spaces or not as determined |
|
1365 by aHaveSpaces. |
|
1366 |
|
1367 @param aPrevClass The class before the potential line break. |
|
1368 @param aNextClass The class after the potential line break. |
|
1369 @param aHaveSpaces Whether spaces separate the characters. |
|
1370 @return ETrue if and only if a line break is legal in the situation described. */ |
|
1371 { |
|
1372 if (aPrevClass >= ELineBreakClasses || aNextClass >= ELineBreakClasses) |
|
1373 return FALSE; |
|
1374 const TLineBreakRule& rule = TheLineBreakRuleTable[aPrevClass]; |
|
1375 TUint flag = 1 << aNextClass; |
|
1376 if (rule.iForbid & flag) |
|
1377 return FALSE; |
|
1378 return aHaveSpaces || (rule.iAllow & flag); |
|
1379 } |
|
1380 |
|
1381 EXPORT_C TUint MLineBreaker::LineBreakClass(TUint aCode,TUint& aRangeStart,TUint& aRangeEnd) const |
|
1382 /** Converts Unicode character into line breaking class. |
|
1383 |
|
1384 @param aCode Input Unicode value. |
|
1385 @param aRangeStart Returns the start of the contiguous range of characters |
|
1386 including aCode that have the returned line breaking class. |
|
1387 @param aRangeEnd Returns the end of the contiguous range of characters including |
|
1388 aCode that have the returned line breaking class. |
|
1389 @return The line breaking class for aCode. */ |
|
1390 { |
|
1391 aRangeStart = aCode; |
|
1392 aRangeEnd = aCode + 1; |
|
1393 |
|
1394 // First check for a space; this needs to be fast. |
|
1395 if (aCode == 0x0020) |
|
1396 return ESpLineBreakClass; |
|
1397 |
|
1398 // Now search the table. |
|
1399 const TLineBreakRange* base = TheLineBreakRangeTable; |
|
1400 const TLineBreakRange* end = base + TheLineBreakRanges; |
|
1401 while (base < end) |
|
1402 { |
|
1403 int n = end - base; |
|
1404 const TLineBreakRange* r = &base[n / 2]; |
|
1405 if (r->iStart > aCode) |
|
1406 end = r; |
|
1407 else if (r->iEnd <= aCode) |
|
1408 base = r + 1; |
|
1409 else |
|
1410 { |
|
1411 aRangeStart = r->iStart; |
|
1412 aRangeEnd = r->iEnd; |
|
1413 return r->iClass; |
|
1414 } |
|
1415 } |
|
1416 |
|
1417 // If the character was not in the table determine the line break class using the Unicode character category. |
|
1418 TChar::TCategory cat = TChar(aCode).GetCategory(); |
|
1419 switch (cat) |
|
1420 { |
|
1421 case TChar::ELuCategory: |
|
1422 case TChar::ELlCategory: |
|
1423 case TChar::ELtCategory: |
|
1424 case TChar::ELoCategory: |
|
1425 case TChar::ESmCategory: |
|
1426 case TChar::ESoCategory: |
|
1427 return EAlLineBreakClass; |
|
1428 |
|
1429 case TChar::ESkCategory: |
|
1430 if (TChar(aCode).GetCjkWidth() == TChar::EWide) |
|
1431 return ENsLineBreakClass; |
|
1432 else |
|
1433 return EAlLineBreakClass; |
|
1434 |
|
1435 case TChar::ELmCategory: |
|
1436 { |
|
1437 TChar::TCjkWidth cjk_width = TChar(aCode).GetCjkWidth(); |
|
1438 if (cjk_width == TChar::EWide || cjk_width == TChar::EHalfWidth) |
|
1439 return ENsLineBreakClass; |
|
1440 } |
|
1441 break; |
|
1442 |
|
1443 case TChar::EMnCategory: |
|
1444 case TChar::EMcCategory: |
|
1445 case TChar::EMeCategory: |
|
1446 case TChar::ECcCategory: |
|
1447 case TChar::ECfCategory: |
|
1448 return ECmLineBreakClass; |
|
1449 |
|
1450 case TChar::ENdCategory: |
|
1451 return ENuLineBreakClass; |
|
1452 |
|
1453 case TChar::EPsCategory: |
|
1454 return EOpLineBreakClass; |
|
1455 |
|
1456 case TChar::EPeCategory: |
|
1457 return EClLineBreakClass; |
|
1458 |
|
1459 case TChar::EScCategory: |
|
1460 return EPrLineBreakClass; |
|
1461 |
|
1462 case TChar::EPfCategory: |
|
1463 case TChar::EPiCategory: |
|
1464 return EQuLineBreakClass; |
|
1465 |
|
1466 case TChar::ECsCategory: |
|
1467 return ESgLineBreakClass; |
|
1468 |
|
1469 default: |
|
1470 break; |
|
1471 } |
|
1472 |
|
1473 // If it has still not been found it may be an ideographic character not covered above. |
|
1474 if (aCode >= 0x3000 && aCode <= 0x3FFF) |
|
1475 return EIdLineBreakClass; |
|
1476 |
|
1477 // Or it may be a surrogate; assume plane 1 is alphabetic, plane 2 is ideographic. |
|
1478 if (aCode >= 0x10000 && aCode <= 0x1FFFF) |
|
1479 return EAlLineBreakClass; |
|
1480 if (aCode >= 0x20000 && aCode <= 0x2FFFF) |
|
1481 return EIdLineBreakClass; |
|
1482 |
|
1483 // Return the catch-all class. |
|
1484 return EXxLineBreakClass; |
|
1485 } |
|
1486 |
|
1487 |
|
1488 EXPORT_C TBool MLineBreaker::IsHangingCharacter(TUint) const |
|
1489 /** Returns ETrue if the character value may be positioned outside the margins. |
|
1490 |
|
1491 @param aChar Chatacter to test |
|
1492 @return ETrue if the character value may be positioned outside the margins. */ |
|
1493 { |
|
1494 return EFalse; |
|
1495 } |
|
1496 |
|
1497 TInt MakeSurrogate(TInt aHigh, TInt aLow) |
|
1498 { |
|
1499 return (aHigh << 10) + (aLow & 0x3FF) |
|
1500 + (0x10000 - 0xD800*0x400); |
|
1501 } |
|
1502 |
|
1503 inline TBool IsSurrogate(TInt aChar) |
|
1504 { |
|
1505 return (aChar & 0xF800) == 0xD800; |
|
1506 } |
|
1507 |
|
1508 inline TBool IsHighSurrogate(TInt aChar) |
|
1509 { |
|
1510 return (aChar & 0xFC00) == 0xD800; |
|
1511 } |
|
1512 |
|
1513 inline TBool IsLowSurrogate(TInt aChar) |
|
1514 { |
|
1515 return (aChar & 0xFC00) == 0xDC00; |
|
1516 } |
|
1517 |
|
1518 /** |
|
1519 Move past the current character and return the next. |
|
1520 @param aPtr |
|
1521 Pointer to the current character. On return, contains a pointer to the next |
|
1522 character. |
|
1523 @param aLimit |
|
1524 Limit of the iteration. For positive aOffset (moving forwards) this should |
|
1525 be one less than the end of the text. For negative aOffset (moving |
|
1526 backwards) this should be the start of the text. |
|
1527 @param aOffset |
|
1528 1 for moving forward, -1 for moving backward. |
|
1529 @return |
|
1530 Unicode character value or -1 on error. |
|
1531 @internalComponent |
|
1532 */ |
|
1533 TInt MoveTextPtr(const TText*& aPtr, const TText* aLimit, TInt aOffset) |
|
1534 { |
|
1535 // if (aPtr - aLimit) has the same sign as aOffset then we are |
|
1536 // already too far. |
|
1537 if (0 <= ((aPtr - aLimit) ^ aOffset) |
|
1538 && aLimit) |
|
1539 return -1; |
|
1540 TInt c = 0; |
|
1541 for (;;) |
|
1542 { |
|
1543 if (aPtr == aLimit) |
|
1544 return -1; |
|
1545 aPtr += aOffset; |
|
1546 TInt lastc = c; |
|
1547 c = *aPtr; |
|
1548 if (!IsSurrogate(c)) |
|
1549 return c; |
|
1550 if (aOffset < 0) |
|
1551 { |
|
1552 if (IsHighSurrogate(c) && IsLowSurrogate(lastc)) |
|
1553 return MakeSurrogate(c, lastc); |
|
1554 } |
|
1555 else |
|
1556 { |
|
1557 if (IsHighSurrogate(lastc) && IsLowSurrogate(c)) |
|
1558 { |
|
1559 // found a surrogate pair |
|
1560 // Set the pointer to the first of the pair |
|
1561 --aPtr; |
|
1562 return MakeSurrogate(lastc, c); |
|
1563 } |
|
1564 } |
|
1565 } |
|
1566 } |
|
1567 |
|
1568 /** Find the line break class of the latest character within [aFirst, aP+1] |
|
1569 to have non-CM line break class. |
|
1570 @pre aP points to a character of CM class. |
|
1571 */ |
|
1572 inline TUint GetGlyphClusterLineBreakClass(const TText* aP, |
|
1573 const TText* aFirst, TLineBreakClassCache& aBreaker) |
|
1574 { |
|
1575 TInt ch = MoveTextPtr(aP, aFirst, -1); |
|
1576 while (0 <= ch) |
|
1577 { |
|
1578 TUint cl = aBreaker.LineBreakClass(ch); |
|
1579 if (cl != MLineBreaker::ECmLineBreakClass) |
|
1580 return cl; |
|
1581 ch = MoveTextPtr(aP, aFirst, -1); |
|
1582 } |
|
1583 return MLineBreaker::ECmLineBreakClass; |
|
1584 } |
|
1585 |
|
1586 void TLineBreakClassIterator::Set(const TText* aFirst, const TText* aText, TLineBreakClassCache& aBreaker) |
|
1587 { |
|
1588 iFirst = aFirst; |
|
1589 iCurrent = aText; |
|
1590 TInt c = *aText; |
|
1591 if (IsHighSurrogate(c)) |
|
1592 { |
|
1593 if (IsLowSurrogate(aText[1])) |
|
1594 c = MakeSurrogate(c, aText[1]); |
|
1595 } |
|
1596 iClass = aBreaker.LineBreakClass(c); |
|
1597 if (iClass == MLineBreaker::ECmLineBreakClass) |
|
1598 { |
|
1599 TUint baseLbCls = GetGlyphClusterLineBreakClass(iCurrent, iFirst, aBreaker); |
|
1600 if (baseLbCls == MLineBreaker::ESaLineBreakClass) |
|
1601 iClass = MLineBreaker::ESaLineBreakClass; |
|
1602 } |
|
1603 } |
|
1604 |
|
1605 void TLineBreakClassIterator::SetNull() |
|
1606 { |
|
1607 iFirst = iCurrent = 0; |
|
1608 } |
|
1609 |
|
1610 TBool TLineBreakClassIterator::Move(const TText* aLimit, |
|
1611 const TText* aLimitAfterSpaces, TInt aOffset, |
|
1612 TBool& aHasSpaces, TLineBreakClassCache& aBreaker) |
|
1613 { |
|
1614 TInt c = MoveTextPtr(iCurrent, aLimit, aOffset); |
|
1615 if (c < 0) |
|
1616 return EFalse; |
|
1617 TUint lbClass = aBreaker.LineBreakClass(c); |
|
1618 if (lbClass == MLineBreaker::ESpLineBreakClass) |
|
1619 { |
|
1620 aHasSpaces = ETrue; |
|
1621 while (lbClass == MLineBreaker::ESpLineBreakClass) |
|
1622 { |
|
1623 c = MoveTextPtr(iCurrent, aLimitAfterSpaces, aOffset); |
|
1624 // We have hit the limit, but there might be more text to search |
|
1625 // after the spaces. |
|
1626 if (c < 0) |
|
1627 { |
|
1628 iClass = MLineBreaker::ESpLineBreakClass; |
|
1629 return ETrue; |
|
1630 } |
|
1631 lbClass = aBreaker.LineBreakClass(c); |
|
1632 } |
|
1633 iClass = lbClass; |
|
1634 return ETrue; |
|
1635 } |
|
1636 |
|
1637 // Cm class characters taken on their base character's |
|
1638 // line break property in the combining character sequence |
|
1639 // See D14 in Conformance chapter of Unicode 3.0 book. |
|
1640 // We do this just for Complex content to maintain backwards |
|
1641 // compatibility otherwise original test cases fail. |
|
1642 if (lbClass == MLineBreaker::ECmLineBreakClass) |
|
1643 { |
|
1644 TUint baseLbCls = GetGlyphClusterLineBreakClass(iCurrent, iFirst, aBreaker); |
|
1645 if (baseLbCls == MLineBreaker::ESaLineBreakClass) |
|
1646 lbClass = MLineBreaker::ESaLineBreakClass; |
|
1647 } |
|
1648 |
|
1649 aHasSpaces = EFalse; |
|
1650 if (lbClass == MLineBreaker::ESaLineBreakClass |
|
1651 && iClass == MLineBreaker::ESaLineBreakClass) |
|
1652 { |
|
1653 // We have at least two SA characters, so we need to find the |
|
1654 // total extent of the SA block. GetLineBreakInContext will be asked |
|
1655 // to find the break. |
|
1656 const TText* p = iCurrent; |
|
1657 while (0 <= c && lbClass == MLineBreaker::ESaLineBreakClass) |
|
1658 { |
|
1659 iCurrent = p; |
|
1660 c = MoveTextPtr(p, aOffset < 0? iFirst : aLimitAfterSpaces, aOffset); |
|
1661 lbClass = aBreaker.LineBreakClass(c); |
|
1662 |
|
1663 // Cm class characters taken on their base character's |
|
1664 // line break property. See above. |
|
1665 if (lbClass == MLineBreaker::ECmLineBreakClass) |
|
1666 { |
|
1667 TUint baseLbCls = GetGlyphClusterLineBreakClass(p, iFirst, aBreaker); |
|
1668 if (baseLbCls == MLineBreaker::ESaLineBreakClass) |
|
1669 lbClass = MLineBreaker::ESaLineBreakClass; |
|
1670 } |
|
1671 if (lbClass == MLineBreaker::EZwLineBreakClass) |
|
1672 { |
|
1673 iCurrent = p; |
|
1674 } |
|
1675 } |
|
1676 return ETrue; |
|
1677 } |
|
1678 iClass = lbClass; |
|
1679 return ETrue; |
|
1680 } |
|
1681 |
|
1682 TLineBreakIterator::TLineBreakIterator(TLineBreakClassCache& aBreaker, |
|
1683 const TText* aText, TInt aLength, TBool aForwards, |
|
1684 TInt aMinBreakPos, TInt aMaxBreakPos) |
|
1685 : iBreaker(aBreaker), iText(aText), iTextLength(aLength), iHasSpaces(EFalse) |
|
1686 { |
|
1687 if (aMinBreakPos < 1) |
|
1688 aMinBreakPos = 1; |
|
1689 // Find the next valid character and see if it can hang |
|
1690 // over the margin. |
|
1691 const TText* endText = aText + aLength; |
|
1692 const TText* max = aText + aMaxBreakPos; |
|
1693 if (endText <= max && aText < endText) |
|
1694 { |
|
1695 // let 'max' point to last character |
|
1696 max = endText - 1; |
|
1697 if (IsLowSurrogate(*max)) |
|
1698 { |
|
1699 ASSERT(aLength > 1); |
|
1700 --max; |
|
1701 ASSERT(IsHighSurrogate(*max)); |
|
1702 } |
|
1703 } |
|
1704 TInt c = 0xFFFF; |
|
1705 while (max < endText) |
|
1706 { |
|
1707 TInt lastc = c; |
|
1708 c = *max; |
|
1709 if ((c & 0xF800) != 0xD800) |
|
1710 break; |
|
1711 if ((lastc & 0xFC00) == 0xD800 |
|
1712 && (c & 0xFC00) == 0xDC00) |
|
1713 { |
|
1714 c = MakeSurrogate(lastc, c); |
|
1715 break; |
|
1716 } |
|
1717 ++max; |
|
1718 } |
|
1719 if (c != 0xFFFF && max + 1 < endText |
|
1720 && aBreaker.Breaker().IsHangingCharacter(c)) |
|
1721 // Set max to just beyond hanging character |
|
1722 ++max; |
|
1723 // till here, 'max' points to the "end" of a character |
|
1724 |
|
1725 // set up the iteration. |
|
1726 iMinBreakPos = aText + aMinBreakPos; |
|
1727 iMaxBreakPos = aText + aMaxBreakPos; |
|
1728 // Increment or Decrement must be called to complete set up. |
|
1729 if (max < aText + aMinBreakPos) |
|
1730 { |
|
1731 // no possible line breaks |
|
1732 iLimit = 0; |
|
1733 iAfterBreak.SetNull(); |
|
1734 iBeforeBreak.SetNull(); |
|
1735 } |
|
1736 else if (aForwards) |
|
1737 { |
|
1738 iLimit = max; // point to the 'end' of the last character (low surrogate if supplementary) |
|
1739 iLimitAfterSpaces = endText - 1; |
|
1740 iAfterBreak.Set(iText, aText + aMinBreakPos - 1, aBreaker); |
|
1741 } |
|
1742 else |
|
1743 { |
|
1744 iLimit = aText + aMinBreakPos - 1; |
|
1745 iLimitAfterSpaces = iLimit; |
|
1746 iBeforeBreak.Set(iText, max - 1, aBreaker); |
|
1747 iBeforeBreak.Move(endText - 1, endText - 1, 1, iHasSpaces, aBreaker); |
|
1748 } |
|
1749 } |
|
1750 |
|
1751 TBool TLineBreakIterator::IsBreak(TBool aForwards) |
|
1752 { |
|
1753 TInt beforeClass = iBeforeBreak.Class(); |
|
1754 TInt afterClass = iAfterBreak.Class(); |
|
1755 const MLineBreaker& breaker = iBreaker.Breaker(); |
|
1756 |
|
1757 // Line break characters overhang the margin |
|
1758 // along with the spaces that precede them. |
|
1759 if (afterClass == MLineBreaker::EBkLineBreakClass) |
|
1760 { |
|
1761 const TText* p = iAfterBreak.Ptr(); |
|
1762 MoveTextPtr(p, iText + iTextLength, 1); |
|
1763 iAfterBreak.Set(iText, p, iBreaker); |
|
1764 return ETrue; |
|
1765 } |
|
1766 |
|
1767 // Find break within this run of SA text |
|
1768 const TText* startOfSaText = iBeforeBreak.Ptr(); |
|
1769 const TText* endOfSaText = iAfterBreak.Ptr(); |
|
1770 |
|
1771 if ((beforeClass != MLineBreaker::ESaLineBreakClass && *startOfSaText != KWhiteSpace) || |
|
1772 (afterClass != MLineBreaker::ESaLineBreakClass)) |
|
1773 // Complex (South-East Asian) line breaking rules not required. |
|
1774 return breaker.LineBreakPossible(beforeClass, afterClass, iHasSpaces); |
|
1775 |
|
1776 MoveTextPtr(endOfSaText, iText + iTextLength, 1); |
|
1777 const TText* minBreak = startOfSaText < iMinBreakPos ? |
|
1778 iMinBreakPos : startOfSaText + 1; |
|
1779 const TText* maxBreak = iMaxBreakPos < endOfSaText ? |
|
1780 iMaxBreakPos : endOfSaText - 1; |
|
1781 TInt breakPos; |
|
1782 TPtrC saText(startOfSaText, endOfSaText - startOfSaText); |
|
1783 if (!breaker.GetLineBreakInContext(saText, |
|
1784 minBreak - startOfSaText, maxBreak - startOfSaText, |
|
1785 aForwards, breakPos)) |
|
1786 return EFalse; |
|
1787 |
|
1788 // Convert the break position within SA text into iterator positions. |
|
1789 const TText* p = startOfSaText + breakPos; |
|
1790 iAfterBreak.Set(iText, p, iBreaker); |
|
1791 if (*p != KZeroWidthSpace) MoveTextPtr(p, 0, -1); |
|
1792 iBeforeBreak.Set(iText, p, iBreaker); |
|
1793 |
|
1794 return ETrue; |
|
1795 } |
|
1796 |
|
1797 TBool TLineBreakIterator::HasContingentBreak() const |
|
1798 { |
|
1799 return iBeforeBreak.Class() == MLineBreaker::ECbLineBreakClass |
|
1800 || iAfterBreak.Class() == MLineBreaker::ECbLineBreakClass; |
|
1801 } |
|
1802 |
|
1803 TInt TLineBreakIterator::PreviousClass() const |
|
1804 { |
|
1805 return iBeforeBreak.Class(); |
|
1806 } |
|
1807 |
|
1808 TInt TLineBreakIterator::NextClass() const |
|
1809 { |
|
1810 return iAfterBreak.Class(); |
|
1811 } |
|
1812 |
|
1813 TBool TLineBreakIterator::HasSpaces() const |
|
1814 { |
|
1815 return iHasSpaces; |
|
1816 } |
|
1817 |
|
1818 TBool TLineBreakIterator::Decrement() |
|
1819 { |
|
1820 iAfterBreak = iBeforeBreak; |
|
1821 return iBeforeBreak.Move(iLimit, iLimitAfterSpaces, -1, iHasSpaces, iBreaker); |
|
1822 } |
|
1823 |
|
1824 TBool TLineBreakIterator::Increment() |
|
1825 { |
|
1826 iBeforeBreak = iAfterBreak; |
|
1827 return iAfterBreak.Move(iLimit, iLimitAfterSpaces, 1, iHasSpaces, iBreaker); |
|
1828 } |
|
1829 |
|
1830 TInt TLineBreakIterator::BreakPos() const |
|
1831 { |
|
1832 const TText* t = iBeforeBreak.Ptr(); |
|
1833 MoveTextPtr(t, 0, 1); |
|
1834 return t - iText; |
|
1835 } |
|
1836 |
|
1837 TInt TLineBreakIterator::BeforeBreakPos() const |
|
1838 { |
|
1839 return iBeforeBreak.Ptr() - iText; |
|
1840 } |
|
1841 |
|
1842 TInt TLineBreakIterator::AfterBreakPos() const |
|
1843 { |
|
1844 return iAfterBreak.Ptr() - iText; |
|
1845 } |
|
1846 |
|
1847 TBool HasContingentBreak(TLineBreakIterator& aIterator, |
|
1848 MContingentLineBreaker& aCbDelegate) |
|
1849 { |
|
1850 if (aIterator.PreviousClass() == MLineBreaker::ECbLineBreakClass |
|
1851 && aCbDelegate.IsLegalBreakAfter( |
|
1852 aIterator.BeforeBreakPos(), aIterator.NextClass(), |
|
1853 aIterator.HasSpaces())) |
|
1854 return ETrue; |
|
1855 if (aIterator.NextClass() == MLineBreaker::ECbLineBreakClass |
|
1856 && aCbDelegate.IsLegalBreakBefore( |
|
1857 aIterator.AfterBreakPos(), aIterator.PreviousClass(), |
|
1858 aIterator.HasSpaces())) |
|
1859 return ETrue; |
|
1860 return EFalse; |
|
1861 } |
|
1862 |
|
1863 TBool FindBreak(TLineBreakIterator& aIterator, TBool aForwards, |
|
1864 MContingentLineBreaker* aCbDelegate) |
|
1865 { |
|
1866 while (aForwards? aIterator.Increment() : aIterator.Decrement()) |
|
1867 { |
|
1868 if (aCbDelegate && aIterator.HasContingentBreak()) |
|
1869 { |
|
1870 if (HasContingentBreak(aIterator, *aCbDelegate)) |
|
1871 { |
|
1872 return ETrue; |
|
1873 } |
|
1874 } |
|
1875 else if (aIterator.IsBreak(aForwards)) |
|
1876 { |
|
1877 return ETrue; |
|
1878 } |
|
1879 } |
|
1880 return EFalse; |
|
1881 } |
|
1882 |
|
1883 TBool HasContingentBreakL(TLineBreakIterator& aIterator, |
|
1884 MContingentLineBreakerL& aCbDelegate) |
|
1885 { |
|
1886 if (aIterator.PreviousClass() == MLineBreaker::ECbLineBreakClass |
|
1887 && aCbDelegate.IsLegalBreakAfterL( |
|
1888 aIterator.BeforeBreakPos(), aIterator.NextClass(), |
|
1889 aIterator.HasSpaces())) |
|
1890 return ETrue; |
|
1891 if (aIterator.NextClass() == MLineBreaker::ECbLineBreakClass |
|
1892 && aCbDelegate.IsLegalBreakBeforeL( |
|
1893 aIterator.AfterBreakPos(), aIterator.PreviousClass(), |
|
1894 aIterator.HasSpaces())) |
|
1895 return ETrue; |
|
1896 return EFalse; |
|
1897 } |
|
1898 |
|
1899 TBool FindBreakL(TLineBreakIterator& aIterator, TBool aForwards, |
|
1900 MContingentLineBreakerL* aCbDelegate) |
|
1901 { |
|
1902 while (aForwards? aIterator.Increment() : aIterator.Decrement()) |
|
1903 { |
|
1904 if (aCbDelegate && aIterator.HasContingentBreak()) |
|
1905 { |
|
1906 if (HasContingentBreakL(aIterator, *aCbDelegate)) |
|
1907 return ETrue; |
|
1908 } |
|
1909 else if (aIterator.IsBreak(aForwards)) |
|
1910 return ETrue; |
|
1911 } |
|
1912 return EFalse; |
|
1913 } |
|
1914 |
|
1915 EXPORT_C TBool MLineBreaker::GetLineBreak(const TDesC& aText, |
|
1916 TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards, |
|
1917 MContingentLineBreaker* aCbDelegate, |
|
1918 TInt& aBreakPos, TInt& aBreakPosAfterSpaces) const |
|
1919 /** Finds the line break according to the rules in the virtual functions. |
|
1920 |
|
1921 @param aText The text to be broken. |
|
1922 @param aMinBreakPos The smallest allowed result. |
|
1923 @param aMaxBreakPos The position of the first character that fails to fit the |
|
1924 measure. |
|
1925 @param aForwards ETrue to find the earliest legal break opportunity, EFalse |
|
1926 to find the last. |
|
1927 @param aCbDelegate Object to resolve breaks around characters with the CB (cointingent |
|
1928 break) line breaking class. Typically this class is only used for character |
|
1929 0xFFFD, which usually marks the position of embedded pictures. May be NULL, |
|
1930 in which case LineBreakPossible() determines whether these breaks are legal. |
|
1931 @param aBreakPos Returns the position of the break. |
|
1932 @param aBreakPosAfterSpaces Returns the position of the start of the next line. |
|
1933 @return ETrue if successful, EFalse if no legal line breaks were found. */ |
|
1934 { |
|
1935 TLineBreakClassCache cache(*this); |
|
1936 const TText* text = aText.Ptr(); |
|
1937 TLineBreakIterator it(cache, text, aText.Length(), |
|
1938 aForwards, aMinBreakPos, aMaxBreakPos); |
|
1939 if (!FindBreak(it, aForwards, aCbDelegate)) |
|
1940 return EFalse; |
|
1941 aBreakPos = it.BreakPos(); |
|
1942 aBreakPosAfterSpaces = it.AfterBreakPos(); |
|
1943 return ETrue; |
|
1944 } |
|
1945 |
|
1946 EXPORT_C TBool MLineBreaker::GetLineBreakL(const TDesC& aText, |
|
1947 TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards, |
|
1948 MContingentLineBreakerL* aCbDelegate, |
|
1949 TInt& aBreakPos, TInt& aBreakPosAfterSpaces) const |
|
1950 /** Exactly the same as GetLineBreak, but aCbDelegate's functions may leave so |
|
1951 the whole function may leave. |
|
1952 |
|
1953 @param aText The text to be broken. |
|
1954 @param aMinBreakPos The smallest allowed result. |
|
1955 @param aMaxBreakPos he position of the first character that fails to fit the |
|
1956 measure. |
|
1957 @param aForwards ETrue to find the earliest legal break opportunity, EFalse |
|
1958 to find the last. |
|
1959 @param aCbDelegate Object to resolve breaks around characters with the CB (cointingent |
|
1960 break) line breaking class. Typically this class is only used for character |
|
1961 0xFFFD, which usually marks the position of embedded pictures. May be NULL, |
|
1962 in which case LineBreakPossible() determines whether these breaks are legal. |
|
1963 @param aBreakPos Returns the position of the break. |
|
1964 @param aBreakPosAfterSpaces Returns the position of the start of the next line. |
|
1965 @return ETrue if successful, EFalse if no legal line breaks were found. */ |
|
1966 { |
|
1967 if (aMaxBreakPos < aMinBreakPos) |
|
1968 return EFalse; |
|
1969 TLineBreakClassCache cache(*this); |
|
1970 const TText* text = aText.Ptr(); |
|
1971 TLineBreakIterator it(cache, text, aText.Length(), |
|
1972 aForwards, aMinBreakPos, aMaxBreakPos); |
|
1973 if (!FindBreakL(it, aForwards, aCbDelegate)) |
|
1974 return EFalse; |
|
1975 aBreakPos = it.BreakPos(); |
|
1976 aBreakPosAfterSpaces = it.AfterBreakPos(); |
|
1977 return ETrue; |
|
1978 } |
|
1979 EXPORT_C void* MLineBreaker::ExtendedInterface(TUid&) const |
|
1980 /** Returns an interface to more functions, or 0 if these are unsupported. |
|
1981 Currently there are no other functions, so 0 is always returned. If this |
|
1982 function is overridden, it must base call if aUid is not recognised, |
|
1983 rather than just returning 0. |
|
1984 @param aUid Identifies the interface to be returned. |
|
1985 @return A pointer to the extended interface, or 0 if not supported. */ |
|
1986 { |
|
1987 return 0; |
|
1988 } |
|
1989 |
|
1990 |