|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the QtCore module of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 // Don't define it while compiling this module, or USERS of Qt will |
|
43 // not be able to link. |
|
44 #ifdef QT_NO_CAST_FROM_ASCII |
|
45 #undef QT_NO_CAST_FROM_ASCII |
|
46 #endif |
|
47 #ifdef QT_NO_CAST_TO_ASCII |
|
48 #undef QT_NO_CAST_TO_ASCII |
|
49 #endif |
|
50 #include "qchar.h" |
|
51 #include "qdatastream.h" |
|
52 #include "qtextcodec.h" |
|
53 |
|
54 #include "qunicodetables_p.h" |
|
55 |
|
56 #include "qunicodetables.cpp" |
|
57 |
|
58 QT_BEGIN_NAMESPACE |
|
59 |
|
60 #define LAST_UNICODE_CHAR 0x10ffff |
|
61 |
|
62 #ifndef QT_NO_CODEC_FOR_C_STRINGS |
|
63 #ifdef QT_NO_TEXTCODEC |
|
64 #define QT_NO_CODEC_FOR_C_STRINGS |
|
65 #endif |
|
66 #endif |
|
67 |
|
68 #define FLAG(x) (1 << (x)) |
|
69 |
|
70 /*! \class QLatin1Char |
|
71 \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character. |
|
72 |
|
73 \ingroup string-processing |
|
74 |
|
75 This class is only useful to avoid the codec for C strings business |
|
76 in the QChar(ch) constructor. You can avoid it by writing |
|
77 QChar(ch, 0). |
|
78 |
|
79 \sa QChar, QLatin1String, QString |
|
80 */ |
|
81 |
|
82 /*! |
|
83 \fn const char QLatin1Char::toLatin1() const |
|
84 |
|
85 Converts a Latin-1 character to an 8-bit ASCII representation of |
|
86 the character. |
|
87 */ |
|
88 |
|
89 /*! |
|
90 \fn const ushort QLatin1Char::unicode() const |
|
91 |
|
92 Converts a Latin-1 character to an 16-bit-encoded Unicode representation |
|
93 of the character. |
|
94 */ |
|
95 |
|
96 /*! |
|
97 \fn QLatin1Char::QLatin1Char(char c) |
|
98 |
|
99 Constructs a Latin-1 character for \a c. This constructor should be |
|
100 used when the encoding of the input character is known to be Latin-1. |
|
101 */ |
|
102 |
|
103 /*! |
|
104 \class QChar |
|
105 \brief The QChar class provides a 16-bit Unicode character. |
|
106 |
|
107 \ingroup string-processing |
|
108 \reentrant |
|
109 |
|
110 In Qt, Unicode characters are 16-bit entities without any markup |
|
111 or structure. This class represents such an entity. It is |
|
112 lightweight, so it can be used everywhere. Most compilers treat |
|
113 it like a \c{unsigned short}. |
|
114 |
|
115 QChar provides a full complement of testing/classification |
|
116 functions, converting to and from other formats, converting from |
|
117 composed to decomposed Unicode, and trying to compare and |
|
118 case-convert if you ask it to. |
|
119 |
|
120 The classification functions include functions like those in the |
|
121 standard C++ header \<cctype\> (formerly \<ctype.h\>), but |
|
122 operating on the full range of Unicode characters. They all |
|
123 return true if the character is a certain type of character; |
|
124 otherwise they return false. These classification functions are |
|
125 isNull() (returns true if the character is '\\0'), isPrint() |
|
126 (true if the character is any sort of printable character, |
|
127 including whitespace), isPunct() (any sort of punctation), |
|
128 isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any |
|
129 sort of numeric character, not just 0-9), isLetterOrNumber(), and |
|
130 isDigit() (decimal digits). All of these are wrappers around |
|
131 category() which return the Unicode-defined category of each |
|
132 character. |
|
133 |
|
134 QChar also provides direction(), which indicates the "natural" |
|
135 writing direction of this character. The joining() function |
|
136 indicates how the character joins with its neighbors (needed |
|
137 mostly for Arabic) and finally hasMirrored(), which indicates |
|
138 whether the character needs to be mirrored when it is printed in |
|
139 its "unnatural" writing direction. |
|
140 |
|
141 Composed Unicode characters (like \aring) can be converted to |
|
142 decomposed Unicode ("a" followed by "ring above") by using |
|
143 decomposition(). |
|
144 |
|
145 In Unicode, comparison is not necessarily possible and case |
|
146 conversion is very difficult at best. Unicode, covering the |
|
147 "entire" world, also includes most of the world's case and |
|
148 sorting problems. operator==() and friends will do comparison |
|
149 based purely on the numeric Unicode value (code point) of the |
|
150 characters, and toUpper() and toLower() will do case changes when |
|
151 the character has a well-defined uppercase/lowercase equivalent. |
|
152 For locale-dependent comparisons, use |
|
153 QString::localeAwareCompare(). |
|
154 |
|
155 The conversion functions include unicode() (to a scalar), |
|
156 toLatin1() (to scalar, but converts all non-Latin-1 characters to |
|
157 0), row() (gives the Unicode row), cell() (gives the Unicode |
|
158 cell), digitValue() (gives the integer value of any of the |
|
159 numerous digit characters), and a host of constructors. |
|
160 |
|
161 QChar provides constructors and cast operators that make it easy |
|
162 to convert to and from traditional 8-bit \c{char}s. If you |
|
163 defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as |
|
164 explained in the QString documentation, you will need to |
|
165 explicitly call fromAscii() or fromLatin1(), or use QLatin1Char, |
|
166 to construct a QChar from an 8-bit \c char, and you will need to |
|
167 call toAscii() or toLatin1() to get the 8-bit value back. |
|
168 |
|
169 \sa QString, Unicode, QLatin1Char |
|
170 */ |
|
171 |
|
172 /*! |
|
173 \enum QChar::UnicodeVersion |
|
174 |
|
175 Specifies which version of the \l{http://www.unicode.org/}{Unicode standard} |
|
176 introduced a certain character. |
|
177 |
|
178 \value Unicode_1_1 Version 1.1 |
|
179 \value Unicode_2_0 Version 2.0 |
|
180 \value Unicode_2_1_2 Version 2.1.2 |
|
181 \value Unicode_3_0 Version 3.0 |
|
182 \value Unicode_3_1 Version 3.1 |
|
183 \value Unicode_3_2 Version 3.2 |
|
184 \value Unicode_4_0 Version 4.0 |
|
185 \value Unicode_4_1 Version 4.1 |
|
186 \value Unicode_5_0 Version 5.0 |
|
187 \value Unicode_Unassigned The value is not assigned to any character |
|
188 in version 5.0 of Unicode. |
|
189 |
|
190 \sa unicodeVersion() |
|
191 */ |
|
192 |
|
193 /*! |
|
194 \enum QChar::Category |
|
195 |
|
196 This enum maps the Unicode character categories. |
|
197 |
|
198 The following characters are normative in Unicode: |
|
199 |
|
200 \value Mark_NonSpacing Unicode class name Mn |
|
201 |
|
202 \value Mark_SpacingCombining Unicode class name Mc |
|
203 |
|
204 \value Mark_Enclosing Unicode class name Me |
|
205 |
|
206 \value Number_DecimalDigit Unicode class name Nd |
|
207 |
|
208 \value Number_Letter Unicode class name Nl |
|
209 |
|
210 \value Number_Other Unicode class name No |
|
211 |
|
212 \value Separator_Space Unicode class name Zs |
|
213 |
|
214 \value Separator_Line Unicode class name Zl |
|
215 |
|
216 \value Separator_Paragraph Unicode class name Zp |
|
217 |
|
218 \value Other_Control Unicode class name Cc |
|
219 |
|
220 \value Other_Format Unicode class name Cf |
|
221 |
|
222 \value Other_Surrogate Unicode class name Cs |
|
223 |
|
224 \value Other_PrivateUse Unicode class name Co |
|
225 |
|
226 \value Other_NotAssigned Unicode class name Cn |
|
227 |
|
228 |
|
229 The following categories are informative in Unicode: |
|
230 |
|
231 \value Letter_Uppercase Unicode class name Lu |
|
232 |
|
233 \value Letter_Lowercase Unicode class name Ll |
|
234 |
|
235 \value Letter_Titlecase Unicode class name Lt |
|
236 |
|
237 \value Letter_Modifier Unicode class name Lm |
|
238 |
|
239 \value Letter_Other Unicode class name Lo |
|
240 |
|
241 \value Punctuation_Connector Unicode class name Pc |
|
242 |
|
243 \value Punctuation_Dash Unicode class name Pd |
|
244 |
|
245 \value Punctuation_Open Unicode class name Ps |
|
246 |
|
247 \value Punctuation_Close Unicode class name Pe |
|
248 |
|
249 \value Punctuation_InitialQuote Unicode class name Pi |
|
250 |
|
251 \value Punctuation_FinalQuote Unicode class name Pf |
|
252 |
|
253 \value Punctuation_Other Unicode class name Po |
|
254 |
|
255 \value Symbol_Math Unicode class name Sm |
|
256 |
|
257 \value Symbol_Currency Unicode class name Sc |
|
258 |
|
259 \value Symbol_Modifier Unicode class name Sk |
|
260 |
|
261 \value Symbol_Other Unicode class name So |
|
262 |
|
263 \value NoCategory Qt cannot find an appropriate category for the character. |
|
264 |
|
265 \omitvalue Punctuation_Dask |
|
266 |
|
267 \sa category() |
|
268 */ |
|
269 |
|
270 /*! |
|
271 \enum QChar::Direction |
|
272 |
|
273 This enum type defines the Unicode direction attributes. See the |
|
274 \l{http://www.unicode.org/}{Unicode Standard} for a description |
|
275 of the values. |
|
276 |
|
277 In order to conform to C/C++ naming conventions "Dir" is prepended |
|
278 to the codes used in the Unicode Standard. |
|
279 |
|
280 \value DirAL |
|
281 \value DirAN |
|
282 \value DirB |
|
283 \value DirBN |
|
284 \value DirCS |
|
285 \value DirEN |
|
286 \value DirES |
|
287 \value DirET |
|
288 \value DirL |
|
289 \value DirLRE |
|
290 \value DirLRO |
|
291 \value DirNSM |
|
292 \value DirON |
|
293 \value DirPDF |
|
294 \value DirR |
|
295 \value DirRLE |
|
296 \value DirRLO |
|
297 \value DirS |
|
298 \value DirWS |
|
299 |
|
300 \sa direction() |
|
301 */ |
|
302 |
|
303 /*! |
|
304 \enum QChar::Decomposition |
|
305 |
|
306 This enum type defines the Unicode decomposition attributes. See |
|
307 the \l{http://www.unicode.org/}{Unicode Standard} for a |
|
308 description of the values. |
|
309 |
|
310 \value NoDecomposition |
|
311 \value Canonical |
|
312 \value Circle |
|
313 \value Compat |
|
314 \value Final |
|
315 \value Font |
|
316 \value Fraction |
|
317 \value Initial |
|
318 \value Isolated |
|
319 \value Medial |
|
320 \value Narrow |
|
321 \value NoBreak |
|
322 \value Small |
|
323 \value Square |
|
324 \value Sub |
|
325 \value Super |
|
326 \value Vertical |
|
327 \value Wide |
|
328 |
|
329 \omitvalue Single |
|
330 |
|
331 \sa decomposition() |
|
332 */ |
|
333 |
|
334 /*! |
|
335 \enum QChar::Joining |
|
336 |
|
337 This enum type defines the Unicode joining attributes. See the |
|
338 \l{http://www.unicode.org/}{Unicode Standard} for a description |
|
339 of the values. |
|
340 |
|
341 \value Center |
|
342 \value Dual |
|
343 \value OtherJoining |
|
344 \value Right |
|
345 |
|
346 \sa joining() |
|
347 */ |
|
348 |
|
349 /*! |
|
350 \enum QChar::CombiningClass |
|
351 |
|
352 \internal |
|
353 |
|
354 This enum type defines names for some of the Unicode combining |
|
355 classes. See the \l{http://www.unicode.org/}{Unicode Standard} |
|
356 for a description of the values. |
|
357 |
|
358 \value Combining_Above |
|
359 \value Combining_AboveAttached |
|
360 \value Combining_AboveLeft |
|
361 \value Combining_AboveLeftAttached |
|
362 \value Combining_AboveRight |
|
363 \value Combining_AboveRightAttached |
|
364 \value Combining_Below |
|
365 \value Combining_BelowAttached |
|
366 \value Combining_BelowLeft |
|
367 \value Combining_BelowLeftAttached |
|
368 \value Combining_BelowRight |
|
369 \value Combining_BelowRightAttached |
|
370 \value Combining_DoubleAbove |
|
371 \value Combining_DoubleBelow |
|
372 \value Combining_IotaSubscript |
|
373 \value Combining_Left |
|
374 \value Combining_LeftAttached |
|
375 \value Combining_Right |
|
376 \value Combining_RightAttached |
|
377 */ |
|
378 |
|
379 /*! |
|
380 \enum QChar::SpecialCharacter |
|
381 |
|
382 \value Null A QChar with this value isNull(). |
|
383 \value Nbsp Non-breaking space. |
|
384 \value ReplacementCharacter |
|
385 \value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used. |
|
386 \value ByteOrderMark |
|
387 \value ByteOrderSwapped |
|
388 \value ParagraphSeparator |
|
389 \value LineSeparator |
|
390 |
|
391 \omitvalue null |
|
392 \omitvalue replacement |
|
393 \omitvalue byteOrderMark |
|
394 \omitvalue byteOrderSwapped |
|
395 \omitvalue nbsp |
|
396 */ |
|
397 |
|
398 /*! |
|
399 \fn void QChar::setCell(uchar cell) |
|
400 \internal |
|
401 */ |
|
402 |
|
403 /*! |
|
404 \fn void QChar::setRow(uchar row) |
|
405 \internal |
|
406 */ |
|
407 |
|
408 /*! |
|
409 \fn QChar::QChar() |
|
410 |
|
411 Constructs a null QChar ('\\0'). |
|
412 |
|
413 \sa isNull() |
|
414 */ |
|
415 |
|
416 /*! |
|
417 \fn QChar::QChar(QLatin1Char ch) |
|
418 |
|
419 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
|
420 */ |
|
421 |
|
422 /*! |
|
423 \fn QChar::QChar(SpecialCharacter ch) |
|
424 |
|
425 Constructs a QChar for the predefined character value \a ch. |
|
426 */ |
|
427 |
|
428 /*! |
|
429 Constructs a QChar corresponding to ASCII/Latin-1 character \a |
|
430 ch. |
|
431 */ |
|
432 QChar::QChar(char ch) |
|
433 { |
|
434 #ifndef QT_NO_CODEC_FOR_C_STRINGS |
|
435 if (QTextCodec::codecForCStrings()) |
|
436 // ##### |
|
437 ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode(); |
|
438 else |
|
439 #endif |
|
440 ucs = uchar(ch); |
|
441 } |
|
442 |
|
443 /*! |
|
444 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
|
445 */ |
|
446 QChar::QChar(uchar ch) |
|
447 { |
|
448 #ifndef QT_NO_CODEC_FOR_C_STRINGS |
|
449 if (QTextCodec::codecForCStrings()) { |
|
450 // ##### |
|
451 char c = char(ch); |
|
452 ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode(); |
|
453 } else |
|
454 #endif |
|
455 ucs = ch; |
|
456 } |
|
457 |
|
458 /*! |
|
459 \fn QChar::QChar(uchar cell, uchar row) |
|
460 |
|
461 Constructs a QChar for Unicode cell \a cell in row \a row. |
|
462 |
|
463 \sa cell(), row() |
|
464 */ |
|
465 |
|
466 /*! |
|
467 \fn QChar::QChar(ushort code) |
|
468 |
|
469 Constructs a QChar for the character with Unicode code point \a |
|
470 code. |
|
471 */ |
|
472 |
|
473 |
|
474 /*! |
|
475 \fn QChar::QChar(short code) |
|
476 |
|
477 Constructs a QChar for the character with Unicode code point \a |
|
478 code. |
|
479 */ |
|
480 |
|
481 |
|
482 /*! |
|
483 \fn QChar::QChar(uint code) |
|
484 |
|
485 Constructs a QChar for the character with Unicode code point \a |
|
486 code. |
|
487 */ |
|
488 |
|
489 |
|
490 /*! |
|
491 \fn QChar::QChar(int code) |
|
492 |
|
493 Constructs a QChar for the character with Unicode code point \a |
|
494 code. |
|
495 */ |
|
496 |
|
497 |
|
498 /*! |
|
499 \fn bool QChar::isNull() const |
|
500 |
|
501 Returns true if the character is the Unicode character 0x0000 |
|
502 ('\\0'); otherwise returns false. |
|
503 */ |
|
504 |
|
505 /*! |
|
506 \fn uchar QChar::cell() const |
|
507 |
|
508 Returns the cell (least significant byte) of the Unicode |
|
509 character. |
|
510 |
|
511 \sa row() |
|
512 */ |
|
513 |
|
514 /*! |
|
515 \fn uchar QChar::row() const |
|
516 |
|
517 Returns the row (most significant byte) of the Unicode character. |
|
518 |
|
519 \sa cell() |
|
520 */ |
|
521 |
|
522 /*! |
|
523 Returns true if the character is a printable character; otherwise |
|
524 returns false. This is any character not of category Cc or Cn. |
|
525 |
|
526 Note that this gives no indication of whether the character is |
|
527 available in a particular font. |
|
528 */ |
|
529 bool QChar::isPrint() const |
|
530 { |
|
531 const int test = FLAG(Other_Control) | |
|
532 FLAG(Other_NotAssigned); |
|
533 return !(FLAG(qGetProp(ucs)->category) & test); |
|
534 } |
|
535 |
|
536 /*! |
|
537 Returns true if the character is a separator character |
|
538 (Separator_* categories); otherwise returns false. |
|
539 */ |
|
540 bool QChar::isSpace() const |
|
541 { |
|
542 if(ucs >= 9 && ucs <=13) |
|
543 return true; |
|
544 const int test = FLAG(Separator_Space) | |
|
545 FLAG(Separator_Line) | |
|
546 FLAG(Separator_Paragraph); |
|
547 return FLAG(qGetProp(ucs)->category) & test; |
|
548 } |
|
549 |
|
550 /*! |
|
551 Returns true if the character is a mark (Mark_* categories); |
|
552 otherwise returns false. |
|
553 |
|
554 See QChar::Category for more information regarding marks. |
|
555 */ |
|
556 bool QChar::isMark() const |
|
557 { |
|
558 const int test = FLAG(Mark_NonSpacing) | |
|
559 FLAG(Mark_SpacingCombining) | |
|
560 FLAG(Mark_Enclosing); |
|
561 return FLAG(qGetProp(ucs)->category) & test; |
|
562 } |
|
563 |
|
564 /*! |
|
565 Returns true if the character is a punctuation mark (Punctuation_* |
|
566 categories); otherwise returns false. |
|
567 */ |
|
568 bool QChar::isPunct() const |
|
569 { |
|
570 const int test = FLAG(Punctuation_Connector) | |
|
571 FLAG(Punctuation_Dash) | |
|
572 FLAG(Punctuation_Open) | |
|
573 FLAG(Punctuation_Close) | |
|
574 FLAG(Punctuation_InitialQuote) | |
|
575 FLAG(Punctuation_FinalQuote) | |
|
576 FLAG(Punctuation_Other); |
|
577 return FLAG(qGetProp(ucs)->category) & test; |
|
578 } |
|
579 |
|
580 /*! |
|
581 Returns true if the character is a letter (Letter_* categories); |
|
582 otherwise returns false. |
|
583 */ |
|
584 bool QChar::isLetter() const |
|
585 { |
|
586 const int test = FLAG(Letter_Uppercase) | |
|
587 FLAG(Letter_Lowercase) | |
|
588 FLAG(Letter_Titlecase) | |
|
589 FLAG(Letter_Modifier) | |
|
590 FLAG(Letter_Other); |
|
591 return FLAG(qGetProp(ucs)->category) & test; |
|
592 } |
|
593 |
|
594 /*! |
|
595 Returns true if the character is a number (Number_* categories, |
|
596 not just 0-9); otherwise returns false. |
|
597 |
|
598 \sa isDigit() |
|
599 */ |
|
600 bool QChar::isNumber() const |
|
601 { |
|
602 const int test = FLAG(Number_DecimalDigit) | |
|
603 FLAG(Number_Letter) | |
|
604 FLAG(Number_Other); |
|
605 return FLAG(qGetProp(ucs)->category) & test; |
|
606 } |
|
607 |
|
608 /*! |
|
609 Returns true if the character is a letter or number (Letter_* or |
|
610 Number_* categories); otherwise returns false. |
|
611 */ |
|
612 bool QChar::isLetterOrNumber() const |
|
613 { |
|
614 const int test = FLAG(Letter_Uppercase) | |
|
615 FLAG(Letter_Lowercase) | |
|
616 FLAG(Letter_Titlecase) | |
|
617 FLAG(Letter_Modifier) | |
|
618 FLAG(Letter_Other) | |
|
619 FLAG(Number_DecimalDigit) | |
|
620 FLAG(Number_Letter) | |
|
621 FLAG(Number_Other); |
|
622 return FLAG(qGetProp(ucs)->category) & test; |
|
623 } |
|
624 |
|
625 |
|
626 /*! |
|
627 Returns true if the character is a decimal digit |
|
628 (Number_DecimalDigit); otherwise returns false. |
|
629 */ |
|
630 bool QChar::isDigit() const |
|
631 { |
|
632 return (qGetProp(ucs)->category == Number_DecimalDigit); |
|
633 } |
|
634 |
|
635 |
|
636 /*! |
|
637 Returns true if the character is a symbol (Symbol_* categories); |
|
638 otherwise returns false. |
|
639 */ |
|
640 bool QChar::isSymbol() const |
|
641 { |
|
642 const int test = FLAG(Symbol_Math) | |
|
643 FLAG(Symbol_Currency) | |
|
644 FLAG(Symbol_Modifier) | |
|
645 FLAG(Symbol_Other); |
|
646 return FLAG(qGetProp(ucs)->category) & test; |
|
647 } |
|
648 |
|
649 /*! |
|
650 \fn bool QChar::isHighSurrogate() const |
|
651 |
|
652 Returns true if the QChar is the high part of a utf16 surrogate |
|
653 (ie. if its code point is between 0xd800 and 0xdbff). |
|
654 */ |
|
655 |
|
656 /*! |
|
657 \fn bool QChar::isLowSurrogate() const |
|
658 |
|
659 Returns true if the QChar is the low part of a utf16 surrogate |
|
660 (ie. if its code point is between 0xdc00 and 0xdfff). |
|
661 */ |
|
662 |
|
663 /*! |
|
664 \fn static uint QChar::surrogateToUcs4(ushort high, ushort low) |
|
665 |
|
666 Converts a UTF16 surrogate pair with the given \a high and \a low values |
|
667 to its UCS-4 code point. |
|
668 */ |
|
669 |
|
670 /*! |
|
671 \fn static uint QChar::surrogateToUcs4(QChar high, QChar low) |
|
672 |
|
673 Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code |
|
674 point. |
|
675 */ |
|
676 |
|
677 /*! |
|
678 \fn static ushort QChar::highSurrogate(uint ucs4) |
|
679 |
|
680 Returns the high surrogate value of a ucs4 code point. |
|
681 The returned result is undefined if \a ucs4 is smaller than 0x10000. |
|
682 */ |
|
683 |
|
684 /*! |
|
685 \fn static ushort QChar::lowSurrogate(uint ucs4) |
|
686 |
|
687 Returns the low surrogate value of a ucs4 code point. |
|
688 The returned result is undefined if \a ucs4 is smaller than 0x10000. |
|
689 */ |
|
690 |
|
691 /*! |
|
692 Returns the numeric value of the digit, or -1 if the character is |
|
693 not a digit. |
|
694 */ |
|
695 int QChar::digitValue() const |
|
696 { |
|
697 return qGetProp(ucs)->digitValue; |
|
698 } |
|
699 |
|
700 /*! |
|
701 \overload |
|
702 Returns the numeric value of the digit, specified by the UCS-2-encoded |
|
703 character, \a ucs2, or -1 if the character is not a digit. |
|
704 */ |
|
705 int QChar::digitValue(ushort ucs2) |
|
706 { |
|
707 return qGetProp(ucs2)->digitValue; |
|
708 } |
|
709 |
|
710 /*! |
|
711 \overload |
|
712 Returns the numeric value of the digit specified by the UCS-4-encoded |
|
713 character, \a ucs4, or -1 if the character is not a digit. |
|
714 */ |
|
715 int QChar::digitValue(uint ucs4) |
|
716 { |
|
717 if (ucs4 > LAST_UNICODE_CHAR) |
|
718 return 0; |
|
719 return qGetProp(ucs4)->digitValue; |
|
720 } |
|
721 |
|
722 /*! |
|
723 Returns the character's category. |
|
724 */ |
|
725 QChar::Category QChar::category() const |
|
726 { |
|
727 return (QChar::Category) qGetProp(ucs)->category; |
|
728 } |
|
729 |
|
730 /*! |
|
731 \overload |
|
732 \since 4.3 |
|
733 Returns the category of the UCS-4-encoded character specified by \a ucs4. |
|
734 */ |
|
735 QChar::Category QChar::category(uint ucs4) |
|
736 { |
|
737 if (ucs4 > LAST_UNICODE_CHAR) |
|
738 return QChar::NoCategory; |
|
739 return (QChar::Category) qGetProp(ucs4)->category; |
|
740 } |
|
741 |
|
742 /*! |
|
743 \overload |
|
744 Returns the category of the UCS-2-encoded character specified by \a ucs2. |
|
745 */ |
|
746 QChar::Category QChar::category(ushort ucs2) |
|
747 { |
|
748 return (QChar::Category) qGetProp(ucs2)->category; |
|
749 } |
|
750 |
|
751 |
|
752 /*! |
|
753 Returns the character's direction. |
|
754 */ |
|
755 QChar::Direction QChar::direction() const |
|
756 { |
|
757 return (QChar::Direction) qGetProp(ucs)->direction; |
|
758 } |
|
759 |
|
760 /*! |
|
761 \overload |
|
762 Returns the direction of the UCS-4-encoded character specified by \a ucs4. |
|
763 */ |
|
764 QChar::Direction QChar::direction(uint ucs4) |
|
765 { |
|
766 if (ucs4 > LAST_UNICODE_CHAR) |
|
767 return QChar::DirL; |
|
768 return (QChar::Direction) qGetProp(ucs4)->direction; |
|
769 } |
|
770 |
|
771 /*! |
|
772 \overload |
|
773 Returns the direction of the UCS-2-encoded character specified by \a ucs2. |
|
774 */ |
|
775 QChar::Direction QChar::direction(ushort ucs2) |
|
776 { |
|
777 return (QChar::Direction) qGetProp(ucs2)->direction; |
|
778 } |
|
779 |
|
780 /*! |
|
781 Returns information about the joining properties of the character |
|
782 (needed for certain languages such as Arabic). |
|
783 */ |
|
784 QChar::Joining QChar::joining() const |
|
785 { |
|
786 return (QChar::Joining) qGetProp(ucs)->joining; |
|
787 } |
|
788 |
|
789 /*! |
|
790 \overload |
|
791 Returns information about the joining properties of the UCS-4-encoded |
|
792 character specified by \a ucs4 (needed for certain languages such as |
|
793 Arabic). |
|
794 */ |
|
795 QChar::Joining QChar::joining(uint ucs4) |
|
796 { |
|
797 if (ucs4 > LAST_UNICODE_CHAR) |
|
798 return QChar::OtherJoining; |
|
799 return (QChar::Joining) qGetProp(ucs4)->joining; |
|
800 } |
|
801 |
|
802 /*! |
|
803 \overload |
|
804 Returns information about the joining properties of the UCS-2-encoded |
|
805 character specified by \a ucs2 (needed for certain languages such as |
|
806 Arabic). |
|
807 */ |
|
808 QChar::Joining QChar::joining(ushort ucs2) |
|
809 { |
|
810 return (QChar::Joining) qGetProp(ucs2)->joining; |
|
811 } |
|
812 |
|
813 |
|
814 /*! |
|
815 Returns true if the character should be reversed if the text |
|
816 direction is reversed; otherwise returns false. |
|
817 |
|
818 Same as (ch.mirroredChar() != ch). |
|
819 |
|
820 \sa mirroredChar() |
|
821 */ |
|
822 bool QChar::hasMirrored() const |
|
823 { |
|
824 return qGetProp(ucs)->mirrorDiff != 0; |
|
825 } |
|
826 |
|
827 /*! |
|
828 \fn bool QChar::isLower() const |
|
829 |
|
830 Returns true if the character is a lowercase letter, i.e. |
|
831 category() is Letter_Lowercase. |
|
832 |
|
833 \sa isUpper(), toLower(), toUpper() |
|
834 */ |
|
835 |
|
836 /*! |
|
837 \fn bool QChar::isUpper() const |
|
838 |
|
839 Returns true if the character is an uppercase letter, i.e. |
|
840 category() is Letter_Uppercase. |
|
841 |
|
842 \sa isLower(), toUpper(), toLower() |
|
843 */ |
|
844 |
|
845 /*! |
|
846 \fn bool QChar::isTitleCase() const |
|
847 \since 4.3 |
|
848 |
|
849 Returns true if the character is a titlecase letter, i.e. |
|
850 category() is Letter_Titlecase. |
|
851 |
|
852 \sa isLower(), toUpper(), toLower(), toTitleCase() |
|
853 */ |
|
854 |
|
855 /*! |
|
856 Returns the mirrored character if this character is a mirrored |
|
857 character; otherwise returns the character itself. |
|
858 |
|
859 \sa hasMirrored() |
|
860 */ |
|
861 QChar QChar::mirroredChar() const |
|
862 { |
|
863 return ucs + qGetProp(ucs)->mirrorDiff; |
|
864 } |
|
865 |
|
866 /*! \overload |
|
867 Returns the mirrored character if the UCS-4-encoded character specified |
|
868 by \a ucs4 is a mirrored character; otherwise returns the character itself. |
|
869 |
|
870 \sa hasMirrored() |
|
871 */ |
|
872 uint QChar::mirroredChar(uint ucs4) |
|
873 { |
|
874 if (ucs4 > LAST_UNICODE_CHAR) |
|
875 return ucs4; |
|
876 return ucs4 + qGetProp(ucs4)->mirrorDiff; |
|
877 } |
|
878 |
|
879 /*! |
|
880 \overload |
|
881 Returns the mirrored character if the UCS-2-encoded character specified |
|
882 by \a ucs2 is a mirrored character; otherwise returns the character itself. |
|
883 |
|
884 \sa hasMirrored() |
|
885 */ |
|
886 ushort QChar::mirroredChar(ushort ucs2) |
|
887 { |
|
888 return ucs2 + qGetProp(ucs2)->mirrorDiff; |
|
889 } |
|
890 |
|
891 |
|
892 enum { |
|
893 Hangul_SBase = 0xac00, |
|
894 Hangul_LBase = 0x1100, |
|
895 Hangul_VBase = 0x1161, |
|
896 Hangul_TBase = 0x11a7, |
|
897 Hangul_SCount = 11172, |
|
898 Hangul_LCount = 19, |
|
899 Hangul_VCount = 21, |
|
900 Hangul_TCount = 28, |
|
901 Hangul_NCount = 21*28 |
|
902 }; |
|
903 |
|
904 // buffer has to have a length of 3. It's needed for Hangul decomposition |
|
905 static const unsigned short * QT_FASTCALL decompositionHelper |
|
906 (uint ucs4, int *length, int *tag, unsigned short *buffer) |
|
907 { |
|
908 *length = 0; |
|
909 if (ucs4 > LAST_UNICODE_CHAR) |
|
910 return 0; |
|
911 if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { |
|
912 int SIndex = ucs4 - Hangul_SBase; |
|
913 buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L |
|
914 buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V |
|
915 buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T |
|
916 *length = buffer[2] == Hangul_TBase ? 2 : 3; |
|
917 *tag = QChar::Canonical; |
|
918 return buffer; |
|
919 } |
|
920 |
|
921 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
|
922 if (index == 0xffff) |
|
923 return 0; |
|
924 const unsigned short *decomposition = uc_decomposition_map+index; |
|
925 *tag = (*decomposition) & 0xff; |
|
926 *length = (*decomposition) >> 8; |
|
927 return decomposition+1; |
|
928 } |
|
929 |
|
930 /*! |
|
931 Decomposes a character into its parts. Returns an empty string if |
|
932 no decomposition exists. |
|
933 */ |
|
934 QString QChar::decomposition() const |
|
935 { |
|
936 return decomposition(ucs); |
|
937 } |
|
938 |
|
939 /*! |
|
940 \overload |
|
941 Decomposes the UCS-4-encoded character specified by \a ucs4 into its |
|
942 constituent parts. Returns an empty string if no decomposition exists. |
|
943 */ |
|
944 QString QChar::decomposition(uint ucs4) |
|
945 { |
|
946 unsigned short buffer[3]; |
|
947 int length; |
|
948 int tag; |
|
949 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
|
950 return QString::fromUtf16(d, length); |
|
951 } |
|
952 |
|
953 /*! |
|
954 Returns the tag defining the composition of the character. Returns |
|
955 QChar::Single if no decomposition exists. |
|
956 */ |
|
957 QChar::Decomposition QChar::decompositionTag() const |
|
958 { |
|
959 return decompositionTag(ucs); |
|
960 } |
|
961 |
|
962 /*! |
|
963 \overload |
|
964 Returns the tag defining the composition of the UCS-4-encoded character |
|
965 specified by \a ucs4. Returns QChar::Single if no decomposition exists. |
|
966 */ |
|
967 QChar::Decomposition QChar::decompositionTag(uint ucs4) |
|
968 { |
|
969 if (ucs4 > LAST_UNICODE_CHAR) |
|
970 return QChar::NoDecomposition; |
|
971 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
|
972 if (index == 0xffff) |
|
973 return QChar::NoDecomposition; |
|
974 return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff); |
|
975 } |
|
976 |
|
977 /*! |
|
978 Returns the combining class for the character as defined in the |
|
979 Unicode standard. This is mainly useful as a positioning hint for |
|
980 marks attached to a base character. |
|
981 |
|
982 The Qt text rendering engine uses this information to correctly |
|
983 position non-spacing marks around a base character. |
|
984 */ |
|
985 unsigned char QChar::combiningClass() const |
|
986 { |
|
987 return (unsigned char) qGetProp(ucs)->combiningClass; |
|
988 } |
|
989 |
|
990 /*! \overload |
|
991 Returns the combining class for the UCS-4-encoded character specified by |
|
992 \a ucs4, as defined in the Unicode standard. |
|
993 */ |
|
994 unsigned char QChar::combiningClass(uint ucs4) |
|
995 { |
|
996 if (ucs4 > LAST_UNICODE_CHAR) |
|
997 return 0; |
|
998 return (unsigned char) qGetProp(ucs4)->combiningClass; |
|
999 } |
|
1000 |
|
1001 /*! \overload |
|
1002 Returns the combining class for the UCS-2-encoded character specified by |
|
1003 \a ucs2, as defined in the Unicode standard. |
|
1004 */ |
|
1005 unsigned char QChar::combiningClass(ushort ucs2) |
|
1006 { |
|
1007 return (unsigned char) qGetProp(ucs2)->combiningClass; |
|
1008 } |
|
1009 |
|
1010 |
|
1011 /*! |
|
1012 Returns the Unicode version that introduced this character. |
|
1013 */ |
|
1014 QChar::UnicodeVersion QChar::unicodeVersion() const |
|
1015 { |
|
1016 return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion; |
|
1017 } |
|
1018 |
|
1019 /*! \overload |
|
1020 Returns the Unicode version that introduced the character specified in |
|
1021 its UCS-4-encoded form as \a ucs4. |
|
1022 */ |
|
1023 QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) |
|
1024 { |
|
1025 if (ucs4 > LAST_UNICODE_CHAR) |
|
1026 return QChar::Unicode_Unassigned; |
|
1027 return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion; |
|
1028 } |
|
1029 |
|
1030 /*! \overload |
|
1031 Returns the Unicode version that introduced the character specified in |
|
1032 its UCS-2-encoded form as \a ucs2. |
|
1033 */ |
|
1034 QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2) |
|
1035 { |
|
1036 return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion; |
|
1037 } |
|
1038 |
|
1039 |
|
1040 /*! |
|
1041 Returns the lowercase equivalent if the character is uppercase or titlecase; |
|
1042 otherwise returns the character itself. |
|
1043 */ |
|
1044 QChar QChar::toLower() const |
|
1045 { |
|
1046 const QUnicodeTables::Properties *p = qGetProp(ucs); |
|
1047 if (!p->lowerCaseSpecial) |
|
1048 return ucs + p->lowerCaseDiff; |
|
1049 return ucs; |
|
1050 } |
|
1051 |
|
1052 /*! \overload |
|
1053 Returns the lowercase equivalent of the UCS-4-encoded character specified |
|
1054 by \a ucs4 if the character is uppercase or titlecase; otherwise returns |
|
1055 the character itself. |
|
1056 */ |
|
1057 uint QChar::toLower(uint ucs4) |
|
1058 { |
|
1059 if (ucs4 > LAST_UNICODE_CHAR) |
|
1060 return ucs4; |
|
1061 const QUnicodeTables::Properties *p = qGetProp(ucs4); |
|
1062 if (!p->lowerCaseSpecial) |
|
1063 return ucs4 + p->lowerCaseDiff; |
|
1064 return ucs4; |
|
1065 } |
|
1066 |
|
1067 /*! \overload |
|
1068 Returns the lowercase equivalent of the UCS-2-encoded character specified |
|
1069 by \a ucs2 if the character is uppercase or titlecase; otherwise returns |
|
1070 the character itself. |
|
1071 */ |
|
1072 ushort QChar::toLower(ushort ucs2) |
|
1073 { |
|
1074 const QUnicodeTables::Properties *p = qGetProp(ucs2); |
|
1075 if (!p->lowerCaseSpecial) |
|
1076 return ucs2 + p->lowerCaseDiff; |
|
1077 return ucs2; |
|
1078 } |
|
1079 |
|
1080 /*! |
|
1081 Returns the uppercase equivalent if the character is lowercase or titlecase; |
|
1082 otherwise returns the character itself. |
|
1083 */ |
|
1084 QChar QChar::toUpper() const |
|
1085 { |
|
1086 const QUnicodeTables::Properties *p = qGetProp(ucs); |
|
1087 if (!p->upperCaseSpecial) |
|
1088 return ucs + p->upperCaseDiff; |
|
1089 return ucs; |
|
1090 } |
|
1091 |
|
1092 /*! \overload |
|
1093 Returns the uppercase equivalent of the UCS-4-encoded character specified |
|
1094 by \a ucs4 if the character is lowercase or titlecase; otherwise returns |
|
1095 the character itself. |
|
1096 */ |
|
1097 uint QChar::toUpper(uint ucs4) |
|
1098 { |
|
1099 if (ucs4 > LAST_UNICODE_CHAR) |
|
1100 return ucs4; |
|
1101 const QUnicodeTables::Properties *p = qGetProp(ucs4); |
|
1102 if (!p->upperCaseSpecial) |
|
1103 return ucs4 + p->upperCaseDiff; |
|
1104 return ucs4; |
|
1105 } |
|
1106 |
|
1107 /*! \overload |
|
1108 Returns the uppercase equivalent of the UCS-2-encoded character specified |
|
1109 by \a ucs2 if the character is lowercase or titlecase; otherwise returns |
|
1110 the character itself. |
|
1111 */ |
|
1112 ushort QChar::toUpper(ushort ucs2) |
|
1113 { |
|
1114 const QUnicodeTables::Properties *p = qGetProp(ucs2); |
|
1115 if (!p->upperCaseSpecial) |
|
1116 return ucs2 + p->upperCaseDiff; |
|
1117 return ucs2; |
|
1118 } |
|
1119 |
|
1120 /*! |
|
1121 Returns the title case equivalent if the character is lowercase or uppercase; |
|
1122 otherwise returns the character itself. |
|
1123 */ |
|
1124 QChar QChar::toTitleCase() const |
|
1125 { |
|
1126 const QUnicodeTables::Properties *p = qGetProp(ucs); |
|
1127 if (!p->titleCaseSpecial) |
|
1128 return ucs + p->titleCaseDiff; |
|
1129 return ucs; |
|
1130 } |
|
1131 |
|
1132 /*! |
|
1133 \overload |
|
1134 Returns the title case equivalent of the UCS-4-encoded character specified |
|
1135 by \a ucs4 if the character is lowercase or uppercase; otherwise returns |
|
1136 the character itself. |
|
1137 */ |
|
1138 uint QChar::toTitleCase(uint ucs4) |
|
1139 { |
|
1140 if (ucs4 > LAST_UNICODE_CHAR) |
|
1141 return ucs4; |
|
1142 const QUnicodeTables::Properties *p = qGetProp(ucs4); |
|
1143 if (!p->titleCaseSpecial) |
|
1144 return ucs4 + p->titleCaseDiff; |
|
1145 return ucs4; |
|
1146 } |
|
1147 |
|
1148 /*! |
|
1149 \overload |
|
1150 Returns the title case equivalent of the UCS-2-encoded character specified |
|
1151 by \a ucs2 if the character is lowercase or uppercase; otherwise returns |
|
1152 the character itself. |
|
1153 */ |
|
1154 ushort QChar::toTitleCase(ushort ucs2) |
|
1155 { |
|
1156 const QUnicodeTables::Properties *p = qGetProp(ucs2); |
|
1157 if (!p->titleCaseSpecial) |
|
1158 return ucs2 + p->titleCaseDiff; |
|
1159 return ucs2; |
|
1160 } |
|
1161 |
|
1162 |
|
1163 static inline uint foldCase(const ushort *ch, const ushort *start) |
|
1164 { |
|
1165 uint c = *ch; |
|
1166 if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate()) |
|
1167 c = QChar::surrogateToUcs4(*(ch - 1), c); |
|
1168 return *ch + qGetProp(c)->caseFoldDiff; |
|
1169 } |
|
1170 |
|
1171 static inline uint foldCase(uint ch, uint &last) |
|
1172 { |
|
1173 uint c = ch; |
|
1174 if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate()) |
|
1175 c = QChar::surrogateToUcs4(last, c); |
|
1176 last = ch; |
|
1177 return ch + qGetProp(c)->caseFoldDiff; |
|
1178 } |
|
1179 |
|
1180 static inline ushort foldCase(ushort ch) |
|
1181 { |
|
1182 return ch + qGetProp(ch)->caseFoldDiff; |
|
1183 } |
|
1184 |
|
1185 /*! |
|
1186 Returns the case folded equivalent of the character. For most Unicode characters this |
|
1187 is the same as toLowerCase(). |
|
1188 */ |
|
1189 QChar QChar::toCaseFolded() const |
|
1190 { |
|
1191 return ucs + qGetProp(ucs)->caseFoldDiff; |
|
1192 } |
|
1193 |
|
1194 /*! |
|
1195 \overload |
|
1196 Returns the case folded equivalent of the UCS-4-encoded character specified |
|
1197 by \a ucs4. For most Unicode characters this is the same as toLowerCase(). |
|
1198 */ |
|
1199 uint QChar::toCaseFolded(uint ucs4) |
|
1200 { |
|
1201 if (ucs4 > LAST_UNICODE_CHAR) |
|
1202 return ucs4; |
|
1203 return ucs4 + qGetProp(ucs4)->caseFoldDiff; |
|
1204 } |
|
1205 |
|
1206 /*! |
|
1207 \overload |
|
1208 Returns the case folded equivalent of the UCS-2-encoded character specified |
|
1209 by \a ucs2. For most Unicode characters this is the same as toLowerCase(). |
|
1210 */ |
|
1211 ushort QChar::toCaseFolded(ushort ucs2) |
|
1212 { |
|
1213 return ucs2 + qGetProp(ucs2)->caseFoldDiff; |
|
1214 } |
|
1215 |
|
1216 |
|
1217 /*! |
|
1218 \fn char QChar::latin1() const |
|
1219 |
|
1220 Use toLatin1() instead. |
|
1221 */ |
|
1222 |
|
1223 /*! |
|
1224 \fn char QChar::ascii() const |
|
1225 |
|
1226 Use toAscii() instead. |
|
1227 */ |
|
1228 |
|
1229 /*! |
|
1230 \fn char QChar::toLatin1() const |
|
1231 |
|
1232 Returns the Latin-1 character equivalent to the QChar, or 0. This |
|
1233 is mainly useful for non-internationalized software. |
|
1234 |
|
1235 \sa toAscii(), unicode(), QTextCodec::codecForCStrings() |
|
1236 */ |
|
1237 |
|
1238 /*! |
|
1239 \fn char QChar::toAscii() const |
|
1240 Returns the character value of the QChar obtained using the current |
|
1241 codec used to read C strings, or 0 if the character is not representable |
|
1242 using this codec. The default codec handles Latin-1 encoded text, |
|
1243 but this can be changed to assist developers writing source code using |
|
1244 other encodings. |
|
1245 |
|
1246 The main purpose of this function is to preserve ASCII characters used |
|
1247 in C strings. This is mainly useful for developers of non-internationalized |
|
1248 software. |
|
1249 |
|
1250 \sa toLatin1(), unicode(), QTextCodec::codecForCStrings() |
|
1251 */ |
|
1252 #ifdef Q_COMPILER_MANGLES_RETURN_TYPE |
|
1253 const char QChar::toAscii() const |
|
1254 #else |
|
1255 char QChar::toAscii() const |
|
1256 #endif |
|
1257 { |
|
1258 #ifndef QT_NO_CODEC_FOR_C_STRINGS |
|
1259 if (QTextCodec::codecForCStrings()) |
|
1260 // ##### |
|
1261 return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0); |
|
1262 #endif |
|
1263 return ucs > 0xff ? 0 : char(ucs); |
|
1264 } |
|
1265 |
|
1266 /*! |
|
1267 \fn QChar QChar::fromLatin1(char c) |
|
1268 |
|
1269 Converts the Latin-1 character \a c to its equivalent QChar. This |
|
1270 is mainly useful for non-internationalized software. |
|
1271 |
|
1272 \sa fromAscii(), unicode(), QTextCodec::codecForCStrings() |
|
1273 */ |
|
1274 |
|
1275 /*! |
|
1276 Converts the ASCII character \a c to its equivalent QChar. This |
|
1277 is mainly useful for non-internationalized software. |
|
1278 |
|
1279 An alternative is to use QLatin1Char. |
|
1280 |
|
1281 \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings() |
|
1282 */ |
|
1283 QChar QChar::fromAscii(char c) |
|
1284 { |
|
1285 #ifndef QT_NO_CODEC_FOR_C_STRINGS |
|
1286 if (QTextCodec::codecForCStrings()) |
|
1287 // ##### |
|
1288 return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode(); |
|
1289 #endif |
|
1290 return QChar(ushort((uchar)c)); |
|
1291 } |
|
1292 |
|
1293 #ifndef QT_NO_DATASTREAM |
|
1294 /*! |
|
1295 \relates QChar |
|
1296 |
|
1297 Writes the char \a chr to the stream \a out. |
|
1298 |
|
1299 \sa {Format of the QDataStream operators} |
|
1300 */ |
|
1301 |
|
1302 QDataStream &operator<<(QDataStream &out, const QChar &chr) |
|
1303 { |
|
1304 out << quint16(chr.unicode()); |
|
1305 return out; |
|
1306 } |
|
1307 |
|
1308 |
|
1309 /*! |
|
1310 \relates QChar |
|
1311 |
|
1312 Reads a char from the stream \a in into char \a chr. |
|
1313 |
|
1314 \sa {Format of the QDataStream operators} |
|
1315 */ |
|
1316 |
|
1317 QDataStream &operator>>(QDataStream &in, QChar &chr) |
|
1318 { |
|
1319 quint16 u; |
|
1320 in >> u; |
|
1321 chr.unicode() = ushort(u); |
|
1322 return in; |
|
1323 } |
|
1324 #endif // QT_NO_DATASTREAM |
|
1325 |
|
1326 /*! |
|
1327 \fn ushort & QChar::unicode() |
|
1328 |
|
1329 Returns a reference to the numeric Unicode value of the QChar. |
|
1330 */ |
|
1331 |
|
1332 /*! |
|
1333 \fn ushort QChar::unicode() const |
|
1334 |
|
1335 \overload |
|
1336 */ |
|
1337 |
|
1338 /***************************************************************************** |
|
1339 Documentation of QChar related functions |
|
1340 *****************************************************************************/ |
|
1341 |
|
1342 /*! |
|
1343 \fn bool operator==(QChar c1, QChar c2) |
|
1344 |
|
1345 \relates QChar |
|
1346 |
|
1347 Returns true if \a c1 and \a c2 are the same Unicode character; |
|
1348 otherwise returns false. |
|
1349 */ |
|
1350 |
|
1351 /*! |
|
1352 \fn int operator!=(QChar c1, QChar c2) |
|
1353 |
|
1354 \relates QChar |
|
1355 |
|
1356 Returns true if \a c1 and \a c2 are not the same Unicode |
|
1357 character; otherwise returns false. |
|
1358 */ |
|
1359 |
|
1360 /*! |
|
1361 \fn int operator<=(QChar c1, QChar c2) |
|
1362 |
|
1363 \relates QChar |
|
1364 |
|
1365 Returns true if the numeric Unicode value of \a c1 is less than |
|
1366 or equal to that of \a c2; otherwise returns false. |
|
1367 */ |
|
1368 |
|
1369 /*! |
|
1370 \fn int operator>=(QChar c1, QChar c2) |
|
1371 |
|
1372 \relates QChar |
|
1373 |
|
1374 Returns true if the numeric Unicode value of \a c1 is greater than |
|
1375 or equal to that of \a c2; otherwise returns false. |
|
1376 */ |
|
1377 |
|
1378 /*! |
|
1379 \fn int operator<(QChar c1, QChar c2) |
|
1380 |
|
1381 \relates QChar |
|
1382 |
|
1383 Returns true if the numeric Unicode value of \a c1 is less than |
|
1384 that of \a c2; otherwise returns false. |
|
1385 */ |
|
1386 |
|
1387 /*! |
|
1388 \fn int operator>(QChar c1, QChar c2) |
|
1389 |
|
1390 \relates QChar |
|
1391 |
|
1392 Returns true if the numeric Unicode value of \a c1 is greater than |
|
1393 that of \a c2; otherwise returns false. |
|
1394 */ |
|
1395 |
|
1396 /*! |
|
1397 \fn bool QChar::mirrored() const |
|
1398 |
|
1399 Use hasMirrored() instead. |
|
1400 */ |
|
1401 |
|
1402 /*! |
|
1403 \fn QChar QChar::lower() const |
|
1404 |
|
1405 Use toLower() instead. |
|
1406 */ |
|
1407 |
|
1408 /*! |
|
1409 \fn QChar QChar::upper() const |
|
1410 |
|
1411 Use toUpper() instead. |
|
1412 */ |
|
1413 |
|
1414 /*! |
|
1415 \fn bool QChar::networkOrdered() |
|
1416 |
|
1417 See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead. |
|
1418 */ |
|
1419 |
|
1420 |
|
1421 // --------------------------------------------------------------------------- |
|
1422 |
|
1423 |
|
1424 static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from) |
|
1425 { |
|
1426 unsigned short buffer[3]; |
|
1427 |
|
1428 QString &s = *str; |
|
1429 |
|
1430 const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data()); |
|
1431 const unsigned short *uc = utf16 + s.length(); |
|
1432 while (uc != utf16 + from) { |
|
1433 uint ucs4 = *(--uc); |
|
1434 if (QChar(ucs4).isLowSurrogate() && uc != utf16) { |
|
1435 ushort high = *(uc - 1); |
|
1436 if (QChar(high).isHighSurrogate()) { |
|
1437 --uc; |
|
1438 ucs4 = QChar::surrogateToUcs4(high, ucs4); |
|
1439 } |
|
1440 } |
|
1441 if (QChar::unicodeVersion(ucs4) > version) |
|
1442 continue; |
|
1443 int length; |
|
1444 int tag; |
|
1445 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
|
1446 if (!d || (canonical && tag != QChar::Canonical)) |
|
1447 continue; |
|
1448 |
|
1449 s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length); |
|
1450 // since the insert invalidates the pointers and we do decomposition recursive |
|
1451 int pos = uc - utf16; |
|
1452 utf16 = reinterpret_cast<unsigned short *>(s.data()); |
|
1453 uc = utf16 + pos + length; |
|
1454 } |
|
1455 } |
|
1456 |
|
1457 |
|
1458 static ushort ligatureHelper(ushort u1, ushort u2) |
|
1459 { |
|
1460 // hangul L-V pair |
|
1461 int LIndex = u1 - Hangul_LBase; |
|
1462 if (0 <= LIndex && LIndex < Hangul_LCount) { |
|
1463 int VIndex = u2 - Hangul_VBase; |
|
1464 if (0 <= VIndex && VIndex < Hangul_VCount) |
|
1465 return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount; |
|
1466 } |
|
1467 |
|
1468 // hangul LV-T pair |
|
1469 int SIndex = u1 - Hangul_SBase; |
|
1470 if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) { |
|
1471 int TIndex = u2 - Hangul_TBase; |
|
1472 if (0 <= TIndex && TIndex <= Hangul_TCount) |
|
1473 return u1 + TIndex; |
|
1474 } |
|
1475 |
|
1476 const unsigned short index = GET_LIGATURE_INDEX(u2); |
|
1477 if (index == 0xffff) |
|
1478 return 0; |
|
1479 const unsigned short *ligatures = uc_ligature_map+index; |
|
1480 ushort length = *ligatures; |
|
1481 ++ligatures; |
|
1482 // ### use bsearch |
|
1483 for (uint i = 0; i < length; ++i) |
|
1484 if (ligatures[2*i] == u1) |
|
1485 return ligatures[2*i+1]; |
|
1486 return 0; |
|
1487 } |
|
1488 |
|
1489 static void composeHelper(QString *str, int from) |
|
1490 { |
|
1491 QString &s = *str; |
|
1492 |
|
1493 if (s.length() - from < 2) |
|
1494 return; |
|
1495 |
|
1496 // the loop can partly ignore high Unicode as all ligatures are in the BMP |
|
1497 int starter = 0; |
|
1498 int lastCombining = 0; |
|
1499 int pos = from; |
|
1500 while (pos < s.length()) { |
|
1501 uint uc = s.at(pos).unicode(); |
|
1502 if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { |
|
1503 ushort low = s.at(pos+1).unicode(); |
|
1504 if (QChar(low).isLowSurrogate()) { |
|
1505 uc = QChar::surrogateToUcs4(uc, low); |
|
1506 ++pos; |
|
1507 } |
|
1508 } |
|
1509 int combining = QChar::combiningClass(uc); |
|
1510 if (starter == pos - 1 || combining > lastCombining) { |
|
1511 // allowed to form ligature with S |
|
1512 QChar ligature = ligatureHelper(s.at(starter).unicode(), uc); |
|
1513 if (ligature.unicode()) { |
|
1514 s[starter] = ligature; |
|
1515 s.remove(pos, 1); |
|
1516 continue; |
|
1517 } |
|
1518 } |
|
1519 if (!combining) |
|
1520 starter = pos; |
|
1521 lastCombining = combining; |
|
1522 ++pos; |
|
1523 } |
|
1524 } |
|
1525 |
|
1526 |
|
1527 static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from) |
|
1528 { |
|
1529 QString &s = *str; |
|
1530 const int l = s.length()-1; |
|
1531 int pos = from; |
|
1532 while (pos < l) { |
|
1533 int p2 = pos+1; |
|
1534 uint u1 = s.at(pos).unicode(); |
|
1535 if (QChar(u1).isHighSurrogate()) { |
|
1536 ushort low = s.at(pos+1).unicode(); |
|
1537 if (QChar(low).isLowSurrogate()) { |
|
1538 p2++; |
|
1539 u1 = QChar::surrogateToUcs4(u1, low); |
|
1540 if (p2 >= l) |
|
1541 break; |
|
1542 } |
|
1543 } |
|
1544 uint u2 = s.at(p2).unicode(); |
|
1545 if (QChar(u2).isHighSurrogate() && p2 < l-1) { |
|
1546 ushort low = s.at(p2+1).unicode(); |
|
1547 if (QChar(low).isLowSurrogate()) { |
|
1548 p2++; |
|
1549 u2 = QChar::surrogateToUcs4(u2, low); |
|
1550 } |
|
1551 } |
|
1552 |
|
1553 int c2 = QChar::combiningClass(u2); |
|
1554 if (QChar::unicodeVersion(u2) > version) |
|
1555 c2 = 0; |
|
1556 |
|
1557 if (c2 == 0) { |
|
1558 pos = p2+1; |
|
1559 continue; |
|
1560 } |
|
1561 int c1 = QChar::combiningClass(u1); |
|
1562 if (QChar::unicodeVersion(u1) > version) |
|
1563 c1 = 0; |
|
1564 |
|
1565 if (c1 > c2) { |
|
1566 QChar *uc = s.data(); |
|
1567 int p = pos; |
|
1568 // exchange characters |
|
1569 if (u2 < 0x10000) { |
|
1570 uc[p++] = u2; |
|
1571 } else { |
|
1572 uc[p++] = QChar::highSurrogate(u2); |
|
1573 uc[p++] = QChar::lowSurrogate(u2); |
|
1574 } |
|
1575 if (u1 < 0x10000) { |
|
1576 uc[p++] = u1; |
|
1577 } else { |
|
1578 uc[p++] = QChar::highSurrogate(u1); |
|
1579 uc[p++] = QChar::lowSurrogate(u1); |
|
1580 } |
|
1581 if (pos > 0) |
|
1582 --pos; |
|
1583 if (pos > 0 && s.at(pos).isLowSurrogate()) |
|
1584 --pos; |
|
1585 } else { |
|
1586 ++pos; |
|
1587 if (u1 > 0x10000) |
|
1588 ++pos; |
|
1589 } |
|
1590 } |
|
1591 } |
|
1592 |
|
1593 int QT_FASTCALL QUnicodeTables::script(unsigned int uc) |
|
1594 { |
|
1595 if (uc > 0xffff) |
|
1596 return Common; |
|
1597 int script = uc_scripts[uc >> 7]; |
|
1598 if (script < ScriptSentinel) |
|
1599 return script; |
|
1600 script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount); |
|
1601 script = uc_scripts[script + (uc & 0x7f)]; |
|
1602 return script; |
|
1603 } |
|
1604 |
|
1605 |
|
1606 Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4) |
|
1607 { |
|
1608 return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class; |
|
1609 } |
|
1610 |
|
1611 |
|
1612 QT_END_NAMESPACE |