src/corelib/tools/qchar.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
child 7 f7bc934e204c
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtCore module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 // Don't define it while compiling this module, or USERS of Qt will
       
    43 // not be able to link.
       
    44 #ifdef QT_NO_CAST_FROM_ASCII
       
    45 #undef QT_NO_CAST_FROM_ASCII
       
    46 #endif
       
    47 #ifdef QT_NO_CAST_TO_ASCII
       
    48 #undef QT_NO_CAST_TO_ASCII
       
    49 #endif
       
    50 #include "qchar.h"
       
    51 #include "qdatastream.h"
       
    52 #include "qtextcodec.h"
       
    53 
       
    54 #include "qunicodetables_p.h"
       
    55 
       
    56 #include "qunicodetables.cpp"
       
    57 
       
    58 QT_BEGIN_NAMESPACE
       
    59 
       
    60 #define LAST_UNICODE_CHAR 0x10ffff
       
    61 
       
    62 #ifndef QT_NO_CODEC_FOR_C_STRINGS
       
    63 #ifdef QT_NO_TEXTCODEC
       
    64 #define QT_NO_CODEC_FOR_C_STRINGS
       
    65 #endif
       
    66 #endif
       
    67 
       
    68 #define FLAG(x) (1 << (x))
       
    69 
       
    70 /*! \class QLatin1Char
       
    71     \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
       
    72 
       
    73     \ingroup string-processing
       
    74 
       
    75     This class is only useful to avoid the codec for C strings business
       
    76     in the QChar(ch) constructor. You can avoid it by writing
       
    77     QChar(ch, 0).
       
    78 
       
    79     \sa QChar, QLatin1String, QString
       
    80 */
       
    81 
       
    82 /*!
       
    83     \fn const char QLatin1Char::toLatin1() const
       
    84 
       
    85     Converts a Latin-1 character to an 8-bit ASCII representation of
       
    86     the character.
       
    87 */
       
    88 
       
    89 /*!
       
    90     \fn const ushort QLatin1Char::unicode() const
       
    91 
       
    92     Converts a Latin-1 character to an 16-bit-encoded Unicode representation
       
    93     of the character.
       
    94 */
       
    95 
       
    96 /*!
       
    97     \fn QLatin1Char::QLatin1Char(char c)
       
    98 
       
    99     Constructs a Latin-1 character for \a c. This constructor should be
       
   100     used when the encoding of the input character is known to be Latin-1.
       
   101 */
       
   102 
       
   103 /*!
       
   104     \class QChar
       
   105     \brief The QChar class provides a 16-bit Unicode character.
       
   106 
       
   107     \ingroup string-processing
       
   108     \reentrant
       
   109 
       
   110     In Qt, Unicode characters are 16-bit entities without any markup
       
   111     or structure. This class represents such an entity. It is
       
   112     lightweight, so it can be used everywhere. Most compilers treat
       
   113     it like a \c{unsigned short}.
       
   114 
       
   115     QChar provides a full complement of testing/classification
       
   116     functions, converting to and from other formats, converting from
       
   117     composed to decomposed Unicode, and trying to compare and
       
   118     case-convert if you ask it to.
       
   119 
       
   120     The classification functions include functions like those in the
       
   121     standard C++ header \<cctype\> (formerly \<ctype.h\>), but
       
   122     operating on the full range of Unicode characters. They all
       
   123     return true if the character is a certain type of character;
       
   124     otherwise they return false. These classification functions are
       
   125     isNull() (returns true if the character is '\\0'), isPrint()
       
   126     (true if the character is any sort of printable character,
       
   127     including whitespace), isPunct() (any sort of punctation),
       
   128     isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
       
   129     sort of numeric character, not just 0-9), isLetterOrNumber(), and
       
   130     isDigit() (decimal digits). All of these are wrappers around
       
   131     category() which return the Unicode-defined category of each
       
   132     character.
       
   133 
       
   134     QChar also provides direction(), which indicates the "natural"
       
   135     writing direction of this character. The joining() function
       
   136     indicates how the character joins with its neighbors (needed
       
   137     mostly for Arabic) and finally hasMirrored(), which indicates
       
   138     whether the character needs to be mirrored when it is printed in
       
   139     its "unnatural" writing direction.
       
   140 
       
   141     Composed Unicode characters (like \aring) can be converted to
       
   142     decomposed Unicode ("a" followed by "ring above") by using
       
   143     decomposition().
       
   144 
       
   145     In Unicode, comparison is not necessarily possible and case
       
   146     conversion is very difficult at best. Unicode, covering the
       
   147     "entire" world, also includes most of the world's case and
       
   148     sorting problems. operator==() and friends will do comparison
       
   149     based purely on the numeric Unicode value (code point) of the
       
   150     characters, and toUpper() and toLower() will do case changes when
       
   151     the character has a well-defined uppercase/lowercase equivalent.
       
   152     For locale-dependent comparisons, use
       
   153     QString::localeAwareCompare().
       
   154 
       
   155     The conversion functions include unicode() (to a scalar),
       
   156     toLatin1() (to scalar, but converts all non-Latin-1 characters to
       
   157     0), row() (gives the Unicode row), cell() (gives the Unicode
       
   158     cell), digitValue() (gives the integer value of any of the
       
   159     numerous digit characters), and a host of constructors.
       
   160 
       
   161     QChar provides constructors and cast operators that make it easy
       
   162     to convert to and from traditional 8-bit \c{char}s. If you
       
   163     defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
       
   164     explained in the QString documentation, you will need to
       
   165     explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
       
   166     to construct a QChar from an 8-bit \c char, and you will need to
       
   167     call toAscii() or toLatin1() to get the 8-bit value back.
       
   168 
       
   169     \sa QString, Unicode, QLatin1Char
       
   170 */
       
   171 
       
   172 /*!
       
   173     \enum QChar::UnicodeVersion
       
   174 
       
   175     Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
       
   176     introduced a certain character.
       
   177 
       
   178     \value Unicode_1_1  Version 1.1
       
   179     \value Unicode_2_0  Version 2.0
       
   180     \value Unicode_2_1_2  Version 2.1.2
       
   181     \value Unicode_3_0  Version 3.0
       
   182     \value Unicode_3_1  Version 3.1
       
   183     \value Unicode_3_2  Version 3.2
       
   184     \value Unicode_4_0  Version 4.0
       
   185     \value Unicode_4_1  Version 4.1
       
   186     \value Unicode_5_0  Version 5.0
       
   187     \value Unicode_Unassigned  The value is not assigned to any character
       
   188         in version 5.0 of Unicode.
       
   189 
       
   190     \sa unicodeVersion()
       
   191 */
       
   192 
       
   193 /*!
       
   194     \enum QChar::Category
       
   195 
       
   196     This enum maps the Unicode character categories.
       
   197 
       
   198     The following characters are normative in Unicode:
       
   199 
       
   200     \value Mark_NonSpacing  Unicode class name Mn
       
   201 
       
   202     \value Mark_SpacingCombining  Unicode class name Mc
       
   203 
       
   204     \value Mark_Enclosing  Unicode class name Me
       
   205 
       
   206     \value Number_DecimalDigit  Unicode class name Nd
       
   207 
       
   208     \value Number_Letter  Unicode class name Nl
       
   209 
       
   210     \value Number_Other  Unicode class name No
       
   211 
       
   212     \value Separator_Space  Unicode class name Zs
       
   213 
       
   214     \value Separator_Line  Unicode class name Zl
       
   215 
       
   216     \value Separator_Paragraph  Unicode class name Zp
       
   217 
       
   218     \value Other_Control  Unicode class name Cc
       
   219 
       
   220     \value Other_Format  Unicode class name Cf
       
   221 
       
   222     \value Other_Surrogate  Unicode class name Cs
       
   223 
       
   224     \value Other_PrivateUse  Unicode class name Co
       
   225 
       
   226     \value Other_NotAssigned  Unicode class name Cn
       
   227 
       
   228 
       
   229     The following categories are informative in Unicode:
       
   230 
       
   231     \value Letter_Uppercase  Unicode class name Lu
       
   232 
       
   233     \value Letter_Lowercase  Unicode class name Ll
       
   234 
       
   235     \value Letter_Titlecase  Unicode class name Lt
       
   236 
       
   237     \value Letter_Modifier  Unicode class name Lm
       
   238 
       
   239     \value Letter_Other Unicode class name Lo
       
   240 
       
   241     \value Punctuation_Connector  Unicode class name Pc
       
   242 
       
   243     \value Punctuation_Dash  Unicode class name Pd
       
   244 
       
   245     \value Punctuation_Open  Unicode class name Ps
       
   246 
       
   247     \value Punctuation_Close  Unicode class name Pe
       
   248 
       
   249     \value Punctuation_InitialQuote  Unicode class name Pi
       
   250 
       
   251     \value Punctuation_FinalQuote  Unicode class name Pf
       
   252 
       
   253     \value Punctuation_Other  Unicode class name Po
       
   254 
       
   255     \value Symbol_Math  Unicode class name Sm
       
   256 
       
   257     \value Symbol_Currency  Unicode class name Sc
       
   258 
       
   259     \value Symbol_Modifier  Unicode class name Sk
       
   260 
       
   261     \value Symbol_Other  Unicode class name So
       
   262 
       
   263     \value NoCategory  Qt cannot find an appropriate category for the character.
       
   264 
       
   265     \omitvalue Punctuation_Dask
       
   266 
       
   267     \sa category()
       
   268 */
       
   269 
       
   270 /*!
       
   271     \enum QChar::Direction
       
   272 
       
   273     This enum type defines the Unicode direction attributes. See the
       
   274     \l{http://www.unicode.org/}{Unicode Standard} for a description
       
   275     of the values.
       
   276 
       
   277     In order to conform to C/C++ naming conventions "Dir" is prepended
       
   278     to the codes used in the Unicode Standard.
       
   279 
       
   280     \value DirAL
       
   281     \value DirAN
       
   282     \value DirB
       
   283     \value DirBN
       
   284     \value DirCS
       
   285     \value DirEN
       
   286     \value DirES
       
   287     \value DirET
       
   288     \value DirL
       
   289     \value DirLRE
       
   290     \value DirLRO
       
   291     \value DirNSM
       
   292     \value DirON
       
   293     \value DirPDF
       
   294     \value DirR
       
   295     \value DirRLE
       
   296     \value DirRLO
       
   297     \value DirS
       
   298     \value DirWS
       
   299 
       
   300     \sa direction()
       
   301 */
       
   302 
       
   303 /*!
       
   304     \enum QChar::Decomposition
       
   305 
       
   306     This enum type defines the Unicode decomposition attributes. See
       
   307     the \l{http://www.unicode.org/}{Unicode Standard} for a
       
   308     description of the values.
       
   309 
       
   310     \value NoDecomposition
       
   311     \value Canonical
       
   312     \value Circle
       
   313     \value Compat
       
   314     \value Final
       
   315     \value Font
       
   316     \value Fraction
       
   317     \value Initial
       
   318     \value Isolated
       
   319     \value Medial
       
   320     \value Narrow
       
   321     \value NoBreak
       
   322     \value Small
       
   323     \value Square
       
   324     \value Sub
       
   325     \value Super
       
   326     \value Vertical
       
   327     \value Wide
       
   328 
       
   329     \omitvalue Single
       
   330 
       
   331     \sa decomposition()
       
   332 */
       
   333 
       
   334 /*!
       
   335     \enum QChar::Joining
       
   336 
       
   337     This enum type defines the Unicode joining attributes. See the
       
   338     \l{http://www.unicode.org/}{Unicode Standard} for a description
       
   339     of the values.
       
   340 
       
   341     \value Center
       
   342     \value Dual
       
   343     \value OtherJoining
       
   344     \value Right
       
   345 
       
   346     \sa joining()
       
   347 */
       
   348 
       
   349 /*!
       
   350     \enum QChar::CombiningClass
       
   351 
       
   352     \internal
       
   353 
       
   354     This enum type defines names for some of the Unicode combining
       
   355     classes. See the \l{http://www.unicode.org/}{Unicode Standard}
       
   356     for a description of the values.
       
   357 
       
   358     \value Combining_Above
       
   359     \value Combining_AboveAttached
       
   360     \value Combining_AboveLeft
       
   361     \value Combining_AboveLeftAttached
       
   362     \value Combining_AboveRight
       
   363     \value Combining_AboveRightAttached
       
   364     \value Combining_Below
       
   365     \value Combining_BelowAttached
       
   366     \value Combining_BelowLeft
       
   367     \value Combining_BelowLeftAttached
       
   368     \value Combining_BelowRight
       
   369     \value Combining_BelowRightAttached
       
   370     \value Combining_DoubleAbove
       
   371     \value Combining_DoubleBelow
       
   372     \value Combining_IotaSubscript
       
   373     \value Combining_Left
       
   374     \value Combining_LeftAttached
       
   375     \value Combining_Right
       
   376     \value Combining_RightAttached
       
   377 */
       
   378 
       
   379 /*!
       
   380     \enum QChar::SpecialCharacter
       
   381 
       
   382     \value Null A QChar with this value isNull().
       
   383     \value Nbsp Non-breaking space.
       
   384     \value ReplacementCharacter
       
   385     \value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used.
       
   386     \value ByteOrderMark
       
   387     \value ByteOrderSwapped
       
   388     \value ParagraphSeparator
       
   389     \value LineSeparator
       
   390 
       
   391     \omitvalue null
       
   392     \omitvalue replacement
       
   393     \omitvalue byteOrderMark
       
   394     \omitvalue byteOrderSwapped
       
   395     \omitvalue nbsp
       
   396 */
       
   397 
       
   398 /*!
       
   399     \fn void QChar::setCell(uchar cell)
       
   400     \internal
       
   401 */
       
   402 
       
   403 /*!
       
   404     \fn void QChar::setRow(uchar row)
       
   405     \internal
       
   406 */
       
   407 
       
   408 /*!
       
   409     \fn QChar::QChar()
       
   410 
       
   411     Constructs a null QChar ('\\0').
       
   412 
       
   413     \sa isNull()
       
   414 */
       
   415 
       
   416 /*!
       
   417     \fn QChar::QChar(QLatin1Char ch)
       
   418 
       
   419     Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
       
   420 */
       
   421 
       
   422 /*!
       
   423     \fn QChar::QChar(SpecialCharacter ch)
       
   424 
       
   425     Constructs a QChar for the predefined character value \a ch.
       
   426 */
       
   427 
       
   428 /*!
       
   429     Constructs a QChar corresponding to ASCII/Latin-1 character \a
       
   430     ch.
       
   431 */
       
   432 QChar::QChar(char ch)
       
   433 {
       
   434 #ifndef QT_NO_CODEC_FOR_C_STRINGS
       
   435     if (QTextCodec::codecForCStrings())
       
   436         // #####
       
   437         ucs =  QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
       
   438     else
       
   439 #endif
       
   440         ucs = uchar(ch);
       
   441 }
       
   442 
       
   443 /*!
       
   444     Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
       
   445 */
       
   446 QChar::QChar(uchar ch)
       
   447 {
       
   448 #ifndef QT_NO_CODEC_FOR_C_STRINGS
       
   449     if (QTextCodec::codecForCStrings()) {
       
   450         // #####
       
   451         char c = char(ch);
       
   452         ucs =  QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
       
   453     } else
       
   454 #endif
       
   455         ucs = ch;
       
   456 }
       
   457 
       
   458 /*!
       
   459     \fn QChar::QChar(uchar cell, uchar row)
       
   460 
       
   461     Constructs a QChar for Unicode cell \a cell in row \a row.
       
   462 
       
   463     \sa cell(), row()
       
   464 */
       
   465 
       
   466 /*!
       
   467     \fn QChar::QChar(ushort code)
       
   468 
       
   469     Constructs a QChar for the character with Unicode code point \a
       
   470     code.
       
   471 */
       
   472 
       
   473 
       
   474 /*!
       
   475     \fn QChar::QChar(short code)
       
   476 
       
   477     Constructs a QChar for the character with Unicode code point \a
       
   478     code.
       
   479 */
       
   480 
       
   481 
       
   482 /*!
       
   483     \fn QChar::QChar(uint code)
       
   484 
       
   485     Constructs a QChar for the character with Unicode code point \a
       
   486     code.
       
   487 */
       
   488 
       
   489 
       
   490 /*!
       
   491     \fn QChar::QChar(int code)
       
   492 
       
   493     Constructs a QChar for the character with Unicode code point \a
       
   494     code.
       
   495 */
       
   496 
       
   497 
       
   498 /*!
       
   499     \fn bool QChar::isNull() const
       
   500 
       
   501     Returns true if the character is the Unicode character 0x0000
       
   502     ('\\0'); otherwise returns false.
       
   503 */
       
   504 
       
   505 /*!
       
   506     \fn uchar QChar::cell() const
       
   507 
       
   508     Returns the cell (least significant byte) of the Unicode
       
   509     character.
       
   510 
       
   511     \sa row()
       
   512 */
       
   513 
       
   514 /*!
       
   515     \fn uchar QChar::row() const
       
   516 
       
   517     Returns the row (most significant byte) of the Unicode character.
       
   518 
       
   519     \sa cell()
       
   520 */
       
   521 
       
   522 /*!
       
   523     Returns true if the character is a printable character; otherwise
       
   524     returns false. This is any character not of category Cc or Cn.
       
   525 
       
   526     Note that this gives no indication of whether the character is
       
   527     available in a particular font.
       
   528 */
       
   529 bool QChar::isPrint() const
       
   530 {
       
   531     const int test = FLAG(Other_Control) |
       
   532                      FLAG(Other_NotAssigned);
       
   533     return !(FLAG(qGetProp(ucs)->category) & test);
       
   534 }
       
   535 
       
   536 /*!
       
   537     Returns true if the character is a separator character
       
   538     (Separator_* categories); otherwise returns false.
       
   539 */
       
   540 bool QChar::isSpace() const
       
   541 {
       
   542     if(ucs >= 9 && ucs <=13)
       
   543         return true;
       
   544     const int test = FLAG(Separator_Space) |
       
   545                      FLAG(Separator_Line) |
       
   546                      FLAG(Separator_Paragraph);
       
   547     return FLAG(qGetProp(ucs)->category) & test;
       
   548 }
       
   549 
       
   550 /*!
       
   551     Returns true if the character is a mark (Mark_* categories);
       
   552     otherwise returns false.
       
   553     
       
   554     See QChar::Category for more information regarding marks.
       
   555 */
       
   556 bool QChar::isMark() const
       
   557 {
       
   558     const int test = FLAG(Mark_NonSpacing) |
       
   559                      FLAG(Mark_SpacingCombining) |
       
   560                      FLAG(Mark_Enclosing);
       
   561     return FLAG(qGetProp(ucs)->category) & test;
       
   562 }
       
   563 
       
   564 /*!
       
   565     Returns true if the character is a punctuation mark (Punctuation_*
       
   566     categories); otherwise returns false.
       
   567 */
       
   568 bool QChar::isPunct() const
       
   569 {
       
   570     const int test = FLAG(Punctuation_Connector) |
       
   571                      FLAG(Punctuation_Dash) |
       
   572                      FLAG(Punctuation_Open) |
       
   573                      FLAG(Punctuation_Close) |
       
   574                      FLAG(Punctuation_InitialQuote) |
       
   575                      FLAG(Punctuation_FinalQuote) |
       
   576                      FLAG(Punctuation_Other);
       
   577     return FLAG(qGetProp(ucs)->category) & test;
       
   578 }
       
   579 
       
   580 /*!
       
   581     Returns true if the character is a letter (Letter_* categories);
       
   582     otherwise returns false.
       
   583 */
       
   584 bool QChar::isLetter() const
       
   585 {
       
   586     const int test = FLAG(Letter_Uppercase) |
       
   587                      FLAG(Letter_Lowercase) |
       
   588                      FLAG(Letter_Titlecase) |
       
   589                      FLAG(Letter_Modifier) |
       
   590                      FLAG(Letter_Other);
       
   591     return FLAG(qGetProp(ucs)->category) & test;
       
   592 }
       
   593 
       
   594 /*!
       
   595     Returns true if the character is a number (Number_* categories,
       
   596     not just 0-9); otherwise returns false.
       
   597 
       
   598     \sa isDigit()
       
   599 */
       
   600 bool QChar::isNumber() const
       
   601 {
       
   602     const int test = FLAG(Number_DecimalDigit) |
       
   603                      FLAG(Number_Letter) |
       
   604                      FLAG(Number_Other);
       
   605     return FLAG(qGetProp(ucs)->category) & test;
       
   606 }
       
   607 
       
   608 /*!
       
   609     Returns true if the character is a letter or number (Letter_* or
       
   610     Number_* categories); otherwise returns false.
       
   611 */
       
   612 bool QChar::isLetterOrNumber() const
       
   613 {
       
   614     const int test = FLAG(Letter_Uppercase) |
       
   615                      FLAG(Letter_Lowercase) |
       
   616                      FLAG(Letter_Titlecase) |
       
   617                      FLAG(Letter_Modifier) |
       
   618                      FLAG(Letter_Other) |
       
   619                      FLAG(Number_DecimalDigit) |
       
   620                      FLAG(Number_Letter) |
       
   621                      FLAG(Number_Other);
       
   622     return FLAG(qGetProp(ucs)->category) & test;
       
   623 }
       
   624 
       
   625 
       
   626 /*!
       
   627     Returns true if the character is a decimal digit
       
   628     (Number_DecimalDigit); otherwise returns false.
       
   629 */
       
   630 bool QChar::isDigit() const
       
   631 {
       
   632     return (qGetProp(ucs)->category == Number_DecimalDigit);
       
   633 }
       
   634 
       
   635 
       
   636 /*!
       
   637     Returns true if the character is a symbol (Symbol_* categories);
       
   638     otherwise returns false.
       
   639 */
       
   640 bool QChar::isSymbol() const
       
   641 {
       
   642     const int test = FLAG(Symbol_Math) |
       
   643                      FLAG(Symbol_Currency) |
       
   644                      FLAG(Symbol_Modifier) |
       
   645                      FLAG(Symbol_Other);
       
   646     return FLAG(qGetProp(ucs)->category) & test;
       
   647 }
       
   648 
       
   649 /*!
       
   650   \fn bool QChar::isHighSurrogate() const
       
   651 
       
   652   Returns true if the QChar is the high part of a utf16 surrogate
       
   653   (ie. if its code point is between 0xd800 and 0xdbff).
       
   654 */
       
   655 
       
   656 /*!
       
   657   \fn bool QChar::isLowSurrogate() const
       
   658 
       
   659   Returns true if the QChar is the low part of a utf16 surrogate
       
   660   (ie. if its code point is between 0xdc00 and 0xdfff).
       
   661 */
       
   662 
       
   663 /*!
       
   664   \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
       
   665 
       
   666   Converts a UTF16 surrogate pair with the given \a high and \a low values
       
   667   to its UCS-4 code point.
       
   668 */
       
   669 
       
   670 /*!
       
   671   \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
       
   672 
       
   673   Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
       
   674   point.
       
   675 */
       
   676 
       
   677 /*!
       
   678   \fn static ushort QChar::highSurrogate(uint ucs4)
       
   679 
       
   680   Returns the high surrogate value of a ucs4 code point.
       
   681   The returned result is undefined if \a ucs4 is smaller than 0x10000.
       
   682 */
       
   683 
       
   684 /*!
       
   685   \fn static ushort QChar::lowSurrogate(uint ucs4)
       
   686 
       
   687   Returns the low surrogate value of a ucs4 code point.
       
   688   The returned result is undefined if \a ucs4 is smaller than 0x10000.
       
   689 */
       
   690 
       
   691 /*!
       
   692     Returns the numeric value of the digit, or -1 if the character is
       
   693     not a digit.
       
   694 */
       
   695 int QChar::digitValue() const
       
   696 {
       
   697     return qGetProp(ucs)->digitValue;
       
   698 }
       
   699 
       
   700 /*!
       
   701     \overload
       
   702     Returns the numeric value of the digit, specified by the UCS-2-encoded
       
   703     character, \a ucs2, or -1 if the character is not a digit.
       
   704 */
       
   705 int QChar::digitValue(ushort ucs2)
       
   706 {
       
   707     return qGetProp(ucs2)->digitValue;
       
   708 }
       
   709 
       
   710 /*!
       
   711     \overload
       
   712     Returns the numeric value of the digit specified by the UCS-4-encoded
       
   713     character, \a ucs4, or -1 if the character is not a digit.
       
   714 */
       
   715 int QChar::digitValue(uint ucs4)
       
   716 {
       
   717     if (ucs4 > LAST_UNICODE_CHAR)
       
   718         return 0;
       
   719     return qGetProp(ucs4)->digitValue;
       
   720 }
       
   721 
       
   722 /*!
       
   723     Returns the character's category.
       
   724 */
       
   725 QChar::Category QChar::category() const
       
   726 {
       
   727     return (QChar::Category) qGetProp(ucs)->category;
       
   728 }
       
   729 
       
   730 /*! 
       
   731     \overload
       
   732     \since 4.3
       
   733     Returns the category of the UCS-4-encoded character specified by \a ucs4.
       
   734  */
       
   735 QChar::Category QChar::category(uint ucs4)
       
   736 {
       
   737     if (ucs4 > LAST_UNICODE_CHAR)
       
   738         return QChar::NoCategory;
       
   739     return (QChar::Category) qGetProp(ucs4)->category;
       
   740 }
       
   741 
       
   742 /*! 
       
   743     \overload
       
   744     Returns the category of the UCS-2-encoded character specified by \a ucs2.
       
   745  */
       
   746 QChar::Category QChar::category(ushort ucs2)
       
   747 {
       
   748     return (QChar::Category) qGetProp(ucs2)->category;
       
   749 }
       
   750 
       
   751 
       
   752 /*!
       
   753     Returns the character's direction.
       
   754 */
       
   755 QChar::Direction QChar::direction() const
       
   756 {
       
   757     return (QChar::Direction) qGetProp(ucs)->direction;
       
   758 }
       
   759 
       
   760 /*! 
       
   761 \overload
       
   762 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
       
   763  */
       
   764 QChar::Direction QChar::direction(uint ucs4)
       
   765 {
       
   766     if (ucs4 > LAST_UNICODE_CHAR)
       
   767         return QChar::DirL;
       
   768     return (QChar::Direction) qGetProp(ucs4)->direction;
       
   769 }
       
   770 
       
   771 /*! 
       
   772 \overload
       
   773 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
       
   774  */
       
   775 QChar::Direction QChar::direction(ushort ucs2)
       
   776 {
       
   777     return (QChar::Direction) qGetProp(ucs2)->direction;
       
   778 }
       
   779 
       
   780 /*!
       
   781     Returns information about the joining properties of the character
       
   782     (needed for certain languages such as Arabic).
       
   783 */
       
   784 QChar::Joining QChar::joining() const
       
   785 {
       
   786     return (QChar::Joining) qGetProp(ucs)->joining;
       
   787 }
       
   788 
       
   789 /*! 
       
   790 \overload
       
   791 Returns information about the joining properties of the UCS-4-encoded
       
   792 character specified by \a ucs4 (needed for certain languages such as
       
   793 Arabic).
       
   794  */
       
   795 QChar::Joining QChar::joining(uint ucs4)
       
   796 {
       
   797     if (ucs4 > LAST_UNICODE_CHAR)
       
   798         return QChar::OtherJoining;
       
   799     return (QChar::Joining) qGetProp(ucs4)->joining;
       
   800 }
       
   801 
       
   802 /*! 
       
   803 \overload
       
   804 Returns information about the joining properties of the UCS-2-encoded
       
   805 character specified by \a ucs2 (needed for certain languages such as
       
   806 Arabic).
       
   807  */
       
   808 QChar::Joining QChar::joining(ushort ucs2)
       
   809 {
       
   810     return (QChar::Joining) qGetProp(ucs2)->joining;
       
   811 }
       
   812 
       
   813 
       
   814 /*!
       
   815     Returns true if the character should be reversed if the text
       
   816     direction is reversed; otherwise returns false.
       
   817 
       
   818     Same as (ch.mirroredChar() != ch).
       
   819 
       
   820     \sa mirroredChar()
       
   821 */
       
   822 bool QChar::hasMirrored() const
       
   823 {
       
   824     return qGetProp(ucs)->mirrorDiff != 0;
       
   825 }
       
   826 
       
   827 /*!
       
   828     \fn bool QChar::isLower() const
       
   829 
       
   830     Returns true if the character is a lowercase letter, i.e.
       
   831     category() is Letter_Lowercase.
       
   832 
       
   833     \sa isUpper(), toLower(), toUpper()
       
   834 */
       
   835 
       
   836 /*!
       
   837     \fn bool QChar::isUpper() const
       
   838 
       
   839     Returns true if the character is an uppercase letter, i.e.
       
   840     category() is Letter_Uppercase.
       
   841 
       
   842     \sa isLower(), toUpper(), toLower()
       
   843 */
       
   844 
       
   845 /*!
       
   846     \fn bool QChar::isTitleCase() const
       
   847     \since 4.3
       
   848 
       
   849     Returns true if the character is a titlecase letter, i.e.
       
   850     category() is Letter_Titlecase.
       
   851 
       
   852     \sa isLower(), toUpper(), toLower(), toTitleCase()
       
   853 */
       
   854 
       
   855 /*!
       
   856     Returns the mirrored character if this character is a mirrored
       
   857     character; otherwise returns the character itself.
       
   858 
       
   859     \sa hasMirrored()
       
   860 */
       
   861 QChar QChar::mirroredChar() const
       
   862 {
       
   863     return ucs + qGetProp(ucs)->mirrorDiff;
       
   864 }
       
   865 
       
   866 /*! \overload
       
   867 Returns the mirrored character if the UCS-4-encoded character specified
       
   868 by \a ucs4 is a mirrored character; otherwise returns the character itself.
       
   869 
       
   870 \sa hasMirrored()
       
   871  */
       
   872 uint QChar::mirroredChar(uint ucs4)
       
   873 {
       
   874     if (ucs4 > LAST_UNICODE_CHAR)
       
   875         return ucs4;
       
   876     return ucs4 + qGetProp(ucs4)->mirrorDiff;
       
   877 }
       
   878 
       
   879 /*! 
       
   880 \overload
       
   881 Returns the mirrored character if the UCS-2-encoded character specified
       
   882 by \a ucs2 is a mirrored character; otherwise returns the character itself.
       
   883 
       
   884 \sa hasMirrored()
       
   885  */
       
   886 ushort QChar::mirroredChar(ushort ucs2)
       
   887 {
       
   888     return ucs2 + qGetProp(ucs2)->mirrorDiff;
       
   889 }
       
   890 
       
   891 
       
   892 enum {
       
   893     Hangul_SBase = 0xac00,
       
   894     Hangul_LBase = 0x1100,
       
   895     Hangul_VBase = 0x1161,
       
   896     Hangul_TBase = 0x11a7,
       
   897     Hangul_SCount = 11172,
       
   898     Hangul_LCount = 19,
       
   899     Hangul_VCount = 21,
       
   900     Hangul_TCount = 28,
       
   901     Hangul_NCount = 21*28
       
   902 };
       
   903 
       
   904 // buffer has to have a length of 3. It's needed for Hangul decomposition
       
   905 static const unsigned short * QT_FASTCALL decompositionHelper
       
   906     (uint ucs4, int *length, int *tag, unsigned short *buffer)
       
   907 {
       
   908     *length = 0;
       
   909     if (ucs4 > LAST_UNICODE_CHAR)
       
   910         return 0;
       
   911     if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
       
   912         int SIndex = ucs4 - Hangul_SBase;
       
   913         buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
       
   914         buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
       
   915         buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
       
   916         *length = buffer[2] == Hangul_TBase ? 2 : 3;
       
   917         *tag = QChar::Canonical;
       
   918         return buffer;
       
   919     }
       
   920 
       
   921     const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
       
   922     if (index == 0xffff)
       
   923         return 0;
       
   924     const unsigned short *decomposition = uc_decomposition_map+index;
       
   925     *tag = (*decomposition) & 0xff;
       
   926     *length = (*decomposition) >> 8;
       
   927     return decomposition+1;
       
   928 }
       
   929 
       
   930 /*!
       
   931     Decomposes a character into its parts. Returns an empty string if
       
   932     no decomposition exists.
       
   933 */
       
   934 QString QChar::decomposition() const
       
   935 {
       
   936     return decomposition(ucs);
       
   937 }
       
   938 
       
   939 /*! 
       
   940 \overload
       
   941 Decomposes the UCS-4-encoded character specified by \a ucs4 into its
       
   942 constituent parts. Returns an empty string if no decomposition exists.
       
   943  */
       
   944 QString QChar::decomposition(uint ucs4)
       
   945 {
       
   946     unsigned short buffer[3];
       
   947     int length;
       
   948     int tag;
       
   949     const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
       
   950     return QString::fromUtf16(d, length);
       
   951 }
       
   952 
       
   953 /*!
       
   954     Returns the tag defining the composition of the character. Returns
       
   955     QChar::Single if no decomposition exists.
       
   956 */
       
   957 QChar::Decomposition QChar::decompositionTag() const
       
   958 {
       
   959     return decompositionTag(ucs);
       
   960 }
       
   961 
       
   962 /*! 
       
   963 \overload
       
   964 Returns the tag defining the composition of the UCS-4-encoded character
       
   965 specified by \a ucs4. Returns QChar::Single if no decomposition exists.
       
   966  */
       
   967 QChar::Decomposition QChar::decompositionTag(uint ucs4)
       
   968 {
       
   969     if (ucs4 > LAST_UNICODE_CHAR)
       
   970         return QChar::NoDecomposition;
       
   971     const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
       
   972     if (index == 0xffff)
       
   973         return QChar::NoDecomposition;
       
   974     return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
       
   975 }
       
   976 
       
   977 /*!
       
   978     Returns the combining class for the character as defined in the
       
   979     Unicode standard. This is mainly useful as a positioning hint for
       
   980     marks attached to a base character.
       
   981 
       
   982     The Qt text rendering engine uses this information to correctly
       
   983     position non-spacing marks around a base character.
       
   984 */
       
   985 unsigned char QChar::combiningClass() const
       
   986 {
       
   987     return (unsigned char) qGetProp(ucs)->combiningClass;
       
   988 }
       
   989 
       
   990 /*! \overload
       
   991 Returns the combining class for the UCS-4-encoded character specified by
       
   992 \a ucs4, as defined in the Unicode standard.
       
   993  */
       
   994 unsigned char QChar::combiningClass(uint ucs4)
       
   995 {
       
   996     if (ucs4 > LAST_UNICODE_CHAR)
       
   997         return 0;
       
   998     return (unsigned char) qGetProp(ucs4)->combiningClass;
       
   999 }
       
  1000 
       
  1001 /*! \overload
       
  1002 Returns the combining class for the UCS-2-encoded character specified by
       
  1003 \a ucs2, as defined in the Unicode standard.
       
  1004  */
       
  1005 unsigned char QChar::combiningClass(ushort ucs2)
       
  1006 {
       
  1007     return (unsigned char) qGetProp(ucs2)->combiningClass;
       
  1008 }
       
  1009 
       
  1010 
       
  1011 /*!
       
  1012     Returns the Unicode version that introduced this character.
       
  1013 */
       
  1014 QChar::UnicodeVersion QChar::unicodeVersion() const
       
  1015 {
       
  1016     return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
       
  1017 }
       
  1018 
       
  1019 /*! \overload
       
  1020 Returns the Unicode version that introduced the character specified in
       
  1021 its UCS-4-encoded form as \a ucs4.
       
  1022  */
       
  1023 QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
       
  1024 {
       
  1025     if (ucs4 > LAST_UNICODE_CHAR)
       
  1026         return QChar::Unicode_Unassigned;
       
  1027     return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
       
  1028 }
       
  1029 
       
  1030 /*! \overload
       
  1031 Returns the Unicode version that introduced the character specified in
       
  1032 its UCS-2-encoded form as \a ucs2.
       
  1033  */
       
  1034 QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
       
  1035 {
       
  1036     return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
       
  1037 }
       
  1038 
       
  1039 
       
  1040 /*!
       
  1041     Returns the lowercase equivalent if the character is uppercase or titlecase;
       
  1042     otherwise returns the character itself.
       
  1043 */
       
  1044 QChar QChar::toLower() const
       
  1045 {
       
  1046     const QUnicodeTables::Properties *p = qGetProp(ucs);
       
  1047     if (!p->lowerCaseSpecial)
       
  1048         return ucs + p->lowerCaseDiff;
       
  1049     return ucs;
       
  1050 }
       
  1051 
       
  1052 /*! \overload
       
  1053 Returns the lowercase equivalent of the UCS-4-encoded character specified
       
  1054 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
       
  1055 the character itself.
       
  1056  */
       
  1057 uint QChar::toLower(uint ucs4)
       
  1058 {
       
  1059     if (ucs4 > LAST_UNICODE_CHAR)
       
  1060         return ucs4;
       
  1061     const QUnicodeTables::Properties *p = qGetProp(ucs4);
       
  1062     if (!p->lowerCaseSpecial)
       
  1063         return ucs4 + p->lowerCaseDiff;
       
  1064     return ucs4;
       
  1065 }
       
  1066 
       
  1067 /*! \overload
       
  1068 Returns the lowercase equivalent of the UCS-2-encoded character specified
       
  1069 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
       
  1070 the character itself.
       
  1071  */
       
  1072 ushort QChar::toLower(ushort ucs2)
       
  1073 {
       
  1074     const QUnicodeTables::Properties *p = qGetProp(ucs2);
       
  1075     if (!p->lowerCaseSpecial)
       
  1076         return ucs2 + p->lowerCaseDiff;
       
  1077     return ucs2;
       
  1078 }
       
  1079 
       
  1080 /*!
       
  1081     Returns the uppercase equivalent if the character is lowercase or titlecase;
       
  1082     otherwise returns the character itself.
       
  1083 */
       
  1084 QChar QChar::toUpper() const
       
  1085 {
       
  1086     const QUnicodeTables::Properties *p = qGetProp(ucs);
       
  1087     if (!p->upperCaseSpecial)
       
  1088         return ucs + p->upperCaseDiff;
       
  1089     return ucs;
       
  1090 }
       
  1091 
       
  1092 /*! \overload
       
  1093 Returns the uppercase equivalent of the UCS-4-encoded character specified
       
  1094 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
       
  1095 the character itself.
       
  1096  */
       
  1097 uint QChar::toUpper(uint ucs4)
       
  1098 {
       
  1099     if (ucs4 > LAST_UNICODE_CHAR)
       
  1100         return ucs4;
       
  1101     const QUnicodeTables::Properties *p = qGetProp(ucs4);
       
  1102     if (!p->upperCaseSpecial)
       
  1103         return ucs4 + p->upperCaseDiff;
       
  1104     return ucs4;
       
  1105 }
       
  1106 
       
  1107 /*! \overload
       
  1108 Returns the uppercase equivalent of the UCS-2-encoded character specified
       
  1109 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
       
  1110 the character itself.
       
  1111  */
       
  1112 ushort QChar::toUpper(ushort ucs2)
       
  1113 {
       
  1114     const QUnicodeTables::Properties *p = qGetProp(ucs2);
       
  1115     if (!p->upperCaseSpecial)
       
  1116         return ucs2 + p->upperCaseDiff;
       
  1117     return ucs2;
       
  1118 }
       
  1119 
       
  1120 /*!
       
  1121     Returns the title case equivalent if the character is lowercase or uppercase;
       
  1122     otherwise returns the character itself.
       
  1123 */
       
  1124 QChar QChar::toTitleCase() const
       
  1125 {
       
  1126     const QUnicodeTables::Properties *p = qGetProp(ucs);
       
  1127     if (!p->titleCaseSpecial)
       
  1128         return ucs + p->titleCaseDiff;
       
  1129     return ucs;
       
  1130 }
       
  1131 
       
  1132 /*!
       
  1133     \overload
       
  1134     Returns the title case equivalent of the UCS-4-encoded character specified
       
  1135     by \a ucs4 if the character is lowercase or uppercase; otherwise returns
       
  1136     the character itself.
       
  1137 */
       
  1138 uint QChar::toTitleCase(uint ucs4)
       
  1139 {
       
  1140     if (ucs4 > LAST_UNICODE_CHAR)
       
  1141         return ucs4;
       
  1142     const QUnicodeTables::Properties *p = qGetProp(ucs4);
       
  1143     if (!p->titleCaseSpecial)
       
  1144         return ucs4 + p->titleCaseDiff;
       
  1145     return ucs4;
       
  1146 }
       
  1147 
       
  1148 /*!
       
  1149     \overload
       
  1150     Returns the title case equivalent of the UCS-2-encoded character specified
       
  1151     by \a ucs2 if the character is lowercase or uppercase; otherwise returns
       
  1152     the character itself.
       
  1153 */
       
  1154 ushort QChar::toTitleCase(ushort ucs2)
       
  1155 {
       
  1156     const QUnicodeTables::Properties *p = qGetProp(ucs2);
       
  1157     if (!p->titleCaseSpecial)
       
  1158         return ucs2 + p->titleCaseDiff;
       
  1159     return ucs2;
       
  1160 }
       
  1161 
       
  1162 
       
  1163 static inline uint foldCase(const ushort *ch, const ushort *start)
       
  1164 {
       
  1165     uint c = *ch;
       
  1166     if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
       
  1167         c = QChar::surrogateToUcs4(*(ch - 1), c);
       
  1168     return *ch + qGetProp(c)->caseFoldDiff;
       
  1169 }
       
  1170 
       
  1171 static inline uint foldCase(uint ch, uint &last)
       
  1172 {
       
  1173     uint c = ch;
       
  1174     if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
       
  1175         c = QChar::surrogateToUcs4(last, c);
       
  1176     last = ch;
       
  1177     return ch + qGetProp(c)->caseFoldDiff;
       
  1178 }
       
  1179 
       
  1180 static inline ushort foldCase(ushort ch)
       
  1181 {
       
  1182     return ch + qGetProp(ch)->caseFoldDiff;
       
  1183 }
       
  1184 
       
  1185 /*!
       
  1186     Returns the case folded equivalent of the character. For most Unicode characters this
       
  1187     is the same as toLowerCase().
       
  1188 */
       
  1189 QChar QChar::toCaseFolded() const
       
  1190 {
       
  1191     return ucs + qGetProp(ucs)->caseFoldDiff;
       
  1192 }
       
  1193 
       
  1194 /*!
       
  1195     \overload
       
  1196     Returns the case folded equivalent of the UCS-4-encoded character specified
       
  1197     by \a ucs4. For most Unicode characters this is the same as toLowerCase().
       
  1198 */
       
  1199 uint QChar::toCaseFolded(uint ucs4)
       
  1200 {
       
  1201     if (ucs4 > LAST_UNICODE_CHAR)
       
  1202         return ucs4;
       
  1203     return ucs4 + qGetProp(ucs4)->caseFoldDiff;
       
  1204 }
       
  1205 
       
  1206 /*!
       
  1207     \overload
       
  1208     Returns the case folded equivalent of the UCS-2-encoded character specified
       
  1209     by \a ucs2. For most Unicode characters this is the same as toLowerCase().
       
  1210 */
       
  1211 ushort QChar::toCaseFolded(ushort ucs2)
       
  1212 {
       
  1213     return ucs2 + qGetProp(ucs2)->caseFoldDiff;
       
  1214 }
       
  1215 
       
  1216 
       
  1217 /*!
       
  1218     \fn char QChar::latin1() const
       
  1219 
       
  1220     Use toLatin1() instead.
       
  1221 */
       
  1222 
       
  1223 /*!
       
  1224     \fn char QChar::ascii() const
       
  1225 
       
  1226     Use toAscii() instead.
       
  1227 */
       
  1228 
       
  1229 /*!
       
  1230     \fn char QChar::toLatin1() const
       
  1231 
       
  1232     Returns the Latin-1 character equivalent to the QChar, or 0. This
       
  1233     is mainly useful for non-internationalized software.
       
  1234 
       
  1235     \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
       
  1236 */
       
  1237 
       
  1238 /*!
       
  1239     \fn char QChar::toAscii() const
       
  1240     Returns the character value of the QChar obtained using the current
       
  1241     codec used to read C strings, or 0 if the character is not representable
       
  1242     using this codec. The default codec handles Latin-1 encoded text,
       
  1243     but this can be changed to assist developers writing source code using
       
  1244     other encodings.
       
  1245 
       
  1246     The main purpose of this function is to preserve ASCII characters used
       
  1247     in C strings. This is mainly useful for developers of non-internationalized
       
  1248     software.
       
  1249 
       
  1250     \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
       
  1251 */
       
  1252 #ifdef Q_COMPILER_MANGLES_RETURN_TYPE
       
  1253 const char QChar::toAscii() const
       
  1254 #else
       
  1255 char QChar::toAscii() const
       
  1256 #endif
       
  1257 {
       
  1258 #ifndef QT_NO_CODEC_FOR_C_STRINGS
       
  1259     if (QTextCodec::codecForCStrings())
       
  1260         // #####
       
  1261         return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
       
  1262 #endif
       
  1263     return ucs > 0xff ? 0 : char(ucs);
       
  1264 }
       
  1265 
       
  1266 /*!
       
  1267     \fn QChar QChar::fromLatin1(char c)
       
  1268 
       
  1269     Converts the Latin-1 character \a c to its equivalent QChar. This
       
  1270     is mainly useful for non-internationalized software.
       
  1271 
       
  1272     \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
       
  1273 */
       
  1274 
       
  1275 /*!
       
  1276     Converts the ASCII character \a c to its equivalent QChar. This
       
  1277     is mainly useful for non-internationalized software.
       
  1278 
       
  1279     An alternative is to use QLatin1Char.
       
  1280 
       
  1281     \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
       
  1282 */
       
  1283 QChar QChar::fromAscii(char c)
       
  1284 {
       
  1285 #ifndef QT_NO_CODEC_FOR_C_STRINGS
       
  1286     if (QTextCodec::codecForCStrings())
       
  1287         // #####
       
  1288         return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
       
  1289 #endif
       
  1290     return QChar(ushort((uchar)c));
       
  1291 }
       
  1292 
       
  1293 #ifndef QT_NO_DATASTREAM
       
  1294 /*!
       
  1295   \relates QChar
       
  1296 
       
  1297   Writes the char \a chr to the stream \a out.
       
  1298 
       
  1299   \sa {Format of the QDataStream operators}
       
  1300  */
       
  1301 
       
  1302 QDataStream &operator<<(QDataStream &out, const QChar &chr)
       
  1303 {
       
  1304     out << quint16(chr.unicode());
       
  1305     return out;
       
  1306 }
       
  1307 
       
  1308 
       
  1309 /*!
       
  1310   \relates QChar
       
  1311 
       
  1312   Reads a char from the stream \a in into char \a chr.
       
  1313 
       
  1314   \sa {Format of the QDataStream operators}
       
  1315  */
       
  1316 
       
  1317 QDataStream &operator>>(QDataStream &in, QChar &chr)
       
  1318 {
       
  1319     quint16 u;
       
  1320     in >> u;
       
  1321     chr.unicode() = ushort(u);
       
  1322     return in;
       
  1323 }
       
  1324 #endif // QT_NO_DATASTREAM
       
  1325 
       
  1326 /*!
       
  1327     \fn ushort & QChar::unicode()
       
  1328 
       
  1329     Returns a reference to the numeric Unicode value of the QChar.
       
  1330 */
       
  1331 
       
  1332 /*!
       
  1333     \fn ushort QChar::unicode() const
       
  1334 
       
  1335     \overload
       
  1336 */
       
  1337 
       
  1338 /*****************************************************************************
       
  1339   Documentation of QChar related functions
       
  1340  *****************************************************************************/
       
  1341 
       
  1342 /*!
       
  1343     \fn bool operator==(QChar c1, QChar c2)
       
  1344 
       
  1345     \relates QChar
       
  1346 
       
  1347     Returns true if \a c1 and \a c2 are the same Unicode character;
       
  1348     otherwise returns false.
       
  1349 */
       
  1350 
       
  1351 /*!
       
  1352     \fn int operator!=(QChar c1, QChar c2)
       
  1353 
       
  1354     \relates QChar
       
  1355 
       
  1356     Returns true if \a c1 and \a c2 are not the same Unicode
       
  1357     character; otherwise returns false.
       
  1358 */
       
  1359 
       
  1360 /*!
       
  1361     \fn int operator<=(QChar c1, QChar c2)
       
  1362 
       
  1363     \relates QChar
       
  1364 
       
  1365     Returns true if the numeric Unicode value of \a c1 is less than
       
  1366     or equal to that of \a c2; otherwise returns false.
       
  1367 */
       
  1368 
       
  1369 /*!
       
  1370     \fn int operator>=(QChar c1, QChar c2)
       
  1371 
       
  1372     \relates QChar
       
  1373 
       
  1374     Returns true if the numeric Unicode value of \a c1 is greater than
       
  1375     or equal to that of \a c2; otherwise returns false.
       
  1376 */
       
  1377 
       
  1378 /*!
       
  1379     \fn int operator<(QChar c1, QChar c2)
       
  1380 
       
  1381     \relates QChar
       
  1382 
       
  1383     Returns true if the numeric Unicode value of \a c1 is less than
       
  1384     that of \a c2; otherwise returns false.
       
  1385 */
       
  1386 
       
  1387 /*!
       
  1388     \fn int operator>(QChar c1, QChar c2)
       
  1389 
       
  1390     \relates QChar
       
  1391 
       
  1392     Returns true if the numeric Unicode value of \a c1 is greater than
       
  1393     that of \a c2; otherwise returns false.
       
  1394 */
       
  1395 
       
  1396 /*!
       
  1397     \fn bool QChar::mirrored() const
       
  1398 
       
  1399     Use hasMirrored() instead.
       
  1400 */
       
  1401 
       
  1402 /*!
       
  1403     \fn QChar QChar::lower() const
       
  1404 
       
  1405     Use toLower() instead.
       
  1406 */
       
  1407 
       
  1408 /*!
       
  1409     \fn QChar QChar::upper() const
       
  1410 
       
  1411     Use toUpper() instead.
       
  1412 */
       
  1413 
       
  1414 /*!
       
  1415     \fn bool QChar::networkOrdered()
       
  1416 
       
  1417     See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
       
  1418 */
       
  1419 
       
  1420 
       
  1421 // ---------------------------------------------------------------------------
       
  1422 
       
  1423 
       
  1424 static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
       
  1425 {
       
  1426     unsigned short buffer[3];
       
  1427 
       
  1428     QString &s = *str;
       
  1429 
       
  1430     const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
       
  1431     const unsigned short *uc = utf16 + s.length();
       
  1432     while (uc != utf16 + from) {
       
  1433         uint ucs4 = *(--uc);
       
  1434         if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
       
  1435             ushort high = *(uc - 1);
       
  1436             if (QChar(high).isHighSurrogate()) {
       
  1437                 --uc;
       
  1438                 ucs4 = QChar::surrogateToUcs4(high, ucs4);
       
  1439             }
       
  1440         }
       
  1441         if (QChar::unicodeVersion(ucs4) > version)
       
  1442             continue;
       
  1443         int length;
       
  1444         int tag;
       
  1445         const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
       
  1446         if (!d || (canonical && tag != QChar::Canonical))
       
  1447             continue;
       
  1448 
       
  1449         s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
       
  1450         // since the insert invalidates the pointers and we do decomposition recursive
       
  1451         int pos = uc - utf16;
       
  1452         utf16 = reinterpret_cast<unsigned short *>(s.data());
       
  1453         uc = utf16 + pos + length;
       
  1454     }
       
  1455 }
       
  1456 
       
  1457 
       
  1458 static ushort ligatureHelper(ushort u1, ushort u2)
       
  1459 {
       
  1460     // hangul L-V pair
       
  1461     int LIndex = u1 - Hangul_LBase;
       
  1462     if (0 <= LIndex && LIndex < Hangul_LCount) {
       
  1463         int VIndex = u2 - Hangul_VBase;
       
  1464         if (0 <= VIndex && VIndex < Hangul_VCount)
       
  1465             return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
       
  1466     }
       
  1467 
       
  1468     // hangul LV-T pair
       
  1469     int SIndex = u1 - Hangul_SBase;
       
  1470     if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
       
  1471         int TIndex = u2 - Hangul_TBase;
       
  1472         if (0 <= TIndex && TIndex <= Hangul_TCount)
       
  1473             return u1 + TIndex;
       
  1474     }
       
  1475 
       
  1476     const unsigned short index = GET_LIGATURE_INDEX(u2);
       
  1477     if (index == 0xffff)
       
  1478         return 0;
       
  1479     const unsigned short *ligatures = uc_ligature_map+index;
       
  1480     ushort length = *ligatures;
       
  1481     ++ligatures;
       
  1482     // ### use bsearch
       
  1483     for (uint i = 0; i < length; ++i)
       
  1484         if (ligatures[2*i] == u1)
       
  1485             return ligatures[2*i+1];
       
  1486     return 0;
       
  1487 }
       
  1488 
       
  1489 static void composeHelper(QString *str, int from)
       
  1490 {
       
  1491     QString &s = *str;
       
  1492 
       
  1493     if (s.length() - from < 2)
       
  1494         return;
       
  1495 
       
  1496     // the loop can partly ignore high Unicode as all ligatures are in the BMP
       
  1497     int starter = 0;
       
  1498     int lastCombining = 0;
       
  1499     int pos = from;
       
  1500     while (pos < s.length()) {
       
  1501         uint uc = s.at(pos).unicode();
       
  1502         if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
       
  1503             ushort low = s.at(pos+1).unicode();
       
  1504             if (QChar(low).isLowSurrogate()) {
       
  1505                 uc = QChar::surrogateToUcs4(uc, low);
       
  1506                 ++pos;
       
  1507             }
       
  1508         }
       
  1509         int combining = QChar::combiningClass(uc);
       
  1510         if (starter == pos - 1 || combining > lastCombining) {
       
  1511             // allowed to form ligature with S
       
  1512             QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
       
  1513             if (ligature.unicode()) {
       
  1514                 s[starter] = ligature;
       
  1515                 s.remove(pos, 1);
       
  1516                 continue;
       
  1517             }
       
  1518         }
       
  1519         if (!combining)
       
  1520             starter = pos;
       
  1521         lastCombining = combining;
       
  1522         ++pos;
       
  1523     }
       
  1524 }
       
  1525 
       
  1526 
       
  1527 static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
       
  1528 {
       
  1529     QString &s = *str;
       
  1530     const int l = s.length()-1;
       
  1531     int pos = from;
       
  1532     while (pos < l) {
       
  1533         int p2 = pos+1;
       
  1534         uint u1 = s.at(pos).unicode();
       
  1535         if (QChar(u1).isHighSurrogate()) {
       
  1536             ushort low = s.at(pos+1).unicode();
       
  1537             if (QChar(low).isLowSurrogate()) {
       
  1538                 p2++;
       
  1539                 u1 = QChar::surrogateToUcs4(u1, low);
       
  1540                 if (p2 >= l)
       
  1541                     break;
       
  1542             }
       
  1543         }
       
  1544         uint u2 = s.at(p2).unicode();
       
  1545         if (QChar(u2).isHighSurrogate() && p2 < l-1) {
       
  1546             ushort low = s.at(p2+1).unicode();
       
  1547             if (QChar(low).isLowSurrogate()) {
       
  1548                 p2++;
       
  1549                 u2 = QChar::surrogateToUcs4(u2, low);
       
  1550             }
       
  1551         }
       
  1552 
       
  1553         int c2 = QChar::combiningClass(u2);
       
  1554         if (QChar::unicodeVersion(u2) > version)
       
  1555             c2 = 0;
       
  1556 
       
  1557         if (c2 == 0) {
       
  1558             pos = p2+1;
       
  1559             continue;
       
  1560         }
       
  1561         int c1 = QChar::combiningClass(u1);
       
  1562         if (QChar::unicodeVersion(u1) > version)
       
  1563             c1 = 0;
       
  1564 
       
  1565         if (c1 > c2) {
       
  1566             QChar *uc = s.data();
       
  1567             int p = pos;
       
  1568             // exchange characters
       
  1569             if (u2 < 0x10000) {
       
  1570                 uc[p++] = u2;
       
  1571             } else {
       
  1572                 uc[p++] = QChar::highSurrogate(u2);
       
  1573                 uc[p++] = QChar::lowSurrogate(u2);
       
  1574             }
       
  1575             if (u1 < 0x10000) {
       
  1576                 uc[p++] = u1;
       
  1577             } else {
       
  1578                 uc[p++] = QChar::highSurrogate(u1);
       
  1579                 uc[p++] = QChar::lowSurrogate(u1);
       
  1580             }
       
  1581             if (pos > 0)
       
  1582                 --pos;
       
  1583             if (pos > 0 && s.at(pos).isLowSurrogate())
       
  1584                 --pos;
       
  1585         } else {
       
  1586             ++pos;
       
  1587             if (u1 > 0x10000)
       
  1588                 ++pos;
       
  1589         }
       
  1590     }
       
  1591 }
       
  1592 
       
  1593 int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
       
  1594 {
       
  1595     if (uc > 0xffff)
       
  1596         return Common;
       
  1597     int script = uc_scripts[uc >> 7];
       
  1598     if (script < ScriptSentinel)
       
  1599         return script;
       
  1600     script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);
       
  1601     script = uc_scripts[script + (uc & 0x7f)];
       
  1602     return script;
       
  1603 }
       
  1604 
       
  1605 
       
  1606 Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
       
  1607 {
       
  1608     return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
       
  1609 }
       
  1610 
       
  1611 
       
  1612 QT_END_NAMESPACE