--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/corelib/tools/qchar.cpp Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,1612 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// Don't define it while compiling this module, or USERS of Qt will
+// not be able to link.
+#ifdef QT_NO_CAST_FROM_ASCII
+#undef QT_NO_CAST_FROM_ASCII
+#endif
+#ifdef QT_NO_CAST_TO_ASCII
+#undef QT_NO_CAST_TO_ASCII
+#endif
+#include "qchar.h"
+#include "qdatastream.h"
+#include "qtextcodec.h"
+
+#include "qunicodetables_p.h"
+
+#include "qunicodetables.cpp"
+
+QT_BEGIN_NAMESPACE
+
+#define LAST_UNICODE_CHAR 0x10ffff
+
+#ifndef QT_NO_CODEC_FOR_C_STRINGS
+#ifdef QT_NO_TEXTCODEC
+#define QT_NO_CODEC_FOR_C_STRINGS
+#endif
+#endif
+
+#define FLAG(x) (1 << (x))
+
+/*! \class QLatin1Char
+ \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
+
+ \ingroup string-processing
+
+ This class is only useful to avoid the codec for C strings business
+ in the QChar(ch) constructor. You can avoid it by writing
+ QChar(ch, 0).
+
+ \sa QChar, QLatin1String, QString
+*/
+
+/*!
+ \fn const char QLatin1Char::toLatin1() const
+
+ Converts a Latin-1 character to an 8-bit ASCII representation of
+ the character.
+*/
+
+/*!
+ \fn const ushort QLatin1Char::unicode() const
+
+ Converts a Latin-1 character to an 16-bit-encoded Unicode representation
+ of the character.
+*/
+
+/*!
+ \fn QLatin1Char::QLatin1Char(char c)
+
+ Constructs a Latin-1 character for \a c. This constructor should be
+ used when the encoding of the input character is known to be Latin-1.
+*/
+
+/*!
+ \class QChar
+ \brief The QChar class provides a 16-bit Unicode character.
+
+ \ingroup string-processing
+ \reentrant
+
+ In Qt, Unicode characters are 16-bit entities without any markup
+ or structure. This class represents such an entity. It is
+ lightweight, so it can be used everywhere. Most compilers treat
+ it like a \c{unsigned short}.
+
+ QChar provides a full complement of testing/classification
+ functions, converting to and from other formats, converting from
+ composed to decomposed Unicode, and trying to compare and
+ case-convert if you ask it to.
+
+ The classification functions include functions like those in the
+ standard C++ header \<cctype\> (formerly \<ctype.h\>), but
+ operating on the full range of Unicode characters. They all
+ return true if the character is a certain type of character;
+ otherwise they return false. These classification functions are
+ isNull() (returns true if the character is '\\0'), isPrint()
+ (true if the character is any sort of printable character,
+ including whitespace), isPunct() (any sort of punctation),
+ isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
+ sort of numeric character, not just 0-9), isLetterOrNumber(), and
+ isDigit() (decimal digits). All of these are wrappers around
+ category() which return the Unicode-defined category of each
+ character.
+
+ QChar also provides direction(), which indicates the "natural"
+ writing direction of this character. The joining() function
+ indicates how the character joins with its neighbors (needed
+ mostly for Arabic) and finally hasMirrored(), which indicates
+ whether the character needs to be mirrored when it is printed in
+ its "unnatural" writing direction.
+
+ Composed Unicode characters (like \aring) can be converted to
+ decomposed Unicode ("a" followed by "ring above") by using
+ decomposition().
+
+ In Unicode, comparison is not necessarily possible and case
+ conversion is very difficult at best. Unicode, covering the
+ "entire" world, also includes most of the world's case and
+ sorting problems. operator==() and friends will do comparison
+ based purely on the numeric Unicode value (code point) of the
+ characters, and toUpper() and toLower() will do case changes when
+ the character has a well-defined uppercase/lowercase equivalent.
+ For locale-dependent comparisons, use
+ QString::localeAwareCompare().
+
+ The conversion functions include unicode() (to a scalar),
+ toLatin1() (to scalar, but converts all non-Latin-1 characters to
+ 0), row() (gives the Unicode row), cell() (gives the Unicode
+ cell), digitValue() (gives the integer value of any of the
+ numerous digit characters), and a host of constructors.
+
+ QChar provides constructors and cast operators that make it easy
+ to convert to and from traditional 8-bit \c{char}s. If you
+ defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
+ explained in the QString documentation, you will need to
+ explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
+ to construct a QChar from an 8-bit \c char, and you will need to
+ call toAscii() or toLatin1() to get the 8-bit value back.
+
+ \sa QString, Unicode, QLatin1Char
+*/
+
+/*!
+ \enum QChar::UnicodeVersion
+
+ Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
+ introduced a certain character.
+
+ \value Unicode_1_1 Version 1.1
+ \value Unicode_2_0 Version 2.0
+ \value Unicode_2_1_2 Version 2.1.2
+ \value Unicode_3_0 Version 3.0
+ \value Unicode_3_1 Version 3.1
+ \value Unicode_3_2 Version 3.2
+ \value Unicode_4_0 Version 4.0
+ \value Unicode_4_1 Version 4.1
+ \value Unicode_5_0 Version 5.0
+ \value Unicode_Unassigned The value is not assigned to any character
+ in version 5.0 of Unicode.
+
+ \sa unicodeVersion()
+*/
+
+/*!
+ \enum QChar::Category
+
+ This enum maps the Unicode character categories.
+
+ The following characters are normative in Unicode:
+
+ \value Mark_NonSpacing Unicode class name Mn
+
+ \value Mark_SpacingCombining Unicode class name Mc
+
+ \value Mark_Enclosing Unicode class name Me
+
+ \value Number_DecimalDigit Unicode class name Nd
+
+ \value Number_Letter Unicode class name Nl
+
+ \value Number_Other Unicode class name No
+
+ \value Separator_Space Unicode class name Zs
+
+ \value Separator_Line Unicode class name Zl
+
+ \value Separator_Paragraph Unicode class name Zp
+
+ \value Other_Control Unicode class name Cc
+
+ \value Other_Format Unicode class name Cf
+
+ \value Other_Surrogate Unicode class name Cs
+
+ \value Other_PrivateUse Unicode class name Co
+
+ \value Other_NotAssigned Unicode class name Cn
+
+
+ The following categories are informative in Unicode:
+
+ \value Letter_Uppercase Unicode class name Lu
+
+ \value Letter_Lowercase Unicode class name Ll
+
+ \value Letter_Titlecase Unicode class name Lt
+
+ \value Letter_Modifier Unicode class name Lm
+
+ \value Letter_Other Unicode class name Lo
+
+ \value Punctuation_Connector Unicode class name Pc
+
+ \value Punctuation_Dash Unicode class name Pd
+
+ \value Punctuation_Open Unicode class name Ps
+
+ \value Punctuation_Close Unicode class name Pe
+
+ \value Punctuation_InitialQuote Unicode class name Pi
+
+ \value Punctuation_FinalQuote Unicode class name Pf
+
+ \value Punctuation_Other Unicode class name Po
+
+ \value Symbol_Math Unicode class name Sm
+
+ \value Symbol_Currency Unicode class name Sc
+
+ \value Symbol_Modifier Unicode class name Sk
+
+ \value Symbol_Other Unicode class name So
+
+ \value NoCategory Qt cannot find an appropriate category for the character.
+
+ \omitvalue Punctuation_Dask
+
+ \sa category()
+*/
+
+/*!
+ \enum QChar::Direction
+
+ This enum type defines the Unicode direction attributes. See the
+ \l{http://www.unicode.org/}{Unicode Standard} for a description
+ of the values.
+
+ In order to conform to C/C++ naming conventions "Dir" is prepended
+ to the codes used in the Unicode Standard.
+
+ \value DirAL
+ \value DirAN
+ \value DirB
+ \value DirBN
+ \value DirCS
+ \value DirEN
+ \value DirES
+ \value DirET
+ \value DirL
+ \value DirLRE
+ \value DirLRO
+ \value DirNSM
+ \value DirON
+ \value DirPDF
+ \value DirR
+ \value DirRLE
+ \value DirRLO
+ \value DirS
+ \value DirWS
+
+ \sa direction()
+*/
+
+/*!
+ \enum QChar::Decomposition
+
+ This enum type defines the Unicode decomposition attributes. See
+ the \l{http://www.unicode.org/}{Unicode Standard} for a
+ description of the values.
+
+ \value NoDecomposition
+ \value Canonical
+ \value Circle
+ \value Compat
+ \value Final
+ \value Font
+ \value Fraction
+ \value Initial
+ \value Isolated
+ \value Medial
+ \value Narrow
+ \value NoBreak
+ \value Small
+ \value Square
+ \value Sub
+ \value Super
+ \value Vertical
+ \value Wide
+
+ \omitvalue Single
+
+ \sa decomposition()
+*/
+
+/*!
+ \enum QChar::Joining
+
+ This enum type defines the Unicode joining attributes. See the
+ \l{http://www.unicode.org/}{Unicode Standard} for a description
+ of the values.
+
+ \value Center
+ \value Dual
+ \value OtherJoining
+ \value Right
+
+ \sa joining()
+*/
+
+/*!
+ \enum QChar::CombiningClass
+
+ \internal
+
+ This enum type defines names for some of the Unicode combining
+ classes. See the \l{http://www.unicode.org/}{Unicode Standard}
+ for a description of the values.
+
+ \value Combining_Above
+ \value Combining_AboveAttached
+ \value Combining_AboveLeft
+ \value Combining_AboveLeftAttached
+ \value Combining_AboveRight
+ \value Combining_AboveRightAttached
+ \value Combining_Below
+ \value Combining_BelowAttached
+ \value Combining_BelowLeft
+ \value Combining_BelowLeftAttached
+ \value Combining_BelowRight
+ \value Combining_BelowRightAttached
+ \value Combining_DoubleAbove
+ \value Combining_DoubleBelow
+ \value Combining_IotaSubscript
+ \value Combining_Left
+ \value Combining_LeftAttached
+ \value Combining_Right
+ \value Combining_RightAttached
+*/
+
+/*!
+ \enum QChar::SpecialCharacter
+
+ \value Null A QChar with this value isNull().
+ \value Nbsp Non-breaking space.
+ \value ReplacementCharacter
+ \value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used.
+ \value ByteOrderMark
+ \value ByteOrderSwapped
+ \value ParagraphSeparator
+ \value LineSeparator
+
+ \omitvalue null
+ \omitvalue replacement
+ \omitvalue byteOrderMark
+ \omitvalue byteOrderSwapped
+ \omitvalue nbsp
+*/
+
+/*!
+ \fn void QChar::setCell(uchar cell)
+ \internal
+*/
+
+/*!
+ \fn void QChar::setRow(uchar row)
+ \internal
+*/
+
+/*!
+ \fn QChar::QChar()
+
+ Constructs a null QChar ('\\0').
+
+ \sa isNull()
+*/
+
+/*!
+ \fn QChar::QChar(QLatin1Char ch)
+
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
+*/
+
+/*!
+ \fn QChar::QChar(SpecialCharacter ch)
+
+ Constructs a QChar for the predefined character value \a ch.
+*/
+
+/*!
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a
+ ch.
+*/
+QChar::QChar(char ch)
+{
+#ifndef QT_NO_CODEC_FOR_C_STRINGS
+ if (QTextCodec::codecForCStrings())
+ // #####
+ ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
+ else
+#endif
+ ucs = uchar(ch);
+}
+
+/*!
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
+*/
+QChar::QChar(uchar ch)
+{
+#ifndef QT_NO_CODEC_FOR_C_STRINGS
+ if (QTextCodec::codecForCStrings()) {
+ // #####
+ char c = char(ch);
+ ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
+ } else
+#endif
+ ucs = ch;
+}
+
+/*!
+ \fn QChar::QChar(uchar cell, uchar row)
+
+ Constructs a QChar for Unicode cell \a cell in row \a row.
+
+ \sa cell(), row()
+*/
+
+/*!
+ \fn QChar::QChar(ushort code)
+
+ Constructs a QChar for the character with Unicode code point \a
+ code.
+*/
+
+
+/*!
+ \fn QChar::QChar(short code)
+
+ Constructs a QChar for the character with Unicode code point \a
+ code.
+*/
+
+
+/*!
+ \fn QChar::QChar(uint code)
+
+ Constructs a QChar for the character with Unicode code point \a
+ code.
+*/
+
+
+/*!
+ \fn QChar::QChar(int code)
+
+ Constructs a QChar for the character with Unicode code point \a
+ code.
+*/
+
+
+/*!
+ \fn bool QChar::isNull() const
+
+ Returns true if the character is the Unicode character 0x0000
+ ('\\0'); otherwise returns false.
+*/
+
+/*!
+ \fn uchar QChar::cell() const
+
+ Returns the cell (least significant byte) of the Unicode
+ character.
+
+ \sa row()
+*/
+
+/*!
+ \fn uchar QChar::row() const
+
+ Returns the row (most significant byte) of the Unicode character.
+
+ \sa cell()
+*/
+
+/*!
+ Returns true if the character is a printable character; otherwise
+ returns false. This is any character not of category Cc or Cn.
+
+ Note that this gives no indication of whether the character is
+ available in a particular font.
+*/
+bool QChar::isPrint() const
+{
+ const int test = FLAG(Other_Control) |
+ FLAG(Other_NotAssigned);
+ return !(FLAG(qGetProp(ucs)->category) & test);
+}
+
+/*!
+ Returns true if the character is a separator character
+ (Separator_* categories); otherwise returns false.
+*/
+bool QChar::isSpace() const
+{
+ if(ucs >= 9 && ucs <=13)
+ return true;
+ const int test = FLAG(Separator_Space) |
+ FLAG(Separator_Line) |
+ FLAG(Separator_Paragraph);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ Returns true if the character is a mark (Mark_* categories);
+ otherwise returns false.
+
+ See QChar::Category for more information regarding marks.
+*/
+bool QChar::isMark() const
+{
+ const int test = FLAG(Mark_NonSpacing) |
+ FLAG(Mark_SpacingCombining) |
+ FLAG(Mark_Enclosing);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ Returns true if the character is a punctuation mark (Punctuation_*
+ categories); otherwise returns false.
+*/
+bool QChar::isPunct() const
+{
+ const int test = FLAG(Punctuation_Connector) |
+ FLAG(Punctuation_Dash) |
+ FLAG(Punctuation_Open) |
+ FLAG(Punctuation_Close) |
+ FLAG(Punctuation_InitialQuote) |
+ FLAG(Punctuation_FinalQuote) |
+ FLAG(Punctuation_Other);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ Returns true if the character is a letter (Letter_* categories);
+ otherwise returns false.
+*/
+bool QChar::isLetter() const
+{
+ const int test = FLAG(Letter_Uppercase) |
+ FLAG(Letter_Lowercase) |
+ FLAG(Letter_Titlecase) |
+ FLAG(Letter_Modifier) |
+ FLAG(Letter_Other);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ Returns true if the character is a number (Number_* categories,
+ not just 0-9); otherwise returns false.
+
+ \sa isDigit()
+*/
+bool QChar::isNumber() const
+{
+ const int test = FLAG(Number_DecimalDigit) |
+ FLAG(Number_Letter) |
+ FLAG(Number_Other);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ Returns true if the character is a letter or number (Letter_* or
+ Number_* categories); otherwise returns false.
+*/
+bool QChar::isLetterOrNumber() const
+{
+ const int test = FLAG(Letter_Uppercase) |
+ FLAG(Letter_Lowercase) |
+ FLAG(Letter_Titlecase) |
+ FLAG(Letter_Modifier) |
+ FLAG(Letter_Other) |
+ FLAG(Number_DecimalDigit) |
+ FLAG(Number_Letter) |
+ FLAG(Number_Other);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+
+/*!
+ Returns true if the character is a decimal digit
+ (Number_DecimalDigit); otherwise returns false.
+*/
+bool QChar::isDigit() const
+{
+ return (qGetProp(ucs)->category == Number_DecimalDigit);
+}
+
+
+/*!
+ Returns true if the character is a symbol (Symbol_* categories);
+ otherwise returns false.
+*/
+bool QChar::isSymbol() const
+{
+ const int test = FLAG(Symbol_Math) |
+ FLAG(Symbol_Currency) |
+ FLAG(Symbol_Modifier) |
+ FLAG(Symbol_Other);
+ return FLAG(qGetProp(ucs)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isHighSurrogate() const
+
+ Returns true if the QChar is the high part of a utf16 surrogate
+ (ie. if its code point is between 0xd800 and 0xdbff).
+*/
+
+/*!
+ \fn bool QChar::isLowSurrogate() const
+
+ Returns true if the QChar is the low part of a utf16 surrogate
+ (ie. if its code point is between 0xdc00 and 0xdfff).
+*/
+
+/*!
+ \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
+
+ Converts a UTF16 surrogate pair with the given \a high and \a low values
+ to its UCS-4 code point.
+*/
+
+/*!
+ \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
+
+ Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
+ point.
+*/
+
+/*!
+ \fn static ushort QChar::highSurrogate(uint ucs4)
+
+ Returns the high surrogate value of a ucs4 code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
+*/
+
+/*!
+ \fn static ushort QChar::lowSurrogate(uint ucs4)
+
+ Returns the low surrogate value of a ucs4 code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
+*/
+
+/*!
+ Returns the numeric value of the digit, or -1 if the character is
+ not a digit.
+*/
+int QChar::digitValue() const
+{
+ return qGetProp(ucs)->digitValue;
+}
+
+/*!
+ \overload
+ Returns the numeric value of the digit, specified by the UCS-2-encoded
+ character, \a ucs2, or -1 if the character is not a digit.
+*/
+int QChar::digitValue(ushort ucs2)
+{
+ return qGetProp(ucs2)->digitValue;
+}
+
+/*!
+ \overload
+ Returns the numeric value of the digit specified by the UCS-4-encoded
+ character, \a ucs4, or -1 if the character is not a digit.
+*/
+int QChar::digitValue(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return 0;
+ return qGetProp(ucs4)->digitValue;
+}
+
+/*!
+ Returns the character's category.
+*/
+QChar::Category QChar::category() const
+{
+ return (QChar::Category) qGetProp(ucs)->category;
+}
+
+/*!
+ \overload
+ \since 4.3
+ Returns the category of the UCS-4-encoded character specified by \a ucs4.
+ */
+QChar::Category QChar::category(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return QChar::NoCategory;
+ return (QChar::Category) qGetProp(ucs4)->category;
+}
+
+/*!
+ \overload
+ Returns the category of the UCS-2-encoded character specified by \a ucs2.
+ */
+QChar::Category QChar::category(ushort ucs2)
+{
+ return (QChar::Category) qGetProp(ucs2)->category;
+}
+
+
+/*!
+ Returns the character's direction.
+*/
+QChar::Direction QChar::direction() const
+{
+ return (QChar::Direction) qGetProp(ucs)->direction;
+}
+
+/*!
+\overload
+Returns the direction of the UCS-4-encoded character specified by \a ucs4.
+ */
+QChar::Direction QChar::direction(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return QChar::DirL;
+ return (QChar::Direction) qGetProp(ucs4)->direction;
+}
+
+/*!
+\overload
+Returns the direction of the UCS-2-encoded character specified by \a ucs2.
+ */
+QChar::Direction QChar::direction(ushort ucs2)
+{
+ return (QChar::Direction) qGetProp(ucs2)->direction;
+}
+
+/*!
+ Returns information about the joining properties of the character
+ (needed for certain languages such as Arabic).
+*/
+QChar::Joining QChar::joining() const
+{
+ return (QChar::Joining) qGetProp(ucs)->joining;
+}
+
+/*!
+\overload
+Returns information about the joining properties of the UCS-4-encoded
+character specified by \a ucs4 (needed for certain languages such as
+Arabic).
+ */
+QChar::Joining QChar::joining(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return QChar::OtherJoining;
+ return (QChar::Joining) qGetProp(ucs4)->joining;
+}
+
+/*!
+\overload
+Returns information about the joining properties of the UCS-2-encoded
+character specified by \a ucs2 (needed for certain languages such as
+Arabic).
+ */
+QChar::Joining QChar::joining(ushort ucs2)
+{
+ return (QChar::Joining) qGetProp(ucs2)->joining;
+}
+
+
+/*!
+ Returns true if the character should be reversed if the text
+ direction is reversed; otherwise returns false.
+
+ Same as (ch.mirroredChar() != ch).
+
+ \sa mirroredChar()
+*/
+bool QChar::hasMirrored() const
+{
+ return qGetProp(ucs)->mirrorDiff != 0;
+}
+
+/*!
+ \fn bool QChar::isLower() const
+
+ Returns true if the character is a lowercase letter, i.e.
+ category() is Letter_Lowercase.
+
+ \sa isUpper(), toLower(), toUpper()
+*/
+
+/*!
+ \fn bool QChar::isUpper() const
+
+ Returns true if the character is an uppercase letter, i.e.
+ category() is Letter_Uppercase.
+
+ \sa isLower(), toUpper(), toLower()
+*/
+
+/*!
+ \fn bool QChar::isTitleCase() const
+ \since 4.3
+
+ Returns true if the character is a titlecase letter, i.e.
+ category() is Letter_Titlecase.
+
+ \sa isLower(), toUpper(), toLower(), toTitleCase()
+*/
+
+/*!
+ Returns the mirrored character if this character is a mirrored
+ character; otherwise returns the character itself.
+
+ \sa hasMirrored()
+*/
+QChar QChar::mirroredChar() const
+{
+ return ucs + qGetProp(ucs)->mirrorDiff;
+}
+
+/*! \overload
+Returns the mirrored character if the UCS-4-encoded character specified
+by \a ucs4 is a mirrored character; otherwise returns the character itself.
+
+\sa hasMirrored()
+ */
+uint QChar::mirroredChar(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return ucs4;
+ return ucs4 + qGetProp(ucs4)->mirrorDiff;
+}
+
+/*!
+\overload
+Returns the mirrored character if the UCS-2-encoded character specified
+by \a ucs2 is a mirrored character; otherwise returns the character itself.
+
+\sa hasMirrored()
+ */
+ushort QChar::mirroredChar(ushort ucs2)
+{
+ return ucs2 + qGetProp(ucs2)->mirrorDiff;
+}
+
+
+enum {
+ Hangul_SBase = 0xac00,
+ Hangul_LBase = 0x1100,
+ Hangul_VBase = 0x1161,
+ Hangul_TBase = 0x11a7,
+ Hangul_SCount = 11172,
+ Hangul_LCount = 19,
+ Hangul_VCount = 21,
+ Hangul_TCount = 28,
+ Hangul_NCount = 21*28
+};
+
+// buffer has to have a length of 3. It's needed for Hangul decomposition
+static const unsigned short * QT_FASTCALL decompositionHelper
+ (uint ucs4, int *length, int *tag, unsigned short *buffer)
+{
+ *length = 0;
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return 0;
+ if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
+ int SIndex = ucs4 - Hangul_SBase;
+ buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
+ buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
+ buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
+ *length = buffer[2] == Hangul_TBase ? 2 : 3;
+ *tag = QChar::Canonical;
+ return buffer;
+ }
+
+ const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
+ if (index == 0xffff)
+ return 0;
+ const unsigned short *decomposition = uc_decomposition_map+index;
+ *tag = (*decomposition) & 0xff;
+ *length = (*decomposition) >> 8;
+ return decomposition+1;
+}
+
+/*!
+ Decomposes a character into its parts. Returns an empty string if
+ no decomposition exists.
+*/
+QString QChar::decomposition() const
+{
+ return decomposition(ucs);
+}
+
+/*!
+\overload
+Decomposes the UCS-4-encoded character specified by \a ucs4 into its
+constituent parts. Returns an empty string if no decomposition exists.
+ */
+QString QChar::decomposition(uint ucs4)
+{
+ unsigned short buffer[3];
+ int length;
+ int tag;
+ const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
+ return QString::fromUtf16(d, length);
+}
+
+/*!
+ Returns the tag defining the composition of the character. Returns
+ QChar::Single if no decomposition exists.
+*/
+QChar::Decomposition QChar::decompositionTag() const
+{
+ return decompositionTag(ucs);
+}
+
+/*!
+\overload
+Returns the tag defining the composition of the UCS-4-encoded character
+specified by \a ucs4. Returns QChar::Single if no decomposition exists.
+ */
+QChar::Decomposition QChar::decompositionTag(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return QChar::NoDecomposition;
+ const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
+ if (index == 0xffff)
+ return QChar::NoDecomposition;
+ return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
+}
+
+/*!
+ Returns the combining class for the character as defined in the
+ Unicode standard. This is mainly useful as a positioning hint for
+ marks attached to a base character.
+
+ The Qt text rendering engine uses this information to correctly
+ position non-spacing marks around a base character.
+*/
+unsigned char QChar::combiningClass() const
+{
+ return (unsigned char) qGetProp(ucs)->combiningClass;
+}
+
+/*! \overload
+Returns the combining class for the UCS-4-encoded character specified by
+\a ucs4, as defined in the Unicode standard.
+ */
+unsigned char QChar::combiningClass(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return 0;
+ return (unsigned char) qGetProp(ucs4)->combiningClass;
+}
+
+/*! \overload
+Returns the combining class for the UCS-2-encoded character specified by
+\a ucs2, as defined in the Unicode standard.
+ */
+unsigned char QChar::combiningClass(ushort ucs2)
+{
+ return (unsigned char) qGetProp(ucs2)->combiningClass;
+}
+
+
+/*!
+ Returns the Unicode version that introduced this character.
+*/
+QChar::UnicodeVersion QChar::unicodeVersion() const
+{
+ return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
+}
+
+/*! \overload
+Returns the Unicode version that introduced the character specified in
+its UCS-4-encoded form as \a ucs4.
+ */
+QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return QChar::Unicode_Unassigned;
+ return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
+}
+
+/*! \overload
+Returns the Unicode version that introduced the character specified in
+its UCS-2-encoded form as \a ucs2.
+ */
+QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
+{
+ return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
+}
+
+
+/*!
+ Returns the lowercase equivalent if the character is uppercase or titlecase;
+ otherwise returns the character itself.
+*/
+QChar QChar::toLower() const
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs);
+ if (!p->lowerCaseSpecial)
+ return ucs + p->lowerCaseDiff;
+ return ucs;
+}
+
+/*! \overload
+Returns the lowercase equivalent of the UCS-4-encoded character specified
+by \a ucs4 if the character is uppercase or titlecase; otherwise returns
+the character itself.
+ */
+uint QChar::toLower(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return ucs4;
+ const QUnicodeTables::Properties *p = qGetProp(ucs4);
+ if (!p->lowerCaseSpecial)
+ return ucs4 + p->lowerCaseDiff;
+ return ucs4;
+}
+
+/*! \overload
+Returns the lowercase equivalent of the UCS-2-encoded character specified
+by \a ucs2 if the character is uppercase or titlecase; otherwise returns
+the character itself.
+ */
+ushort QChar::toLower(ushort ucs2)
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs2);
+ if (!p->lowerCaseSpecial)
+ return ucs2 + p->lowerCaseDiff;
+ return ucs2;
+}
+
+/*!
+ Returns the uppercase equivalent if the character is lowercase or titlecase;
+ otherwise returns the character itself.
+*/
+QChar QChar::toUpper() const
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs);
+ if (!p->upperCaseSpecial)
+ return ucs + p->upperCaseDiff;
+ return ucs;
+}
+
+/*! \overload
+Returns the uppercase equivalent of the UCS-4-encoded character specified
+by \a ucs4 if the character is lowercase or titlecase; otherwise returns
+the character itself.
+ */
+uint QChar::toUpper(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return ucs4;
+ const QUnicodeTables::Properties *p = qGetProp(ucs4);
+ if (!p->upperCaseSpecial)
+ return ucs4 + p->upperCaseDiff;
+ return ucs4;
+}
+
+/*! \overload
+Returns the uppercase equivalent of the UCS-2-encoded character specified
+by \a ucs2 if the character is lowercase or titlecase; otherwise returns
+the character itself.
+ */
+ushort QChar::toUpper(ushort ucs2)
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs2);
+ if (!p->upperCaseSpecial)
+ return ucs2 + p->upperCaseDiff;
+ return ucs2;
+}
+
+/*!
+ Returns the title case equivalent if the character is lowercase or uppercase;
+ otherwise returns the character itself.
+*/
+QChar QChar::toTitleCase() const
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs);
+ if (!p->titleCaseSpecial)
+ return ucs + p->titleCaseDiff;
+ return ucs;
+}
+
+/*!
+ \overload
+ Returns the title case equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is lowercase or uppercase; otherwise returns
+ the character itself.
+*/
+uint QChar::toTitleCase(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return ucs4;
+ const QUnicodeTables::Properties *p = qGetProp(ucs4);
+ if (!p->titleCaseSpecial)
+ return ucs4 + p->titleCaseDiff;
+ return ucs4;
+}
+
+/*!
+ \overload
+ Returns the title case equivalent of the UCS-2-encoded character specified
+ by \a ucs2 if the character is lowercase or uppercase; otherwise returns
+ the character itself.
+*/
+ushort QChar::toTitleCase(ushort ucs2)
+{
+ const QUnicodeTables::Properties *p = qGetProp(ucs2);
+ if (!p->titleCaseSpecial)
+ return ucs2 + p->titleCaseDiff;
+ return ucs2;
+}
+
+
+static inline uint foldCase(const ushort *ch, const ushort *start)
+{
+ uint c = *ch;
+ if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
+ c = QChar::surrogateToUcs4(*(ch - 1), c);
+ return *ch + qGetProp(c)->caseFoldDiff;
+}
+
+static inline uint foldCase(uint ch, uint &last)
+{
+ uint c = ch;
+ if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
+ c = QChar::surrogateToUcs4(last, c);
+ last = ch;
+ return ch + qGetProp(c)->caseFoldDiff;
+}
+
+static inline ushort foldCase(ushort ch)
+{
+ return ch + qGetProp(ch)->caseFoldDiff;
+}
+
+/*!
+ Returns the case folded equivalent of the character. For most Unicode characters this
+ is the same as toLowerCase().
+*/
+QChar QChar::toCaseFolded() const
+{
+ return ucs + qGetProp(ucs)->caseFoldDiff;
+}
+
+/*!
+ \overload
+ Returns the case folded equivalent of the UCS-4-encoded character specified
+ by \a ucs4. For most Unicode characters this is the same as toLowerCase().
+*/
+uint QChar::toCaseFolded(uint ucs4)
+{
+ if (ucs4 > LAST_UNICODE_CHAR)
+ return ucs4;
+ return ucs4 + qGetProp(ucs4)->caseFoldDiff;
+}
+
+/*!
+ \overload
+ Returns the case folded equivalent of the UCS-2-encoded character specified
+ by \a ucs2. For most Unicode characters this is the same as toLowerCase().
+*/
+ushort QChar::toCaseFolded(ushort ucs2)
+{
+ return ucs2 + qGetProp(ucs2)->caseFoldDiff;
+}
+
+
+/*!
+ \fn char QChar::latin1() const
+
+ Use toLatin1() instead.
+*/
+
+/*!
+ \fn char QChar::ascii() const
+
+ Use toAscii() instead.
+*/
+
+/*!
+ \fn char QChar::toLatin1() const
+
+ Returns the Latin-1 character equivalent to the QChar, or 0. This
+ is mainly useful for non-internationalized software.
+
+ \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
+*/
+
+/*!
+ \fn char QChar::toAscii() const
+ Returns the character value of the QChar obtained using the current
+ codec used to read C strings, or 0 if the character is not representable
+ using this codec. The default codec handles Latin-1 encoded text,
+ but this can be changed to assist developers writing source code using
+ other encodings.
+
+ The main purpose of this function is to preserve ASCII characters used
+ in C strings. This is mainly useful for developers of non-internationalized
+ software.
+
+ \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
+*/
+#ifdef Q_COMPILER_MANGLES_RETURN_TYPE
+const char QChar::toAscii() const
+#else
+char QChar::toAscii() const
+#endif
+{
+#ifndef QT_NO_CODEC_FOR_C_STRINGS
+ if (QTextCodec::codecForCStrings())
+ // #####
+ return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
+#endif
+ return ucs > 0xff ? 0 : char(ucs);
+}
+
+/*!
+ \fn QChar QChar::fromLatin1(char c)
+
+ Converts the Latin-1 character \a c to its equivalent QChar. This
+ is mainly useful for non-internationalized software.
+
+ \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
+*/
+
+/*!
+ Converts the ASCII character \a c to its equivalent QChar. This
+ is mainly useful for non-internationalized software.
+
+ An alternative is to use QLatin1Char.
+
+ \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
+*/
+QChar QChar::fromAscii(char c)
+{
+#ifndef QT_NO_CODEC_FOR_C_STRINGS
+ if (QTextCodec::codecForCStrings())
+ // #####
+ return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
+#endif
+ return QChar(ushort((uchar)c));
+}
+
+#ifndef QT_NO_DATASTREAM
+/*!
+ \relates QChar
+
+ Writes the char \a chr to the stream \a out.
+
+ \sa {Format of the QDataStream operators}
+ */
+
+QDataStream &operator<<(QDataStream &out, const QChar &chr)
+{
+ out << quint16(chr.unicode());
+ return out;
+}
+
+
+/*!
+ \relates QChar
+
+ Reads a char from the stream \a in into char \a chr.
+
+ \sa {Format of the QDataStream operators}
+ */
+
+QDataStream &operator>>(QDataStream &in, QChar &chr)
+{
+ quint16 u;
+ in >> u;
+ chr.unicode() = ushort(u);
+ return in;
+}
+#endif // QT_NO_DATASTREAM
+
+/*!
+ \fn ushort & QChar::unicode()
+
+ Returns a reference to the numeric Unicode value of the QChar.
+*/
+
+/*!
+ \fn ushort QChar::unicode() const
+
+ \overload
+*/
+
+/*****************************************************************************
+ Documentation of QChar related functions
+ *****************************************************************************/
+
+/*!
+ \fn bool operator==(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if \a c1 and \a c2 are the same Unicode character;
+ otherwise returns false.
+*/
+
+/*!
+ \fn int operator!=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if \a c1 and \a c2 are not the same Unicode
+ character; otherwise returns false.
+*/
+
+/*!
+ \fn int operator<=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if the numeric Unicode value of \a c1 is less than
+ or equal to that of \a c2; otherwise returns false.
+*/
+
+/*!
+ \fn int operator>=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if the numeric Unicode value of \a c1 is greater than
+ or equal to that of \a c2; otherwise returns false.
+*/
+
+/*!
+ \fn int operator<(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if the numeric Unicode value of \a c1 is less than
+ that of \a c2; otherwise returns false.
+*/
+
+/*!
+ \fn int operator>(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns true if the numeric Unicode value of \a c1 is greater than
+ that of \a c2; otherwise returns false.
+*/
+
+/*!
+ \fn bool QChar::mirrored() const
+
+ Use hasMirrored() instead.
+*/
+
+/*!
+ \fn QChar QChar::lower() const
+
+ Use toLower() instead.
+*/
+
+/*!
+ \fn QChar QChar::upper() const
+
+ Use toUpper() instead.
+*/
+
+/*!
+ \fn bool QChar::networkOrdered()
+
+ See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
+*/
+
+
+// ---------------------------------------------------------------------------
+
+
+static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
+{
+ unsigned short buffer[3];
+
+ QString &s = *str;
+
+ const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
+ const unsigned short *uc = utf16 + s.length();
+ while (uc != utf16 + from) {
+ uint ucs4 = *(--uc);
+ if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
+ ushort high = *(uc - 1);
+ if (QChar(high).isHighSurrogate()) {
+ --uc;
+ ucs4 = QChar::surrogateToUcs4(high, ucs4);
+ }
+ }
+ if (QChar::unicodeVersion(ucs4) > version)
+ continue;
+ int length;
+ int tag;
+ const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
+ if (!d || (canonical && tag != QChar::Canonical))
+ continue;
+
+ s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
+ // since the insert invalidates the pointers and we do decomposition recursive
+ int pos = uc - utf16;
+ utf16 = reinterpret_cast<unsigned short *>(s.data());
+ uc = utf16 + pos + length;
+ }
+}
+
+
+static ushort ligatureHelper(ushort u1, ushort u2)
+{
+ // hangul L-V pair
+ int LIndex = u1 - Hangul_LBase;
+ if (0 <= LIndex && LIndex < Hangul_LCount) {
+ int VIndex = u2 - Hangul_VBase;
+ if (0 <= VIndex && VIndex < Hangul_VCount)
+ return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
+ }
+
+ // hangul LV-T pair
+ int SIndex = u1 - Hangul_SBase;
+ if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
+ int TIndex = u2 - Hangul_TBase;
+ if (0 <= TIndex && TIndex <= Hangul_TCount)
+ return u1 + TIndex;
+ }
+
+ const unsigned short index = GET_LIGATURE_INDEX(u2);
+ if (index == 0xffff)
+ return 0;
+ const unsigned short *ligatures = uc_ligature_map+index;
+ ushort length = *ligatures;
+ ++ligatures;
+ // ### use bsearch
+ for (uint i = 0; i < length; ++i)
+ if (ligatures[2*i] == u1)
+ return ligatures[2*i+1];
+ return 0;
+}
+
+static void composeHelper(QString *str, int from)
+{
+ QString &s = *str;
+
+ if (s.length() - from < 2)
+ return;
+
+ // the loop can partly ignore high Unicode as all ligatures are in the BMP
+ int starter = 0;
+ int lastCombining = 0;
+ int pos = from;
+ while (pos < s.length()) {
+ uint uc = s.at(pos).unicode();
+ if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
+ ushort low = s.at(pos+1).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ uc = QChar::surrogateToUcs4(uc, low);
+ ++pos;
+ }
+ }
+ int combining = QChar::combiningClass(uc);
+ if (starter == pos - 1 || combining > lastCombining) {
+ // allowed to form ligature with S
+ QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
+ if (ligature.unicode()) {
+ s[starter] = ligature;
+ s.remove(pos, 1);
+ continue;
+ }
+ }
+ if (!combining)
+ starter = pos;
+ lastCombining = combining;
+ ++pos;
+ }
+}
+
+
+static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
+{
+ QString &s = *str;
+ const int l = s.length()-1;
+ int pos = from;
+ while (pos < l) {
+ int p2 = pos+1;
+ uint u1 = s.at(pos).unicode();
+ if (QChar(u1).isHighSurrogate()) {
+ ushort low = s.at(pos+1).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ p2++;
+ u1 = QChar::surrogateToUcs4(u1, low);
+ if (p2 >= l)
+ break;
+ }
+ }
+ uint u2 = s.at(p2).unicode();
+ if (QChar(u2).isHighSurrogate() && p2 < l-1) {
+ ushort low = s.at(p2+1).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ p2++;
+ u2 = QChar::surrogateToUcs4(u2, low);
+ }
+ }
+
+ int c2 = QChar::combiningClass(u2);
+ if (QChar::unicodeVersion(u2) > version)
+ c2 = 0;
+
+ if (c2 == 0) {
+ pos = p2+1;
+ continue;
+ }
+ int c1 = QChar::combiningClass(u1);
+ if (QChar::unicodeVersion(u1) > version)
+ c1 = 0;
+
+ if (c1 > c2) {
+ QChar *uc = s.data();
+ int p = pos;
+ // exchange characters
+ if (u2 < 0x10000) {
+ uc[p++] = u2;
+ } else {
+ uc[p++] = QChar::highSurrogate(u2);
+ uc[p++] = QChar::lowSurrogate(u2);
+ }
+ if (u1 < 0x10000) {
+ uc[p++] = u1;
+ } else {
+ uc[p++] = QChar::highSurrogate(u1);
+ uc[p++] = QChar::lowSurrogate(u1);
+ }
+ if (pos > 0)
+ --pos;
+ if (pos > 0 && s.at(pos).isLowSurrogate())
+ --pos;
+ } else {
+ ++pos;
+ if (u1 > 0x10000)
+ ++pos;
+ }
+ }
+}
+
+int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
+{
+ if (uc > 0xffff)
+ return Common;
+ int script = uc_scripts[uc >> 7];
+ if (script < ScriptSentinel)
+ return script;
+ script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);
+ script = uc_scripts[script + (uc & 0x7f)];
+ return script;
+}
+
+
+Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
+{
+ return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
+}
+
+
+QT_END_NAMESPACE