diff -r b72c6db6890b -r 5dc02b23752f src/corelib/tools/qchar.cpp --- a/src/corelib/tools/qchar.cpp Wed Jun 23 19:07:03 2010 +0300 +++ b/src/corelib/tools/qchar.cpp Tue Jul 06 15:10:48 2010 +0300 @@ -42,10 +42,10 @@ // Don't define it while compiling this module, or USERS of Qt will // not be able to link. #ifdef QT_NO_CAST_FROM_ASCII -#undef QT_NO_CAST_FROM_ASCII +# undef QT_NO_CAST_FROM_ASCII #endif #ifdef QT_NO_CAST_TO_ASCII -#undef QT_NO_CAST_TO_ASCII +# undef QT_NO_CAST_TO_ASCII #endif #include "qchar.h" #include "qdatastream.h" @@ -57,17 +57,16 @@ QT_BEGIN_NAMESPACE -#define LAST_UNICODE_CHAR 0x10ffff - #ifndef QT_NO_CODEC_FOR_C_STRINGS -#ifdef QT_NO_TEXTCODEC -#define QT_NO_CODEC_FOR_C_STRINGS -#endif +# ifdef QT_NO_TEXTCODEC +# define QT_NO_CODEC_FOR_C_STRINGS +# endif #endif #define FLAG(x) (1 << (x)) -/*! \class QLatin1Char +/*! + \class QLatin1Char \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character. \ingroup string-processing @@ -554,7 +553,7 @@ /*! Returns true if the character is a mark (Mark_* categories); otherwise returns false. - + See QChar::Category for more information regarding marks. */ bool QChar::isMark() const @@ -651,45 +650,71 @@ } /*! - \fn bool QChar::isHighSurrogate() const + \fn bool QChar::isHighSurrogate() const + + Returns true if the QChar is the high part of a utf16 surrogate + (ie. if its code point is between 0xd800 and 0xdbff, inclusive). +*/ - Returns true if the QChar is the high part of a utf16 surrogate - (ie. if its code point is between 0xd800 and 0xdbff). +/*! + \fn bool QChar::isLowSurrogate() const + + Returns true if the QChar is the low part of a utf16 surrogate + (ie. if its code point is between 0xdc00 and 0xdfff, inclusive). */ /*! - \fn bool QChar::isLowSurrogate() const + \fn static bool QChar::isHighSurrogate(uint ucs4) + \since 4.7 + + Returns true if the UCS-4-encoded character specified by \a ucs4 + is the high part of a utf16 surrogate + (ie. if its code point is between 0xd800 and 0xdbff, inclusive). +*/ - Returns true if the QChar is the low part of a utf16 surrogate - (ie. if its code point is between 0xdc00 and 0xdfff). +/*! + \fn static bool QChar::isLowSurrogate(uint ucs4) + \since 4.7 + + Returns true if the UCS-4-encoded character specified by \a ucs4 + is the high part of a utf16 surrogate + (ie. if its code point is between 0xdc00 and 0xdfff, inclusive). */ /*! - \fn static uint QChar::surrogateToUcs4(ushort high, ushort low) + \fn static bool QChar::requiresSurrogates(uint ucs4) + \since 4.7 - Converts a UTF16 surrogate pair with the given \a high and \a low values - to its UCS-4 code point. + Returns true if the UCS-4-encoded character specified by \a ucs4 + can be splited to the high and low parts of a utf16 surrogate + (ie. if its code point is greater than or equals to 0x10000). +*/ + +/*! + \fn static uint QChar::surrogateToUcs4(ushort high, ushort low) + + Converts a UTF16 surrogate pair with the given \a high and \a low values + to its UCS-4 code point. */ /*! - \fn static uint QChar::surrogateToUcs4(QChar high, QChar low) + \fn static uint QChar::surrogateToUcs4(QChar high, QChar low) - Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code - point. + Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code point. */ /*! - \fn static ushort QChar::highSurrogate(uint ucs4) + \fn static ushort QChar::highSurrogate(uint ucs4) - Returns the high surrogate value of a ucs4 code point. - The returned result is undefined if \a ucs4 is smaller than 0x10000. + Returns the high surrogate value of a ucs4 code point. + The returned result is undefined if \a ucs4 is smaller than 0x10000. */ /*! - \fn static ushort QChar::lowSurrogate(uint ucs4) + \fn static ushort QChar::lowSurrogate(uint ucs4) - Returns the low surrogate value of a ucs4 code point. - The returned result is undefined if \a ucs4 is smaller than 0x10000. + Returns the low surrogate value of a ucs4 code point. + The returned result is undefined if \a ucs4 is smaller than 0x10000. */ /*! @@ -718,7 +743,7 @@ */ int QChar::digitValue(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return 0; return qGetProp(ucs4)->digitValue; } @@ -731,22 +756,22 @@ return (QChar::Category) qGetProp(ucs)->category; } -/*! +/*! \overload \since 4.3 Returns the category of the UCS-4-encoded character specified by \a ucs4. - */ +*/ QChar::Category QChar::category(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return QChar::NoCategory; return (QChar::Category) qGetProp(ucs4)->category; } -/*! +/*! \overload Returns the category of the UCS-2-encoded character specified by \a ucs2. - */ +*/ QChar::Category QChar::category(ushort ucs2) { return (QChar::Category) qGetProp(ucs2)->category; @@ -761,21 +786,21 @@ return (QChar::Direction) qGetProp(ucs)->direction; } -/*! -\overload -Returns the direction of the UCS-4-encoded character specified by \a ucs4. - */ +/*! + \overload + Returns the direction of the UCS-4-encoded character specified by \a ucs4. +*/ QChar::Direction QChar::direction(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return QChar::DirL; return (QChar::Direction) qGetProp(ucs4)->direction; } -/*! -\overload -Returns the direction of the UCS-2-encoded character specified by \a ucs2. - */ +/*! + \overload + Returns the direction of the UCS-2-encoded character specified by \a ucs2. +*/ QChar::Direction QChar::direction(ushort ucs2) { return (QChar::Direction) qGetProp(ucs2)->direction; @@ -790,25 +815,25 @@ return (QChar::Joining) qGetProp(ucs)->joining; } -/*! -\overload -Returns information about the joining properties of the UCS-4-encoded -character specified by \a ucs4 (needed for certain languages such as -Arabic). - */ +/*! + \overload + Returns information about the joining properties of the UCS-4-encoded + character specified by \a ucs4 (needed for certain languages such as + Arabic). +*/ QChar::Joining QChar::joining(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return QChar::OtherJoining; return (QChar::Joining) qGetProp(ucs4)->joining; } -/*! -\overload -Returns information about the joining properties of the UCS-2-encoded -character specified by \a ucs2 (needed for certain languages such as -Arabic). - */ +/*! + \overload + Returns information about the joining properties of the UCS-2-encoded + character specified by \a ucs2 (needed for certain languages such as + Arabic). +*/ QChar::Joining QChar::joining(ushort ucs2) { return (QChar::Joining) qGetProp(ucs2)->joining; @@ -867,26 +892,27 @@ return ucs + qGetProp(ucs)->mirrorDiff; } -/*! \overload -Returns the mirrored character if the UCS-4-encoded character specified -by \a ucs4 is a mirrored character; otherwise returns the character itself. +/*! + \overload + Returns the mirrored character if the UCS-4-encoded character specified + by \a ucs4 is a mirrored character; otherwise returns the character itself. -\sa hasMirrored() - */ + \sa hasMirrored() +*/ uint QChar::mirroredChar(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return ucs4; return ucs4 + qGetProp(ucs4)->mirrorDiff; } -/*! -\overload -Returns the mirrored character if the UCS-2-encoded character specified -by \a ucs2 is a mirrored character; otherwise returns the character itself. +/*! + \overload + Returns the mirrored character if the UCS-2-encoded character specified + by \a ucs2 is a mirrored character; otherwise returns the character itself. -\sa hasMirrored() - */ + \sa hasMirrored() +*/ ushort QChar::mirroredChar(ushort ucs2) { return ucs2 + qGetProp(ucs2)->mirrorDiff; @@ -910,7 +936,7 @@ (uint ucs4, int *length, int *tag, unsigned short *buffer) { *length = 0; - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return 0; if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { int SIndex = ucs4 - Hangul_SBase; @@ -940,11 +966,11 @@ return decomposition(ucs); } -/*! -\overload -Decomposes the UCS-4-encoded character specified by \a ucs4 into its -constituent parts. Returns an empty string if no decomposition exists. - */ +/*! + \overload + Decomposes the UCS-4-encoded character specified by \a ucs4 into its + constituent parts. Returns an empty string if no decomposition exists. +*/ QString QChar::decomposition(uint ucs4) { unsigned short buffer[3]; @@ -963,14 +989,14 @@ return decompositionTag(ucs); } -/*! -\overload -Returns the tag defining the composition of the UCS-4-encoded character -specified by \a ucs4. Returns QChar::Single if no decomposition exists. - */ +/*! + \overload + Returns the tag defining the composition of the UCS-4-encoded character + specified by \a ucs4. Returns QChar::Single if no decomposition exists. +*/ QChar::Decomposition QChar::decompositionTag(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return QChar::NoDecomposition; const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); if (index == 0xffff) @@ -991,27 +1017,28 @@ return (unsigned char) qGetProp(ucs)->combiningClass; } -/*! \overload -Returns the combining class for the UCS-4-encoded character specified by -\a ucs4, as defined in the Unicode standard. - */ +/*! + \overload + Returns the combining class for the UCS-4-encoded character specified by + \a ucs4, as defined in the Unicode standard. +*/ unsigned char QChar::combiningClass(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return 0; return (unsigned char) qGetProp(ucs4)->combiningClass; } -/*! \overload -Returns the combining class for the UCS-2-encoded character specified by -\a ucs2, as defined in the Unicode standard. - */ +/*! + \overload + Returns the combining class for the UCS-2-encoded character specified by + \a ucs2, as defined in the Unicode standard. +*/ unsigned char QChar::combiningClass(ushort ucs2) { return (unsigned char) qGetProp(ucs2)->combiningClass; } - /*! Returns the Unicode version that introduced this character. */ @@ -1020,21 +1047,23 @@ return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion; } -/*! \overload -Returns the Unicode version that introduced the character specified in -its UCS-4-encoded form as \a ucs4. - */ +/*! + \overload + Returns the Unicode version that introduced the character specified in + its UCS-4-encoded form as \a ucs4. +*/ QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return QChar::Unicode_Unassigned; return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion; } -/*! \overload -Returns the Unicode version that introduced the character specified in -its UCS-2-encoded form as \a ucs2. - */ +/*! + \overload + Returns the Unicode version that introduced the character specified in + its UCS-2-encoded form as \a ucs2. +*/ QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2) { return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion; @@ -1053,14 +1082,15 @@ return ucs; } -/*! \overload -Returns the lowercase equivalent of the UCS-4-encoded character specified -by \a ucs4 if the character is uppercase or titlecase; otherwise returns -the character itself. - */ +/*! + \overload + Returns the lowercase equivalent of the UCS-4-encoded character specified + by \a ucs4 if the character is uppercase or titlecase; otherwise returns + the character itself. +*/ uint QChar::toLower(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return ucs4; const QUnicodeTables::Properties *p = qGetProp(ucs4); if (!p->lowerCaseSpecial) @@ -1068,11 +1098,12 @@ return ucs4; } -/*! \overload -Returns the lowercase equivalent of the UCS-2-encoded character specified -by \a ucs2 if the character is uppercase or titlecase; otherwise returns -the character itself. - */ +/*! + \overload + Returns the lowercase equivalent of the UCS-2-encoded character specified + by \a ucs2 if the character is uppercase or titlecase; otherwise returns + the character itself. +*/ ushort QChar::toLower(ushort ucs2) { const QUnicodeTables::Properties *p = qGetProp(ucs2); @@ -1093,14 +1124,15 @@ return ucs; } -/*! \overload -Returns the uppercase equivalent of the UCS-4-encoded character specified -by \a ucs4 if the character is lowercase or titlecase; otherwise returns -the character itself. - */ +/*! + \overload + Returns the uppercase equivalent of the UCS-4-encoded character specified + by \a ucs4 if the character is lowercase or titlecase; otherwise returns + the character itself. +*/ uint QChar::toUpper(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return ucs4; const QUnicodeTables::Properties *p = qGetProp(ucs4); if (!p->upperCaseSpecial) @@ -1108,11 +1140,12 @@ return ucs4; } -/*! \overload -Returns the uppercase equivalent of the UCS-2-encoded character specified -by \a ucs2 if the character is lowercase or titlecase; otherwise returns -the character itself. - */ +/*! + \overload + Returns the uppercase equivalent of the UCS-2-encoded character specified + by \a ucs2 if the character is lowercase or titlecase; otherwise returns + the character itself. +*/ ushort QChar::toUpper(ushort ucs2) { const QUnicodeTables::Properties *p = qGetProp(ucs2); @@ -1141,7 +1174,7 @@ */ uint QChar::toTitleCase(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return ucs4; const QUnicodeTables::Properties *p = qGetProp(ucs4); if (!p->titleCaseSpecial) @@ -1202,7 +1235,7 @@ */ uint QChar::toCaseFolded(uint ucs4) { - if (ucs4 > LAST_UNICODE_CHAR) + if (ucs4 > UNICODE_LAST_CODEPOINT) return ucs4; return ucs4 + qGetProp(ucs4)->caseFoldDiff; } @@ -1296,28 +1329,25 @@ #ifndef QT_NO_DATASTREAM /*! - \relates QChar - - Writes the char \a chr to the stream \a out. + \relates QChar - \sa {Format of the QDataStream operators} - */ + Writes the char \a chr to the stream \a out. + \sa {Serializing Qt Data Types} +*/ QDataStream &operator<<(QDataStream &out, const QChar &chr) { out << quint16(chr.unicode()); return out; } - /*! - \relates QChar + \relates QChar - Reads a char from the stream \a in into char \a chr. + Reads a char from the stream \a in into char \a chr. - \sa {Format of the QDataStream operators} - */ - + \sa {Serializing Qt Data Types} +*/ QDataStream &operator>>(QDataStream &in, QChar &chr) { quint16 u; @@ -1450,9 +1480,9 @@ if (!d || (canonical && tag != QChar::Canonical)) continue; - s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length); + int pos = uc - utf16; + s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast(d), length); // since the insert invalidates the pointers and we do decomposition recursive - int pos = uc - utf16; utf16 = reinterpret_cast(s.data()); uc = utf16 + pos + length; } @@ -1537,46 +1567,52 @@ int p2 = pos+1; uint u1 = s.at(pos).unicode(); if (QChar(u1).isHighSurrogate()) { - ushort low = s.at(pos+1).unicode(); + ushort low = s.at(p2).unicode(); if (QChar(low).isLowSurrogate()) { - p2++; u1 = QChar::surrogateToUcs4(u1, low); if (p2 >= l) break; + ++p2; } } uint u2 = s.at(p2).unicode(); - if (QChar(u2).isHighSurrogate() && p2 < l-1) { + if (QChar(u2).isHighSurrogate() && p2 < l) { ushort low = s.at(p2+1).unicode(); if (QChar(low).isLowSurrogate()) { - p2++; u2 = QChar::surrogateToUcs4(u2, low); + ++p2; } } - int c2 = QChar::combiningClass(u2); - if (QChar::unicodeVersion(u2) > version) - c2 = 0; - + ushort c2 = 0; + { + const QUnicodeTables::Properties *p = qGetProp(u2); + if ((QChar::UnicodeVersion)p->unicodeVersion <= version) + c2 = p->combiningClass; + } if (c2 == 0) { pos = p2+1; continue; } - int c1 = QChar::combiningClass(u1); - if (QChar::unicodeVersion(u1) > version) - c1 = 0; + + ushort c1 = 0; + { + const QUnicodeTables::Properties *p = qGetProp(u1); + if ((QChar::UnicodeVersion)p->unicodeVersion <= version) + c1 = p->combiningClass; + } if (c1 > c2) { QChar *uc = s.data(); int p = pos; // exchange characters - if (u2 < 0x10000) { + if (!QChar::requiresSurrogates(u2)) { uc[p++] = u2; } else { uc[p++] = QChar::highSurrogate(u2); uc[p++] = QChar::lowSurrogate(u2); } - if (u1 < 0x10000) { + if (!QChar::requiresSurrogates(u1)) { uc[p++] = u1; } else { uc[p++] = QChar::highSurrogate(u1); @@ -1588,7 +1624,7 @@ --pos; } else { ++pos; - if (u1 > 0x10000) + if (QChar::requiresSurrogates(u1)) ++pos; } } @@ -1606,11 +1642,9 @@ return script; } - Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4) { return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class; } - QT_END_NAMESPACE