--- a/src/corelib/tools/qchar.cpp Wed Jun 23 19:07:03 2010 +0300
+++ b/src/corelib/tools/qchar.cpp Tue Jul 06 15:10:48 2010 +0300
@@ -42,10 +42,10 @@
// Don't define it while compiling this module, or USERS of Qt will
// not be able to link.
#ifdef QT_NO_CAST_FROM_ASCII
-#undef QT_NO_CAST_FROM_ASCII
+# undef QT_NO_CAST_FROM_ASCII
#endif
#ifdef QT_NO_CAST_TO_ASCII
-#undef QT_NO_CAST_TO_ASCII
+# undef QT_NO_CAST_TO_ASCII
#endif
#include "qchar.h"
#include "qdatastream.h"
@@ -57,17 +57,16 @@
QT_BEGIN_NAMESPACE
-#define LAST_UNICODE_CHAR 0x10ffff
-
#ifndef QT_NO_CODEC_FOR_C_STRINGS
-#ifdef QT_NO_TEXTCODEC
-#define QT_NO_CODEC_FOR_C_STRINGS
-#endif
+# ifdef QT_NO_TEXTCODEC
+# define QT_NO_CODEC_FOR_C_STRINGS
+# endif
#endif
#define FLAG(x) (1 << (x))
-/*! \class QLatin1Char
+/*!
+ \class QLatin1Char
\brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
\ingroup string-processing
@@ -554,7 +553,7 @@
/*!
Returns true if the character is a mark (Mark_* categories);
otherwise returns false.
-
+
See QChar::Category for more information regarding marks.
*/
bool QChar::isMark() const
@@ -651,45 +650,71 @@
}
/*!
- \fn bool QChar::isHighSurrogate() const
+ \fn bool QChar::isHighSurrogate() const
+
+ Returns true if the QChar is the high part of a utf16 surrogate
+ (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
+*/
- Returns true if the QChar is the high part of a utf16 surrogate
- (ie. if its code point is between 0xd800 and 0xdbff).
+/*!
+ \fn bool QChar::isLowSurrogate() const
+
+ Returns true if the QChar is the low part of a utf16 surrogate
+ (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
*/
/*!
- \fn bool QChar::isLowSurrogate() const
+ \fn static bool QChar::isHighSurrogate(uint ucs4)
+ \since 4.7
+
+ Returns true if the UCS-4-encoded character specified by \a ucs4
+ is the high part of a utf16 surrogate
+ (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
+*/
- Returns true if the QChar is the low part of a utf16 surrogate
- (ie. if its code point is between 0xdc00 and 0xdfff).
+/*!
+ \fn static bool QChar::isLowSurrogate(uint ucs4)
+ \since 4.7
+
+ Returns true if the UCS-4-encoded character specified by \a ucs4
+ is the high part of a utf16 surrogate
+ (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
*/
/*!
- \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
+ \fn static bool QChar::requiresSurrogates(uint ucs4)
+ \since 4.7
- Converts a UTF16 surrogate pair with the given \a high and \a low values
- to its UCS-4 code point.
+ Returns true if the UCS-4-encoded character specified by \a ucs4
+ can be splited to the high and low parts of a utf16 surrogate
+ (ie. if its code point is greater than or equals to 0x10000).
+*/
+
+/*!
+ \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
+
+ Converts a UTF16 surrogate pair with the given \a high and \a low values
+ to its UCS-4 code point.
*/
/*!
- \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
+ \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
- Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
- point.
+ Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code point.
*/
/*!
- \fn static ushort QChar::highSurrogate(uint ucs4)
+ \fn static ushort QChar::highSurrogate(uint ucs4)
- Returns the high surrogate value of a ucs4 code point.
- The returned result is undefined if \a ucs4 is smaller than 0x10000.
+ Returns the high surrogate value of a ucs4 code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
*/
/*!
- \fn static ushort QChar::lowSurrogate(uint ucs4)
+ \fn static ushort QChar::lowSurrogate(uint ucs4)
- Returns the low surrogate value of a ucs4 code point.
- The returned result is undefined if \a ucs4 is smaller than 0x10000.
+ Returns the low surrogate value of a ucs4 code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
*/
/*!
@@ -718,7 +743,7 @@
*/
int QChar::digitValue(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return 0;
return qGetProp(ucs4)->digitValue;
}
@@ -731,22 +756,22 @@
return (QChar::Category) qGetProp(ucs)->category;
}
-/*!
+/*!
\overload
\since 4.3
Returns the category of the UCS-4-encoded character specified by \a ucs4.
- */
+*/
QChar::Category QChar::category(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::NoCategory;
return (QChar::Category) qGetProp(ucs4)->category;
}
-/*!
+/*!
\overload
Returns the category of the UCS-2-encoded character specified by \a ucs2.
- */
+*/
QChar::Category QChar::category(ushort ucs2)
{
return (QChar::Category) qGetProp(ucs2)->category;
@@ -761,21 +786,21 @@
return (QChar::Direction) qGetProp(ucs)->direction;
}
-/*!
-\overload
-Returns the direction of the UCS-4-encoded character specified by \a ucs4.
- */
+/*!
+ \overload
+ Returns the direction of the UCS-4-encoded character specified by \a ucs4.
+*/
QChar::Direction QChar::direction(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::DirL;
return (QChar::Direction) qGetProp(ucs4)->direction;
}
-/*!
-\overload
-Returns the direction of the UCS-2-encoded character specified by \a ucs2.
- */
+/*!
+ \overload
+ Returns the direction of the UCS-2-encoded character specified by \a ucs2.
+*/
QChar::Direction QChar::direction(ushort ucs2)
{
return (QChar::Direction) qGetProp(ucs2)->direction;
@@ -790,25 +815,25 @@
return (QChar::Joining) qGetProp(ucs)->joining;
}
-/*!
-\overload
-Returns information about the joining properties of the UCS-4-encoded
-character specified by \a ucs4 (needed for certain languages such as
-Arabic).
- */
+/*!
+ \overload
+ Returns information about the joining properties of the UCS-4-encoded
+ character specified by \a ucs4 (needed for certain languages such as
+ Arabic).
+*/
QChar::Joining QChar::joining(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::OtherJoining;
return (QChar::Joining) qGetProp(ucs4)->joining;
}
-/*!
-\overload
-Returns information about the joining properties of the UCS-2-encoded
-character specified by \a ucs2 (needed for certain languages such as
-Arabic).
- */
+/*!
+ \overload
+ Returns information about the joining properties of the UCS-2-encoded
+ character specified by \a ucs2 (needed for certain languages such as
+ Arabic).
+*/
QChar::Joining QChar::joining(ushort ucs2)
{
return (QChar::Joining) qGetProp(ucs2)->joining;
@@ -867,26 +892,27 @@
return ucs + qGetProp(ucs)->mirrorDiff;
}
-/*! \overload
-Returns the mirrored character if the UCS-4-encoded character specified
-by \a ucs4 is a mirrored character; otherwise returns the character itself.
+/*!
+ \overload
+ Returns the mirrored character if the UCS-4-encoded character specified
+ by \a ucs4 is a mirrored character; otherwise returns the character itself.
-\sa hasMirrored()
- */
+ \sa hasMirrored()
+*/
uint QChar::mirroredChar(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return ucs4;
return ucs4 + qGetProp(ucs4)->mirrorDiff;
}
-/*!
-\overload
-Returns the mirrored character if the UCS-2-encoded character specified
-by \a ucs2 is a mirrored character; otherwise returns the character itself.
+/*!
+ \overload
+ Returns the mirrored character if the UCS-2-encoded character specified
+ by \a ucs2 is a mirrored character; otherwise returns the character itself.
-\sa hasMirrored()
- */
+ \sa hasMirrored()
+*/
ushort QChar::mirroredChar(ushort ucs2)
{
return ucs2 + qGetProp(ucs2)->mirrorDiff;
@@ -910,7 +936,7 @@
(uint ucs4, int *length, int *tag, unsigned short *buffer)
{
*length = 0;
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return 0;
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
int SIndex = ucs4 - Hangul_SBase;
@@ -940,11 +966,11 @@
return decomposition(ucs);
}
-/*!
-\overload
-Decomposes the UCS-4-encoded character specified by \a ucs4 into its
-constituent parts. Returns an empty string if no decomposition exists.
- */
+/*!
+ \overload
+ Decomposes the UCS-4-encoded character specified by \a ucs4 into its
+ constituent parts. Returns an empty string if no decomposition exists.
+*/
QString QChar::decomposition(uint ucs4)
{
unsigned short buffer[3];
@@ -963,14 +989,14 @@
return decompositionTag(ucs);
}
-/*!
-\overload
-Returns the tag defining the composition of the UCS-4-encoded character
-specified by \a ucs4. Returns QChar::Single if no decomposition exists.
- */
+/*!
+ \overload
+ Returns the tag defining the composition of the UCS-4-encoded character
+ specified by \a ucs4. Returns QChar::Single if no decomposition exists.
+*/
QChar::Decomposition QChar::decompositionTag(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::NoDecomposition;
const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
if (index == 0xffff)
@@ -991,27 +1017,28 @@
return (unsigned char) qGetProp(ucs)->combiningClass;
}
-/*! \overload
-Returns the combining class for the UCS-4-encoded character specified by
-\a ucs4, as defined in the Unicode standard.
- */
+/*!
+ \overload
+ Returns the combining class for the UCS-4-encoded character specified by
+ \a ucs4, as defined in the Unicode standard.
+*/
unsigned char QChar::combiningClass(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return 0;
return (unsigned char) qGetProp(ucs4)->combiningClass;
}
-/*! \overload
-Returns the combining class for the UCS-2-encoded character specified by
-\a ucs2, as defined in the Unicode standard.
- */
+/*!
+ \overload
+ Returns the combining class for the UCS-2-encoded character specified by
+ \a ucs2, as defined in the Unicode standard.
+*/
unsigned char QChar::combiningClass(ushort ucs2)
{
return (unsigned char) qGetProp(ucs2)->combiningClass;
}
-
/*!
Returns the Unicode version that introduced this character.
*/
@@ -1020,21 +1047,23 @@
return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
}
-/*! \overload
-Returns the Unicode version that introduced the character specified in
-its UCS-4-encoded form as \a ucs4.
- */
+/*!
+ \overload
+ Returns the Unicode version that introduced the character specified in
+ its UCS-4-encoded form as \a ucs4.
+*/
QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return QChar::Unicode_Unassigned;
return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
}
-/*! \overload
-Returns the Unicode version that introduced the character specified in
-its UCS-2-encoded form as \a ucs2.
- */
+/*!
+ \overload
+ Returns the Unicode version that introduced the character specified in
+ its UCS-2-encoded form as \a ucs2.
+*/
QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
{
return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
@@ -1053,14 +1082,15 @@
return ucs;
}
-/*! \overload
-Returns the lowercase equivalent of the UCS-4-encoded character specified
-by \a ucs4 if the character is uppercase or titlecase; otherwise returns
-the character itself.
- */
+/*!
+ \overload
+ Returns the lowercase equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is uppercase or titlecase; otherwise returns
+ the character itself.
+*/
uint QChar::toLower(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return ucs4;
const QUnicodeTables::Properties *p = qGetProp(ucs4);
if (!p->lowerCaseSpecial)
@@ -1068,11 +1098,12 @@
return ucs4;
}
-/*! \overload
-Returns the lowercase equivalent of the UCS-2-encoded character specified
-by \a ucs2 if the character is uppercase or titlecase; otherwise returns
-the character itself.
- */
+/*!
+ \overload
+ Returns the lowercase equivalent of the UCS-2-encoded character specified
+ by \a ucs2 if the character is uppercase or titlecase; otherwise returns
+ the character itself.
+*/
ushort QChar::toLower(ushort ucs2)
{
const QUnicodeTables::Properties *p = qGetProp(ucs2);
@@ -1093,14 +1124,15 @@
return ucs;
}
-/*! \overload
-Returns the uppercase equivalent of the UCS-4-encoded character specified
-by \a ucs4 if the character is lowercase or titlecase; otherwise returns
-the character itself.
- */
+/*!
+ \overload
+ Returns the uppercase equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is lowercase or titlecase; otherwise returns
+ the character itself.
+*/
uint QChar::toUpper(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return ucs4;
const QUnicodeTables::Properties *p = qGetProp(ucs4);
if (!p->upperCaseSpecial)
@@ -1108,11 +1140,12 @@
return ucs4;
}
-/*! \overload
-Returns the uppercase equivalent of the UCS-2-encoded character specified
-by \a ucs2 if the character is lowercase or titlecase; otherwise returns
-the character itself.
- */
+/*!
+ \overload
+ Returns the uppercase equivalent of the UCS-2-encoded character specified
+ by \a ucs2 if the character is lowercase or titlecase; otherwise returns
+ the character itself.
+*/
ushort QChar::toUpper(ushort ucs2)
{
const QUnicodeTables::Properties *p = qGetProp(ucs2);
@@ -1141,7 +1174,7 @@
*/
uint QChar::toTitleCase(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return ucs4;
const QUnicodeTables::Properties *p = qGetProp(ucs4);
if (!p->titleCaseSpecial)
@@ -1202,7 +1235,7 @@
*/
uint QChar::toCaseFolded(uint ucs4)
{
- if (ucs4 > LAST_UNICODE_CHAR)
+ if (ucs4 > UNICODE_LAST_CODEPOINT)
return ucs4;
return ucs4 + qGetProp(ucs4)->caseFoldDiff;
}
@@ -1296,28 +1329,25 @@
#ifndef QT_NO_DATASTREAM
/*!
- \relates QChar
-
- Writes the char \a chr to the stream \a out.
+ \relates QChar
- \sa {Format of the QDataStream operators}
- */
+ Writes the char \a chr to the stream \a out.
+ \sa {Serializing Qt Data Types}
+*/
QDataStream &operator<<(QDataStream &out, const QChar &chr)
{
out << quint16(chr.unicode());
return out;
}
-
/*!
- \relates QChar
+ \relates QChar
- Reads a char from the stream \a in into char \a chr.
+ Reads a char from the stream \a in into char \a chr.
- \sa {Format of the QDataStream operators}
- */
-
+ \sa {Serializing Qt Data Types}
+*/
QDataStream &operator>>(QDataStream &in, QChar &chr)
{
quint16 u;
@@ -1450,9 +1480,9 @@
if (!d || (canonical && tag != QChar::Canonical))
continue;
- s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
+ int pos = uc - utf16;
+ s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
// since the insert invalidates the pointers and we do decomposition recursive
- int pos = uc - utf16;
utf16 = reinterpret_cast<unsigned short *>(s.data());
uc = utf16 + pos + length;
}
@@ -1537,46 +1567,52 @@
int p2 = pos+1;
uint u1 = s.at(pos).unicode();
if (QChar(u1).isHighSurrogate()) {
- ushort low = s.at(pos+1).unicode();
+ ushort low = s.at(p2).unicode();
if (QChar(low).isLowSurrogate()) {
- p2++;
u1 = QChar::surrogateToUcs4(u1, low);
if (p2 >= l)
break;
+ ++p2;
}
}
uint u2 = s.at(p2).unicode();
- if (QChar(u2).isHighSurrogate() && p2 < l-1) {
+ if (QChar(u2).isHighSurrogate() && p2 < l) {
ushort low = s.at(p2+1).unicode();
if (QChar(low).isLowSurrogate()) {
- p2++;
u2 = QChar::surrogateToUcs4(u2, low);
+ ++p2;
}
}
- int c2 = QChar::combiningClass(u2);
- if (QChar::unicodeVersion(u2) > version)
- c2 = 0;
-
+ ushort c2 = 0;
+ {
+ const QUnicodeTables::Properties *p = qGetProp(u2);
+ if ((QChar::UnicodeVersion)p->unicodeVersion <= version)
+ c2 = p->combiningClass;
+ }
if (c2 == 0) {
pos = p2+1;
continue;
}
- int c1 = QChar::combiningClass(u1);
- if (QChar::unicodeVersion(u1) > version)
- c1 = 0;
+
+ ushort c1 = 0;
+ {
+ const QUnicodeTables::Properties *p = qGetProp(u1);
+ if ((QChar::UnicodeVersion)p->unicodeVersion <= version)
+ c1 = p->combiningClass;
+ }
if (c1 > c2) {
QChar *uc = s.data();
int p = pos;
// exchange characters
- if (u2 < 0x10000) {
+ if (!QChar::requiresSurrogates(u2)) {
uc[p++] = u2;
} else {
uc[p++] = QChar::highSurrogate(u2);
uc[p++] = QChar::lowSurrogate(u2);
}
- if (u1 < 0x10000) {
+ if (!QChar::requiresSurrogates(u1)) {
uc[p++] = u1;
} else {
uc[p++] = QChar::highSurrogate(u1);
@@ -1588,7 +1624,7 @@
--pos;
} else {
++pos;
- if (u1 > 0x10000)
+ if (QChar::requiresSurrogates(u1))
++pos;
}
}
@@ -1606,11 +1642,9 @@
return script;
}
-
Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
{
return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
}
-
QT_END_NAMESPACE