diff -r 000000000000 -r 1918ee327afb src/3rdparty/webkit/JavaScriptCore/runtime/UString.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/3rdparty/webkit/JavaScriptCore/runtime/UString.cpp Mon Jan 11 14:00:40 2010 +0000 @@ -0,0 +1,1772 @@ +/* + * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) + * Copyright (C) 2009 Google Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "UString.h" + +#include "JSGlobalObjectFunctions.h" +#include "Collector.h" +#include "dtoa.h" +#include "Identifier.h" +#include "Operations.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE(STRING_H) +#include +#endif +#if HAVE(STRINGS_H) +#include +#endif + +using namespace WTF; +using namespace WTF::Unicode; +using namespace std; + +// This can be tuned differently per platform by putting platform #ifs right here. +// If you don't define this macro at all, then copyChars will just call directly +// to memcpy. +#define USTRING_COPY_CHARS_INLINE_CUTOFF 20 + +namespace JSC { + +extern const double NaN; +extern const double Inf; + +// This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. +static const int minLengthToShare = 10; + +static inline size_t overflowIndicator() { return std::numeric_limits::max(); } +static inline size_t maxUChars() { return std::numeric_limits::max() / sizeof(UChar); } + +static inline PossiblyNull allocChars(size_t length) +{ + ASSERT(length); + if (length > maxUChars()) + return 0; + return tryFastMalloc(sizeof(UChar) * length); +} + +static inline PossiblyNull reallocChars(UChar* buffer, size_t length) +{ + ASSERT(length); + if (length > maxUChars()) + return 0; + return tryFastRealloc(buffer, sizeof(UChar) * length); +} + +static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) +{ +#ifdef USTRING_COPY_CHARS_INLINE_CUTOFF + if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) { + for (unsigned i = 0; i < numCharacters; ++i) + destination[i] = source[i]; + return; + } +#endif + memcpy(destination, source, numCharacters * sizeof(UChar)); +} + +COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes); + +CString::CString(const char* c) + : m_length(strlen(c)) + , m_data(new char[m_length + 1]) +{ + memcpy(m_data, c, m_length + 1); +} + +CString::CString(const char* c, size_t length) + : m_length(length) + , m_data(new char[length + 1]) +{ + memcpy(m_data, c, m_length); + m_data[m_length] = 0; +} + +CString::CString(const CString& b) +{ + m_length = b.m_length; + if (b.m_data) { + m_data = new char[m_length + 1]; + memcpy(m_data, b.m_data, m_length + 1); + } else + m_data = 0; +} + +CString::~CString() +{ + delete [] m_data; +} + +CString CString::adopt(char* c, size_t length) +{ + CString s; + s.m_data = c; + s.m_length = length; + return s; +} + +CString& CString::append(const CString& t) +{ + char* n; + n = new char[m_length + t.m_length + 1]; + if (m_length) + memcpy(n, m_data, m_length); + if (t.m_length) + memcpy(n + m_length, t.m_data, t.m_length); + m_length += t.m_length; + n[m_length] = 0; + + delete [] m_data; + m_data = n; + + return *this; +} + +CString& CString::operator=(const char* c) +{ + if (m_data) + delete [] m_data; + m_length = strlen(c); + m_data = new char[m_length + 1]; + memcpy(m_data, c, m_length + 1); + + return *this; +} + +CString& CString::operator=(const CString& str) +{ + if (this == &str) + return *this; + + if (m_data) + delete [] m_data; + m_length = str.m_length; + if (str.m_data) { + m_data = new char[m_length + 1]; + memcpy(m_data, str.m_data, m_length + 1); + } else + m_data = 0; + + return *this; +} + +bool operator==(const CString& c1, const CString& c2) +{ + size_t len = c1.size(); + return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0); +} + +// These static strings are immutable, except for rc, whose initial value is chosen to +// reduce the possibility of it becoming zero due to ref/deref not being thread-safe. +static UChar sharedEmptyChar; +UString::BaseString* UString::Rep::nullBaseString; +UString::BaseString* UString::Rep::emptyBaseString; +UString* UString::nullUString; + +static void initializeStaticBaseString(UString::BaseString& base) +{ + base.rc = INT_MAX / 2; + base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag); + base.checkConsistency(); +} + +void initializeUString() +{ + UString::Rep::nullBaseString = new UString::BaseString(0, 0); + initializeStaticBaseString(*UString::Rep::nullBaseString); + + UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0); + initializeStaticBaseString(*UString::Rep::emptyBaseString); + + UString::nullUString = new UString; +} + +static char* statBuffer = 0; // Only used for debugging via UString::ascii(). + +PassRefPtr UString::Rep::createCopying(const UChar* d, int l) +{ + UChar* copyD = static_cast(fastMalloc(l * sizeof(UChar))); + copyChars(copyD, d, l); + return create(copyD, l); +} + +PassRefPtr UString::Rep::createFromUTF8(const char* string) +{ + if (!string) + return &UString::Rep::null(); + + size_t length = strlen(string); + Vector buffer(length); + UChar* p = buffer.data(); + if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length)) + return &UString::Rep::null(); + + return UString::Rep::createCopying(buffer.data(), p - buffer.data()); +} + +PassRefPtr UString::Rep::create(UChar* string, int length, PassRefPtr sharedBuffer) +{ + PassRefPtr rep = create(string, length); + rep->baseString()->setSharedBuffer(sharedBuffer); + rep->checkConsistency(); + return rep; +} + +UString::SharedUChar* UString::Rep::sharedBuffer() +{ + UString::BaseString* base = baseString(); + if (len < minLengthToShare) + return 0; + + return base->sharedBuffer(); +} + +void UString::Rep::destroy() +{ + checkConsistency(); + + // Static null and empty strings can never be destroyed, but we cannot rely on + // reference counting, because ref/deref are not thread-safe. + if (!isStatic()) { + if (identifierTable()) + Identifier::remove(this); + + UString::BaseString* base = baseString(); + if (base == this) { + if (m_sharedBuffer) + m_sharedBuffer->deref(); + else + fastFree(base->buf); + } else + base->deref(); + + delete this; + } +} + +// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's +// or anything like that. +const unsigned PHI = 0x9e3779b9U; + +// Paul Hsieh's SuperFastHash +// http://www.azillionmonkeys.com/qed/hash.html +unsigned UString::Rep::computeHash(const UChar* s, int len) +{ + unsigned l = len; + uint32_t hash = PHI; + uint32_t tmp; + + int rem = l & 1; + l >>= 1; + + // Main loop + for (; l > 0; l--) { + hash += s[0]; + tmp = (s[1] << 11) ^ hash; + hash = (hash << 16) ^ tmp; + s += 2; + hash += hash >> 11; + } + + // Handle end case + if (rem) { + hash += s[0]; + hash ^= hash << 11; + hash += hash >> 17; + } + + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + // this avoids ever returning a hash code of 0, since that is used to + // signal "hash not computed yet", using a value that is likely to be + // effectively the same as 0 when the low bits are masked + if (hash == 0) + hash = 0x80000000; + + return hash; +} + +// Paul Hsieh's SuperFastHash +// http://www.azillionmonkeys.com/qed/hash.html +unsigned UString::Rep::computeHash(const char* s, int l) +{ + // This hash is designed to work on 16-bit chunks at a time. But since the normal case + // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they + // were 16-bit chunks, which should give matching results + + uint32_t hash = PHI; + uint32_t tmp; + + size_t rem = l & 1; + l >>= 1; + + // Main loop + for (; l > 0; l--) { + hash += static_cast(s[0]); + tmp = (static_cast(s[1]) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + s += 2; + hash += hash >> 11; + } + + // Handle end case + if (rem) { + hash += static_cast(s[0]); + hash ^= hash << 11; + hash += hash >> 17; + } + + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + // this avoids ever returning a hash code of 0, since that is used to + // signal "hash not computed yet", using a value that is likely to be + // effectively the same as 0 when the low bits are masked + if (hash == 0) + hash = 0x80000000; + + return hash; +} + +#ifndef NDEBUG +void UString::Rep::checkConsistency() const +{ + const UString::BaseString* base = baseString(); + + // There is no recursion for base strings. + ASSERT(base == base->baseString()); + + if (isStatic()) { + // There are only two static strings: null and empty. + ASSERT(!len); + + // Static strings cannot get in identifier tables, because they are globally shared. + ASSERT(!identifierTable()); + } + + // The string fits in buffer. + ASSERT(base->usedPreCapacity <= base->preCapacity); + ASSERT(base->usedCapacity <= base->capacity); + ASSERT(-offset <= base->usedPreCapacity); + ASSERT(offset + len <= base->usedCapacity); +} +#endif + +UString::SharedUChar* UString::BaseString::sharedBuffer() +{ + if (!m_sharedBuffer) + setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr(buf))); + return m_sharedBuffer; +} + +void UString::BaseString::setSharedBuffer(PassRefPtr sharedBuffer) +{ + // The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer + // is in a union with another variable to avoid making BaseString any larger. + if (m_sharedBuffer) + m_sharedBuffer->deref(); + m_sharedBuffer = sharedBuffer.releaseRef(); +} + +bool UString::BaseString::slowIsBufferReadOnly() +{ + // The buffer may not be modified as soon as the underlying data has been shared with another class. + if (m_sharedBuffer->isShared()) + return true; + + // At this point, we know it that the underlying buffer isn't shared outside of this base class, + // so get rid of m_sharedBuffer. + OwnPtr > mallocPtr(m_sharedBuffer->release()); + UChar* unsharedBuf = const_cast(mallocPtr->release()); + setSharedBuffer(0); + preCapacity += (buf - unsharedBuf); + buf = unsharedBuf; + return false; +} + +// Put these early so they can be inlined. +static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize) +{ + // Combine capacitySize & precapacitySize to produce a single size to allocate, + // check that doing so does not result in overflow. + size_t size = capacitySize + precapacitySize; + if (size < capacitySize) + return overflowIndicator(); + + // Small Strings (up to 4 pages): + // Expand the allocation size to 112.5% of the amount requested. This is largely sicking + // to our previous policy, however 112.5% is cheaper to calculate. + if (size < 0x4000) { + size_t expandedSize = ((size + (size >> 3)) | 15) + 1; + // Given the limited range within which we calculate the expansion in this + // fashion the above calculation should never overflow. + ASSERT(expandedSize >= size); + ASSERT(expandedSize < maxUChars()); + return expandedSize; + } + + // Medium Strings (up to 128 pages): + // For pages covering multiple pages over-allocation is less of a concern - any unused + // space will not be paged in if it is not used, so this is purely a VM overhead. For + // these strings allocate 2x the requested size. + if (size < 0x80000) { + size_t expandedSize = ((size + size) | 0xfff) + 1; + // Given the limited range within which we calculate the expansion in this + // fashion the above calculation should never overflow. + ASSERT(expandedSize >= size); + ASSERT(expandedSize < maxUChars()); + return expandedSize; + } + + // Large Strings (to infinity and beyond!): + // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow + // any individual string be responsible for. + size_t expandedSize = ((size + (size >> 3)) | 0xfff) + 1; + + // Check for overflow - any result that is at least as large as requested (but + // still below the limit) is okay. + if ((expandedSize >= size) && (expandedSize < maxUChars())) + return expandedSize; + return overflowIndicator(); +} + +static inline bool expandCapacity(UString::Rep* rep, int requiredLength) +{ + rep->checkConsistency(); + ASSERT(!rep->baseString()->isBufferReadOnly()); + + UString::BaseString* base = rep->baseString(); + + if (requiredLength > base->capacity) { + size_t newCapacity = expandedSize(requiredLength, base->preCapacity); + UChar* oldBuf = base->buf; + if (!reallocChars(base->buf, newCapacity).getValue(base->buf)) { + base->buf = oldBuf; + return false; + } + base->capacity = newCapacity - base->preCapacity; + } + if (requiredLength > base->usedCapacity) + base->usedCapacity = requiredLength; + + rep->checkConsistency(); + return true; +} + +bool UString::Rep::reserveCapacity(int capacity) +{ + // If this is an empty string there is no point 'growing' it - just allocate a new one. + // If the BaseString is shared with another string that is using more capacity than this + // string is, then growing the buffer won't help. + // If the BaseString's buffer is readonly, then it isn't allowed to grow. + UString::BaseString* base = baseString(); + if (!base->buf || !base->capacity || (offset + len) != base->usedCapacity || base->isBufferReadOnly()) + return false; + + // If there is already sufficient capacity, no need to grow! + if (capacity <= base->capacity) + return true; + + checkConsistency(); + + size_t newCapacity = expandedSize(capacity, base->preCapacity); + UChar* oldBuf = base->buf; + if (!reallocChars(base->buf, newCapacity).getValue(base->buf)) { + base->buf = oldBuf; + return false; + } + base->capacity = newCapacity - base->preCapacity; + + checkConsistency(); + return true; +} + +void UString::expandCapacity(int requiredLength) +{ + if (!JSC::expandCapacity(m_rep.get(), requiredLength)) + makeNull(); +} + +void UString::expandPreCapacity(int requiredPreCap) +{ + m_rep->checkConsistency(); + ASSERT(!m_rep->baseString()->isBufferReadOnly()); + + BaseString* base = m_rep->baseString(); + + if (requiredPreCap > base->preCapacity) { + size_t newCapacity = expandedSize(requiredPreCap, base->capacity); + int delta = newCapacity - base->capacity - base->preCapacity; + + UChar* newBuf; + if (!allocChars(newCapacity).getValue(newBuf)) { + makeNull(); + return; + } + copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity); + fastFree(base->buf); + base->buf = newBuf; + + base->preCapacity = newCapacity - base->capacity; + } + if (requiredPreCap > base->usedPreCapacity) + base->usedPreCapacity = requiredPreCap; + + m_rep->checkConsistency(); +} + +static PassRefPtr createRep(const char* c) +{ + if (!c) + return &UString::Rep::null(); + + if (!c[0]) + return &UString::Rep::empty(); + + size_t length = strlen(c); + UChar* d; + if (!allocChars(length).getValue(d)) + return &UString::Rep::null(); + else { + for (size_t i = 0; i < length; i++) + d[i] = static_cast(c[i]); // use unsigned char to zero-extend instead of sign-extend + return UString::Rep::create(d, static_cast(length)); + } + +} + +UString::UString(const char* c) + : m_rep(createRep(c)) +{ +} + +UString::UString(const UChar* c, int length) +{ + if (length == 0) + m_rep = &Rep::empty(); + else + m_rep = Rep::createCopying(c, length); +} + +UString::UString(UChar* c, int length, bool copy) +{ + if (length == 0) + m_rep = &Rep::empty(); + else if (copy) + m_rep = Rep::createCopying(c, length); + else + m_rep = Rep::create(c, length); +} + +UString::UString(const Vector& buffer) +{ + if (!buffer.size()) + m_rep = &Rep::empty(); + else + m_rep = Rep::createCopying(buffer.data(), buffer.size()); +} + +static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false) +{ + ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength); + + const int plusLength = plusOne ? 1 : 0; + if (currentCapacity > std::numeric_limits::max() - extendLength - plusLength) + CRASH(); + + return currentCapacity + extendLength + plusLength; +} + +static ALWAYS_INLINE PassRefPtr concatenate(PassRefPtr r, const UChar* tData, int tSize) +{ + RefPtr rep = r; + + rep->checkConsistency(); + + int thisSize = rep->size(); + int thisOffset = rep->offset; + int length = thisSize + tSize; + UString::BaseString* base = rep->baseString(); + + // possible cases: + if (tSize == 0) { + // t is empty + } else if (thisSize == 0) { + // this is empty + rep = UString::Rep::createCopying(tData, tSize); + } else if (rep == base && !base->isShared()) { + // this is direct and has refcount of 1 (so we can just alter it directly) + if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) + rep = &UString::Rep::null(); + if (rep->data()) { + copyChars(rep->data() + thisSize, tData, tSize); + rep->len = length; + rep->_hash = 0; + } + } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { + // this reaches the end of the buffer - extend it if it's long enough to append to + if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) + rep = &UString::Rep::null(); + if (rep->data()) { + copyChars(rep->data() + thisSize, tData, tSize); + rep = UString::Rep::create(rep, 0, length); + } + } else { + // This is shared in some way that prevents us from modifying base, so we must make a whole new string. + size_t newCapacity = expandedSize(length, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + rep = &UString::Rep::null(); + else { + copyChars(d, rep->data(), thisSize); + copyChars(d + thisSize, tData, tSize); + rep = UString::Rep::create(d, length); + rep->baseString()->capacity = newCapacity; + } + } + + rep->checkConsistency(); + + return rep.release(); +} + +static ALWAYS_INLINE PassRefPtr concatenate(PassRefPtr r, const char* t) +{ + RefPtr rep = r; + + rep->checkConsistency(); + + int thisSize = rep->size(); + int thisOffset = rep->offset; + int tSize = static_cast(strlen(t)); + int length = thisSize + tSize; + UString::BaseString* base = rep->baseString(); + + // possible cases: + if (thisSize == 0) { + // this is empty + rep = createRep(t); + } else if (tSize == 0) { + // t is empty, we'll just return *this below. + } else if (rep == base && !base->isShared()) { + // this is direct and has refcount of 1 (so we can just alter it directly) + expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); + UChar* d = rep->data(); + if (d) { + for (int i = 0; i < tSize; ++i) + d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend + rep->len = length; + rep->_hash = 0; + } + } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { + // this string reaches the end of the buffer - extend it + expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); + UChar* d = rep->data(); + if (d) { + for (int i = 0; i < tSize; ++i) + d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend + rep = UString::Rep::create(rep, 0, length); + } + } else { + // This is shared in some way that prevents us from modifying base, so we must make a whole new string. + size_t newCapacity = expandedSize(length, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + rep = &UString::Rep::null(); + else { + copyChars(d, rep->data(), thisSize); + for (int i = 0; i < tSize; ++i) + d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend + rep = UString::Rep::create(d, length); + rep->baseString()->capacity = newCapacity; + } + } + + rep->checkConsistency(); + + return rep.release(); +} + +PassRefPtr concatenate(UString::Rep* a, UString::Rep* b) +{ + a->checkConsistency(); + b->checkConsistency(); + + int aSize = a->size(); + int bSize = b->size(); + int aOffset = a->offset; + + // possible cases: + + UString::BaseString* aBase = a->baseString(); + if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity && !aBase->isBufferReadOnly()) { + // b is a single character (common fast case) + ++aBase->usedCapacity; + a->data()[aSize] = b->data()[0]; + return UString::Rep::create(a, 0, aSize + 1); + } + + // a is empty + if (aSize == 0) + return b; + // b is empty + if (bSize == 0) + return a; + + int bOffset = b->offset; + int length = aSize + bSize; + + UString::BaseString* bBase = b->baseString(); + if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize + && (-bOffset != bBase->usedPreCapacity || aSize >= bSize) && !aBase->isBufferReadOnly()) { + // - a reaches the end of its buffer so it qualifies for shared append + // - also, it's at least a quarter the length of b - appending to a much shorter + // string does more harm than good + // - however, if b qualifies for prepend and is longer than a, we'd rather prepend + + UString x(a); + x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length)); + if (!a->data() || !x.data()) + return 0; + copyChars(a->data() + aSize, b->data(), bSize); + PassRefPtr result = UString::Rep::create(a, 0, length); + + a->checkConsistency(); + b->checkConsistency(); + result->checkConsistency(); + + return result; + } + + if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize && !bBase->isBufferReadOnly()) { + // - b reaches the beginning of its buffer so it qualifies for shared prepend + // - also, it's at least a quarter the length of a - prepending to a much shorter + // string does more harm than good + UString y(b); + y.expandPreCapacity(-bOffset + aSize); + if (!b->data() || !y.data()) + return 0; + copyChars(b->data() - aSize, a->data(), aSize); + PassRefPtr result = UString::Rep::create(b, -aSize, length); + + a->checkConsistency(); + b->checkConsistency(); + result->checkConsistency(); + + return result; + } + + // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string + size_t newCapacity = expandedSize(length, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + return 0; + copyChars(d, a->data(), aSize); + copyChars(d + aSize, b->data(), bSize); + PassRefPtr result = UString::Rep::create(d, length); + result->baseString()->capacity = newCapacity; + + a->checkConsistency(); + b->checkConsistency(); + result->checkConsistency(); + + return result; +} + +PassRefPtr concatenate(UString::Rep* rep, int i) +{ + UChar buf[1 + sizeof(i) * 3]; + UChar* end = buf + sizeof(buf) / sizeof(UChar); + UChar* p = end; + + if (i == 0) + *--p = '0'; + else if (i == INT_MIN) { + char minBuf[1 + sizeof(i) * 3]; + sprintf(minBuf, "%d", INT_MIN); + return concatenate(rep, minBuf); + } else { + bool negative = false; + if (i < 0) { + negative = true; + i = -i; + } + while (i) { + *--p = static_cast((i % 10) + '0'); + i /= 10; + } + if (negative) + *--p = '-'; + } + + return concatenate(rep, p, static_cast(end - p)); + +} + +PassRefPtr concatenate(UString::Rep* rep, double d) +{ + // avoid ever printing -NaN, in JS conceptually there is only one NaN value + if (isnan(d)) + return concatenate(rep, "NaN"); + + if (d == 0.0) // stringify -0 as 0 + d = 0.0; + + char buf[80]; + int decimalPoint; + int sign; + + char result[80]; + WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL); + int length = static_cast(strlen(result)); + + int i = 0; + if (sign) + buf[i++] = '-'; + + if (decimalPoint <= 0 && decimalPoint > -6) { + buf[i++] = '0'; + buf[i++] = '.'; + for (int j = decimalPoint; j < 0; j++) + buf[i++] = '0'; + strcpy(buf + i, result); + } else if (decimalPoint <= 21 && decimalPoint > 0) { + if (length <= decimalPoint) { + strcpy(buf + i, result); + i += length; + for (int j = 0; j < decimalPoint - length; j++) + buf[i++] = '0'; + buf[i] = '\0'; + } else { + strncpy(buf + i, result, decimalPoint); + i += decimalPoint; + buf[i++] = '.'; + strcpy(buf + i, result + decimalPoint); + } + } else if (result[0] < '0' || result[0] > '9') + strcpy(buf + i, result); + else { + buf[i++] = result[0]; + if (length > 1) { + buf[i++] = '.'; + strcpy(buf + i, result + 1); + i += length - 1; + } + + buf[i++] = 'e'; + buf[i++] = (decimalPoint >= 0) ? '+' : '-'; + // decimalPoint can't be more than 3 digits decimal given the + // nature of float representation + int exponential = decimalPoint - 1; + if (exponential < 0) + exponential = -exponential; + if (exponential >= 100) + buf[i++] = static_cast('0' + exponential / 100); + if (exponential >= 10) + buf[i++] = static_cast('0' + (exponential % 100) / 10); + buf[i++] = static_cast('0' + exponential % 10); + buf[i++] = '\0'; + } + + return concatenate(rep, buf); +} + +UString UString::from(int i) +{ + UChar buf[1 + sizeof(i) * 3]; + UChar* end = buf + sizeof(buf) / sizeof(UChar); + UChar* p = end; + + if (i == 0) + *--p = '0'; + else if (i == INT_MIN) { + char minBuf[1 + sizeof(i) * 3]; + sprintf(minBuf, "%d", INT_MIN); + return UString(minBuf); + } else { + bool negative = false; + if (i < 0) { + negative = true; + i = -i; + } + while (i) { + *--p = static_cast((i % 10) + '0'); + i /= 10; + } + if (negative) + *--p = '-'; + } + + return UString(p, static_cast(end - p)); +} + +UString UString::from(long long i) +{ + UChar buf[1 + sizeof(i) * 3]; + UChar* end = buf + sizeof(buf) / sizeof(UChar); + UChar* p = end; + + if (i == 0) + *--p = '0'; + else if (i == std::numeric_limits::min()) { + char minBuf[1 + sizeof(i) * 3]; +#if PLATFORM(WIN_OS) + snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits::min()); +#else + snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits::min()); +#endif + return UString(minBuf); + } else { + bool negative = false; + if (i < 0) { + negative = true; + i = -i; + } + while (i) { + *--p = static_cast((i % 10) + '0'); + i /= 10; + } + if (negative) + *--p = '-'; + } + + return UString(p, static_cast(end - p)); +} + +UString UString::from(unsigned int u) +{ + UChar buf[sizeof(u) * 3]; + UChar* end = buf + sizeof(buf) / sizeof(UChar); + UChar* p = end; + + if (u == 0) + *--p = '0'; + else { + while (u) { + *--p = static_cast((u % 10) + '0'); + u /= 10; + } + } + + return UString(p, static_cast(end - p)); +} + +UString UString::from(long l) +{ + UChar buf[1 + sizeof(l) * 3]; + UChar* end = buf + sizeof(buf) / sizeof(UChar); + UChar* p = end; + + if (l == 0) + *--p = '0'; + else if (l == LONG_MIN) { + char minBuf[1 + sizeof(l) * 3]; + sprintf(minBuf, "%ld", LONG_MIN); + return UString(minBuf); + } else { + bool negative = false; + if (l < 0) { + negative = true; + l = -l; + } + while (l) { + *--p = static_cast((l % 10) + '0'); + l /= 10; + } + if (negative) + *--p = '-'; + } + + return UString(p, static_cast(end - p)); +} + +UString UString::from(double d) +{ + // avoid ever printing -NaN, in JS conceptually there is only one NaN value + if (isnan(d)) + return "NaN"; + if (!d) + return "0"; // -0 -> "0" + + char buf[80]; + int decimalPoint; + int sign; + + char result[80]; + WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL); + int length = static_cast(strlen(result)); + + int i = 0; + if (sign) + buf[i++] = '-'; + + if (decimalPoint <= 0 && decimalPoint > -6) { + buf[i++] = '0'; + buf[i++] = '.'; + for (int j = decimalPoint; j < 0; j++) + buf[i++] = '0'; + strcpy(buf + i, result); + } else if (decimalPoint <= 21 && decimalPoint > 0) { + if (length <= decimalPoint) { + strcpy(buf + i, result); + i += length; + for (int j = 0; j < decimalPoint - length; j++) + buf[i++] = '0'; + buf[i] = '\0'; + } else { + strncpy(buf + i, result, decimalPoint); + i += decimalPoint; + buf[i++] = '.'; + strcpy(buf + i, result + decimalPoint); + } + } else if (result[0] < '0' || result[0] > '9') + strcpy(buf + i, result); + else { + buf[i++] = result[0]; + if (length > 1) { + buf[i++] = '.'; + strcpy(buf + i, result + 1); + i += length - 1; + } + + buf[i++] = 'e'; + buf[i++] = (decimalPoint >= 0) ? '+' : '-'; + // decimalPoint can't be more than 3 digits decimal given the + // nature of float representation + int exponential = decimalPoint - 1; + if (exponential < 0) + exponential = -exponential; + if (exponential >= 100) + buf[i++] = static_cast('0' + exponential / 100); + if (exponential >= 10) + buf[i++] = static_cast('0' + (exponential % 100) / 10); + buf[i++] = static_cast('0' + exponential % 10); + buf[i++] = '\0'; + } + + return UString(buf); +} + +UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const +{ + m_rep->checkConsistency(); + + if (rangeCount == 1 && separatorCount == 0) { + int thisSize = size(); + int position = substringRanges[0].position; + int length = substringRanges[0].length; + if (position <= 0 && length >= thisSize) + return *this; + return UString::Rep::create(m_rep, max(0, position), min(thisSize, length)); + } + + int totalLength = 0; + for (int i = 0; i < rangeCount; i++) + totalLength += substringRanges[i].length; + for (int i = 0; i < separatorCount; i++) + totalLength += separators[i].size(); + + if (totalLength == 0) + return ""; + + UChar* buffer; + if (!allocChars(totalLength).getValue(buffer)) + return null(); + + int maxCount = max(rangeCount, separatorCount); + int bufferPos = 0; + for (int i = 0; i < maxCount; i++) { + if (i < rangeCount) { + copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length); + bufferPos += substringRanges[i].length; + } + if (i < separatorCount) { + copyChars(buffer + bufferPos, separators[i].data(), separators[i].size()); + bufferPos += separators[i].size(); + } + } + + return UString::Rep::create(buffer, totalLength); +} + +UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const +{ + m_rep->checkConsistency(); + + int replacementLength = replacement.size(); + int totalLength = size() - rangeLength + replacementLength; + if (totalLength == 0) + return ""; + + UChar* buffer; + if (!allocChars(totalLength).getValue(buffer)) + return null(); + + copyChars(buffer, data(), rangeStart); + copyChars(buffer + rangeStart, replacement.data(), replacementLength); + int rangeEnd = rangeStart + rangeLength; + copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd); + + return UString::Rep::create(buffer, totalLength); +} + + +UString& UString::append(const UString &t) +{ + m_rep->checkConsistency(); + t.rep()->checkConsistency(); + + int thisSize = size(); + int thisOffset = m_rep->offset; + int tSize = t.size(); + int length = thisSize + tSize; + BaseString* base = m_rep->baseString(); + + // possible cases: + if (thisSize == 0) { + // this is empty + *this = t; + } else if (tSize == 0) { + // t is empty + } else if (m_rep == base && !base->isShared()) { + // this is direct and has refcount of 1 (so we can just alter it directly) + expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); + if (data()) { + copyChars(m_rep->data() + thisSize, t.data(), tSize); + m_rep->len = length; + m_rep->_hash = 0; + } + } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { + // this reaches the end of the buffer - extend it if it's long enough to append to + expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); + if (data()) { + copyChars(m_rep->data() + thisSize, t.data(), tSize); + m_rep = Rep::create(m_rep, 0, length); + } + } else { + // This is shared in some way that prevents us from modifying base, so we must make a whole new string. + size_t newCapacity = expandedSize(length, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + makeNull(); + else { + copyChars(d, data(), thisSize); + copyChars(d + thisSize, t.data(), tSize); + m_rep = Rep::create(d, length); + m_rep->baseString()->capacity = newCapacity; + } + } + + m_rep->checkConsistency(); + t.rep()->checkConsistency(); + + return *this; +} + +UString& UString::append(const UChar* tData, int tSize) +{ + m_rep = concatenate(m_rep.release(), tData, tSize); + return *this; +} + +UString& UString::append(const char* t) +{ + m_rep = concatenate(m_rep.release(), t); + return *this; +} + +UString& UString::append(UChar c) +{ + m_rep->checkConsistency(); + + int thisOffset = m_rep->offset; + int length = size(); + BaseString* base = m_rep->baseString(); + + // possible cases: + if (length == 0) { + // this is empty - must make a new m_rep because we don't want to pollute the shared empty one + size_t newCapacity = expandedSize(1, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + makeNull(); + else { + d[0] = c; + m_rep = Rep::create(d, 1); + m_rep->baseString()->capacity = newCapacity; + } + } else if (m_rep == base && !base->isShared()) { + // this is direct and has refcount of 1 (so we can just alter it directly) + expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); + UChar* d = m_rep->data(); + if (d) { + d[length] = c; + m_rep->len = length + 1; + m_rep->_hash = 0; + } + } else if (thisOffset + length == base->usedCapacity && length >= minShareSize && !base->isBufferReadOnly()) { + // this reaches the end of the string - extend it and share + expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); + UChar* d = m_rep->data(); + if (d) { + d[length] = c; + m_rep = Rep::create(m_rep, 0, length + 1); + } + } else { + // This is shared in some way that prevents us from modifying base, so we must make a whole new string. + size_t newCapacity = expandedSize(length + 1, 0); + UChar* d; + if (!allocChars(newCapacity).getValue(d)) + makeNull(); + else { + copyChars(d, data(), length); + d[length] = c; + m_rep = Rep::create(d, length + 1); + m_rep->baseString()->capacity = newCapacity; + } + } + + m_rep->checkConsistency(); + + return *this; +} + +bool UString::getCString(CStringBuffer& buffer) const +{ + int length = size(); + int neededSize = length + 1; + buffer.resize(neededSize); + char* buf = buffer.data(); + + UChar ored = 0; + const UChar* p = data(); + char* q = buf; + const UChar* limit = p + length; + while (p != limit) { + UChar c = p[0]; + ored |= c; + *q = static_cast(c); + ++p; + ++q; + } + *q = '\0'; + + return !(ored & 0xFF00); +} + +char* UString::ascii() const +{ + int length = size(); + int neededSize = length + 1; + delete[] statBuffer; + statBuffer = new char[neededSize]; + + const UChar* p = data(); + char* q = statBuffer; + const UChar* limit = p + length; + while (p != limit) { + *q = static_cast(p[0]); + ++p; + ++q; + } + *q = '\0'; + + return statBuffer; +} + +UString& UString::operator=(const char* c) +{ + if (!c) { + m_rep = &Rep::null(); + return *this; + } + + if (!c[0]) { + m_rep = &Rep::empty(); + return *this; + } + + int l = static_cast(strlen(c)); + UChar* d; + BaseString* base = m_rep->baseString(); + if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) { + d = base->buf; + m_rep->_hash = 0; + m_rep->len = l; + } else { + if (!allocChars(l).getValue(d)) { + makeNull(); + return *this; + } + m_rep = Rep::create(d, l); + } + for (int i = 0; i < l; i++) + d[i] = static_cast(c[i]); // use unsigned char to zero-extend instead of sign-extend + + return *this; +} + +bool UString::is8Bit() const +{ + const UChar* u = data(); + const UChar* limit = u + size(); + while (u < limit) { + if (u[0] > 0xFF) + return false; + ++u; + } + + return true; +} + +UChar UString::operator[](int pos) const +{ + if (pos >= size()) + return '\0'; + return data()[pos]; +} + +double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const +{ + if (size() == 1) { + UChar c = data()[0]; + if (isASCIIDigit(c)) + return c - '0'; + if (isASCIISpace(c) && tolerateEmptyString) + return 0; + return NaN; + } + + // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk + // after the number, so this is too strict a check. + CStringBuffer s; + if (!getCString(s)) + return NaN; + const char* c = s.data(); + + // skip leading white space + while (isASCIISpace(*c)) + c++; + + // empty string ? + if (*c == '\0') + return tolerateEmptyString ? 0.0 : NaN; + + double d; + + // hex number ? + if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { + const char* firstDigitPosition = c + 2; + c++; + d = 0.0; + while (*(++c)) { + if (*c >= '0' && *c <= '9') + d = d * 16.0 + *c - '0'; + else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) + d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; + else + break; + } + + if (d >= mantissaOverflowLowerBound) + d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); + } else { + // regular number ? + char* end; + d = WTF::strtod(c, &end); + if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { + c = end; + } else { + double sign = 1.0; + + if (*c == '+') + c++; + else if (*c == '-') { + sign = -1.0; + c++; + } + + // We used strtod() to do the conversion. However, strtod() handles + // infinite values slightly differently than JavaScript in that it + // converts the string "inf" with any capitalization to infinity, + // whereas the ECMA spec requires that it be converted to NaN. + + if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { + d = sign * Inf; + c += 8; + } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') + c = end; + else + return NaN; + } + } + + // allow trailing white space + while (isASCIISpace(*c)) + c++; + // don't allow anything after - unless tolerant=true + if (!tolerateTrailingJunk && *c != '\0') + d = NaN; + + return d; +} + +double UString::toDouble(bool tolerateTrailingJunk) const +{ + return toDouble(tolerateTrailingJunk, true); +} + +double UString::toDouble() const +{ + return toDouble(false, true); +} + +uint32_t UString::toUInt32(bool* ok) const +{ + double d = toDouble(); + bool b = true; + + if (d != static_cast(d)) { + b = false; + d = 0; + } + + if (ok) + *ok = b; + + return static_cast(d); +} + +uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const +{ + double d = toDouble(false, tolerateEmptyString); + bool b = true; + + if (d != static_cast(d)) { + b = false; + d = 0; + } + + if (ok) + *ok = b; + + return static_cast(d); +} + +uint32_t UString::toStrictUInt32(bool* ok) const +{ + if (ok) + *ok = false; + + // Empty string is not OK. + int len = m_rep->len; + if (len == 0) + return 0; + const UChar* p = m_rep->data(); + unsigned short c = p[0]; + + // If the first digit is 0, only 0 itself is OK. + if (c == '0') { + if (len == 1 && ok) + *ok = true; + return 0; + } + + // Convert to UInt32, checking for overflow. + uint32_t i = 0; + while (1) { + // Process character, turning it into a digit. + if (c < '0' || c > '9') + return 0; + const unsigned d = c - '0'; + + // Multiply by 10, checking for overflow out of 32 bits. + if (i > 0xFFFFFFFFU / 10) + return 0; + i *= 10; + + // Add in the digit, checking for overflow out of 32 bits. + const unsigned max = 0xFFFFFFFFU - d; + if (i > max) + return 0; + i += d; + + // Handle end of string. + if (--len == 0) { + if (ok) + *ok = true; + return i; + } + + // Get next character. + c = *(++p); + } +} + +int UString::find(const UString& f, int pos) const +{ + int fsz = f.size(); + + if (pos < 0) + pos = 0; + + if (fsz == 1) { + UChar ch = f[0]; + const UChar* end = data() + size(); + for (const UChar* c = data() + pos; c < end; c++) { + if (*c == ch) + return static_cast(c - data()); + } + return -1; + } + + int sz = size(); + if (sz < fsz) + return -1; + if (fsz == 0) + return pos; + const UChar* end = data() + sz - fsz; + int fsizeminusone = (fsz - 1) * sizeof(UChar); + const UChar* fdata = f.data(); + unsigned short fchar = fdata[0]; + ++fdata; + for (const UChar* c = data() + pos; c <= end; c++) { + if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone)) + return static_cast(c - data()); + } + + return -1; +} + +int UString::find(UChar ch, int pos) const +{ + if (pos < 0) + pos = 0; + const UChar* end = data() + size(); + for (const UChar* c = data() + pos; c < end; c++) { + if (*c == ch) + return static_cast(c - data()); + } + + return -1; +} + +int UString::rfind(const UString& f, int pos) const +{ + int sz = size(); + int fsz = f.size(); + if (sz < fsz) + return -1; + if (pos < 0) + pos = 0; + if (pos > sz - fsz) + pos = sz - fsz; + if (fsz == 0) + return pos; + int fsizeminusone = (fsz - 1) * sizeof(UChar); + const UChar* fdata = f.data(); + for (const UChar* c = data() + pos; c >= data(); c--) { + if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) + return static_cast(c - data()); + } + + return -1; +} + +int UString::rfind(UChar ch, int pos) const +{ + if (isEmpty()) + return -1; + if (pos + 1 >= size()) + pos = size() - 1; + for (const UChar* c = data() + pos; c >= data(); c--) { + if (*c == ch) + return static_cast(c - data()); + } + + return -1; +} + +UString UString::substr(int pos, int len) const +{ + int s = size(); + + if (pos < 0) + pos = 0; + else if (pos >= s) + pos = s; + if (len < 0) + len = s; + if (pos + len >= s) + len = s - pos; + + if (pos == 0 && len == s) + return *this; + + return UString(Rep::create(m_rep, pos, len)); +} + +bool operator==(const UString& s1, const char *s2) +{ + if (s2 == 0) + return s1.isEmpty(); + + const UChar* u = s1.data(); + const UChar* uend = u + s1.size(); + while (u != uend && *s2) { + if (u[0] != (unsigned char)*s2) + return false; + s2++; + u++; + } + + return u == uend && *s2 == 0; +} + +bool operator<(const UString& s1, const UString& s2) +{ + const int l1 = s1.size(); + const int l2 = s2.size(); + const int lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1.data(); + const UChar* c2 = s2.data(); + int l = 0; + while (l < lmin && *c1 == *c2) { + c1++; + c2++; + l++; + } + if (l < lmin) + return (c1[0] < c2[0]); + + return (l1 < l2); +} + +bool operator>(const UString& s1, const UString& s2) +{ + const int l1 = s1.size(); + const int l2 = s2.size(); + const int lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1.data(); + const UChar* c2 = s2.data(); + int l = 0; + while (l < lmin && *c1 == *c2) { + c1++; + c2++; + l++; + } + if (l < lmin) + return (c1[0] > c2[0]); + + return (l1 > l2); +} + +int compare(const UString& s1, const UString& s2) +{ + const int l1 = s1.size(); + const int l2 = s2.size(); + const int lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1.data(); + const UChar* c2 = s2.data(); + int l = 0; + while (l < lmin && *c1 == *c2) { + c1++; + c2++; + l++; + } + + if (l < lmin) + return (c1[0] > c2[0]) ? 1 : -1; + + if (l1 == l2) + return 0; + + return (l1 > l2) ? 1 : -1; +} + +bool equal(const UString::Rep* r, const UString::Rep* b) +{ + int length = r->len; + if (length != b->len) + return false; + const UChar* d = r->data(); + const UChar* s = b->data(); + for (int i = 0; i != length; ++i) { + if (d[i] != s[i]) + return false; + } + return true; +} + +CString UString::UTF8String(bool strict) const +{ + // Allocate a buffer big enough to hold all the characters. + const int length = size(); + Vector buffer(length * 3); + + // Convert to runs of 8-bit characters. + char* p = buffer.data(); + const UChar* d = reinterpret_cast(&data()[0]); + ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict); + if (result != conversionOK) + return CString(); + + return CString(buffer.data(), p - buffer.data()); +} + +// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. +NEVER_INLINE void UString::makeNull() +{ + m_rep = &Rep::null(); +} + +// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. +NEVER_INLINE UString::Rep* UString::nullRep() +{ + return &Rep::null(); +} + +} // namespace JSC