JavaScriptCore/wtf/text/WTFString.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * (C) 1999 Lars Knoll (knoll@kde.org)
       
     3  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
       
     4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
       
     5  *
       
     6  * This library is free software; you can redistribute it and/or
       
     7  * modify it under the terms of the GNU Library General Public
       
     8  * License as published by the Free Software Foundation; either
       
     9  * version 2 of the License, or (at your option) any later version.
       
    10  *
       
    11  * This library is distributed in the hope that it will be useful,
       
    12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    14  * Library General Public License for more details.
       
    15  *
       
    16  * You should have received a copy of the GNU Library General Public License
       
    17  * along with this library; see the file COPYING.LIB.  If not, write to
       
    18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    19  * Boston, MA 02110-1301, USA.
       
    20  */
       
    21 
       
    22 #include "config.h"
       
    23 #include "WTFString.h"
       
    24 
       
    25 #include <limits>
       
    26 #include <stdarg.h>
       
    27 #include <wtf/ASCIICType.h>
       
    28 #include <wtf/text/CString.h>
       
    29 #include <wtf/StringExtras.h>
       
    30 #include <wtf/Vector.h>
       
    31 #include <wtf/dtoa.h>
       
    32 #include <wtf/unicode/UTF8.h>
       
    33 #include <wtf/unicode/Unicode.h>
       
    34 
       
    35 using namespace WTF;
       
    36 using namespace WTF::Unicode;
       
    37 
       
    38 namespace WebCore {
       
    39 
       
    40 String::String(const UChar* str)
       
    41 {
       
    42     if (!str)
       
    43         return;
       
    44         
       
    45     int len = 0;
       
    46     while (str[len] != UChar(0))
       
    47         len++;
       
    48     
       
    49     m_impl = StringImpl::create(str, len);
       
    50 }
       
    51 
       
    52 void String::append(const String& str)
       
    53 {
       
    54     if (str.isEmpty())
       
    55        return;
       
    56 
       
    57     // FIXME: This is extremely inefficient. So much so that we might want to take this
       
    58     // out of String's API. We can make it better by optimizing the case where exactly
       
    59     // one String is pointing at this StringImpl, but even then it's going to require a
       
    60     // call to fastMalloc every single time.
       
    61     if (str.m_impl) {
       
    62         if (m_impl) {
       
    63             UChar* data;
       
    64             RefPtr<StringImpl> newImpl =
       
    65                 StringImpl::createUninitialized(m_impl->length() + str.length(), data);
       
    66             memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
       
    67             memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
       
    68             m_impl = newImpl.release();
       
    69         } else
       
    70             m_impl = str.m_impl;
       
    71     }
       
    72 }
       
    73 
       
    74 void String::append(char c)
       
    75 {
       
    76     // FIXME: This is extremely inefficient. So much so that we might want to take this
       
    77     // out of String's API. We can make it better by optimizing the case where exactly
       
    78     // one String is pointing at this StringImpl, but even then it's going to require a
       
    79     // call to fastMalloc every single time.
       
    80     if (m_impl) {
       
    81         UChar* data;
       
    82         RefPtr<StringImpl> newImpl =
       
    83             StringImpl::createUninitialized(m_impl->length() + 1, data);
       
    84         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
       
    85         data[m_impl->length()] = c;
       
    86         m_impl = newImpl.release();
       
    87     } else
       
    88         m_impl = StringImpl::create(&c, 1);
       
    89 }
       
    90 
       
    91 void String::append(UChar c)
       
    92 {
       
    93     // FIXME: This is extremely inefficient. So much so that we might want to take this
       
    94     // out of String's API. We can make it better by optimizing the case where exactly
       
    95     // one String is pointing at this StringImpl, but even then it's going to require a
       
    96     // call to fastMalloc every single time.
       
    97     if (m_impl) {
       
    98         UChar* data;
       
    99         RefPtr<StringImpl> newImpl =
       
   100             StringImpl::createUninitialized(m_impl->length() + 1, data);
       
   101         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
       
   102         data[m_impl->length()] = c;
       
   103         m_impl = newImpl.release();
       
   104     } else
       
   105         m_impl = StringImpl::create(&c, 1);
       
   106 }
       
   107 
       
   108 String operator+(const String& a, const String& b)
       
   109 {
       
   110     if (a.isEmpty())
       
   111         return b;
       
   112     if (b.isEmpty())
       
   113         return a;
       
   114     String c = a;
       
   115     c += b;
       
   116     return c;
       
   117 }
       
   118 
       
   119 String operator+(const String& s, const char* cs)
       
   120 {
       
   121     return s + String(cs);
       
   122 }
       
   123 
       
   124 String operator+(const char* cs, const String& s)
       
   125 {
       
   126     return String(cs) + s;
       
   127 }
       
   128 
       
   129 int codePointCompare(const String& a, const String& b)
       
   130 {
       
   131     return codePointCompare(a.impl(), b.impl());
       
   132 }
       
   133 
       
   134 void String::insert(const String& str, unsigned pos)
       
   135 {
       
   136     if (str.isEmpty()) {
       
   137         if (str.isNull())
       
   138             return;
       
   139         if (isNull())
       
   140             m_impl = str.impl();
       
   141         return;
       
   142     }
       
   143     insert(str.characters(), str.length(), pos);
       
   144 }
       
   145 
       
   146 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
       
   147 {
       
   148     if (!m_impl) {
       
   149         if (!charactersToAppend)
       
   150             return;
       
   151         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
       
   152         return;
       
   153     }
       
   154 
       
   155     if (!lengthToAppend)
       
   156         return;
       
   157 
       
   158     ASSERT(charactersToAppend);
       
   159     UChar* data;
       
   160     RefPtr<StringImpl> newImpl =
       
   161         StringImpl::createUninitialized(length() + lengthToAppend, data);
       
   162     memcpy(data, characters(), length() * sizeof(UChar));
       
   163     memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
       
   164     m_impl = newImpl.release();
       
   165 }
       
   166 
       
   167 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
       
   168 {
       
   169     if (position >= length()) {
       
   170         append(charactersToInsert, lengthToInsert);
       
   171         return;
       
   172     }
       
   173 
       
   174     ASSERT(m_impl);
       
   175 
       
   176     if (!lengthToInsert)
       
   177         return;
       
   178 
       
   179     ASSERT(charactersToInsert);
       
   180     UChar* data;
       
   181     RefPtr<StringImpl> newImpl =
       
   182       StringImpl::createUninitialized(length() + lengthToInsert, data);
       
   183     memcpy(data, characters(), position * sizeof(UChar));
       
   184     memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
       
   185     memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
       
   186     m_impl = newImpl.release();
       
   187 }
       
   188 
       
   189 UChar32 String::characterStartingAt(unsigned i) const
       
   190 {
       
   191     if (!m_impl || i >= m_impl->length())
       
   192         return 0;
       
   193     return m_impl->characterStartingAt(i);
       
   194 }
       
   195 
       
   196 void String::truncate(unsigned position)
       
   197 {
       
   198     if (position >= length())
       
   199         return;
       
   200     UChar* data;
       
   201     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
       
   202     memcpy(data, characters(), position * sizeof(UChar));
       
   203     m_impl = newImpl.release();
       
   204 }
       
   205 
       
   206 void String::remove(unsigned position, int lengthToRemove)
       
   207 {
       
   208     if (lengthToRemove <= 0)
       
   209         return;
       
   210     if (position >= length())
       
   211         return;
       
   212     if (static_cast<unsigned>(lengthToRemove) > length() - position)
       
   213         lengthToRemove = length() - position;
       
   214     UChar* data;
       
   215     RefPtr<StringImpl> newImpl =
       
   216         StringImpl::createUninitialized(length() - lengthToRemove, data);
       
   217     memcpy(data, characters(), position * sizeof(UChar));
       
   218     memcpy(data + position, characters() + position + lengthToRemove,
       
   219         (length() - lengthToRemove - position) * sizeof(UChar));
       
   220     m_impl = newImpl.release();
       
   221 }
       
   222 
       
   223 String String::substring(unsigned pos, unsigned len) const
       
   224 {
       
   225     if (!m_impl) 
       
   226         return String();
       
   227     return m_impl->substring(pos, len);
       
   228 }
       
   229 
       
   230 String String::lower() const
       
   231 {
       
   232     if (!m_impl)
       
   233         return String();
       
   234     return m_impl->lower();
       
   235 }
       
   236 
       
   237 String String::upper() const
       
   238 {
       
   239     if (!m_impl)
       
   240         return String();
       
   241     return m_impl->upper();
       
   242 }
       
   243 
       
   244 String String::stripWhiteSpace() const
       
   245 {
       
   246     if (!m_impl)
       
   247         return String();
       
   248     return m_impl->stripWhiteSpace();
       
   249 }
       
   250 
       
   251 String String::simplifyWhiteSpace() const
       
   252 {
       
   253     if (!m_impl)
       
   254         return String();
       
   255     return m_impl->simplifyWhiteSpace();
       
   256 }
       
   257 
       
   258 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
       
   259 {
       
   260     if (!m_impl)
       
   261         return String();
       
   262     return m_impl->removeCharacters(findMatch);
       
   263 }
       
   264 
       
   265 String String::foldCase() const
       
   266 {
       
   267     if (!m_impl)
       
   268         return String();
       
   269     return m_impl->foldCase();
       
   270 }
       
   271 
       
   272 bool String::percentage(int& result) const
       
   273 {
       
   274     if (!m_impl || !m_impl->length())
       
   275         return false;
       
   276 
       
   277     if ((*m_impl)[m_impl->length() - 1] != '%')
       
   278        return false;
       
   279 
       
   280     result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
       
   281     return true;
       
   282 }
       
   283 
       
   284 const UChar* String::charactersWithNullTermination()
       
   285 {
       
   286     if (!m_impl)
       
   287         return 0;
       
   288     if (m_impl->hasTerminatingNullCharacter())
       
   289         return m_impl->characters();
       
   290     m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
       
   291     return m_impl->characters();
       
   292 }
       
   293 
       
   294 String String::format(const char *format, ...)
       
   295 {
       
   296 #if PLATFORM(QT)
       
   297     // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
       
   298     // https://bugs.webkit.org/show_bug.cgi?id=18994
       
   299     va_list args;
       
   300     va_start(args, format);
       
   301 
       
   302     QString buffer;
       
   303     buffer.vsprintf(format, args);
       
   304 
       
   305     va_end(args);
       
   306 
       
   307     return buffer;
       
   308 
       
   309 #elif OS(WINCE)
       
   310     va_list args;
       
   311     va_start(args, format);
       
   312 
       
   313     Vector<char, 256> buffer;
       
   314 
       
   315     int bufferSize = 256;
       
   316     buffer.resize(bufferSize);
       
   317     for (;;) {
       
   318         int written = vsnprintf(buffer.data(), bufferSize, format, args);
       
   319         va_end(args);
       
   320 
       
   321         if (written == 0)
       
   322             return String("");
       
   323         if (written > 0)
       
   324             return StringImpl::create(buffer.data(), written);
       
   325         
       
   326         bufferSize <<= 1;
       
   327         buffer.resize(bufferSize);
       
   328         va_start(args, format);
       
   329     }
       
   330 
       
   331 #else
       
   332     va_list args;
       
   333     va_start(args, format);
       
   334 
       
   335     Vector<char, 256> buffer;
       
   336 
       
   337     // Do the format once to get the length.
       
   338 #if COMPILER(MSVC)
       
   339     int result = _vscprintf(format, args);
       
   340 #else
       
   341     char ch;
       
   342     int result = vsnprintf(&ch, 1, format, args);
       
   343     // We need to call va_end() and then va_start() again here, as the
       
   344     // contents of args is undefined after the call to vsnprintf
       
   345     // according to http://man.cx/snprintf(3)
       
   346     //
       
   347     // Not calling va_end/va_start here happens to work on lots of
       
   348     // systems, but fails e.g. on 64bit Linux.
       
   349     va_end(args);
       
   350     va_start(args, format);
       
   351 #endif
       
   352 
       
   353     if (result == 0)
       
   354         return String("");
       
   355     if (result < 0)
       
   356         return String();
       
   357     unsigned len = result;
       
   358     buffer.grow(len + 1);
       
   359     
       
   360     // Now do the formatting again, guaranteed to fit.
       
   361     vsnprintf(buffer.data(), buffer.size(), format, args);
       
   362 
       
   363     va_end(args);
       
   364     
       
   365     return StringImpl::create(buffer.data(), len);
       
   366 #endif
       
   367 }
       
   368 
       
   369 String String::number(short n)
       
   370 {
       
   371     return String::format("%hd", n);
       
   372 }
       
   373 
       
   374 String String::number(unsigned short n)
       
   375 {
       
   376     return String::format("%hu", n);
       
   377 }
       
   378 
       
   379 String String::number(int n)
       
   380 {
       
   381     return String::format("%d", n);
       
   382 }
       
   383 
       
   384 String String::number(unsigned n)
       
   385 {
       
   386     return String::format("%u", n);
       
   387 }
       
   388 
       
   389 String String::number(long n)
       
   390 {
       
   391     return String::format("%ld", n);
       
   392 }
       
   393 
       
   394 String String::number(unsigned long n)
       
   395 {
       
   396     return String::format("%lu", n);
       
   397 }
       
   398 
       
   399 String String::number(long long n)
       
   400 {
       
   401 #if OS(WINDOWS) && !PLATFORM(QT)
       
   402     return String::format("%I64i", n);
       
   403 #else
       
   404     return String::format("%lli", n);
       
   405 #endif
       
   406 }
       
   407 
       
   408 String String::number(unsigned long long n)
       
   409 {
       
   410 #if OS(WINDOWS) && !PLATFORM(QT)
       
   411     return String::format("%I64u", n);
       
   412 #else
       
   413     return String::format("%llu", n);
       
   414 #endif
       
   415 }
       
   416     
       
   417 String String::number(double n)
       
   418 {
       
   419     return String::format("%.6lg", n);
       
   420 }
       
   421 
       
   422 int String::toIntStrict(bool* ok, int base) const
       
   423 {
       
   424     if (!m_impl) {
       
   425         if (ok)
       
   426             *ok = false;
       
   427         return 0;
       
   428     }
       
   429     return m_impl->toIntStrict(ok, base);
       
   430 }
       
   431 
       
   432 unsigned String::toUIntStrict(bool* ok, int base) const
       
   433 {
       
   434     if (!m_impl) {
       
   435         if (ok)
       
   436             *ok = false;
       
   437         return 0;
       
   438     }
       
   439     return m_impl->toUIntStrict(ok, base);
       
   440 }
       
   441 
       
   442 int64_t String::toInt64Strict(bool* ok, int base) const
       
   443 {
       
   444     if (!m_impl) {
       
   445         if (ok)
       
   446             *ok = false;
       
   447         return 0;
       
   448     }
       
   449     return m_impl->toInt64Strict(ok, base);
       
   450 }
       
   451 
       
   452 uint64_t String::toUInt64Strict(bool* ok, int base) const
       
   453 {
       
   454     if (!m_impl) {
       
   455         if (ok)
       
   456             *ok = false;
       
   457         return 0;
       
   458     }
       
   459     return m_impl->toUInt64Strict(ok, base);
       
   460 }
       
   461 
       
   462 intptr_t String::toIntPtrStrict(bool* ok, int base) const
       
   463 {
       
   464     if (!m_impl) {
       
   465         if (ok)
       
   466             *ok = false;
       
   467         return 0;
       
   468     }
       
   469     return m_impl->toIntPtrStrict(ok, base);
       
   470 }
       
   471 
       
   472 
       
   473 int String::toInt(bool* ok) const
       
   474 {
       
   475     if (!m_impl) {
       
   476         if (ok)
       
   477             *ok = false;
       
   478         return 0;
       
   479     }
       
   480     return m_impl->toInt(ok);
       
   481 }
       
   482 
       
   483 unsigned String::toUInt(bool* ok) const
       
   484 {
       
   485     if (!m_impl) {
       
   486         if (ok)
       
   487             *ok = false;
       
   488         return 0;
       
   489     }
       
   490     return m_impl->toUInt(ok);
       
   491 }
       
   492 
       
   493 int64_t String::toInt64(bool* ok) const
       
   494 {
       
   495     if (!m_impl) {
       
   496         if (ok)
       
   497             *ok = false;
       
   498         return 0;
       
   499     }
       
   500     return m_impl->toInt64(ok);
       
   501 }
       
   502 
       
   503 uint64_t String::toUInt64(bool* ok) const
       
   504 {
       
   505     if (!m_impl) {
       
   506         if (ok)
       
   507             *ok = false;
       
   508         return 0;
       
   509     }
       
   510     return m_impl->toUInt64(ok);
       
   511 }
       
   512 
       
   513 intptr_t String::toIntPtr(bool* ok) const
       
   514 {
       
   515     if (!m_impl) {
       
   516         if (ok)
       
   517             *ok = false;
       
   518         return 0;
       
   519     }
       
   520     return m_impl->toIntPtr(ok);
       
   521 }
       
   522 
       
   523 double String::toDouble(bool* ok) const
       
   524 {
       
   525     if (!m_impl) {
       
   526         if (ok)
       
   527             *ok = false;
       
   528         return 0.0;
       
   529     }
       
   530     return m_impl->toDouble(ok);
       
   531 }
       
   532 
       
   533 float String::toFloat(bool* ok) const
       
   534 {
       
   535     if (!m_impl) {
       
   536         if (ok)
       
   537             *ok = false;
       
   538         return 0.0f;
       
   539     }
       
   540     return m_impl->toFloat(ok);
       
   541 }
       
   542 
       
   543 String String::threadsafeCopy() const
       
   544 {
       
   545     if (!m_impl)
       
   546         return String();
       
   547     return m_impl->threadsafeCopy();
       
   548 }
       
   549 
       
   550 String String::crossThreadString() const
       
   551 {
       
   552     if (!m_impl)
       
   553         return String();
       
   554     return m_impl->crossThreadString();
       
   555 }
       
   556 
       
   557 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
       
   558 {
       
   559     result.clear();
       
   560 
       
   561     int startPos = 0;
       
   562     int endPos;
       
   563     while ((endPos = find(separator, startPos)) != -1) {
       
   564         if (allowEmptyEntries || startPos != endPos)
       
   565             result.append(substring(startPos, endPos - startPos));
       
   566         startPos = endPos + separator.length();
       
   567     }
       
   568     if (allowEmptyEntries || startPos != static_cast<int>(length()))
       
   569         result.append(substring(startPos));
       
   570 }
       
   571 
       
   572 void String::split(const String& separator, Vector<String>& result) const
       
   573 {
       
   574     return split(separator, false, result);
       
   575 }
       
   576 
       
   577 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
       
   578 {
       
   579     result.clear();
       
   580 
       
   581     int startPos = 0;
       
   582     int endPos;
       
   583     while ((endPos = find(separator, startPos)) != -1) {
       
   584         if (allowEmptyEntries || startPos != endPos)
       
   585             result.append(substring(startPos, endPos - startPos));
       
   586         startPos = endPos + 1;
       
   587     }
       
   588     if (allowEmptyEntries || startPos != static_cast<int>(length()))
       
   589         result.append(substring(startPos));
       
   590 }
       
   591 
       
   592 void String::split(UChar separator, Vector<String>& result) const
       
   593 {
       
   594     return split(String(&separator, 1), false, result);
       
   595 }
       
   596 
       
   597 Vector<char> String::ascii() const
       
   598 {
       
   599     if (m_impl) 
       
   600         return m_impl->ascii();
       
   601     
       
   602     const char* nullMsg = "(null impl)";
       
   603     Vector<char, 2048> buffer;
       
   604     for (int i = 0; nullMsg[i]; ++i)
       
   605         buffer.append(nullMsg[i]);
       
   606     
       
   607     buffer.append('\0');
       
   608     return buffer;
       
   609 }
       
   610 
       
   611 CString String::latin1() const
       
   612 {
       
   613     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
       
   614     // preserved, characters outside of this range are converted to '?'.
       
   615 
       
   616     unsigned length = this->length();
       
   617     const UChar* characters = this->characters();
       
   618 
       
   619     char* characterBuffer;
       
   620     CString result = CString::newUninitialized(length, characterBuffer);
       
   621 
       
   622     for (unsigned i = 0; i < length; ++i) {
       
   623         UChar ch = characters[i];
       
   624         characterBuffer[i] = ch > 255 ? '?' : ch;
       
   625     }
       
   626 
       
   627     return result;
       
   628 }
       
   629 
       
   630 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
       
   631 static inline void putUTF8Triple(char*& buffer, UChar ch)
       
   632 {
       
   633     ASSERT(ch >= 0x0800);
       
   634     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
       
   635     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
       
   636     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
       
   637 }
       
   638 
       
   639 CString String::utf8() const
       
   640 {
       
   641     unsigned length = this->length();
       
   642     const UChar* characters = this->characters();
       
   643 
       
   644     // Allocate a buffer big enough to hold all the characters
       
   645     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
       
   646     // Optimization ideas, if we find this function is hot:
       
   647     //  * We could speculatively create a CStringBuffer to contain 'length' 
       
   648     //    characters, and resize if necessary (i.e. if the buffer contains
       
   649     //    non-ascii characters). (Alternatively, scan the buffer first for
       
   650     //    ascii characters, so we know this will be sufficient).
       
   651     //  * We could allocate a CStringBuffer with an appropriate size to
       
   652     //    have a good chance of being able to write the string into the
       
   653     //    buffer without reallocing (say, 1.5 x length).
       
   654     Vector<char, 1024> bufferVector(length * 3);
       
   655 
       
   656     char* buffer = bufferVector.data();
       
   657     ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false);
       
   658     ASSERT(result != sourceIllegal); // Only produced from strict conversion.
       
   659     ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
       
   660 
       
   661     // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
       
   662     // would have been handled in the middle of a string with non-strict conversion - which is to say,
       
   663     // simply encode it to UTF-8.
       
   664     if (result == sourceExhausted) {
       
   665         // This should be one unpaired high surrogate.
       
   666         ASSERT((characters + 1) == (this->characters() + length));
       
   667         ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
       
   668         // There should be room left, since one UChar hasn't been converted.
       
   669         ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
       
   670         putUTF8Triple(buffer, *characters);
       
   671     }
       
   672 
       
   673     return CString(bufferVector.data(), buffer - bufferVector.data());
       
   674 }
       
   675 
       
   676 String String::fromUTF8(const char* stringStart, size_t length)
       
   677 {
       
   678     if (!stringStart)
       
   679         return String();
       
   680 
       
   681     // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
       
   682     // the right length, if there are any multi-byte sequences this buffer will be too large.
       
   683     UChar* buffer;
       
   684     String stringBuffer(StringImpl::createUninitialized(length, buffer));
       
   685     UChar* bufferEnd = buffer + length;
       
   686 
       
   687     // Try converting into the buffer.
       
   688     const char* stringCurrent = stringStart;
       
   689     if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK)
       
   690         return String();
       
   691 
       
   692     // stringBuffer is full (the input must have been all ascii) so just return it!
       
   693     if (buffer == bufferEnd)
       
   694         return stringBuffer;
       
   695 
       
   696     // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
       
   697     unsigned utf16Length = buffer - stringBuffer.characters();
       
   698     ASSERT(utf16Length < length);
       
   699     return String(stringBuffer.characters(), utf16Length);
       
   700 }
       
   701 
       
   702 String String::fromUTF8(const char* string)
       
   703 {
       
   704     if (!string)
       
   705         return String();
       
   706     return fromUTF8(string, strlen(string));
       
   707 }
       
   708 
       
   709 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
       
   710 {
       
   711     String utf8 = fromUTF8(string, size);
       
   712     if (!utf8)
       
   713         return String(string, size);
       
   714     return utf8;
       
   715 }
       
   716 
       
   717 // String Operations
       
   718 
       
   719 static bool isCharacterAllowedInBase(UChar c, int base)
       
   720 {
       
   721     if (c > 0x7F)
       
   722         return false;
       
   723     if (isASCIIDigit(c))
       
   724         return c - '0' < base;
       
   725     if (isASCIIAlpha(c)) {
       
   726         if (base > 36)
       
   727             base = 36;
       
   728         return (c >= 'a' && c < 'a' + base - 10)
       
   729             || (c >= 'A' && c < 'A' + base - 10);
       
   730     }
       
   731     return false;
       
   732 }
       
   733 
       
   734 template <typename IntegralType>
       
   735 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
       
   736 {
       
   737     static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
       
   738     static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
       
   739     const IntegralType maxMultiplier = integralMax / base;
       
   740 
       
   741     IntegralType value = 0;
       
   742     bool isOk = false;
       
   743     bool isNegative = false;
       
   744 
       
   745     if (!data)
       
   746         goto bye;
       
   747 
       
   748     // skip leading whitespace
       
   749     while (length && isSpaceOrNewline(*data)) {
       
   750         length--;
       
   751         data++;
       
   752     }
       
   753 
       
   754     if (isSigned && length && *data == '-') {
       
   755         length--;
       
   756         data++;
       
   757         isNegative = true;
       
   758     } else if (length && *data == '+') {
       
   759         length--;
       
   760         data++;
       
   761     }
       
   762 
       
   763     if (!length || !isCharacterAllowedInBase(*data, base))
       
   764         goto bye;
       
   765 
       
   766     while (length && isCharacterAllowedInBase(*data, base)) {
       
   767         length--;
       
   768         IntegralType digitValue;
       
   769         UChar c = *data;
       
   770         if (isASCIIDigit(c))
       
   771             digitValue = c - '0';
       
   772         else if (c >= 'a')
       
   773             digitValue = c - 'a' + 10;
       
   774         else
       
   775             digitValue = c - 'A' + 10;
       
   776 
       
   777         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
       
   778             goto bye;
       
   779 
       
   780         value = base * value + digitValue;
       
   781         data++;
       
   782     }
       
   783 
       
   784 #if COMPILER(MSVC)
       
   785 #pragma warning(push, 0)
       
   786 #pragma warning(disable:4146)
       
   787 #endif
       
   788 
       
   789     if (isNegative)
       
   790         value = -value;
       
   791 
       
   792 #if COMPILER(MSVC)
       
   793 #pragma warning(pop)
       
   794 #endif
       
   795 
       
   796     // skip trailing space
       
   797     while (length && isSpaceOrNewline(*data)) {
       
   798         length--;
       
   799         data++;
       
   800     }
       
   801 
       
   802     if (!length)
       
   803         isOk = true;
       
   804 bye:
       
   805     if (ok)
       
   806         *ok = isOk;
       
   807     return isOk ? value : 0;
       
   808 }
       
   809 
       
   810 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
       
   811 {
       
   812     size_t i = 0;
       
   813 
       
   814     // Allow leading spaces.
       
   815     for (; i != length; ++i) {
       
   816         if (!isSpaceOrNewline(data[i]))
       
   817             break;
       
   818     }
       
   819     
       
   820     // Allow sign.
       
   821     if (i != length && (data[i] == '+' || data[i] == '-'))
       
   822         ++i;
       
   823     
       
   824     // Allow digits.
       
   825     for (; i != length; ++i) {
       
   826         if (!isASCIIDigit(data[i]))
       
   827             break;
       
   828     }
       
   829 
       
   830     return i;
       
   831 }
       
   832 
       
   833 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
       
   834 {
       
   835     return toIntegralType<int>(data, length, ok, base);
       
   836 }
       
   837 
       
   838 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
       
   839 {
       
   840     return toIntegralType<unsigned>(data, length, ok, base);
       
   841 }
       
   842 
       
   843 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
       
   844 {
       
   845     return toIntegralType<int64_t>(data, length, ok, base);
       
   846 }
       
   847 
       
   848 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
       
   849 {
       
   850     return toIntegralType<uint64_t>(data, length, ok, base);
       
   851 }
       
   852 
       
   853 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
       
   854 {
       
   855     return toIntegralType<intptr_t>(data, length, ok, base);
       
   856 }
       
   857 
       
   858 int charactersToInt(const UChar* data, size_t length, bool* ok)
       
   859 {
       
   860     return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
       
   861 }
       
   862 
       
   863 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
       
   864 {
       
   865     return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
       
   866 }
       
   867 
       
   868 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
       
   869 {
       
   870     return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
       
   871 }
       
   872 
       
   873 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
       
   874 {
       
   875     return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
       
   876 }
       
   877 
       
   878 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
       
   879 {
       
   880     return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
       
   881 }
       
   882 
       
   883 double charactersToDouble(const UChar* data, size_t length, bool* ok)
       
   884 {
       
   885     if (!length) {
       
   886         if (ok)
       
   887             *ok = false;
       
   888         return 0.0;
       
   889     }
       
   890 
       
   891     Vector<char, 256> bytes(length + 1);
       
   892     for (unsigned i = 0; i < length; ++i)
       
   893         bytes[i] = data[i] < 0x7F ? data[i] : '?';
       
   894     bytes[length] = '\0';
       
   895     char* end;
       
   896     double val = WTF::strtod(bytes.data(), &end);
       
   897     if (ok)
       
   898         *ok = (end == 0 || *end == '\0');
       
   899     return val;
       
   900 }
       
   901 
       
   902 float charactersToFloat(const UChar* data, size_t length, bool* ok)
       
   903 {
       
   904     // FIXME: This will return ok even when the string fits into a double but not a float.
       
   905     return static_cast<float>(charactersToDouble(data, length, ok));
       
   906 }
       
   907 
       
   908 } // namespace WebCore
       
   909 
       
   910 #ifndef NDEBUG
       
   911 // For use in the debugger - leaks memory
       
   912 WebCore::String* string(const char*);
       
   913 
       
   914 WebCore::String* string(const char* s)
       
   915 {
       
   916     return new WebCore::String(s);
       
   917 }
       
   918 #endif