|
1 /* |
|
2 * (C) 1999 Lars Knoll (knoll@kde.org) |
|
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. |
|
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Library General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Library General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Library General Public License |
|
17 * along with this library; see the file COPYING.LIB. If not, write to |
|
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
19 * Boston, MA 02110-1301, USA. |
|
20 */ |
|
21 |
|
22 #include "config.h" |
|
23 #include "WTFString.h" |
|
24 |
|
25 #include <limits> |
|
26 #include <stdarg.h> |
|
27 #include <wtf/ASCIICType.h> |
|
28 #include <wtf/text/CString.h> |
|
29 #include <wtf/StringExtras.h> |
|
30 #include <wtf/Vector.h> |
|
31 #include <wtf/dtoa.h> |
|
32 #include <wtf/unicode/UTF8.h> |
|
33 #include <wtf/unicode/Unicode.h> |
|
34 |
|
35 using namespace WTF; |
|
36 using namespace WTF::Unicode; |
|
37 |
|
38 namespace WebCore { |
|
39 |
|
40 String::String(const UChar* str) |
|
41 { |
|
42 if (!str) |
|
43 return; |
|
44 |
|
45 int len = 0; |
|
46 while (str[len] != UChar(0)) |
|
47 len++; |
|
48 |
|
49 m_impl = StringImpl::create(str, len); |
|
50 } |
|
51 |
|
52 void String::append(const String& str) |
|
53 { |
|
54 if (str.isEmpty()) |
|
55 return; |
|
56 |
|
57 // FIXME: This is extremely inefficient. So much so that we might want to take this |
|
58 // out of String's API. We can make it better by optimizing the case where exactly |
|
59 // one String is pointing at this StringImpl, but even then it's going to require a |
|
60 // call to fastMalloc every single time. |
|
61 if (str.m_impl) { |
|
62 if (m_impl) { |
|
63 UChar* data; |
|
64 RefPtr<StringImpl> newImpl = |
|
65 StringImpl::createUninitialized(m_impl->length() + str.length(), data); |
|
66 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
|
67 memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); |
|
68 m_impl = newImpl.release(); |
|
69 } else |
|
70 m_impl = str.m_impl; |
|
71 } |
|
72 } |
|
73 |
|
74 void String::append(char c) |
|
75 { |
|
76 // FIXME: This is extremely inefficient. So much so that we might want to take this |
|
77 // out of String's API. We can make it better by optimizing the case where exactly |
|
78 // one String is pointing at this StringImpl, but even then it's going to require a |
|
79 // call to fastMalloc every single time. |
|
80 if (m_impl) { |
|
81 UChar* data; |
|
82 RefPtr<StringImpl> newImpl = |
|
83 StringImpl::createUninitialized(m_impl->length() + 1, data); |
|
84 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
|
85 data[m_impl->length()] = c; |
|
86 m_impl = newImpl.release(); |
|
87 } else |
|
88 m_impl = StringImpl::create(&c, 1); |
|
89 } |
|
90 |
|
91 void String::append(UChar c) |
|
92 { |
|
93 // FIXME: This is extremely inefficient. So much so that we might want to take this |
|
94 // out of String's API. We can make it better by optimizing the case where exactly |
|
95 // one String is pointing at this StringImpl, but even then it's going to require a |
|
96 // call to fastMalloc every single time. |
|
97 if (m_impl) { |
|
98 UChar* data; |
|
99 RefPtr<StringImpl> newImpl = |
|
100 StringImpl::createUninitialized(m_impl->length() + 1, data); |
|
101 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
|
102 data[m_impl->length()] = c; |
|
103 m_impl = newImpl.release(); |
|
104 } else |
|
105 m_impl = StringImpl::create(&c, 1); |
|
106 } |
|
107 |
|
108 String operator+(const String& a, const String& b) |
|
109 { |
|
110 if (a.isEmpty()) |
|
111 return b; |
|
112 if (b.isEmpty()) |
|
113 return a; |
|
114 String c = a; |
|
115 c += b; |
|
116 return c; |
|
117 } |
|
118 |
|
119 String operator+(const String& s, const char* cs) |
|
120 { |
|
121 return s + String(cs); |
|
122 } |
|
123 |
|
124 String operator+(const char* cs, const String& s) |
|
125 { |
|
126 return String(cs) + s; |
|
127 } |
|
128 |
|
129 int codePointCompare(const String& a, const String& b) |
|
130 { |
|
131 return codePointCompare(a.impl(), b.impl()); |
|
132 } |
|
133 |
|
134 void String::insert(const String& str, unsigned pos) |
|
135 { |
|
136 if (str.isEmpty()) { |
|
137 if (str.isNull()) |
|
138 return; |
|
139 if (isNull()) |
|
140 m_impl = str.impl(); |
|
141 return; |
|
142 } |
|
143 insert(str.characters(), str.length(), pos); |
|
144 } |
|
145 |
|
146 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) |
|
147 { |
|
148 if (!m_impl) { |
|
149 if (!charactersToAppend) |
|
150 return; |
|
151 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
|
152 return; |
|
153 } |
|
154 |
|
155 if (!lengthToAppend) |
|
156 return; |
|
157 |
|
158 ASSERT(charactersToAppend); |
|
159 UChar* data; |
|
160 RefPtr<StringImpl> newImpl = |
|
161 StringImpl::createUninitialized(length() + lengthToAppend, data); |
|
162 memcpy(data, characters(), length() * sizeof(UChar)); |
|
163 memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); |
|
164 m_impl = newImpl.release(); |
|
165 } |
|
166 |
|
167 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) |
|
168 { |
|
169 if (position >= length()) { |
|
170 append(charactersToInsert, lengthToInsert); |
|
171 return; |
|
172 } |
|
173 |
|
174 ASSERT(m_impl); |
|
175 |
|
176 if (!lengthToInsert) |
|
177 return; |
|
178 |
|
179 ASSERT(charactersToInsert); |
|
180 UChar* data; |
|
181 RefPtr<StringImpl> newImpl = |
|
182 StringImpl::createUninitialized(length() + lengthToInsert, data); |
|
183 memcpy(data, characters(), position * sizeof(UChar)); |
|
184 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); |
|
185 memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); |
|
186 m_impl = newImpl.release(); |
|
187 } |
|
188 |
|
189 UChar32 String::characterStartingAt(unsigned i) const |
|
190 { |
|
191 if (!m_impl || i >= m_impl->length()) |
|
192 return 0; |
|
193 return m_impl->characterStartingAt(i); |
|
194 } |
|
195 |
|
196 void String::truncate(unsigned position) |
|
197 { |
|
198 if (position >= length()) |
|
199 return; |
|
200 UChar* data; |
|
201 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); |
|
202 memcpy(data, characters(), position * sizeof(UChar)); |
|
203 m_impl = newImpl.release(); |
|
204 } |
|
205 |
|
206 void String::remove(unsigned position, int lengthToRemove) |
|
207 { |
|
208 if (lengthToRemove <= 0) |
|
209 return; |
|
210 if (position >= length()) |
|
211 return; |
|
212 if (static_cast<unsigned>(lengthToRemove) > length() - position) |
|
213 lengthToRemove = length() - position; |
|
214 UChar* data; |
|
215 RefPtr<StringImpl> newImpl = |
|
216 StringImpl::createUninitialized(length() - lengthToRemove, data); |
|
217 memcpy(data, characters(), position * sizeof(UChar)); |
|
218 memcpy(data + position, characters() + position + lengthToRemove, |
|
219 (length() - lengthToRemove - position) * sizeof(UChar)); |
|
220 m_impl = newImpl.release(); |
|
221 } |
|
222 |
|
223 String String::substring(unsigned pos, unsigned len) const |
|
224 { |
|
225 if (!m_impl) |
|
226 return String(); |
|
227 return m_impl->substring(pos, len); |
|
228 } |
|
229 |
|
230 String String::lower() const |
|
231 { |
|
232 if (!m_impl) |
|
233 return String(); |
|
234 return m_impl->lower(); |
|
235 } |
|
236 |
|
237 String String::upper() const |
|
238 { |
|
239 if (!m_impl) |
|
240 return String(); |
|
241 return m_impl->upper(); |
|
242 } |
|
243 |
|
244 String String::stripWhiteSpace() const |
|
245 { |
|
246 if (!m_impl) |
|
247 return String(); |
|
248 return m_impl->stripWhiteSpace(); |
|
249 } |
|
250 |
|
251 String String::simplifyWhiteSpace() const |
|
252 { |
|
253 if (!m_impl) |
|
254 return String(); |
|
255 return m_impl->simplifyWhiteSpace(); |
|
256 } |
|
257 |
|
258 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const |
|
259 { |
|
260 if (!m_impl) |
|
261 return String(); |
|
262 return m_impl->removeCharacters(findMatch); |
|
263 } |
|
264 |
|
265 String String::foldCase() const |
|
266 { |
|
267 if (!m_impl) |
|
268 return String(); |
|
269 return m_impl->foldCase(); |
|
270 } |
|
271 |
|
272 bool String::percentage(int& result) const |
|
273 { |
|
274 if (!m_impl || !m_impl->length()) |
|
275 return false; |
|
276 |
|
277 if ((*m_impl)[m_impl->length() - 1] != '%') |
|
278 return false; |
|
279 |
|
280 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); |
|
281 return true; |
|
282 } |
|
283 |
|
284 const UChar* String::charactersWithNullTermination() |
|
285 { |
|
286 if (!m_impl) |
|
287 return 0; |
|
288 if (m_impl->hasTerminatingNullCharacter()) |
|
289 return m_impl->characters(); |
|
290 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); |
|
291 return m_impl->characters(); |
|
292 } |
|
293 |
|
294 String String::format(const char *format, ...) |
|
295 { |
|
296 #if PLATFORM(QT) |
|
297 // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. |
|
298 // https://bugs.webkit.org/show_bug.cgi?id=18994 |
|
299 va_list args; |
|
300 va_start(args, format); |
|
301 |
|
302 QString buffer; |
|
303 buffer.vsprintf(format, args); |
|
304 |
|
305 va_end(args); |
|
306 |
|
307 return buffer; |
|
308 |
|
309 #elif OS(WINCE) |
|
310 va_list args; |
|
311 va_start(args, format); |
|
312 |
|
313 Vector<char, 256> buffer; |
|
314 |
|
315 int bufferSize = 256; |
|
316 buffer.resize(bufferSize); |
|
317 for (;;) { |
|
318 int written = vsnprintf(buffer.data(), bufferSize, format, args); |
|
319 va_end(args); |
|
320 |
|
321 if (written == 0) |
|
322 return String(""); |
|
323 if (written > 0) |
|
324 return StringImpl::create(buffer.data(), written); |
|
325 |
|
326 bufferSize <<= 1; |
|
327 buffer.resize(bufferSize); |
|
328 va_start(args, format); |
|
329 } |
|
330 |
|
331 #else |
|
332 va_list args; |
|
333 va_start(args, format); |
|
334 |
|
335 Vector<char, 256> buffer; |
|
336 |
|
337 // Do the format once to get the length. |
|
338 #if COMPILER(MSVC) |
|
339 int result = _vscprintf(format, args); |
|
340 #else |
|
341 char ch; |
|
342 int result = vsnprintf(&ch, 1, format, args); |
|
343 // We need to call va_end() and then va_start() again here, as the |
|
344 // contents of args is undefined after the call to vsnprintf |
|
345 // according to http://man.cx/snprintf(3) |
|
346 // |
|
347 // Not calling va_end/va_start here happens to work on lots of |
|
348 // systems, but fails e.g. on 64bit Linux. |
|
349 va_end(args); |
|
350 va_start(args, format); |
|
351 #endif |
|
352 |
|
353 if (result == 0) |
|
354 return String(""); |
|
355 if (result < 0) |
|
356 return String(); |
|
357 unsigned len = result; |
|
358 buffer.grow(len + 1); |
|
359 |
|
360 // Now do the formatting again, guaranteed to fit. |
|
361 vsnprintf(buffer.data(), buffer.size(), format, args); |
|
362 |
|
363 va_end(args); |
|
364 |
|
365 return StringImpl::create(buffer.data(), len); |
|
366 #endif |
|
367 } |
|
368 |
|
369 String String::number(short n) |
|
370 { |
|
371 return String::format("%hd", n); |
|
372 } |
|
373 |
|
374 String String::number(unsigned short n) |
|
375 { |
|
376 return String::format("%hu", n); |
|
377 } |
|
378 |
|
379 String String::number(int n) |
|
380 { |
|
381 return String::format("%d", n); |
|
382 } |
|
383 |
|
384 String String::number(unsigned n) |
|
385 { |
|
386 return String::format("%u", n); |
|
387 } |
|
388 |
|
389 String String::number(long n) |
|
390 { |
|
391 return String::format("%ld", n); |
|
392 } |
|
393 |
|
394 String String::number(unsigned long n) |
|
395 { |
|
396 return String::format("%lu", n); |
|
397 } |
|
398 |
|
399 String String::number(long long n) |
|
400 { |
|
401 #if OS(WINDOWS) && !PLATFORM(QT) |
|
402 return String::format("%I64i", n); |
|
403 #else |
|
404 return String::format("%lli", n); |
|
405 #endif |
|
406 } |
|
407 |
|
408 String String::number(unsigned long long n) |
|
409 { |
|
410 #if OS(WINDOWS) && !PLATFORM(QT) |
|
411 return String::format("%I64u", n); |
|
412 #else |
|
413 return String::format("%llu", n); |
|
414 #endif |
|
415 } |
|
416 |
|
417 String String::number(double n) |
|
418 { |
|
419 return String::format("%.6lg", n); |
|
420 } |
|
421 |
|
422 int String::toIntStrict(bool* ok, int base) const |
|
423 { |
|
424 if (!m_impl) { |
|
425 if (ok) |
|
426 *ok = false; |
|
427 return 0; |
|
428 } |
|
429 return m_impl->toIntStrict(ok, base); |
|
430 } |
|
431 |
|
432 unsigned String::toUIntStrict(bool* ok, int base) const |
|
433 { |
|
434 if (!m_impl) { |
|
435 if (ok) |
|
436 *ok = false; |
|
437 return 0; |
|
438 } |
|
439 return m_impl->toUIntStrict(ok, base); |
|
440 } |
|
441 |
|
442 int64_t String::toInt64Strict(bool* ok, int base) const |
|
443 { |
|
444 if (!m_impl) { |
|
445 if (ok) |
|
446 *ok = false; |
|
447 return 0; |
|
448 } |
|
449 return m_impl->toInt64Strict(ok, base); |
|
450 } |
|
451 |
|
452 uint64_t String::toUInt64Strict(bool* ok, int base) const |
|
453 { |
|
454 if (!m_impl) { |
|
455 if (ok) |
|
456 *ok = false; |
|
457 return 0; |
|
458 } |
|
459 return m_impl->toUInt64Strict(ok, base); |
|
460 } |
|
461 |
|
462 intptr_t String::toIntPtrStrict(bool* ok, int base) const |
|
463 { |
|
464 if (!m_impl) { |
|
465 if (ok) |
|
466 *ok = false; |
|
467 return 0; |
|
468 } |
|
469 return m_impl->toIntPtrStrict(ok, base); |
|
470 } |
|
471 |
|
472 |
|
473 int String::toInt(bool* ok) const |
|
474 { |
|
475 if (!m_impl) { |
|
476 if (ok) |
|
477 *ok = false; |
|
478 return 0; |
|
479 } |
|
480 return m_impl->toInt(ok); |
|
481 } |
|
482 |
|
483 unsigned String::toUInt(bool* ok) const |
|
484 { |
|
485 if (!m_impl) { |
|
486 if (ok) |
|
487 *ok = false; |
|
488 return 0; |
|
489 } |
|
490 return m_impl->toUInt(ok); |
|
491 } |
|
492 |
|
493 int64_t String::toInt64(bool* ok) const |
|
494 { |
|
495 if (!m_impl) { |
|
496 if (ok) |
|
497 *ok = false; |
|
498 return 0; |
|
499 } |
|
500 return m_impl->toInt64(ok); |
|
501 } |
|
502 |
|
503 uint64_t String::toUInt64(bool* ok) const |
|
504 { |
|
505 if (!m_impl) { |
|
506 if (ok) |
|
507 *ok = false; |
|
508 return 0; |
|
509 } |
|
510 return m_impl->toUInt64(ok); |
|
511 } |
|
512 |
|
513 intptr_t String::toIntPtr(bool* ok) const |
|
514 { |
|
515 if (!m_impl) { |
|
516 if (ok) |
|
517 *ok = false; |
|
518 return 0; |
|
519 } |
|
520 return m_impl->toIntPtr(ok); |
|
521 } |
|
522 |
|
523 double String::toDouble(bool* ok) const |
|
524 { |
|
525 if (!m_impl) { |
|
526 if (ok) |
|
527 *ok = false; |
|
528 return 0.0; |
|
529 } |
|
530 return m_impl->toDouble(ok); |
|
531 } |
|
532 |
|
533 float String::toFloat(bool* ok) const |
|
534 { |
|
535 if (!m_impl) { |
|
536 if (ok) |
|
537 *ok = false; |
|
538 return 0.0f; |
|
539 } |
|
540 return m_impl->toFloat(ok); |
|
541 } |
|
542 |
|
543 String String::threadsafeCopy() const |
|
544 { |
|
545 if (!m_impl) |
|
546 return String(); |
|
547 return m_impl->threadsafeCopy(); |
|
548 } |
|
549 |
|
550 String String::crossThreadString() const |
|
551 { |
|
552 if (!m_impl) |
|
553 return String(); |
|
554 return m_impl->crossThreadString(); |
|
555 } |
|
556 |
|
557 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const |
|
558 { |
|
559 result.clear(); |
|
560 |
|
561 int startPos = 0; |
|
562 int endPos; |
|
563 while ((endPos = find(separator, startPos)) != -1) { |
|
564 if (allowEmptyEntries || startPos != endPos) |
|
565 result.append(substring(startPos, endPos - startPos)); |
|
566 startPos = endPos + separator.length(); |
|
567 } |
|
568 if (allowEmptyEntries || startPos != static_cast<int>(length())) |
|
569 result.append(substring(startPos)); |
|
570 } |
|
571 |
|
572 void String::split(const String& separator, Vector<String>& result) const |
|
573 { |
|
574 return split(separator, false, result); |
|
575 } |
|
576 |
|
577 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const |
|
578 { |
|
579 result.clear(); |
|
580 |
|
581 int startPos = 0; |
|
582 int endPos; |
|
583 while ((endPos = find(separator, startPos)) != -1) { |
|
584 if (allowEmptyEntries || startPos != endPos) |
|
585 result.append(substring(startPos, endPos - startPos)); |
|
586 startPos = endPos + 1; |
|
587 } |
|
588 if (allowEmptyEntries || startPos != static_cast<int>(length())) |
|
589 result.append(substring(startPos)); |
|
590 } |
|
591 |
|
592 void String::split(UChar separator, Vector<String>& result) const |
|
593 { |
|
594 return split(String(&separator, 1), false, result); |
|
595 } |
|
596 |
|
597 Vector<char> String::ascii() const |
|
598 { |
|
599 if (m_impl) |
|
600 return m_impl->ascii(); |
|
601 |
|
602 const char* nullMsg = "(null impl)"; |
|
603 Vector<char, 2048> buffer; |
|
604 for (int i = 0; nullMsg[i]; ++i) |
|
605 buffer.append(nullMsg[i]); |
|
606 |
|
607 buffer.append('\0'); |
|
608 return buffer; |
|
609 } |
|
610 |
|
611 CString String::latin1() const |
|
612 { |
|
613 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
|
614 // preserved, characters outside of this range are converted to '?'. |
|
615 |
|
616 unsigned length = this->length(); |
|
617 const UChar* characters = this->characters(); |
|
618 |
|
619 char* characterBuffer; |
|
620 CString result = CString::newUninitialized(length, characterBuffer); |
|
621 |
|
622 for (unsigned i = 0; i < length; ++i) { |
|
623 UChar ch = characters[i]; |
|
624 characterBuffer[i] = ch > 255 ? '?' : ch; |
|
625 } |
|
626 |
|
627 return result; |
|
628 } |
|
629 |
|
630 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. |
|
631 static inline void putUTF8Triple(char*& buffer, UChar ch) |
|
632 { |
|
633 ASSERT(ch >= 0x0800); |
|
634 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
|
635 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
|
636 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
|
637 } |
|
638 |
|
639 CString String::utf8() const |
|
640 { |
|
641 unsigned length = this->length(); |
|
642 const UChar* characters = this->characters(); |
|
643 |
|
644 // Allocate a buffer big enough to hold all the characters |
|
645 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
|
646 // Optimization ideas, if we find this function is hot: |
|
647 // * We could speculatively create a CStringBuffer to contain 'length' |
|
648 // characters, and resize if necessary (i.e. if the buffer contains |
|
649 // non-ascii characters). (Alternatively, scan the buffer first for |
|
650 // ascii characters, so we know this will be sufficient). |
|
651 // * We could allocate a CStringBuffer with an appropriate size to |
|
652 // have a good chance of being able to write the string into the |
|
653 // buffer without reallocing (say, 1.5 x length). |
|
654 Vector<char, 1024> bufferVector(length * 3); |
|
655 |
|
656 char* buffer = bufferVector.data(); |
|
657 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false); |
|
658 ASSERT(result != sourceIllegal); // Only produced from strict conversion. |
|
659 ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion |
|
660 |
|
661 // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate |
|
662 // would have been handled in the middle of a string with non-strict conversion - which is to say, |
|
663 // simply encode it to UTF-8. |
|
664 if (result == sourceExhausted) { |
|
665 // This should be one unpaired high surrogate. |
|
666 ASSERT((characters + 1) == (this->characters() + length)); |
|
667 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); |
|
668 // There should be room left, since one UChar hasn't been converted. |
|
669 ASSERT((buffer + 3) <= (buffer + bufferVector.size())); |
|
670 putUTF8Triple(buffer, *characters); |
|
671 } |
|
672 |
|
673 return CString(bufferVector.data(), buffer - bufferVector.data()); |
|
674 } |
|
675 |
|
676 String String::fromUTF8(const char* stringStart, size_t length) |
|
677 { |
|
678 if (!stringStart) |
|
679 return String(); |
|
680 |
|
681 // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be |
|
682 // the right length, if there are any multi-byte sequences this buffer will be too large. |
|
683 UChar* buffer; |
|
684 String stringBuffer(StringImpl::createUninitialized(length, buffer)); |
|
685 UChar* bufferEnd = buffer + length; |
|
686 |
|
687 // Try converting into the buffer. |
|
688 const char* stringCurrent = stringStart; |
|
689 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK) |
|
690 return String(); |
|
691 |
|
692 // stringBuffer is full (the input must have been all ascii) so just return it! |
|
693 if (buffer == bufferEnd) |
|
694 return stringBuffer; |
|
695 |
|
696 // stringBuffer served its purpose as a buffer, copy the contents out into a new string. |
|
697 unsigned utf16Length = buffer - stringBuffer.characters(); |
|
698 ASSERT(utf16Length < length); |
|
699 return String(stringBuffer.characters(), utf16Length); |
|
700 } |
|
701 |
|
702 String String::fromUTF8(const char* string) |
|
703 { |
|
704 if (!string) |
|
705 return String(); |
|
706 return fromUTF8(string, strlen(string)); |
|
707 } |
|
708 |
|
709 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size) |
|
710 { |
|
711 String utf8 = fromUTF8(string, size); |
|
712 if (!utf8) |
|
713 return String(string, size); |
|
714 return utf8; |
|
715 } |
|
716 |
|
717 // String Operations |
|
718 |
|
719 static bool isCharacterAllowedInBase(UChar c, int base) |
|
720 { |
|
721 if (c > 0x7F) |
|
722 return false; |
|
723 if (isASCIIDigit(c)) |
|
724 return c - '0' < base; |
|
725 if (isASCIIAlpha(c)) { |
|
726 if (base > 36) |
|
727 base = 36; |
|
728 return (c >= 'a' && c < 'a' + base - 10) |
|
729 || (c >= 'A' && c < 'A' + base - 10); |
|
730 } |
|
731 return false; |
|
732 } |
|
733 |
|
734 template <typename IntegralType> |
|
735 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) |
|
736 { |
|
737 static const IntegralType integralMax = std::numeric_limits<IntegralType>::max(); |
|
738 static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; |
|
739 const IntegralType maxMultiplier = integralMax / base; |
|
740 |
|
741 IntegralType value = 0; |
|
742 bool isOk = false; |
|
743 bool isNegative = false; |
|
744 |
|
745 if (!data) |
|
746 goto bye; |
|
747 |
|
748 // skip leading whitespace |
|
749 while (length && isSpaceOrNewline(*data)) { |
|
750 length--; |
|
751 data++; |
|
752 } |
|
753 |
|
754 if (isSigned && length && *data == '-') { |
|
755 length--; |
|
756 data++; |
|
757 isNegative = true; |
|
758 } else if (length && *data == '+') { |
|
759 length--; |
|
760 data++; |
|
761 } |
|
762 |
|
763 if (!length || !isCharacterAllowedInBase(*data, base)) |
|
764 goto bye; |
|
765 |
|
766 while (length && isCharacterAllowedInBase(*data, base)) { |
|
767 length--; |
|
768 IntegralType digitValue; |
|
769 UChar c = *data; |
|
770 if (isASCIIDigit(c)) |
|
771 digitValue = c - '0'; |
|
772 else if (c >= 'a') |
|
773 digitValue = c - 'a' + 10; |
|
774 else |
|
775 digitValue = c - 'A' + 10; |
|
776 |
|
777 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) |
|
778 goto bye; |
|
779 |
|
780 value = base * value + digitValue; |
|
781 data++; |
|
782 } |
|
783 |
|
784 #if COMPILER(MSVC) |
|
785 #pragma warning(push, 0) |
|
786 #pragma warning(disable:4146) |
|
787 #endif |
|
788 |
|
789 if (isNegative) |
|
790 value = -value; |
|
791 |
|
792 #if COMPILER(MSVC) |
|
793 #pragma warning(pop) |
|
794 #endif |
|
795 |
|
796 // skip trailing space |
|
797 while (length && isSpaceOrNewline(*data)) { |
|
798 length--; |
|
799 data++; |
|
800 } |
|
801 |
|
802 if (!length) |
|
803 isOk = true; |
|
804 bye: |
|
805 if (ok) |
|
806 *ok = isOk; |
|
807 return isOk ? value : 0; |
|
808 } |
|
809 |
|
810 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) |
|
811 { |
|
812 size_t i = 0; |
|
813 |
|
814 // Allow leading spaces. |
|
815 for (; i != length; ++i) { |
|
816 if (!isSpaceOrNewline(data[i])) |
|
817 break; |
|
818 } |
|
819 |
|
820 // Allow sign. |
|
821 if (i != length && (data[i] == '+' || data[i] == '-')) |
|
822 ++i; |
|
823 |
|
824 // Allow digits. |
|
825 for (; i != length; ++i) { |
|
826 if (!isASCIIDigit(data[i])) |
|
827 break; |
|
828 } |
|
829 |
|
830 return i; |
|
831 } |
|
832 |
|
833 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) |
|
834 { |
|
835 return toIntegralType<int>(data, length, ok, base); |
|
836 } |
|
837 |
|
838 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) |
|
839 { |
|
840 return toIntegralType<unsigned>(data, length, ok, base); |
|
841 } |
|
842 |
|
843 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
|
844 { |
|
845 return toIntegralType<int64_t>(data, length, ok, base); |
|
846 } |
|
847 |
|
848 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
|
849 { |
|
850 return toIntegralType<uint64_t>(data, length, ok, base); |
|
851 } |
|
852 |
|
853 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) |
|
854 { |
|
855 return toIntegralType<intptr_t>(data, length, ok, base); |
|
856 } |
|
857 |
|
858 int charactersToInt(const UChar* data, size_t length, bool* ok) |
|
859 { |
|
860 return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
|
861 } |
|
862 |
|
863 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) |
|
864 { |
|
865 return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
|
866 } |
|
867 |
|
868 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) |
|
869 { |
|
870 return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
|
871 } |
|
872 |
|
873 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) |
|
874 { |
|
875 return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
|
876 } |
|
877 |
|
878 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) |
|
879 { |
|
880 return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
|
881 } |
|
882 |
|
883 double charactersToDouble(const UChar* data, size_t length, bool* ok) |
|
884 { |
|
885 if (!length) { |
|
886 if (ok) |
|
887 *ok = false; |
|
888 return 0.0; |
|
889 } |
|
890 |
|
891 Vector<char, 256> bytes(length + 1); |
|
892 for (unsigned i = 0; i < length; ++i) |
|
893 bytes[i] = data[i] < 0x7F ? data[i] : '?'; |
|
894 bytes[length] = '\0'; |
|
895 char* end; |
|
896 double val = WTF::strtod(bytes.data(), &end); |
|
897 if (ok) |
|
898 *ok = (end == 0 || *end == '\0'); |
|
899 return val; |
|
900 } |
|
901 |
|
902 float charactersToFloat(const UChar* data, size_t length, bool* ok) |
|
903 { |
|
904 // FIXME: This will return ok even when the string fits into a double but not a float. |
|
905 return static_cast<float>(charactersToDouble(data, length, ok)); |
|
906 } |
|
907 |
|
908 } // namespace WebCore |
|
909 |
|
910 #ifndef NDEBUG |
|
911 // For use in the debugger - leaks memory |
|
912 WebCore::String* string(const char*); |
|
913 |
|
914 WebCore::String* string(const char* s) |
|
915 { |
|
916 return new WebCore::String(s); |
|
917 } |
|
918 #endif |