|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 1998-2005, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * |
|
7 * File unistr.h |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 09/25/98 stephen Creation. |
|
13 * 11/11/98 stephen Changed per 11/9 code review. |
|
14 * 04/20/99 stephen Overhauled per 4/16 code review. |
|
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method |
|
16 * handleReplaceBetween(); other methods unchanged. |
|
17 * 06/25/01 grhoten Remove dependency on iostream. |
|
18 ****************************************************************************** |
|
19 */ |
|
20 |
|
21 #ifndef UNISTR_H |
|
22 #define UNISTR_H |
|
23 |
|
24 /** |
|
25 * \file |
|
26 * \brief C++ API: Unicode String |
|
27 */ |
|
28 |
|
29 #include "unicode/rep.h" |
|
30 |
|
31 struct UConverter; // unicode/ucnv.h |
|
32 class StringThreadTest; |
|
33 |
|
34 #ifndef U_COMPARE_CODE_POINT_ORDER |
|
35 /* see also ustring.h and unorm.h */ |
|
36 /** |
|
37 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: |
|
38 * Compare strings in code point order instead of code unit order. |
|
39 * @stable ICU 2.2 |
|
40 */ |
|
41 #define U_COMPARE_CODE_POINT_ORDER 0x8000 |
|
42 #endif |
|
43 |
|
44 #ifndef USTRING_H |
|
45 /* see ustring.h */ |
|
46 U_STABLE int32_t U_EXPORT2 |
|
47 u_strlen(const UChar *s); |
|
48 #endif |
|
49 |
|
50 U_NAMESPACE_BEGIN |
|
51 |
|
52 class Locale; // unicode/locid.h |
|
53 class StringCharacterIterator; |
|
54 class BreakIterator; // unicode/brkiter.h |
|
55 |
|
56 /* The <iostream> include has been moved to unicode/ustream.h */ |
|
57 |
|
58 /** |
|
59 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor |
|
60 * which constructs a Unicode string from an invariant-character char * string. |
|
61 * About invariant characters see utypes.h. |
|
62 * This constructor has no runtime dependency on conversion code and is |
|
63 * therefore recommended over ones taking a charset name string |
|
64 * (where the empty string "" indicates invariant-character conversion). |
|
65 * |
|
66 * @draft ICU 3.2 |
|
67 */ |
|
68 #define US_INV UnicodeString::kInvariant |
|
69 |
|
70 /** |
|
71 * Unicode String literals in C++. |
|
72 * Dependent on the platform properties, different UnicodeString |
|
73 * constructors should be used to create a UnicodeString object from |
|
74 * a string literal. |
|
75 * The macros are defined for maximum performance. |
|
76 * They work only for strings that contain "invariant characters", i.e., |
|
77 * only latin letters, digits, and some punctuation. |
|
78 * See utypes.h for details. |
|
79 * |
|
80 * The string parameter must be a C string literal. |
|
81 * The length of the string, not including the terminating |
|
82 * <code>NUL</code>, must be specified as a constant. |
|
83 * The U_STRING_DECL macro should be invoked exactly once for one |
|
84 * such string variable before it is used. |
|
85 * @stable ICU 2.0 |
|
86 */ |
|
87 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) |
|
88 # define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length) |
|
89 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY |
|
90 # define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length) |
|
91 #else |
|
92 # define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV) |
|
93 #endif |
|
94 |
|
95 /** |
|
96 * Unicode String literals in C++. |
|
97 * Dependent on the platform properties, different UnicodeString |
|
98 * constructors should be used to create a UnicodeString object from |
|
99 * a string literal. |
|
100 * The macros are defined for improved performance. |
|
101 * They work only for strings that contain "invariant characters", i.e., |
|
102 * only latin letters, digits, and some punctuation. |
|
103 * See utypes.h for details. |
|
104 * |
|
105 * The string parameter must be a C string literal. |
|
106 * @stable ICU 2.0 |
|
107 */ |
|
108 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) |
|
109 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1) |
|
110 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY |
|
111 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1) |
|
112 #else |
|
113 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV) |
|
114 #endif |
|
115 |
|
116 /** |
|
117 * UnicodeString is a string class that stores Unicode characters directly and provides |
|
118 * similar functionality as the Java String and StringBuffer classes. |
|
119 * It is a concrete implementation of the abstract class Replaceable (for transliteration). |
|
120 * |
|
121 * The UnicodeString class is not suitable for subclassing. |
|
122 * |
|
123 * <p>For an overview of Unicode strings in C and C++ see the |
|
124 * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p> |
|
125 * |
|
126 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. |
|
127 * A Unicode character may be stored with either one code unit |
|
128 * (the most common case) or with a matched pair of special code units |
|
129 * ("surrogates"). The data type for code units is UChar. |
|
130 * For single-character handling, a Unicode character code <em>point</em> is a value |
|
131 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> |
|
132 * |
|
133 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. |
|
134 * This is the same as with multi-byte char* strings in traditional string handling. |
|
135 * Operations on partial strings typically do not test for code point boundaries. |
|
136 * If necessary, the user needs to take care of such boundaries by testing for the code unit |
|
137 * values or by using functions like |
|
138 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() |
|
139 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> |
|
140 * |
|
141 * UnicodeString methods are more lenient with regard to input parameter values |
|
142 * than other ICU APIs. In particular: |
|
143 * - If indexes are out of bounds for a UnicodeString object |
|
144 * (<0 or >length()) then they are "pinned" to the nearest boundary. |
|
145 * - If primitive string pointer values (e.g., const UChar * or char *) |
|
146 * for input strings are NULL, then those input string parameters are treated |
|
147 * as if they pointed to an empty string. |
|
148 * However, this is <em>not</em> the case for char * parameters for charset names |
|
149 * or other IDs. |
|
150 * - Most UnicodeString methods do not take a UErrorCode parameter because |
|
151 * there are usually very few opportunities for failure other than a shortage |
|
152 * of memory, error codes in low-level C++ string methods would be inconvenient, |
|
153 * and the error code as the last parameter (ICU convention) would prevent |
|
154 * the use of default parameter values. |
|
155 * Instead, such methods set the UnicodeString into a "bogus" state |
|
156 * (see isBogus()) if an error occurs. |
|
157 * |
|
158 * In string comparisons, two UnicodeString objects that are both "bogus" |
|
159 * compare equal (to be transitive and prevent endless loops in sorting), |
|
160 * and a "bogus" string compares less than any non-"bogus" one. |
|
161 * |
|
162 * Const UnicodeString methods are thread-safe. Multiple threads can use |
|
163 * const methods on the same UnicodeString object simultaneously, |
|
164 * but non-const methods must not be called concurrently (in multiple threads) |
|
165 * with any other (const or non-const) methods. |
|
166 * |
|
167 * Similarly, const UnicodeString & parameters are thread-safe. |
|
168 * One object may be passed in as such a parameter concurrently in multiple threads. |
|
169 * This includes the const UnicodeString & parameters for |
|
170 * copy construction, assignment, and cloning. |
|
171 * |
|
172 * <p>UnicodeString uses several storage methods. |
|
173 * String contents can be stored inside the UnicodeString object itself, |
|
174 * in an allocated and shared buffer, or in an outside buffer that is "aliased". |
|
175 * Most of this is done transparently, but careful aliasing in particular provides |
|
176 * significant performance improvements. |
|
177 * Also, the internal buffer is accessible via special functions. |
|
178 * For details see the |
|
179 * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p> |
|
180 * |
|
181 * @see utf.h |
|
182 * @see CharacterIterator |
|
183 * @stable ICU 2.0 |
|
184 */ |
|
185 class U_COMMON_API UnicodeString : public Replaceable |
|
186 { |
|
187 public: |
|
188 |
|
189 /** |
|
190 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor |
|
191 * which constructs a Unicode string from an invariant-character char * string. |
|
192 * Use the macro US_INV instead of the full qualification for this value. |
|
193 * |
|
194 * @see US_INV |
|
195 * @draft ICU 3.2 |
|
196 */ |
|
197 enum EInvariant { |
|
198 /** |
|
199 * @see EInvariant |
|
200 * @draft ICU 3.2 |
|
201 */ |
|
202 kInvariant |
|
203 }; |
|
204 |
|
205 //======================================== |
|
206 // Read-only operations |
|
207 //======================================== |
|
208 |
|
209 /* Comparison - bitwise only - for international comparison use collation */ |
|
210 |
|
211 /** |
|
212 * Equality operator. Performs only bitwise comparison. |
|
213 * @param text The UnicodeString to compare to this one. |
|
214 * @return TRUE if <TT>text</TT> contains the same characters as this one, |
|
215 * FALSE otherwise. |
|
216 * @stable ICU 2.0 |
|
217 */ |
|
218 inline UBool operator== (const UnicodeString& text) const; |
|
219 |
|
220 /** |
|
221 * Inequality operator. Performs only bitwise comparison. |
|
222 * @param text The UnicodeString to compare to this one. |
|
223 * @return FALSE if <TT>text</TT> contains the same characters as this one, |
|
224 * TRUE otherwise. |
|
225 * @stable ICU 2.0 |
|
226 */ |
|
227 inline UBool operator!= (const UnicodeString& text) const; |
|
228 |
|
229 /** |
|
230 * Greater than operator. Performs only bitwise comparison. |
|
231 * @param text The UnicodeString to compare to this one. |
|
232 * @return TRUE if the characters in this are bitwise |
|
233 * greater than the characters in <code>text</code>, FALSE otherwise |
|
234 * @stable ICU 2.0 |
|
235 */ |
|
236 inline UBool operator> (const UnicodeString& text) const; |
|
237 |
|
238 /** |
|
239 * Less than operator. Performs only bitwise comparison. |
|
240 * @param text The UnicodeString to compare to this one. |
|
241 * @return TRUE if the characters in this are bitwise |
|
242 * less than the characters in <code>text</code>, FALSE otherwise |
|
243 * @stable ICU 2.0 |
|
244 */ |
|
245 inline UBool operator< (const UnicodeString& text) const; |
|
246 |
|
247 /** |
|
248 * Greater than or equal operator. Performs only bitwise comparison. |
|
249 * @param text The UnicodeString to compare to this one. |
|
250 * @return TRUE if the characters in this are bitwise |
|
251 * greater than or equal to the characters in <code>text</code>, FALSE otherwise |
|
252 * @stable ICU 2.0 |
|
253 */ |
|
254 inline UBool operator>= (const UnicodeString& text) const; |
|
255 |
|
256 /** |
|
257 * Less than or equal operator. Performs only bitwise comparison. |
|
258 * @param text The UnicodeString to compare to this one. |
|
259 * @return TRUE if the characters in this are bitwise |
|
260 * less than or equal to the characters in <code>text</code>, FALSE otherwise |
|
261 * @stable ICU 2.0 |
|
262 */ |
|
263 inline UBool operator<= (const UnicodeString& text) const; |
|
264 |
|
265 /** |
|
266 * Compare the characters bitwise in this UnicodeString to |
|
267 * the characters in <code>text</code>. |
|
268 * @param text The UnicodeString to compare to this one. |
|
269 * @return The result of bitwise character comparison: 0 if this |
|
270 * contains the same characters as <code>text</code>, -1 if the characters in |
|
271 * this are bitwise less than the characters in <code>text</code>, +1 if the |
|
272 * characters in this are bitwise greater than the characters |
|
273 * in <code>text</code>. |
|
274 * @stable ICU 2.0 |
|
275 */ |
|
276 inline int8_t compare(const UnicodeString& text) const; |
|
277 |
|
278 /** |
|
279 * Compare the characters bitwise in the range |
|
280 * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
|
281 * in <TT>text</TT> |
|
282 * @param start the offset at which the compare operation begins |
|
283 * @param length the number of characters of text to compare. |
|
284 * @param text the other text to be compared against this string. |
|
285 * @return The result of bitwise character comparison: 0 if this |
|
286 * contains the same characters as <code>text</code>, -1 if the characters in |
|
287 * this are bitwise less than the characters in <code>text</code>, +1 if the |
|
288 * characters in this are bitwise greater than the characters |
|
289 * in <code>text</code>. |
|
290 * @stable ICU 2.0 |
|
291 */ |
|
292 inline int8_t compare(int32_t start, |
|
293 int32_t length, |
|
294 const UnicodeString& text) const; |
|
295 |
|
296 /** |
|
297 * Compare the characters bitwise in the range |
|
298 * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
|
299 * in <TT>srcText</TT> in the range |
|
300 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
301 * @param start the offset at which the compare operation begins |
|
302 * @param length the number of characters in this to compare. |
|
303 * @param srcText the text to be compared |
|
304 * @param srcStart the offset into <TT>srcText</TT> to start comparison |
|
305 * @param srcLength the number of characters in <TT>src</TT> to compare |
|
306 * @return The result of bitwise character comparison: 0 if this |
|
307 * contains the same characters as <code>srcText</code>, -1 if the characters in |
|
308 * this are bitwise less than the characters in <code>srcText</code>, +1 if the |
|
309 * characters in this are bitwise greater than the characters |
|
310 * in <code>srcText</code>. |
|
311 * @stable ICU 2.0 |
|
312 */ |
|
313 inline int8_t compare(int32_t start, |
|
314 int32_t length, |
|
315 const UnicodeString& srcText, |
|
316 int32_t srcStart, |
|
317 int32_t srcLength) const; |
|
318 |
|
319 /** |
|
320 * Compare the characters bitwise in this UnicodeString with the first |
|
321 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. |
|
322 * @param srcChars The characters to compare to this UnicodeString. |
|
323 * @param srcLength the number of characters in <TT>srcChars</TT> to compare |
|
324 * @return The result of bitwise character comparison: 0 if this |
|
325 * contains the same characters as <code>srcChars</code>, -1 if the characters in |
|
326 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the |
|
327 * characters in this are bitwise greater than the characters |
|
328 * in <code>srcChars</code>. |
|
329 * @stable ICU 2.0 |
|
330 */ |
|
331 inline int8_t compare(const UChar *srcChars, |
|
332 int32_t srcLength) const; |
|
333 |
|
334 /** |
|
335 * Compare the characters bitwise in the range |
|
336 * [<TT>start</TT>, <TT>start + length</TT>) with the first |
|
337 * <TT>length</TT> characters in <TT>srcChars</TT> |
|
338 * @param start the offset at which the compare operation begins |
|
339 * @param length the number of characters to compare. |
|
340 * @param srcChars the characters to be compared |
|
341 * @return The result of bitwise character comparison: 0 if this |
|
342 * contains the same characters as <code>srcChars</code>, -1 if the characters in |
|
343 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the |
|
344 * characters in this are bitwise greater than the characters |
|
345 * in <code>srcChars</code>. |
|
346 * @stable ICU 2.0 |
|
347 */ |
|
348 inline int8_t compare(int32_t start, |
|
349 int32_t length, |
|
350 const UChar *srcChars) const; |
|
351 |
|
352 /** |
|
353 * Compare the characters bitwise in the range |
|
354 * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
|
355 * in <TT>srcChars</TT> in the range |
|
356 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
357 * @param start the offset at which the compare operation begins |
|
358 * @param length the number of characters in this to compare |
|
359 * @param srcChars the characters to be compared |
|
360 * @param srcStart the offset into <TT>srcChars</TT> to start comparison |
|
361 * @param srcLength the number of characters in <TT>srcChars</TT> to compare |
|
362 * @return The result of bitwise character comparison: 0 if this |
|
363 * contains the same characters as <code>srcChars</code>, -1 if the characters in |
|
364 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the |
|
365 * characters in this are bitwise greater than the characters |
|
366 * in <code>srcChars</code>. |
|
367 * @stable ICU 2.0 |
|
368 */ |
|
369 inline int8_t compare(int32_t start, |
|
370 int32_t length, |
|
371 const UChar *srcChars, |
|
372 int32_t srcStart, |
|
373 int32_t srcLength) const; |
|
374 |
|
375 /** |
|
376 * Compare the characters bitwise in the range |
|
377 * [<TT>start</TT>, <TT>limit</TT>) with the characters |
|
378 * in <TT>srcText</TT> in the range |
|
379 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). |
|
380 * @param start the offset at which the compare operation begins |
|
381 * @param limit the offset immediately following the compare operation |
|
382 * @param srcText the text to be compared |
|
383 * @param srcStart the offset into <TT>srcText</TT> to start comparison |
|
384 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison |
|
385 * @return The result of bitwise character comparison: 0 if this |
|
386 * contains the same characters as <code>srcText</code>, -1 if the characters in |
|
387 * this are bitwise less than the characters in <code>srcText</code>, +1 if the |
|
388 * characters in this are bitwise greater than the characters |
|
389 * in <code>srcText</code>. |
|
390 * @stable ICU 2.0 |
|
391 */ |
|
392 inline int8_t compareBetween(int32_t start, |
|
393 int32_t limit, |
|
394 const UnicodeString& srcText, |
|
395 int32_t srcStart, |
|
396 int32_t srcLimit) const; |
|
397 |
|
398 /** |
|
399 * Compare two Unicode strings in code point order. |
|
400 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
401 * if supplementary characters are present: |
|
402 * |
|
403 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
404 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
405 * which means that they compare as less than some other BMP characters like U+feff. |
|
406 * This function compares Unicode strings in code point order. |
|
407 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
408 * |
|
409 * @param text Another string to compare this one to. |
|
410 * @return a negative/zero/positive integer corresponding to whether |
|
411 * this string is less than/equal to/greater than the second one |
|
412 * in code point order |
|
413 * @stable ICU 2.0 |
|
414 */ |
|
415 inline int8_t compareCodePointOrder(const UnicodeString& text) const; |
|
416 |
|
417 /** |
|
418 * Compare two Unicode strings in code point order. |
|
419 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
420 * if supplementary characters are present: |
|
421 * |
|
422 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
423 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
424 * which means that they compare as less than some other BMP characters like U+feff. |
|
425 * This function compares Unicode strings in code point order. |
|
426 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
427 * |
|
428 * @param start The start offset in this string at which the compare operation begins. |
|
429 * @param length The number of code units from this string to compare. |
|
430 * @param srcText Another string to compare this one to. |
|
431 * @return a negative/zero/positive integer corresponding to whether |
|
432 * this string is less than/equal to/greater than the second one |
|
433 * in code point order |
|
434 * @stable ICU 2.0 |
|
435 */ |
|
436 inline int8_t compareCodePointOrder(int32_t start, |
|
437 int32_t length, |
|
438 const UnicodeString& srcText) const; |
|
439 |
|
440 /** |
|
441 * Compare two Unicode strings in code point order. |
|
442 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
443 * if supplementary characters are present: |
|
444 * |
|
445 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
446 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
447 * which means that they compare as less than some other BMP characters like U+feff. |
|
448 * This function compares Unicode strings in code point order. |
|
449 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
450 * |
|
451 * @param start The start offset in this string at which the compare operation begins. |
|
452 * @param length The number of code units from this string to compare. |
|
453 * @param srcText Another string to compare this one to. |
|
454 * @param srcStart The start offset in that string at which the compare operation begins. |
|
455 * @param srcLength The number of code units from that string to compare. |
|
456 * @return a negative/zero/positive integer corresponding to whether |
|
457 * this string is less than/equal to/greater than the second one |
|
458 * in code point order |
|
459 * @stable ICU 2.0 |
|
460 */ |
|
461 inline int8_t compareCodePointOrder(int32_t start, |
|
462 int32_t length, |
|
463 const UnicodeString& srcText, |
|
464 int32_t srcStart, |
|
465 int32_t srcLength) const; |
|
466 |
|
467 /** |
|
468 * Compare two Unicode strings in code point order. |
|
469 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
470 * if supplementary characters are present: |
|
471 * |
|
472 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
473 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
474 * which means that they compare as less than some other BMP characters like U+feff. |
|
475 * This function compares Unicode strings in code point order. |
|
476 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
477 * |
|
478 * @param srcChars A pointer to another string to compare this one to. |
|
479 * @param srcLength The number of code units from that string to compare. |
|
480 * @return a negative/zero/positive integer corresponding to whether |
|
481 * this string is less than/equal to/greater than the second one |
|
482 * in code point order |
|
483 * @stable ICU 2.0 |
|
484 */ |
|
485 inline int8_t compareCodePointOrder(const UChar *srcChars, |
|
486 int32_t srcLength) const; |
|
487 |
|
488 /** |
|
489 * Compare two Unicode strings in code point order. |
|
490 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
491 * if supplementary characters are present: |
|
492 * |
|
493 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
494 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
495 * which means that they compare as less than some other BMP characters like U+feff. |
|
496 * This function compares Unicode strings in code point order. |
|
497 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
498 * |
|
499 * @param start The start offset in this string at which the compare operation begins. |
|
500 * @param length The number of code units from this string to compare. |
|
501 * @param srcChars A pointer to another string to compare this one to. |
|
502 * @return a negative/zero/positive integer corresponding to whether |
|
503 * this string is less than/equal to/greater than the second one |
|
504 * in code point order |
|
505 * @stable ICU 2.0 |
|
506 */ |
|
507 inline int8_t compareCodePointOrder(int32_t start, |
|
508 int32_t length, |
|
509 const UChar *srcChars) const; |
|
510 |
|
511 /** |
|
512 * Compare two Unicode strings in code point order. |
|
513 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
514 * if supplementary characters are present: |
|
515 * |
|
516 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
517 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
518 * which means that they compare as less than some other BMP characters like U+feff. |
|
519 * This function compares Unicode strings in code point order. |
|
520 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
521 * |
|
522 * @param start The start offset in this string at which the compare operation begins. |
|
523 * @param length The number of code units from this string to compare. |
|
524 * @param srcChars A pointer to another string to compare this one to. |
|
525 * @param srcStart The start offset in that string at which the compare operation begins. |
|
526 * @param srcLength The number of code units from that string to compare. |
|
527 * @return a negative/zero/positive integer corresponding to whether |
|
528 * this string is less than/equal to/greater than the second one |
|
529 * in code point order |
|
530 * @stable ICU 2.0 |
|
531 */ |
|
532 inline int8_t compareCodePointOrder(int32_t start, |
|
533 int32_t length, |
|
534 const UChar *srcChars, |
|
535 int32_t srcStart, |
|
536 int32_t srcLength) const; |
|
537 |
|
538 /** |
|
539 * Compare two Unicode strings in code point order. |
|
540 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
|
541 * if supplementary characters are present: |
|
542 * |
|
543 * In UTF-16, supplementary characters (with code points U+10000 and above) are |
|
544 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
|
545 * which means that they compare as less than some other BMP characters like U+feff. |
|
546 * This function compares Unicode strings in code point order. |
|
547 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
|
548 * |
|
549 * @param start The start offset in this string at which the compare operation begins. |
|
550 * @param limit The offset after the last code unit from this string to compare. |
|
551 * @param srcText Another string to compare this one to. |
|
552 * @param srcStart The start offset in that string at which the compare operation begins. |
|
553 * @param srcLimit The offset after the last code unit from that string to compare. |
|
554 * @return a negative/zero/positive integer corresponding to whether |
|
555 * this string is less than/equal to/greater than the second one |
|
556 * in code point order |
|
557 * @stable ICU 2.0 |
|
558 */ |
|
559 inline int8_t compareCodePointOrderBetween(int32_t start, |
|
560 int32_t limit, |
|
561 const UnicodeString& srcText, |
|
562 int32_t srcStart, |
|
563 int32_t srcLimit) const; |
|
564 |
|
565 /** |
|
566 * Compare two strings case-insensitively using full case folding. |
|
567 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). |
|
568 * |
|
569 * @param text Another string to compare this one to. |
|
570 * @param options A bit set of options: |
|
571 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
572 * Comparison in code unit order with default case folding. |
|
573 * |
|
574 * - U_COMPARE_CODE_POINT_ORDER |
|
575 * Set to choose code point order instead of code unit order |
|
576 * (see u_strCompare for details). |
|
577 * |
|
578 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
579 * |
|
580 * @return A negative, zero, or positive integer indicating the comparison result. |
|
581 * @stable ICU 2.0 |
|
582 */ |
|
583 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; |
|
584 |
|
585 /** |
|
586 * Compare two strings case-insensitively using full case folding. |
|
587 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). |
|
588 * |
|
589 * @param start The start offset in this string at which the compare operation begins. |
|
590 * @param length The number of code units from this string to compare. |
|
591 * @param srcText Another string to compare this one to. |
|
592 * @param options A bit set of options: |
|
593 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
594 * Comparison in code unit order with default case folding. |
|
595 * |
|
596 * - U_COMPARE_CODE_POINT_ORDER |
|
597 * Set to choose code point order instead of code unit order |
|
598 * (see u_strCompare for details). |
|
599 * |
|
600 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
601 * |
|
602 * @return A negative, zero, or positive integer indicating the comparison result. |
|
603 * @stable ICU 2.0 |
|
604 */ |
|
605 inline int8_t caseCompare(int32_t start, |
|
606 int32_t length, |
|
607 const UnicodeString& srcText, |
|
608 uint32_t options) const; |
|
609 |
|
610 /** |
|
611 * Compare two strings case-insensitively using full case folding. |
|
612 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). |
|
613 * |
|
614 * @param start The start offset in this string at which the compare operation begins. |
|
615 * @param length The number of code units from this string to compare. |
|
616 * @param srcText Another string to compare this one to. |
|
617 * @param srcStart The start offset in that string at which the compare operation begins. |
|
618 * @param srcLength The number of code units from that string to compare. |
|
619 * @param options A bit set of options: |
|
620 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
621 * Comparison in code unit order with default case folding. |
|
622 * |
|
623 * - U_COMPARE_CODE_POINT_ORDER |
|
624 * Set to choose code point order instead of code unit order |
|
625 * (see u_strCompare for details). |
|
626 * |
|
627 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
628 * |
|
629 * @return A negative, zero, or positive integer indicating the comparison result. |
|
630 * @stable ICU 2.0 |
|
631 */ |
|
632 inline int8_t caseCompare(int32_t start, |
|
633 int32_t length, |
|
634 const UnicodeString& srcText, |
|
635 int32_t srcStart, |
|
636 int32_t srcLength, |
|
637 uint32_t options) const; |
|
638 |
|
639 /** |
|
640 * Compare two strings case-insensitively using full case folding. |
|
641 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
|
642 * |
|
643 * @param srcChars A pointer to another string to compare this one to. |
|
644 * @param srcLength The number of code units from that string to compare. |
|
645 * @param options A bit set of options: |
|
646 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
647 * Comparison in code unit order with default case folding. |
|
648 * |
|
649 * - U_COMPARE_CODE_POINT_ORDER |
|
650 * Set to choose code point order instead of code unit order |
|
651 * (see u_strCompare for details). |
|
652 * |
|
653 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
654 * |
|
655 * @return A negative, zero, or positive integer indicating the comparison result. |
|
656 * @stable ICU 2.0 |
|
657 */ |
|
658 inline int8_t caseCompare(const UChar *srcChars, |
|
659 int32_t srcLength, |
|
660 uint32_t options) const; |
|
661 |
|
662 /** |
|
663 * Compare two strings case-insensitively using full case folding. |
|
664 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
|
665 * |
|
666 * @param start The start offset in this string at which the compare operation begins. |
|
667 * @param length The number of code units from this string to compare. |
|
668 * @param srcChars A pointer to another string to compare this one to. |
|
669 * @param options A bit set of options: |
|
670 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
671 * Comparison in code unit order with default case folding. |
|
672 * |
|
673 * - U_COMPARE_CODE_POINT_ORDER |
|
674 * Set to choose code point order instead of code unit order |
|
675 * (see u_strCompare for details). |
|
676 * |
|
677 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
678 * |
|
679 * @return A negative, zero, or positive integer indicating the comparison result. |
|
680 * @stable ICU 2.0 |
|
681 */ |
|
682 inline int8_t caseCompare(int32_t start, |
|
683 int32_t length, |
|
684 const UChar *srcChars, |
|
685 uint32_t options) const; |
|
686 |
|
687 /** |
|
688 * Compare two strings case-insensitively using full case folding. |
|
689 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
|
690 * |
|
691 * @param start The start offset in this string at which the compare operation begins. |
|
692 * @param length The number of code units from this string to compare. |
|
693 * @param srcChars A pointer to another string to compare this one to. |
|
694 * @param srcStart The start offset in that string at which the compare operation begins. |
|
695 * @param srcLength The number of code units from that string to compare. |
|
696 * @param options A bit set of options: |
|
697 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
698 * Comparison in code unit order with default case folding. |
|
699 * |
|
700 * - U_COMPARE_CODE_POINT_ORDER |
|
701 * Set to choose code point order instead of code unit order |
|
702 * (see u_strCompare for details). |
|
703 * |
|
704 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
705 * |
|
706 * @return A negative, zero, or positive integer indicating the comparison result. |
|
707 * @stable ICU 2.0 |
|
708 */ |
|
709 inline int8_t caseCompare(int32_t start, |
|
710 int32_t length, |
|
711 const UChar *srcChars, |
|
712 int32_t srcStart, |
|
713 int32_t srcLength, |
|
714 uint32_t options) const; |
|
715 |
|
716 /** |
|
717 * Compare two strings case-insensitively using full case folding. |
|
718 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). |
|
719 * |
|
720 * @param start The start offset in this string at which the compare operation begins. |
|
721 * @param limit The offset after the last code unit from this string to compare. |
|
722 * @param srcText Another string to compare this one to. |
|
723 * @param srcStart The start offset in that string at which the compare operation begins. |
|
724 * @param srcLimit The offset after the last code unit from that string to compare. |
|
725 * @param options A bit set of options: |
|
726 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
|
727 * Comparison in code unit order with default case folding. |
|
728 * |
|
729 * - U_COMPARE_CODE_POINT_ORDER |
|
730 * Set to choose code point order instead of code unit order |
|
731 * (see u_strCompare for details). |
|
732 * |
|
733 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
734 * |
|
735 * @return A negative, zero, or positive integer indicating the comparison result. |
|
736 * @stable ICU 2.0 |
|
737 */ |
|
738 inline int8_t caseCompareBetween(int32_t start, |
|
739 int32_t limit, |
|
740 const UnicodeString& srcText, |
|
741 int32_t srcStart, |
|
742 int32_t srcLimit, |
|
743 uint32_t options) const; |
|
744 |
|
745 /** |
|
746 * Determine if this starts with the characters in <TT>text</TT> |
|
747 * @param text The text to match. |
|
748 * @return TRUE if this starts with the characters in <TT>text</TT>, |
|
749 * FALSE otherwise |
|
750 * @stable ICU 2.0 |
|
751 */ |
|
752 inline UBool startsWith(const UnicodeString& text) const; |
|
753 |
|
754 /** |
|
755 * Determine if this starts with the characters in <TT>srcText</TT> |
|
756 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
757 * @param srcText The text to match. |
|
758 * @param srcStart the offset into <TT>srcText</TT> to start matching |
|
759 * @param srcLength the number of characters in <TT>srcText</TT> to match |
|
760 * @return TRUE if this starts with the characters in <TT>text</TT>, |
|
761 * FALSE otherwise |
|
762 * @stable ICU 2.0 |
|
763 */ |
|
764 inline UBool startsWith(const UnicodeString& srcText, |
|
765 int32_t srcStart, |
|
766 int32_t srcLength) const; |
|
767 |
|
768 /** |
|
769 * Determine if this starts with the characters in <TT>srcChars</TT> |
|
770 * @param srcChars The characters to match. |
|
771 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
772 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, |
|
773 * FALSE otherwise |
|
774 * @stable ICU 2.0 |
|
775 */ |
|
776 inline UBool startsWith(const UChar *srcChars, |
|
777 int32_t srcLength) const; |
|
778 |
|
779 /** |
|
780 * Determine if this ends with the characters in <TT>srcChars</TT> |
|
781 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
782 * @param srcChars The characters to match. |
|
783 * @param srcStart the offset into <TT>srcText</TT> to start matching |
|
784 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
785 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise |
|
786 * @stable ICU 2.0 |
|
787 */ |
|
788 inline UBool startsWith(const UChar *srcChars, |
|
789 int32_t srcStart, |
|
790 int32_t srcLength) const; |
|
791 |
|
792 /** |
|
793 * Determine if this ends with the characters in <TT>text</TT> |
|
794 * @param text The text to match. |
|
795 * @return TRUE if this ends with the characters in <TT>text</TT>, |
|
796 * FALSE otherwise |
|
797 * @stable ICU 2.0 |
|
798 */ |
|
799 inline UBool endsWith(const UnicodeString& text) const; |
|
800 |
|
801 /** |
|
802 * Determine if this ends with the characters in <TT>srcText</TT> |
|
803 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
804 * @param srcText The text to match. |
|
805 * @param srcStart the offset into <TT>srcText</TT> to start matching |
|
806 * @param srcLength the number of characters in <TT>srcText</TT> to match |
|
807 * @return TRUE if this ends with the characters in <TT>text</TT>, |
|
808 * FALSE otherwise |
|
809 * @stable ICU 2.0 |
|
810 */ |
|
811 inline UBool endsWith(const UnicodeString& srcText, |
|
812 int32_t srcStart, |
|
813 int32_t srcLength) const; |
|
814 |
|
815 /** |
|
816 * Determine if this ends with the characters in <TT>srcChars</TT> |
|
817 * @param srcChars The characters to match. |
|
818 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
819 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, |
|
820 * FALSE otherwise |
|
821 * @stable ICU 2.0 |
|
822 */ |
|
823 inline UBool endsWith(const UChar *srcChars, |
|
824 int32_t srcLength) const; |
|
825 |
|
826 /** |
|
827 * Determine if this ends with the characters in <TT>srcChars</TT> |
|
828 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
829 * @param srcChars The characters to match. |
|
830 * @param srcStart the offset into <TT>srcText</TT> to start matching |
|
831 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
832 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, |
|
833 * FALSE otherwise |
|
834 * @stable ICU 2.0 |
|
835 */ |
|
836 inline UBool endsWith(const UChar *srcChars, |
|
837 int32_t srcStart, |
|
838 int32_t srcLength) const; |
|
839 |
|
840 |
|
841 /* Searching - bitwise only */ |
|
842 |
|
843 /** |
|
844 * Locate in this the first occurrence of the characters in <TT>text</TT>, |
|
845 * using bitwise comparison. |
|
846 * @param text The text to search for. |
|
847 * @return The offset into this of the start of <TT>text</TT>, |
|
848 * or -1 if not found. |
|
849 * @stable ICU 2.0 |
|
850 */ |
|
851 inline int32_t indexOf(const UnicodeString& text) const; |
|
852 |
|
853 /** |
|
854 * Locate in this the first occurrence of the characters in <TT>text</TT> |
|
855 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
856 * @param text The text to search for. |
|
857 * @param start The offset at which searching will start. |
|
858 * @return The offset into this of the start of <TT>text</TT>, |
|
859 * or -1 if not found. |
|
860 * @stable ICU 2.0 |
|
861 */ |
|
862 inline int32_t indexOf(const UnicodeString& text, |
|
863 int32_t start) const; |
|
864 |
|
865 /** |
|
866 * Locate in this the first occurrence in the range |
|
867 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
868 * in <TT>text</TT>, using bitwise comparison. |
|
869 * @param text The text to search for. |
|
870 * @param start The offset at which searching will start. |
|
871 * @param length The number of characters to search |
|
872 * @return The offset into this of the start of <TT>text</TT>, |
|
873 * or -1 if not found. |
|
874 * @stable ICU 2.0 |
|
875 */ |
|
876 inline int32_t indexOf(const UnicodeString& text, |
|
877 int32_t start, |
|
878 int32_t length) const; |
|
879 |
|
880 /** |
|
881 * Locate in this the first occurrence in the range |
|
882 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
883 * in <TT>srcText</TT> in the range |
|
884 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
|
885 * using bitwise comparison. |
|
886 * @param srcText The text to search for. |
|
887 * @param srcStart the offset into <TT>srcText</TT> at which |
|
888 * to start matching |
|
889 * @param srcLength the number of characters in <TT>srcText</TT> to match |
|
890 * @param start the offset into this at which to start matching |
|
891 * @param length the number of characters in this to search |
|
892 * @return The offset into this of the start of <TT>text</TT>, |
|
893 * or -1 if not found. |
|
894 * @stable ICU 2.0 |
|
895 */ |
|
896 inline int32_t indexOf(const UnicodeString& srcText, |
|
897 int32_t srcStart, |
|
898 int32_t srcLength, |
|
899 int32_t start, |
|
900 int32_t length) const; |
|
901 |
|
902 /** |
|
903 * Locate in this the first occurrence of the characters in |
|
904 * <TT>srcChars</TT> |
|
905 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
906 * @param srcChars The text to search for. |
|
907 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
908 * @param start the offset into this at which to start matching |
|
909 * @return The offset into this of the start of <TT>text</TT>, |
|
910 * or -1 if not found. |
|
911 * @stable ICU 2.0 |
|
912 */ |
|
913 inline int32_t indexOf(const UChar *srcChars, |
|
914 int32_t srcLength, |
|
915 int32_t start) const; |
|
916 |
|
917 /** |
|
918 * Locate in this the first occurrence in the range |
|
919 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
920 * in <TT>srcChars</TT>, using bitwise comparison. |
|
921 * @param srcChars The text to search for. |
|
922 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
923 * @param start The offset at which searching will start. |
|
924 * @param length The number of characters to search |
|
925 * @return The offset into this of the start of <TT>srcChars</TT>, |
|
926 * or -1 if not found. |
|
927 * @stable ICU 2.0 |
|
928 */ |
|
929 inline int32_t indexOf(const UChar *srcChars, |
|
930 int32_t srcLength, |
|
931 int32_t start, |
|
932 int32_t length) const; |
|
933 |
|
934 /** |
|
935 * Locate in this the first occurrence in the range |
|
936 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
937 * in <TT>srcChars</TT> in the range |
|
938 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
|
939 * using bitwise comparison. |
|
940 * @param srcChars The text to search for. |
|
941 * @param srcStart the offset into <TT>srcChars</TT> at which |
|
942 * to start matching |
|
943 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
944 * @param start the offset into this at which to start matching |
|
945 * @param length the number of characters in this to search |
|
946 * @return The offset into this of the start of <TT>text</TT>, |
|
947 * or -1 if not found. |
|
948 * @stable ICU 2.0 |
|
949 */ |
|
950 int32_t indexOf(const UChar *srcChars, |
|
951 int32_t srcStart, |
|
952 int32_t srcLength, |
|
953 int32_t start, |
|
954 int32_t length) const; |
|
955 |
|
956 /** |
|
957 * Locate in this the first occurrence of the BMP code point <code>c</code>, |
|
958 * using bitwise comparison. |
|
959 * @param c The code unit to search for. |
|
960 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
961 * @stable ICU 2.0 |
|
962 */ |
|
963 inline int32_t indexOf(UChar c) const; |
|
964 |
|
965 /** |
|
966 * Locate in this the first occurrence of the code point <TT>c</TT>, |
|
967 * using bitwise comparison. |
|
968 * |
|
969 * @param c The code point to search for. |
|
970 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
971 * @stable ICU 2.0 |
|
972 */ |
|
973 inline int32_t indexOf(UChar32 c) const; |
|
974 |
|
975 /** |
|
976 * Locate in this the first occurrence of the BMP code point <code>c</code>, |
|
977 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
978 * @param c The code unit to search for. |
|
979 * @param start The offset at which searching will start. |
|
980 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
981 * @stable ICU 2.0 |
|
982 */ |
|
983 inline int32_t indexOf(UChar c, |
|
984 int32_t start) const; |
|
985 |
|
986 /** |
|
987 * Locate in this the first occurrence of the code point <TT>c</TT> |
|
988 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
989 * |
|
990 * @param c The code point to search for. |
|
991 * @param start The offset at which searching will start. |
|
992 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
993 * @stable ICU 2.0 |
|
994 */ |
|
995 inline int32_t indexOf(UChar32 c, |
|
996 int32_t start) const; |
|
997 |
|
998 /** |
|
999 * Locate in this the first occurrence of the BMP code point <code>c</code> |
|
1000 * in the range [<TT>start</TT>, <TT>start + length</TT>), |
|
1001 * using bitwise comparison. |
|
1002 * @param c The code unit to search for. |
|
1003 * @param start the offset into this at which to start matching |
|
1004 * @param length the number of characters in this to search |
|
1005 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1006 * @stable ICU 2.0 |
|
1007 */ |
|
1008 inline int32_t indexOf(UChar c, |
|
1009 int32_t start, |
|
1010 int32_t length) const; |
|
1011 |
|
1012 /** |
|
1013 * Locate in this the first occurrence of the code point <TT>c</TT> |
|
1014 * in the range [<TT>start</TT>, <TT>start + length</TT>), |
|
1015 * using bitwise comparison. |
|
1016 * |
|
1017 * @param c The code point to search for. |
|
1018 * @param start the offset into this at which to start matching |
|
1019 * @param length the number of characters in this to search |
|
1020 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1021 * @stable ICU 2.0 |
|
1022 */ |
|
1023 inline int32_t indexOf(UChar32 c, |
|
1024 int32_t start, |
|
1025 int32_t length) const; |
|
1026 |
|
1027 /** |
|
1028 * Locate in this the last occurrence of the characters in <TT>text</TT>, |
|
1029 * using bitwise comparison. |
|
1030 * @param text The text to search for. |
|
1031 * @return The offset into this of the start of <TT>text</TT>, |
|
1032 * or -1 if not found. |
|
1033 * @stable ICU 2.0 |
|
1034 */ |
|
1035 inline int32_t lastIndexOf(const UnicodeString& text) const; |
|
1036 |
|
1037 /** |
|
1038 * Locate in this the last occurrence of the characters in <TT>text</TT> |
|
1039 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
1040 * @param text The text to search for. |
|
1041 * @param start The offset at which searching will start. |
|
1042 * @return The offset into this of the start of <TT>text</TT>, |
|
1043 * or -1 if not found. |
|
1044 * @stable ICU 2.0 |
|
1045 */ |
|
1046 inline int32_t lastIndexOf(const UnicodeString& text, |
|
1047 int32_t start) const; |
|
1048 |
|
1049 /** |
|
1050 * Locate in this the last occurrence in the range |
|
1051 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
1052 * in <TT>text</TT>, using bitwise comparison. |
|
1053 * @param text The text to search for. |
|
1054 * @param start The offset at which searching will start. |
|
1055 * @param length The number of characters to search |
|
1056 * @return The offset into this of the start of <TT>text</TT>, |
|
1057 * or -1 if not found. |
|
1058 * @stable ICU 2.0 |
|
1059 */ |
|
1060 inline int32_t lastIndexOf(const UnicodeString& text, |
|
1061 int32_t start, |
|
1062 int32_t length) const; |
|
1063 |
|
1064 /** |
|
1065 * Locate in this the last occurrence in the range |
|
1066 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
1067 * in <TT>srcText</TT> in the range |
|
1068 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
|
1069 * using bitwise comparison. |
|
1070 * @param srcText The text to search for. |
|
1071 * @param srcStart the offset into <TT>srcText</TT> at which |
|
1072 * to start matching |
|
1073 * @param srcLength the number of characters in <TT>srcText</TT> to match |
|
1074 * @param start the offset into this at which to start matching |
|
1075 * @param length the number of characters in this to search |
|
1076 * @return The offset into this of the start of <TT>text</TT>, |
|
1077 * or -1 if not found. |
|
1078 * @stable ICU 2.0 |
|
1079 */ |
|
1080 inline int32_t lastIndexOf(const UnicodeString& srcText, |
|
1081 int32_t srcStart, |
|
1082 int32_t srcLength, |
|
1083 int32_t start, |
|
1084 int32_t length) const; |
|
1085 |
|
1086 /** |
|
1087 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> |
|
1088 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
1089 * @param srcChars The text to search for. |
|
1090 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
1091 * @param start the offset into this at which to start matching |
|
1092 * @return The offset into this of the start of <TT>text</TT>, |
|
1093 * or -1 if not found. |
|
1094 * @stable ICU 2.0 |
|
1095 */ |
|
1096 inline int32_t lastIndexOf(const UChar *srcChars, |
|
1097 int32_t srcLength, |
|
1098 int32_t start) const; |
|
1099 |
|
1100 /** |
|
1101 * Locate in this the last occurrence in the range |
|
1102 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
1103 * in <TT>srcChars</TT>, using bitwise comparison. |
|
1104 * @param srcChars The text to search for. |
|
1105 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
1106 * @param start The offset at which searching will start. |
|
1107 * @param length The number of characters to search |
|
1108 * @return The offset into this of the start of <TT>srcChars</TT>, |
|
1109 * or -1 if not found. |
|
1110 * @stable ICU 2.0 |
|
1111 */ |
|
1112 inline int32_t lastIndexOf(const UChar *srcChars, |
|
1113 int32_t srcLength, |
|
1114 int32_t start, |
|
1115 int32_t length) const; |
|
1116 |
|
1117 /** |
|
1118 * Locate in this the last occurrence in the range |
|
1119 * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
|
1120 * in <TT>srcChars</TT> in the range |
|
1121 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
|
1122 * using bitwise comparison. |
|
1123 * @param srcChars The text to search for. |
|
1124 * @param srcStart the offset into <TT>srcChars</TT> at which |
|
1125 * to start matching |
|
1126 * @param srcLength the number of characters in <TT>srcChars</TT> to match |
|
1127 * @param start the offset into this at which to start matching |
|
1128 * @param length the number of characters in this to search |
|
1129 * @return The offset into this of the start of <TT>text</TT>, |
|
1130 * or -1 if not found. |
|
1131 * @stable ICU 2.0 |
|
1132 */ |
|
1133 int32_t lastIndexOf(const UChar *srcChars, |
|
1134 int32_t srcStart, |
|
1135 int32_t srcLength, |
|
1136 int32_t start, |
|
1137 int32_t length) const; |
|
1138 |
|
1139 /** |
|
1140 * Locate in this the last occurrence of the BMP code point <code>c</code>, |
|
1141 * using bitwise comparison. |
|
1142 * @param c The code unit to search for. |
|
1143 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1144 * @stable ICU 2.0 |
|
1145 */ |
|
1146 inline int32_t lastIndexOf(UChar c) const; |
|
1147 |
|
1148 /** |
|
1149 * Locate in this the last occurrence of the code point <TT>c</TT>, |
|
1150 * using bitwise comparison. |
|
1151 * |
|
1152 * @param c The code point to search for. |
|
1153 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1154 * @stable ICU 2.0 |
|
1155 */ |
|
1156 inline int32_t lastIndexOf(UChar32 c) const; |
|
1157 |
|
1158 /** |
|
1159 * Locate in this the last occurrence of the BMP code point <code>c</code> |
|
1160 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
1161 * @param c The code unit to search for. |
|
1162 * @param start The offset at which searching will start. |
|
1163 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1164 * @stable ICU 2.0 |
|
1165 */ |
|
1166 inline int32_t lastIndexOf(UChar c, |
|
1167 int32_t start) const; |
|
1168 |
|
1169 /** |
|
1170 * Locate in this the last occurrence of the code point <TT>c</TT> |
|
1171 * starting at offset <TT>start</TT>, using bitwise comparison. |
|
1172 * |
|
1173 * @param c The code point to search for. |
|
1174 * @param start The offset at which searching will start. |
|
1175 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1176 * @stable ICU 2.0 |
|
1177 */ |
|
1178 inline int32_t lastIndexOf(UChar32 c, |
|
1179 int32_t start) const; |
|
1180 |
|
1181 /** |
|
1182 * Locate in this the last occurrence of the BMP code point <code>c</code> |
|
1183 * in the range [<TT>start</TT>, <TT>start + length</TT>), |
|
1184 * using bitwise comparison. |
|
1185 * @param c The code unit to search for. |
|
1186 * @param start the offset into this at which to start matching |
|
1187 * @param length the number of characters in this to search |
|
1188 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1189 * @stable ICU 2.0 |
|
1190 */ |
|
1191 inline int32_t lastIndexOf(UChar c, |
|
1192 int32_t start, |
|
1193 int32_t length) const; |
|
1194 |
|
1195 /** |
|
1196 * Locate in this the last occurrence of the code point <TT>c</TT> |
|
1197 * in the range [<TT>start</TT>, <TT>start + length</TT>), |
|
1198 * using bitwise comparison. |
|
1199 * |
|
1200 * @param c The code point to search for. |
|
1201 * @param start the offset into this at which to start matching |
|
1202 * @param length the number of characters in this to search |
|
1203 * @return The offset into this of <TT>c</TT>, or -1 if not found. |
|
1204 * @stable ICU 2.0 |
|
1205 */ |
|
1206 inline int32_t lastIndexOf(UChar32 c, |
|
1207 int32_t start, |
|
1208 int32_t length) const; |
|
1209 |
|
1210 |
|
1211 /* Character access */ |
|
1212 |
|
1213 /** |
|
1214 * Return the code unit at offset <tt>offset</tt>. |
|
1215 * If the offset is not valid (0..length()-1) then U+ffff is returned. |
|
1216 * @param offset a valid offset into the text |
|
1217 * @return the code unit at offset <tt>offset</tt> |
|
1218 * or 0xffff if the offset is not valid for this string |
|
1219 * @stable ICU 2.0 |
|
1220 */ |
|
1221 inline UChar charAt(int32_t offset) const; |
|
1222 |
|
1223 /** |
|
1224 * Return the code unit at offset <tt>offset</tt>. |
|
1225 * If the offset is not valid (0..length()-1) then U+ffff is returned. |
|
1226 * @param offset a valid offset into the text |
|
1227 * @return the code unit at offset <tt>offset</tt> |
|
1228 * @stable ICU 2.0 |
|
1229 */ |
|
1230 inline UChar operator[] (int32_t offset) const; |
|
1231 |
|
1232 /** |
|
1233 * Return the code point that contains the code unit |
|
1234 * at offset <tt>offset</tt>. |
|
1235 * If the offset is not valid (0..length()-1) then U+ffff is returned. |
|
1236 * @param offset a valid offset into the text |
|
1237 * that indicates the text offset of any of the code units |
|
1238 * that will be assembled into a code point (21-bit value) and returned |
|
1239 * @return the code point of text at <tt>offset</tt> |
|
1240 * or 0xffff if the offset is not valid for this string |
|
1241 * @stable ICU 2.0 |
|
1242 */ |
|
1243 inline UChar32 char32At(int32_t offset) const; |
|
1244 |
|
1245 /** |
|
1246 * Adjust a random-access offset so that |
|
1247 * it points to the beginning of a Unicode character. |
|
1248 * The offset that is passed in points to |
|
1249 * any code unit of a code point, |
|
1250 * while the returned offset will point to the first code unit |
|
1251 * of the same code point. |
|
1252 * In UTF-16, if the input offset points to a second surrogate |
|
1253 * of a surrogate pair, then the returned offset will point |
|
1254 * to the first surrogate. |
|
1255 * @param offset a valid offset into one code point of the text |
|
1256 * @return offset of the first code unit of the same code point |
|
1257 * @see U16_SET_CP_START |
|
1258 * @stable ICU 2.0 |
|
1259 */ |
|
1260 inline int32_t getChar32Start(int32_t offset) const; |
|
1261 |
|
1262 /** |
|
1263 * Adjust a random-access offset so that |
|
1264 * it points behind a Unicode character. |
|
1265 * The offset that is passed in points behind |
|
1266 * any code unit of a code point, |
|
1267 * while the returned offset will point behind the last code unit |
|
1268 * of the same code point. |
|
1269 * In UTF-16, if the input offset points behind the first surrogate |
|
1270 * (i.e., to the second surrogate) |
|
1271 * of a surrogate pair, then the returned offset will point |
|
1272 * behind the second surrogate (i.e., to the first surrogate). |
|
1273 * @param offset a valid offset after any code unit of a code point of the text |
|
1274 * @return offset of the first code unit after the same code point |
|
1275 * @see U16_SET_CP_LIMIT |
|
1276 * @stable ICU 2.0 |
|
1277 */ |
|
1278 inline int32_t getChar32Limit(int32_t offset) const; |
|
1279 |
|
1280 /** |
|
1281 * Move the code unit index along the string by delta code points. |
|
1282 * Interpret the input index as a code unit-based offset into the string, |
|
1283 * move the index forward or backward by delta code points, and |
|
1284 * return the resulting index. |
|
1285 * The input index should point to the first code unit of a code point, |
|
1286 * if there is more than one. |
|
1287 * |
|
1288 * Both input and output indexes are code unit-based as for all |
|
1289 * string indexes/offsets in ICU (and other libraries, like MBCS char*). |
|
1290 * If delta<0 then the index is moved backward (toward the start of the string). |
|
1291 * If delta>0 then the index is moved forward (toward the end of the string). |
|
1292 * |
|
1293 * This behaves like CharacterIterator::move32(delta, kCurrent). |
|
1294 * |
|
1295 * Behavior for out-of-bounds indexes: |
|
1296 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., |
|
1297 * if the input index<0 then it is pinned to 0; |
|
1298 * if it is index>length() then it is pinned to length(). |
|
1299 * Afterwards, the index is moved by <code>delta</code> code points |
|
1300 * forward or backward, |
|
1301 * but no further backward than to 0 and no further forward than to length(). |
|
1302 * The resulting index return value will be in between 0 and length(), inclusively. |
|
1303 * |
|
1304 * Examples: |
|
1305 * <pre> |
|
1306 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 |
|
1307 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); |
|
1308 * |
|
1309 * // initial index: position of U+10000 |
|
1310 * int32_t index=1; |
|
1311 * |
|
1312 * // the following examples will all result in index==4, position of U+10ffff |
|
1313 * |
|
1314 * // skip 2 code points from some position in the string |
|
1315 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' |
|
1316 * |
|
1317 * // go to the 3rd code point from the start of s (0-based) |
|
1318 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' |
|
1319 * |
|
1320 * // go to the next-to-last code point of s |
|
1321 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff |
|
1322 * </pre> |
|
1323 * |
|
1324 * @param index input code unit index |
|
1325 * @param delta (signed) code point count to move the index forward or backward |
|
1326 * in the string |
|
1327 * @return the resulting code unit index |
|
1328 * @stable ICU 2.0 |
|
1329 */ |
|
1330 int32_t moveIndex32(int32_t index, int32_t delta) const; |
|
1331 |
|
1332 /* Substring extraction */ |
|
1333 |
|
1334 /** |
|
1335 * Copy the characters in the range |
|
1336 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, |
|
1337 * beginning at <tt>dstStart</tt>. |
|
1338 * If the string aliases to <code>dst</code> itself as an external buffer, |
|
1339 * then extract() will not copy the contents. |
|
1340 * |
|
1341 * @param start offset of first character which will be copied into the array |
|
1342 * @param length the number of characters to extract |
|
1343 * @param dst array in which to copy characters. The length of <tt>dst</tt> |
|
1344 * must be at least (<tt>dstStart + length</tt>). |
|
1345 * @param dstStart the offset in <TT>dst</TT> where the first character |
|
1346 * will be extracted |
|
1347 * @stable ICU 2.0 |
|
1348 */ |
|
1349 inline void extract(int32_t start, |
|
1350 int32_t length, |
|
1351 UChar *dst, |
|
1352 int32_t dstStart = 0) const; |
|
1353 |
|
1354 /** |
|
1355 * Copy the contents of the string into dest. |
|
1356 * This is a convenience function that |
|
1357 * checks if there is enough space in dest, |
|
1358 * extracts the entire string if possible, |
|
1359 * and NUL-terminates dest if possible. |
|
1360 * |
|
1361 * If the string fits into dest but cannot be NUL-terminated |
|
1362 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. |
|
1363 * If the string itself does not fit into dest |
|
1364 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. |
|
1365 * |
|
1366 * If the string aliases to <code>dest</code> itself as an external buffer, |
|
1367 * then extract() will not copy the contents. |
|
1368 * |
|
1369 * @param dest Destination string buffer. |
|
1370 * @param destCapacity Number of UChars available at dest. |
|
1371 * @param errorCode ICU error code. |
|
1372 * @return length() |
|
1373 * @stable ICU 2.0 |
|
1374 */ |
|
1375 int32_t |
|
1376 extract(UChar *dest, int32_t destCapacity, |
|
1377 UErrorCode &errorCode) const; |
|
1378 |
|
1379 /** |
|
1380 * Copy the characters in the range |
|
1381 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString |
|
1382 * <tt>target</tt>. |
|
1383 * @param start offset of first character which will be copied |
|
1384 * @param length the number of characters to extract |
|
1385 * @param target UnicodeString into which to copy characters. |
|
1386 * @return A reference to <TT>target</TT> |
|
1387 * @stable ICU 2.0 |
|
1388 */ |
|
1389 inline void extract(int32_t start, |
|
1390 int32_t length, |
|
1391 UnicodeString& target) const; |
|
1392 |
|
1393 /** |
|
1394 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) |
|
1395 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. |
|
1396 * @param start offset of first character which will be copied into the array |
|
1397 * @param limit offset immediately following the last character to be copied |
|
1398 * @param dst array in which to copy characters. The length of <tt>dst</tt> |
|
1399 * must be at least (<tt>dstStart + (limit - start)</tt>). |
|
1400 * @param dstStart the offset in <TT>dst</TT> where the first character |
|
1401 * will be extracted |
|
1402 * @stable ICU 2.0 |
|
1403 */ |
|
1404 inline void extractBetween(int32_t start, |
|
1405 int32_t limit, |
|
1406 UChar *dst, |
|
1407 int32_t dstStart = 0) const; |
|
1408 |
|
1409 /** |
|
1410 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) |
|
1411 * into the UnicodeString <tt>target</tt>. Replaceable API. |
|
1412 * @param start offset of first character which will be copied |
|
1413 * @param limit offset immediately following the last character to be copied |
|
1414 * @param target UnicodeString into which to copy characters. |
|
1415 * @return A reference to <TT>target</TT> |
|
1416 * @stable ICU 2.0 |
|
1417 */ |
|
1418 virtual void extractBetween(int32_t start, |
|
1419 int32_t limit, |
|
1420 UnicodeString& target) const; |
|
1421 |
|
1422 /** |
|
1423 * Copy the characters in the range |
|
1424 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. |
|
1425 * All characters must be invariant (see utypes.h). |
|
1426 * Use US_INV as the last, signature-distinguishing parameter. |
|
1427 * |
|
1428 * This function does not write any more than <code>targetLength</code> |
|
1429 * characters but returns the length of the entire output string |
|
1430 * so that one can allocate a larger buffer and call the function again |
|
1431 * if necessary. |
|
1432 * The output string is NUL-terminated if possible. |
|
1433 * |
|
1434 * @param start offset of first character which will be copied |
|
1435 * @param startLength the number of characters to extract |
|
1436 * @param target the target buffer for extraction, can be NULL |
|
1437 * if targetLength is 0 |
|
1438 * @param targetCapacity the length of the target buffer |
|
1439 * @param inv Signature-distinguishing paramater, use US_INV. |
|
1440 * @return the output string length, not including the terminating NUL |
|
1441 * @draft ICU 3.2 |
|
1442 */ |
|
1443 int32_t extract(int32_t start, |
|
1444 int32_t startLength, |
|
1445 char *target, |
|
1446 int32_t targetCapacity, |
|
1447 enum EInvariant inv) const; |
|
1448 |
|
1449 #if !UCONFIG_NO_CONVERSION |
|
1450 |
|
1451 /** |
|
1452 * Copy the characters in the range |
|
1453 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters |
|
1454 * in a specified codepage. |
|
1455 * The output string is NUL-terminated. |
|
1456 * |
|
1457 * Recommendation: For invariant-character strings use |
|
1458 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const |
|
1459 * because it avoids object code dependencies of UnicodeString on |
|
1460 * the conversion code. |
|
1461 * |
|
1462 * @param start offset of first character which will be copied |
|
1463 * @param startLength the number of characters to extract |
|
1464 * @param target the target buffer for extraction |
|
1465 * @param codepage the desired codepage for the characters. 0 has |
|
1466 * the special meaning of the default codepage |
|
1467 * If <code>codepage</code> is an empty string (<code>""</code>), |
|
1468 * then a simple conversion is performed on the codepage-invariant |
|
1469 * subset ("invariant characters") of the platform encoding. See utypes.h. |
|
1470 * If <TT>target</TT> is NULL, then the number of bytes required for |
|
1471 * <TT>target</TT> is returned. It is assumed that the target is big enough |
|
1472 * to fit all of the characters. |
|
1473 * @return the output string length, not including the terminating NUL |
|
1474 * @stable ICU 2.0 |
|
1475 */ |
|
1476 inline int32_t extract(int32_t start, |
|
1477 int32_t startLength, |
|
1478 char *target, |
|
1479 const char *codepage = 0) const; |
|
1480 |
|
1481 /** |
|
1482 * Copy the characters in the range |
|
1483 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters |
|
1484 * in a specified codepage. |
|
1485 * This function does not write any more than <code>targetLength</code> |
|
1486 * characters but returns the length of the entire output string |
|
1487 * so that one can allocate a larger buffer and call the function again |
|
1488 * if necessary. |
|
1489 * The output string is NUL-terminated if possible. |
|
1490 * |
|
1491 * Recommendation: For invariant-character strings use |
|
1492 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const |
|
1493 * because it avoids object code dependencies of UnicodeString on |
|
1494 * the conversion code. |
|
1495 * |
|
1496 * @param start offset of first character which will be copied |
|
1497 * @param startLength the number of characters to extract |
|
1498 * @param target the target buffer for extraction |
|
1499 * @param targetLength the length of the target buffer |
|
1500 * @param codepage the desired codepage for the characters. 0 has |
|
1501 * the special meaning of the default codepage |
|
1502 * If <code>codepage</code> is an empty string (<code>""</code>), |
|
1503 * then a simple conversion is performed on the codepage-invariant |
|
1504 * subset ("invariant characters") of the platform encoding. See utypes.h. |
|
1505 * If <TT>target</TT> is NULL, then the number of bytes required for |
|
1506 * <TT>target</TT> is returned. |
|
1507 * @return the output string length, not including the terminating NUL |
|
1508 * @stable ICU 2.0 |
|
1509 */ |
|
1510 int32_t extract(int32_t start, |
|
1511 int32_t startLength, |
|
1512 char *target, |
|
1513 uint32_t targetLength, |
|
1514 const char *codepage = 0) const; |
|
1515 |
|
1516 /** |
|
1517 * Convert the UnicodeString into a codepage string using an existing UConverter. |
|
1518 * The output string is NUL-terminated if possible. |
|
1519 * |
|
1520 * This function avoids the overhead of opening and closing a converter if |
|
1521 * multiple strings are extracted. |
|
1522 * |
|
1523 * @param dest destination string buffer, can be NULL if destCapacity==0 |
|
1524 * @param destCapacity the number of chars available at dest |
|
1525 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), |
|
1526 * or NULL for the default converter |
|
1527 * @param errorCode normal ICU error code |
|
1528 * @return the length of the output string, not counting the terminating NUL; |
|
1529 * if the length is greater than destCapacity, then the string will not fit |
|
1530 * and a buffer of the indicated length would need to be passed in |
|
1531 * @stable ICU 2.0 |
|
1532 */ |
|
1533 int32_t extract(char *dest, int32_t destCapacity, |
|
1534 UConverter *cnv, |
|
1535 UErrorCode &errorCode) const; |
|
1536 |
|
1537 #endif |
|
1538 |
|
1539 /* Length operations */ |
|
1540 |
|
1541 /** |
|
1542 * Return the length of the UnicodeString object. |
|
1543 * The length is the number of UChar code units are in the UnicodeString. |
|
1544 * If you want the number of code points, please use countChar32(). |
|
1545 * @return the length of the UnicodeString object |
|
1546 * @see countChar32 |
|
1547 * @stable ICU 2.0 |
|
1548 */ |
|
1549 inline int32_t length(void) const; |
|
1550 |
|
1551 /** |
|
1552 * Count Unicode code points in the length UChar code units of the string. |
|
1553 * A code point may occupy either one or two UChar code units. |
|
1554 * Counting code points involves reading all code units. |
|
1555 * |
|
1556 * This functions is basically the inverse of moveIndex32(). |
|
1557 * |
|
1558 * @param start the index of the first code unit to check |
|
1559 * @param length the number of UChar code units to check |
|
1560 * @return the number of code points in the specified code units |
|
1561 * @see length |
|
1562 * @stable ICU 2.0 |
|
1563 */ |
|
1564 int32_t |
|
1565 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; |
|
1566 |
|
1567 /** |
|
1568 * Check if the length UChar code units of the string |
|
1569 * contain more Unicode code points than a certain number. |
|
1570 * This is more efficient than counting all code points in this part of the string |
|
1571 * and comparing that number with a threshold. |
|
1572 * This function may not need to scan the string at all if the length |
|
1573 * falls within a certain range, and |
|
1574 * never needs to count more than 'number+1' code points. |
|
1575 * Logically equivalent to (countChar32(start, length)>number). |
|
1576 * A Unicode code point may occupy either one or two UChar code units. |
|
1577 * |
|
1578 * @param start the index of the first code unit to check (0 for the entire string) |
|
1579 * @param length the number of UChar code units to check |
|
1580 * (use INT32_MAX for the entire string; remember that start/length |
|
1581 * values are pinned) |
|
1582 * @param number The number of code points in the (sub)string is compared against |
|
1583 * the 'number' parameter. |
|
1584 * @return Boolean value for whether the string contains more Unicode code points |
|
1585 * than 'number'. Same as (u_countChar32(s, length)>number). |
|
1586 * @see countChar32 |
|
1587 * @see u_strHasMoreChar32Than |
|
1588 * @stable ICU 2.4 |
|
1589 */ |
|
1590 UBool |
|
1591 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; |
|
1592 |
|
1593 /** |
|
1594 * Determine if this string is empty. |
|
1595 * @return TRUE if this string contains 0 characters, FALSE otherwise. |
|
1596 * @stable ICU 2.0 |
|
1597 */ |
|
1598 inline UBool isEmpty(void) const; |
|
1599 |
|
1600 /** |
|
1601 * Return the capacity of the internal buffer of the UnicodeString object. |
|
1602 * This is useful together with the getBuffer functions. |
|
1603 * See there for details. |
|
1604 * |
|
1605 * @return the number of UChars available in the internal buffer |
|
1606 * @see getBuffer |
|
1607 * @stable ICU 2.0 |
|
1608 */ |
|
1609 inline int32_t getCapacity(void) const; |
|
1610 |
|
1611 /* Other operations */ |
|
1612 |
|
1613 /** |
|
1614 * Generate a hash code for this object. |
|
1615 * @return The hash code of this UnicodeString. |
|
1616 * @stable ICU 2.0 |
|
1617 */ |
|
1618 inline int32_t hashCode(void) const; |
|
1619 |
|
1620 /** |
|
1621 * Determine if this object contains a valid string. |
|
1622 * A bogus string has no value. It is different from an empty string. |
|
1623 * It can be used to indicate that no string value is available. |
|
1624 * getBuffer() and getTerminatedBuffer() return NULL, and |
|
1625 * length() returns 0. |
|
1626 * |
|
1627 * @return TRUE if the string is valid, FALSE otherwise |
|
1628 * @see setToBogus() |
|
1629 * @stable ICU 2.0 |
|
1630 */ |
|
1631 inline UBool isBogus(void) const; |
|
1632 |
|
1633 |
|
1634 //======================================== |
|
1635 // Write operations |
|
1636 //======================================== |
|
1637 |
|
1638 /* Assignment operations */ |
|
1639 |
|
1640 /** |
|
1641 * Assignment operator. Replace the characters in this UnicodeString |
|
1642 * with the characters from <TT>srcText</TT>. |
|
1643 * @param srcText The text containing the characters to replace |
|
1644 * @return a reference to this |
|
1645 * @stable ICU 2.0 |
|
1646 */ |
|
1647 UnicodeString &operator=(const UnicodeString &srcText); |
|
1648 |
|
1649 /** |
|
1650 * Almost the same as the assignment operator. |
|
1651 * Replace the characters in this UnicodeString |
|
1652 * with the characters from <code>srcText</code>. |
|
1653 * |
|
1654 * This function works the same for all strings except for ones that |
|
1655 * are readonly aliases. |
|
1656 * Starting with ICU 2.4, the assignment operator and the copy constructor |
|
1657 * allocate a new buffer and copy the buffer contents even for readonly aliases. |
|
1658 * This function implements the old, more efficient but less safe behavior |
|
1659 * of making this string also a readonly alias to the same buffer. |
|
1660 * The fastCopyFrom function must be used only if it is known that the lifetime of |
|
1661 * this UnicodeString is at least as long as the lifetime of the aliased buffer |
|
1662 * including its contents, for example for strings from resource bundles |
|
1663 * or aliases to string contents. |
|
1664 * |
|
1665 * @param src The text containing the characters to replace. |
|
1666 * @return a reference to this |
|
1667 * @stable ICU 2.4 |
|
1668 */ |
|
1669 UnicodeString &fastCopyFrom(const UnicodeString &src); |
|
1670 |
|
1671 /** |
|
1672 * Assignment operator. Replace the characters in this UnicodeString |
|
1673 * with the code unit <TT>ch</TT>. |
|
1674 * @param ch the code unit to replace |
|
1675 * @return a reference to this |
|
1676 * @stable ICU 2.0 |
|
1677 */ |
|
1678 inline UnicodeString& operator= (UChar ch); |
|
1679 |
|
1680 /** |
|
1681 * Assignment operator. Replace the characters in this UnicodeString |
|
1682 * with the code point <TT>ch</TT>. |
|
1683 * @param ch the code point to replace |
|
1684 * @return a reference to this |
|
1685 * @stable ICU 2.0 |
|
1686 */ |
|
1687 inline UnicodeString& operator= (UChar32 ch); |
|
1688 |
|
1689 /** |
|
1690 * Set the text in the UnicodeString object to the characters |
|
1691 * in <TT>srcText</TT> in the range |
|
1692 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). |
|
1693 * <TT>srcText</TT> is not modified. |
|
1694 * @param srcText the source for the new characters |
|
1695 * @param srcStart the offset into <TT>srcText</TT> where new characters |
|
1696 * will be obtained |
|
1697 * @return a reference to this |
|
1698 * @stable ICU 2.2 |
|
1699 */ |
|
1700 inline UnicodeString& setTo(const UnicodeString& srcText, |
|
1701 int32_t srcStart); |
|
1702 |
|
1703 /** |
|
1704 * Set the text in the UnicodeString object to the characters |
|
1705 * in <TT>srcText</TT> in the range |
|
1706 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
1707 * <TT>srcText</TT> is not modified. |
|
1708 * @param srcText the source for the new characters |
|
1709 * @param srcStart the offset into <TT>srcText</TT> where new characters |
|
1710 * will be obtained |
|
1711 * @param srcLength the number of characters in <TT>srcText</TT> in the |
|
1712 * replace string. |
|
1713 * @return a reference to this |
|
1714 * @stable ICU 2.0 |
|
1715 */ |
|
1716 inline UnicodeString& setTo(const UnicodeString& srcText, |
|
1717 int32_t srcStart, |
|
1718 int32_t srcLength); |
|
1719 |
|
1720 /** |
|
1721 * Set the text in the UnicodeString object to the characters in |
|
1722 * <TT>srcText</TT>. |
|
1723 * <TT>srcText</TT> is not modified. |
|
1724 * @param srcText the source for the new characters |
|
1725 * @return a reference to this |
|
1726 * @stable ICU 2.0 |
|
1727 */ |
|
1728 inline UnicodeString& setTo(const UnicodeString& srcText); |
|
1729 |
|
1730 /** |
|
1731 * Set the characters in the UnicodeString object to the characters |
|
1732 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. |
|
1733 * @param srcChars the source for the new characters |
|
1734 * @param srcLength the number of Unicode characters in srcChars. |
|
1735 * @return a reference to this |
|
1736 * @stable ICU 2.0 |
|
1737 */ |
|
1738 inline UnicodeString& setTo(const UChar *srcChars, |
|
1739 int32_t srcLength); |
|
1740 |
|
1741 /** |
|
1742 * Set the characters in the UnicodeString object to the code unit |
|
1743 * <TT>srcChar</TT>. |
|
1744 * @param srcChar the code unit which becomes the UnicodeString's character |
|
1745 * content |
|
1746 * @return a reference to this |
|
1747 * @stable ICU 2.0 |
|
1748 */ |
|
1749 UnicodeString& setTo(UChar srcChar); |
|
1750 |
|
1751 /** |
|
1752 * Set the characters in the UnicodeString object to the code point |
|
1753 * <TT>srcChar</TT>. |
|
1754 * @param srcChar the code point which becomes the UnicodeString's character |
|
1755 * content |
|
1756 * @return a reference to this |
|
1757 * @stable ICU 2.0 |
|
1758 */ |
|
1759 UnicodeString& setTo(UChar32 srcChar); |
|
1760 |
|
1761 /** |
|
1762 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. |
|
1763 * The text will be used for the UnicodeString object, but |
|
1764 * it will not be released when the UnicodeString is destroyed. |
|
1765 * This has copy-on-write semantics: |
|
1766 * When the string is modified, then the buffer is first copied into |
|
1767 * newly allocated memory. |
|
1768 * The aliased buffer is never modified. |
|
1769 * In an assignment to another UnicodeString, the text will be aliased again, |
|
1770 * so that both strings then alias the same readonly-text. |
|
1771 * |
|
1772 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. |
|
1773 * This must be true if <code>textLength==-1</code>. |
|
1774 * @param text The characters to alias for the UnicodeString. |
|
1775 * @param textLength The number of Unicode characters in <code>text</code> to alias. |
|
1776 * If -1, then this constructor will determine the length |
|
1777 * by calling <code>u_strlen()</code>. |
|
1778 * @return a reference to this |
|
1779 * @stable ICU 2.0 |
|
1780 */ |
|
1781 UnicodeString &setTo(UBool isTerminated, |
|
1782 const UChar *text, |
|
1783 int32_t textLength); |
|
1784 |
|
1785 /** |
|
1786 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. |
|
1787 * The text will be used for the UnicodeString object, but |
|
1788 * it will not be released when the UnicodeString is destroyed. |
|
1789 * This has write-through semantics: |
|
1790 * For as long as the capacity of the buffer is sufficient, write operations |
|
1791 * will directly affect the buffer. When more capacity is necessary, then |
|
1792 * a new buffer will be allocated and the contents copied as with regularly |
|
1793 * constructed strings. |
|
1794 * In an assignment to another UnicodeString, the buffer will be copied. |
|
1795 * The extract(UChar *dst) function detects whether the dst pointer is the same |
|
1796 * as the string buffer itself and will in this case not copy the contents. |
|
1797 * |
|
1798 * @param buffer The characters to alias for the UnicodeString. |
|
1799 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. |
|
1800 * @param buffCapacity The size of <code>buffer</code> in UChars. |
|
1801 * @return a reference to this |
|
1802 * @stable ICU 2.0 |
|
1803 */ |
|
1804 UnicodeString &setTo(UChar *buffer, |
|
1805 int32_t buffLength, |
|
1806 int32_t buffCapacity); |
|
1807 |
|
1808 /** |
|
1809 * Make this UnicodeString object invalid. |
|
1810 * The string will test TRUE with isBogus(). |
|
1811 * |
|
1812 * A bogus string has no value. It is different from an empty string. |
|
1813 * It can be used to indicate that no string value is available. |
|
1814 * getBuffer() and getTerminatedBuffer() return NULL, and |
|
1815 * length() returns 0. |
|
1816 * |
|
1817 * This utility function is used throughout the UnicodeString |
|
1818 * implementation to indicate that a UnicodeString operation failed, |
|
1819 * and may be used in other functions, |
|
1820 * especially but not exclusively when such functions do not |
|
1821 * take a UErrorCode for simplicity. |
|
1822 * |
|
1823 * The following methods, and no others, will clear a string object's bogus flag: |
|
1824 * - remove() |
|
1825 * - remove(0, INT32_MAX) |
|
1826 * - truncate(0) |
|
1827 * - operator=() (assignment operator) |
|
1828 * - setTo(...) |
|
1829 * |
|
1830 * The simplest ways to turn a bogus string into an empty one |
|
1831 * is to use the remove() function. |
|
1832 * Examples for other functions that are equivalent to "set to empty string": |
|
1833 * \code |
|
1834 * if(s.isBogus()) { |
|
1835 * s.remove(); // set to an empty string (remove all), or |
|
1836 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or |
|
1837 * s.truncate(0); // set to an empty string (complete truncation), or |
|
1838 * s=UnicodeString(); // assign an empty string, or |
|
1839 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or |
|
1840 * static const UChar nul=0; |
|
1841 * s.setTo(&nul, 0); // set to an empty C Unicode string |
|
1842 * } |
|
1843 * \endcode |
|
1844 * |
|
1845 * @see isBogus() |
|
1846 * @stable ICU 2.0 |
|
1847 */ |
|
1848 void setToBogus(); |
|
1849 |
|
1850 /** |
|
1851 * Set the character at the specified offset to the specified character. |
|
1852 * @param offset A valid offset into the text of the character to set |
|
1853 * @param ch The new character |
|
1854 * @return A reference to this |
|
1855 * @stable ICU 2.0 |
|
1856 */ |
|
1857 UnicodeString& setCharAt(int32_t offset, |
|
1858 UChar ch); |
|
1859 |
|
1860 |
|
1861 /* Append operations */ |
|
1862 |
|
1863 /** |
|
1864 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString |
|
1865 * object. |
|
1866 * @param ch the code unit to be appended |
|
1867 * @return a reference to this |
|
1868 * @stable ICU 2.0 |
|
1869 */ |
|
1870 inline UnicodeString& operator+= (UChar ch); |
|
1871 |
|
1872 /** |
|
1873 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString |
|
1874 * object. |
|
1875 * @param ch the code point to be appended |
|
1876 * @return a reference to this |
|
1877 * @stable ICU 2.0 |
|
1878 */ |
|
1879 inline UnicodeString& operator+= (UChar32 ch); |
|
1880 |
|
1881 /** |
|
1882 * Append operator. Append the characters in <TT>srcText</TT> to the |
|
1883 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is |
|
1884 * not modified. |
|
1885 * @param srcText the source for the new characters |
|
1886 * @return a reference to this |
|
1887 * @stable ICU 2.0 |
|
1888 */ |
|
1889 inline UnicodeString& operator+= (const UnicodeString& srcText); |
|
1890 |
|
1891 /** |
|
1892 * Append the characters |
|
1893 * in <TT>srcText</TT> in the range |
|
1894 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the |
|
1895 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> |
|
1896 * is not modified. |
|
1897 * @param srcText the source for the new characters |
|
1898 * @param srcStart the offset into <TT>srcText</TT> where new characters |
|
1899 * will be obtained |
|
1900 * @param srcLength the number of characters in <TT>srcText</TT> in |
|
1901 * the append string |
|
1902 * @return a reference to this |
|
1903 * @stable ICU 2.0 |
|
1904 */ |
|
1905 inline UnicodeString& append(const UnicodeString& srcText, |
|
1906 int32_t srcStart, |
|
1907 int32_t srcLength); |
|
1908 |
|
1909 /** |
|
1910 * Append the characters in <TT>srcText</TT> to the UnicodeString object at |
|
1911 * offset <TT>start</TT>. <TT>srcText</TT> is not modified. |
|
1912 * @param srcText the source for the new characters |
|
1913 * @return a reference to this |
|
1914 * @stable ICU 2.0 |
|
1915 */ |
|
1916 inline UnicodeString& append(const UnicodeString& srcText); |
|
1917 |
|
1918 /** |
|
1919 * Append the characters in <TT>srcChars</TT> in the range |
|
1920 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString |
|
1921 * object at offset |
|
1922 * <TT>start</TT>. <TT>srcChars</TT> is not modified. |
|
1923 * @param srcChars the source for the new characters |
|
1924 * @param srcStart the offset into <TT>srcChars</TT> where new characters |
|
1925 * will be obtained |
|
1926 * @param srcLength the number of characters in <TT>srcChars</TT> in |
|
1927 * the append string |
|
1928 * @return a reference to this |
|
1929 * @stable ICU 2.0 |
|
1930 */ |
|
1931 inline UnicodeString& append(const UChar *srcChars, |
|
1932 int32_t srcStart, |
|
1933 int32_t srcLength); |
|
1934 |
|
1935 /** |
|
1936 * Append the characters in <TT>srcChars</TT> to the UnicodeString object |
|
1937 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. |
|
1938 * @param srcChars the source for the new characters |
|
1939 * @param srcLength the number of Unicode characters in <TT>srcChars</TT> |
|
1940 * @return a reference to this |
|
1941 * @stable ICU 2.0 |
|
1942 */ |
|
1943 inline UnicodeString& append(const UChar *srcChars, |
|
1944 int32_t srcLength); |
|
1945 |
|
1946 /** |
|
1947 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. |
|
1948 * @param srcChar the code unit to append |
|
1949 * @return a reference to this |
|
1950 * @stable ICU 2.0 |
|
1951 */ |
|
1952 inline UnicodeString& append(UChar srcChar); |
|
1953 |
|
1954 /** |
|
1955 * Append the code point <TT>srcChar</TT> to the UnicodeString object. |
|
1956 * @param srcChar the code point to append |
|
1957 * @return a reference to this |
|
1958 * @stable ICU 2.0 |
|
1959 */ |
|
1960 inline UnicodeString& append(UChar32 srcChar); |
|
1961 |
|
1962 |
|
1963 /* Insert operations */ |
|
1964 |
|
1965 /** |
|
1966 * Insert the characters in <TT>srcText</TT> in the range |
|
1967 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString |
|
1968 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. |
|
1969 * @param start the offset where the insertion begins |
|
1970 * @param srcText the source for the new characters |
|
1971 * @param srcStart the offset into <TT>srcText</TT> where new characters |
|
1972 * will be obtained |
|
1973 * @param srcLength the number of characters in <TT>srcText</TT> in |
|
1974 * the insert string |
|
1975 * @return a reference to this |
|
1976 * @stable ICU 2.0 |
|
1977 */ |
|
1978 inline UnicodeString& insert(int32_t start, |
|
1979 const UnicodeString& srcText, |
|
1980 int32_t srcStart, |
|
1981 int32_t srcLength); |
|
1982 |
|
1983 /** |
|
1984 * Insert the characters in <TT>srcText</TT> into the UnicodeString object |
|
1985 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. |
|
1986 * @param start the offset where the insertion begins |
|
1987 * @param srcText the source for the new characters |
|
1988 * @return a reference to this |
|
1989 * @stable ICU 2.0 |
|
1990 */ |
|
1991 inline UnicodeString& insert(int32_t start, |
|
1992 const UnicodeString& srcText); |
|
1993 |
|
1994 /** |
|
1995 * Insert the characters in <TT>srcChars</TT> in the range |
|
1996 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString |
|
1997 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. |
|
1998 * @param start the offset at which the insertion begins |
|
1999 * @param srcChars the source for the new characters |
|
2000 * @param srcStart the offset into <TT>srcChars</TT> where new characters |
|
2001 * will be obtained |
|
2002 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
2003 * in the insert string |
|
2004 * @return a reference to this |
|
2005 * @stable ICU 2.0 |
|
2006 */ |
|
2007 inline UnicodeString& insert(int32_t start, |
|
2008 const UChar *srcChars, |
|
2009 int32_t srcStart, |
|
2010 int32_t srcLength); |
|
2011 |
|
2012 /** |
|
2013 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object |
|
2014 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. |
|
2015 * @param start the offset where the insertion begins |
|
2016 * @param srcChars the source for the new characters |
|
2017 * @param srcLength the number of Unicode characters in srcChars. |
|
2018 * @return a reference to this |
|
2019 * @stable ICU 2.0 |
|
2020 */ |
|
2021 inline UnicodeString& insert(int32_t start, |
|
2022 const UChar *srcChars, |
|
2023 int32_t srcLength); |
|
2024 |
|
2025 /** |
|
2026 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at |
|
2027 * offset <TT>start</TT>. |
|
2028 * @param start the offset at which the insertion occurs |
|
2029 * @param srcChar the code unit to insert |
|
2030 * @return a reference to this |
|
2031 * @stable ICU 2.0 |
|
2032 */ |
|
2033 inline UnicodeString& insert(int32_t start, |
|
2034 UChar srcChar); |
|
2035 |
|
2036 /** |
|
2037 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at |
|
2038 * offset <TT>start</TT>. |
|
2039 * @param start the offset at which the insertion occurs |
|
2040 * @param srcChar the code point to insert |
|
2041 * @return a reference to this |
|
2042 * @stable ICU 2.0 |
|
2043 */ |
|
2044 inline UnicodeString& insert(int32_t start, |
|
2045 UChar32 srcChar); |
|
2046 |
|
2047 |
|
2048 /* Replace operations */ |
|
2049 |
|
2050 /** |
|
2051 * Replace the characters in the range |
|
2052 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in |
|
2053 * <TT>srcText</TT> in the range |
|
2054 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
|
2055 * <TT>srcText</TT> is not modified. |
|
2056 * @param start the offset at which the replace operation begins |
|
2057 * @param length the number of characters to replace. The character at |
|
2058 * <TT>start + length</TT> is not modified. |
|
2059 * @param srcText the source for the new characters |
|
2060 * @param srcStart the offset into <TT>srcText</TT> where new characters |
|
2061 * will be obtained |
|
2062 * @param srcLength the number of characters in <TT>srcText</TT> in |
|
2063 * the replace string |
|
2064 * @return a reference to this |
|
2065 * @stable ICU 2.0 |
|
2066 */ |
|
2067 UnicodeString& replace(int32_t start, |
|
2068 int32_t length, |
|
2069 const UnicodeString& srcText, |
|
2070 int32_t srcStart, |
|
2071 int32_t srcLength); |
|
2072 |
|
2073 /** |
|
2074 * Replace the characters in the range |
|
2075 * [<TT>start</TT>, <TT>start + length</TT>) |
|
2076 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is |
|
2077 * not modified. |
|
2078 * @param start the offset at which the replace operation begins |
|
2079 * @param length the number of characters to replace. The character at |
|
2080 * <TT>start + length</TT> is not modified. |
|
2081 * @param srcText the source for the new characters |
|
2082 * @return a reference to this |
|
2083 * @stable ICU 2.0 |
|
2084 */ |
|
2085 UnicodeString& replace(int32_t start, |
|
2086 int32_t length, |
|
2087 const UnicodeString& srcText); |
|
2088 |
|
2089 /** |
|
2090 * Replace the characters in the range |
|
2091 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in |
|
2092 * <TT>srcChars</TT> in the range |
|
2093 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> |
|
2094 * is not modified. |
|
2095 * @param start the offset at which the replace operation begins |
|
2096 * @param length the number of characters to replace. The character at |
|
2097 * <TT>start + length</TT> is not modified. |
|
2098 * @param srcChars the source for the new characters |
|
2099 * @param srcStart the offset into <TT>srcChars</TT> where new characters |
|
2100 * will be obtained |
|
2101 * @param srcLength the number of characters in <TT>srcChars</TT> |
|
2102 * in the replace string |
|
2103 * @return a reference to this |
|
2104 * @stable ICU 2.0 |
|
2105 */ |
|
2106 UnicodeString& replace(int32_t start, |
|
2107 int32_t length, |
|
2108 const UChar *srcChars, |
|
2109 int32_t srcStart, |
|
2110 int32_t srcLength); |
|
2111 |
|
2112 /** |
|
2113 * Replace the characters in the range |
|
2114 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in |
|
2115 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. |
|
2116 * @param start the offset at which the replace operation begins |
|
2117 * @param length number of characters to replace. The character at |
|
2118 * <TT>start + length</TT> is not modified. |
|
2119 * @param srcChars the source for the new characters |
|
2120 * @param srcLength the number of Unicode characters in srcChars |
|
2121 * @return a reference to this |
|
2122 * @stable ICU 2.0 |
|
2123 */ |
|
2124 inline UnicodeString& replace(int32_t start, |
|
2125 int32_t length, |
|
2126 const UChar *srcChars, |
|
2127 int32_t srcLength); |
|
2128 |
|
2129 /** |
|
2130 * Replace the characters in the range |
|
2131 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit |
|
2132 * <TT>srcChar</TT>. |
|
2133 * @param start the offset at which the replace operation begins |
|
2134 * @param length the number of characters to replace. The character at |
|
2135 * <TT>start + length</TT> is not modified. |
|
2136 * @param srcChar the new code unit |
|
2137 * @return a reference to this |
|
2138 * @stable ICU 2.0 |
|
2139 */ |
|
2140 inline UnicodeString& replace(int32_t start, |
|
2141 int32_t length, |
|
2142 UChar srcChar); |
|
2143 |
|
2144 /** |
|
2145 * Replace the characters in the range |
|
2146 * [<TT>start</TT>, <TT>start + length</TT>) with the code point |
|
2147 * <TT>srcChar</TT>. |
|
2148 * @param start the offset at which the replace operation begins |
|
2149 * @param length the number of characters to replace. The character at |
|
2150 * <TT>start + length</TT> is not modified. |
|
2151 * @param srcChar the new code point |
|
2152 * @return a reference to this |
|
2153 * @stable ICU 2.0 |
|
2154 */ |
|
2155 inline UnicodeString& replace(int32_t start, |
|
2156 int32_t length, |
|
2157 UChar32 srcChar); |
|
2158 |
|
2159 /** |
|
2160 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) |
|
2161 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. |
|
2162 * @param start the offset at which the replace operation begins |
|
2163 * @param limit the offset immediately following the replace range |
|
2164 * @param srcText the source for the new characters |
|
2165 * @return a reference to this |
|
2166 * @stable ICU 2.0 |
|
2167 */ |
|
2168 inline UnicodeString& replaceBetween(int32_t start, |
|
2169 int32_t limit, |
|
2170 const UnicodeString& srcText); |
|
2171 |
|
2172 /** |
|
2173 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) |
|
2174 * with the characters in <TT>srcText</TT> in the range |
|
2175 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. |
|
2176 * @param start the offset at which the replace operation begins |
|
2177 * @param limit the offset immediately following the replace range |
|
2178 * @param srcText the source for the new characters |
|
2179 * @param srcStart the offset into <TT>srcChars</TT> where new characters |
|
2180 * will be obtained |
|
2181 * @param srcLimit the offset immediately following the range to copy |
|
2182 * in <TT>srcText</TT> |
|
2183 * @return a reference to this |
|
2184 * @stable ICU 2.0 |
|
2185 */ |
|
2186 inline UnicodeString& replaceBetween(int32_t start, |
|
2187 int32_t limit, |
|
2188 const UnicodeString& srcText, |
|
2189 int32_t srcStart, |
|
2190 int32_t srcLimit); |
|
2191 |
|
2192 /** |
|
2193 * Replace a substring of this object with the given text. |
|
2194 * @param start the beginning index, inclusive; <code>0 <= start |
|
2195 * <= limit</code>. |
|
2196 * @param limit the ending index, exclusive; <code>start <= limit |
|
2197 * <= length()</code>. |
|
2198 * @param text the text to replace characters <code>start</code> |
|
2199 * to <code>limit - 1</code> |
|
2200 * @stable ICU 2.0 |
|
2201 */ |
|
2202 virtual void handleReplaceBetween(int32_t start, |
|
2203 int32_t limit, |
|
2204 const UnicodeString& text); |
|
2205 |
|
2206 /** |
|
2207 * Replaceable API |
|
2208 * @return TRUE if it has MetaData |
|
2209 * @stable ICU 2.4 |
|
2210 */ |
|
2211 virtual UBool hasMetaData() const; |
|
2212 |
|
2213 /** |
|
2214 * Copy a substring of this object, retaining attribute (out-of-band) |
|
2215 * information. This method is used to duplicate or reorder substrings. |
|
2216 * The destination index must not overlap the source range. |
|
2217 * |
|
2218 * @param start the beginning index, inclusive; <code>0 <= start <= |
|
2219 * limit</code>. |
|
2220 * @param limit the ending index, exclusive; <code>start <= limit <= |
|
2221 * length()</code>. |
|
2222 * @param dest the destination index. The characters from |
|
2223 * <code>start..limit-1</code> will be copied to <code>dest</code>. |
|
2224 * Implementations of this method may assume that <code>dest <= start || |
|
2225 * dest >= limit</code>. |
|
2226 * @stable ICU 2.0 |
|
2227 */ |
|
2228 virtual void copy(int32_t start, int32_t limit, int32_t dest); |
|
2229 |
|
2230 /* Search and replace operations */ |
|
2231 |
|
2232 /** |
|
2233 * Replace all occurrences of characters in oldText with the characters |
|
2234 * in newText |
|
2235 * @param oldText the text containing the search text |
|
2236 * @param newText the text containing the replacement text |
|
2237 * @return a reference to this |
|
2238 * @stable ICU 2.0 |
|
2239 */ |
|
2240 inline UnicodeString& findAndReplace(const UnicodeString& oldText, |
|
2241 const UnicodeString& newText); |
|
2242 |
|
2243 /** |
|
2244 * Replace all occurrences of characters in oldText with characters |
|
2245 * in newText |
|
2246 * in the range [<TT>start</TT>, <TT>start + length</TT>). |
|
2247 * @param start the start of the range in which replace will performed |
|
2248 * @param length the length of the range in which replace will be performed |
|
2249 * @param oldText the text containing the search text |
|
2250 * @param newText the text containing the replacement text |
|
2251 * @return a reference to this |
|
2252 * @stable ICU 2.0 |
|
2253 */ |
|
2254 inline UnicodeString& findAndReplace(int32_t start, |
|
2255 int32_t length, |
|
2256 const UnicodeString& oldText, |
|
2257 const UnicodeString& newText); |
|
2258 |
|
2259 /** |
|
2260 * Replace all occurrences of characters in oldText in the range |
|
2261 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters |
|
2262 * in newText in the range |
|
2263 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) |
|
2264 * in the range [<TT>start</TT>, <TT>start + length</TT>). |
|
2265 * @param start the start of the range in which replace will performed |
|
2266 * @param length the length of the range in which replace will be performed |
|
2267 * @param oldText the text containing the search text |
|
2268 * @param oldStart the start of the search range in <TT>oldText</TT> |
|
2269 * @param oldLength the length of the search range in <TT>oldText</TT> |
|
2270 * @param newText the text containing the replacement text |
|
2271 * @param newStart the start of the replacement range in <TT>newText</TT> |
|
2272 * @param newLength the length of the replacement range in <TT>newText</TT> |
|
2273 * @return a reference to this |
|
2274 * @stable ICU 2.0 |
|
2275 */ |
|
2276 UnicodeString& findAndReplace(int32_t start, |
|
2277 int32_t length, |
|
2278 const UnicodeString& oldText, |
|
2279 int32_t oldStart, |
|
2280 int32_t oldLength, |
|
2281 const UnicodeString& newText, |
|
2282 int32_t newStart, |
|
2283 int32_t newLength); |
|
2284 |
|
2285 |
|
2286 /* Remove operations */ |
|
2287 |
|
2288 /** |
|
2289 * Remove all characters from the UnicodeString object. |
|
2290 * @return a reference to this |
|
2291 * @stable ICU 2.0 |
|
2292 */ |
|
2293 inline UnicodeString& remove(void); |
|
2294 |
|
2295 /** |
|
2296 * Remove the characters in the range |
|
2297 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. |
|
2298 * @param start the offset of the first character to remove |
|
2299 * @param length the number of characters to remove |
|
2300 * @return a reference to this |
|
2301 * @stable ICU 2.0 |
|
2302 */ |
|
2303 inline UnicodeString& remove(int32_t start, |
|
2304 int32_t length = (int32_t)INT32_MAX); |
|
2305 |
|
2306 /** |
|
2307 * Remove the characters in the range |
|
2308 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. |
|
2309 * @param start the offset of the first character to remove |
|
2310 * @param limit the offset immediately following the range to remove |
|
2311 * @return a reference to this |
|
2312 * @stable ICU 2.0 |
|
2313 */ |
|
2314 inline UnicodeString& removeBetween(int32_t start, |
|
2315 int32_t limit = (int32_t)INT32_MAX); |
|
2316 |
|
2317 |
|
2318 /* Length operations */ |
|
2319 |
|
2320 /** |
|
2321 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. |
|
2322 * If the length of this UnicodeString is less than targetLength, |
|
2323 * length() - targetLength copies of padChar will be added to the |
|
2324 * beginning of this UnicodeString. |
|
2325 * @param targetLength the desired length of the string |
|
2326 * @param padChar the character to use for padding. Defaults to |
|
2327 * space (U+0020) |
|
2328 * @return TRUE if the text was padded, FALSE otherwise. |
|
2329 * @stable ICU 2.0 |
|
2330 */ |
|
2331 UBool padLeading(int32_t targetLength, |
|
2332 UChar padChar = 0x0020); |
|
2333 |
|
2334 /** |
|
2335 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. |
|
2336 * If the length of this UnicodeString is less than targetLength, |
|
2337 * length() - targetLength copies of padChar will be added to the |
|
2338 * end of this UnicodeString. |
|
2339 * @param targetLength the desired length of the string |
|
2340 * @param padChar the character to use for padding. Defaults to |
|
2341 * space (U+0020) |
|
2342 * @return TRUE if the text was padded, FALSE otherwise. |
|
2343 * @stable ICU 2.0 |
|
2344 */ |
|
2345 UBool padTrailing(int32_t targetLength, |
|
2346 UChar padChar = 0x0020); |
|
2347 |
|
2348 /** |
|
2349 * Truncate this UnicodeString to the <TT>targetLength</TT>. |
|
2350 * @param targetLength the desired length of this UnicodeString. |
|
2351 * @return TRUE if the text was truncated, FALSE otherwise |
|
2352 * @stable ICU 2.0 |
|
2353 */ |
|
2354 inline UBool truncate(int32_t targetLength); |
|
2355 |
|
2356 /** |
|
2357 * Trims leading and trailing whitespace from this UnicodeString. |
|
2358 * @return a reference to this |
|
2359 * @stable ICU 2.0 |
|
2360 */ |
|
2361 UnicodeString& trim(void); |
|
2362 |
|
2363 |
|
2364 /* Miscellaneous operations */ |
|
2365 |
|
2366 /** |
|
2367 * Reverse this UnicodeString in place. |
|
2368 * @return a reference to this |
|
2369 * @stable ICU 2.0 |
|
2370 */ |
|
2371 inline UnicodeString& reverse(void); |
|
2372 |
|
2373 /** |
|
2374 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in |
|
2375 * this UnicodeString. |
|
2376 * @param start the start of the range to reverse |
|
2377 * @param length the number of characters to to reverse |
|
2378 * @return a reference to this |
|
2379 * @stable ICU 2.0 |
|
2380 */ |
|
2381 inline UnicodeString& reverse(int32_t start, |
|
2382 int32_t length); |
|
2383 |
|
2384 /** |
|
2385 * Convert the characters in this to UPPER CASE following the conventions of |
|
2386 * the default locale. |
|
2387 * @return A reference to this. |
|
2388 * @stable ICU 2.0 |
|
2389 */ |
|
2390 UnicodeString& toUpper(void); |
|
2391 |
|
2392 /** |
|
2393 * Convert the characters in this to UPPER CASE following the conventions of |
|
2394 * a specific locale. |
|
2395 * @param locale The locale containing the conventions to use. |
|
2396 * @return A reference to this. |
|
2397 * @stable ICU 2.0 |
|
2398 */ |
|
2399 UnicodeString& toUpper(const Locale& locale); |
|
2400 |
|
2401 /** |
|
2402 * Convert the characters in this to lower case following the conventions of |
|
2403 * the default locale. |
|
2404 * @return A reference to this. |
|
2405 * @stable ICU 2.0 |
|
2406 */ |
|
2407 UnicodeString& toLower(void); |
|
2408 |
|
2409 /** |
|
2410 * Convert the characters in this to lower case following the conventions of |
|
2411 * a specific locale. |
|
2412 * @param locale The locale containing the conventions to use. |
|
2413 * @return A reference to this. |
|
2414 * @stable ICU 2.0 |
|
2415 */ |
|
2416 UnicodeString& toLower(const Locale& locale); |
|
2417 |
|
2418 #if !UCONFIG_NO_BREAK_ITERATION |
|
2419 |
|
2420 /** |
|
2421 * Titlecase this string, convenience function using the default locale. |
|
2422 * |
|
2423 * Casing is locale-dependent and context-sensitive. |
|
2424 * Titlecasing uses a break iterator to find the first characters of words |
|
2425 * that are to be titlecased. It titlecases those characters and lowercases |
|
2426 * all others. |
|
2427 * |
|
2428 * The titlecase break iterator can be provided to customize for arbitrary |
|
2429 * styles, using rules and dictionaries beyond the standard iterators. |
|
2430 * It may be more efficient to always provide an iterator to avoid |
|
2431 * opening and closing one for each string. |
|
2432 * The standard titlecase iterator for the root locale implements the |
|
2433 * algorithm of Unicode TR 21. |
|
2434 * |
|
2435 * This function uses only the first() and next() methods of the |
|
2436 * provided break iterator. |
|
2437 * |
|
2438 * @param titleIter A break iterator to find the first characters of words |
|
2439 * that are to be titlecased. |
|
2440 * If none is provided (0), then a standard titlecase |
|
2441 * break iterator is opened. |
|
2442 * Otherwise the provided iterator is set to the string's text. |
|
2443 * @return A reference to this. |
|
2444 * @stable ICU 2.1 |
|
2445 */ |
|
2446 UnicodeString &toTitle(BreakIterator *titleIter); |
|
2447 |
|
2448 /** |
|
2449 * Titlecase this string. |
|
2450 * |
|
2451 * Casing is locale-dependent and context-sensitive. |
|
2452 * Titlecasing uses a break iterator to find the first characters of words |
|
2453 * that are to be titlecased. It titlecases those characters and lowercases |
|
2454 * all others. |
|
2455 * |
|
2456 * The titlecase break iterator can be provided to customize for arbitrary |
|
2457 * styles, using rules and dictionaries beyond the standard iterators. |
|
2458 * It may be more efficient to always provide an iterator to avoid |
|
2459 * opening and closing one for each string. |
|
2460 * The standard titlecase iterator for the root locale implements the |
|
2461 * algorithm of Unicode TR 21. |
|
2462 * |
|
2463 * This function uses only the first() and next() methods of the |
|
2464 * provided break iterator. |
|
2465 * |
|
2466 * @param titleIter A break iterator to find the first characters of words |
|
2467 * that are to be titlecased. |
|
2468 * If none is provided (0), then a standard titlecase |
|
2469 * break iterator is opened. |
|
2470 * Otherwise the provided iterator is set to the string's text. |
|
2471 * @param locale The locale to consider. |
|
2472 * @return A reference to this. |
|
2473 * @stable ICU 2.1 |
|
2474 */ |
|
2475 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); |
|
2476 |
|
2477 #endif |
|
2478 |
|
2479 /** |
|
2480 * Case-fold the characters in this string. |
|
2481 * Case-folding is locale-independent and not context-sensitive, |
|
2482 * but there is an option for whether to include or exclude mappings for dotted I |
|
2483 * and dotless i that are marked with 'I' in CaseFolding.txt. |
|
2484 * The result may be longer or shorter than the original. |
|
2485 * |
|
2486 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
2487 * @return A reference to this. |
|
2488 * @stable ICU 2.0 |
|
2489 */ |
|
2490 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); |
|
2491 |
|
2492 //======================================== |
|
2493 // Access to the internal buffer |
|
2494 //======================================== |
|
2495 |
|
2496 /** |
|
2497 * Get a read/write pointer to the internal buffer. |
|
2498 * The buffer is guaranteed to be large enough for at least minCapacity UChars, |
|
2499 * writable, and is still owned by the UnicodeString object. |
|
2500 * Calls to getBuffer(minCapacity) must not be nested, and |
|
2501 * must be matched with calls to releaseBuffer(newLength). |
|
2502 * If the string buffer was read-only or shared, |
|
2503 * then it will be reallocated and copied. |
|
2504 * |
|
2505 * An attempted nested call will return 0, and will not further modify the |
|
2506 * state of the UnicodeString object. |
|
2507 * It also returns 0 if the string is bogus. |
|
2508 * |
|
2509 * The actual capacity of the string buffer may be larger than minCapacity. |
|
2510 * getCapacity() returns the actual capacity. |
|
2511 * For many operations, the full capacity should be used to avoid reallocations. |
|
2512 * |
|
2513 * While the buffer is "open" between getBuffer(minCapacity) |
|
2514 * and releaseBuffer(newLength), the following applies: |
|
2515 * - The string length is set to 0. |
|
2516 * - Any read API call on the UnicodeString object will behave like on a 0-length string. |
|
2517 * - Any write API call on the UnicodeString object is disallowed and will have no effect. |
|
2518 * - You can read from and write to the returned buffer. |
|
2519 * - The previous string contents will still be in the buffer; |
|
2520 * if you want to use it, then you need to call length() before getBuffer(minCapacity). |
|
2521 * If the length() was greater than minCapacity, then any contents after minCapacity |
|
2522 * may be lost. |
|
2523 * The buffer contents is not NUL-terminated by getBuffer(). |
|
2524 * If length()<getCapacity() then you can terminate it by writing a NUL |
|
2525 * at index length(). |
|
2526 * - You must call releaseBuffer(newLength) before and in order to |
|
2527 * return to normal UnicodeString operation. |
|
2528 * |
|
2529 * @param minCapacity the minimum number of UChars that are to be available |
|
2530 * in the buffer, starting at the returned pointer; |
|
2531 * default to the current string capacity if minCapacity==-1 |
|
2532 * @return a writable pointer to the internal string buffer, |
|
2533 * or 0 if an error occurs (nested calls, out of memory) |
|
2534 * |
|
2535 * @see releaseBuffer |
|
2536 * @see getTerminatedBuffer() |
|
2537 * @stable ICU 2.0 |
|
2538 */ |
|
2539 UChar *getBuffer(int32_t minCapacity); |
|
2540 |
|
2541 /** |
|
2542 * Release a read/write buffer on a UnicodeString object with an |
|
2543 * "open" getBuffer(minCapacity). |
|
2544 * This function must be called in a matched pair with getBuffer(minCapacity). |
|
2545 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". |
|
2546 * |
|
2547 * It will set the string length to newLength, at most to the current capacity. |
|
2548 * If newLength==-1 then it will set the length according to the |
|
2549 * first NUL in the buffer, or to the capacity if there is no NUL. |
|
2550 * |
|
2551 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. |
|
2552 * |
|
2553 * @param newLength the new length of the UnicodeString object; |
|
2554 * defaults to the current capacity if newLength is greater than that; |
|
2555 * if newLength==-1, it defaults to u_strlen(buffer) but not more than |
|
2556 * the current capacity of the string |
|
2557 * |
|
2558 * @see getBuffer(int32_t minCapacity) |
|
2559 * @stable ICU 2.0 |
|
2560 */ |
|
2561 void releaseBuffer(int32_t newLength=-1); |
|
2562 |
|
2563 /** |
|
2564 * Get a read-only pointer to the internal buffer. |
|
2565 * This can be called at any time on a valid UnicodeString. |
|
2566 * |
|
2567 * It returns 0 if the string is bogus, or |
|
2568 * during an "open" getBuffer(minCapacity). |
|
2569 * |
|
2570 * It can be called as many times as desired. |
|
2571 * The pointer that it returns will remain valid until the UnicodeString object is modified, |
|
2572 * at which time the pointer is semantically invalidated and must not be used any more. |
|
2573 * |
|
2574 * The capacity of the buffer can be determined with getCapacity(). |
|
2575 * The part after length() may or may not be initialized and valid, |
|
2576 * depending on the history of the UnicodeString object. |
|
2577 * |
|
2578 * The buffer contents is (probably) not NUL-terminated. |
|
2579 * You can check if it is with |
|
2580 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. |
|
2581 * (See getTerminatedBuffer().) |
|
2582 * |
|
2583 * The buffer may reside in read-only memory. Its contents must not |
|
2584 * be modified. |
|
2585 * |
|
2586 * @return a read-only pointer to the internal string buffer, |
|
2587 * or 0 if the string is empty or bogus |
|
2588 * |
|
2589 * @see getBuffer(int32_t minCapacity) |
|
2590 * @see getTerminatedBuffer() |
|
2591 * @stable ICU 2.0 |
|
2592 */ |
|
2593 inline const UChar *getBuffer() const; |
|
2594 |
|
2595 /** |
|
2596 * Get a read-only pointer to the internal buffer, |
|
2597 * making sure that it is NUL-terminated. |
|
2598 * This can be called at any time on a valid UnicodeString. |
|
2599 * |
|
2600 * It returns 0 if the string is bogus, or |
|
2601 * during an "open" getBuffer(minCapacity), or if the buffer cannot |
|
2602 * be NUL-terminated (because memory allocation failed). |
|
2603 * |
|
2604 * It can be called as many times as desired. |
|
2605 * The pointer that it returns will remain valid until the UnicodeString object is modified, |
|
2606 * at which time the pointer is semantically invalidated and must not be used any more. |
|
2607 * |
|
2608 * The capacity of the buffer can be determined with getCapacity(). |
|
2609 * The part after length()+1 may or may not be initialized and valid, |
|
2610 * depending on the history of the UnicodeString object. |
|
2611 * |
|
2612 * The buffer contents is guaranteed to be NUL-terminated. |
|
2613 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL |
|
2614 * is written. |
|
2615 * For this reason, this function is not const, unlike getBuffer(). |
|
2616 * Note that a UnicodeString may also contain NUL characters as part of its contents. |
|
2617 * |
|
2618 * The buffer may reside in read-only memory. Its contents must not |
|
2619 * be modified. |
|
2620 * |
|
2621 * @return a read-only pointer to the internal string buffer, |
|
2622 * or 0 if the string is empty or bogus |
|
2623 * |
|
2624 * @see getBuffer(int32_t minCapacity) |
|
2625 * @see getBuffer() |
|
2626 * @stable ICU 2.2 |
|
2627 */ |
|
2628 inline const UChar *getTerminatedBuffer(); |
|
2629 |
|
2630 //======================================== |
|
2631 // Constructors |
|
2632 //======================================== |
|
2633 |
|
2634 /** Construct an empty UnicodeString. |
|
2635 * @stable ICU 2.0 |
|
2636 */ |
|
2637 UnicodeString(); |
|
2638 |
|
2639 /** |
|
2640 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars |
|
2641 * @param capacity the number of UChars this UnicodeString should hold |
|
2642 * before a resize is necessary; if count is greater than 0 and count |
|
2643 * code points c take up more space than capacity, then capacity is adjusted |
|
2644 * accordingly. |
|
2645 * @param c is used to initially fill the string |
|
2646 * @param count specifies how many code points c are to be written in the |
|
2647 * string |
|
2648 * @stable ICU 2.0 |
|
2649 */ |
|
2650 UnicodeString(int32_t capacity, UChar32 c, int32_t count); |
|
2651 |
|
2652 /** |
|
2653 * Single UChar (code unit) constructor. |
|
2654 * @param ch the character to place in the UnicodeString |
|
2655 * @stable ICU 2.0 |
|
2656 */ |
|
2657 UnicodeString(UChar ch); |
|
2658 |
|
2659 /** |
|
2660 * Single UChar32 (code point) constructor. |
|
2661 * @param ch the character to place in the UnicodeString |
|
2662 * @stable ICU 2.0 |
|
2663 */ |
|
2664 UnicodeString(UChar32 ch); |
|
2665 |
|
2666 /** |
|
2667 * UChar* constructor. |
|
2668 * @param text The characters to place in the UnicodeString. <TT>text</TT> |
|
2669 * must be NULL (U+0000) terminated. |
|
2670 * @stable ICU 2.0 |
|
2671 */ |
|
2672 UnicodeString(const UChar *text); |
|
2673 |
|
2674 /** |
|
2675 * UChar* constructor. |
|
2676 * @param text The characters to place in the UnicodeString. |
|
2677 * @param textLength The number of Unicode characters in <TT>text</TT> |
|
2678 * to copy. |
|
2679 * @stable ICU 2.0 |
|
2680 */ |
|
2681 UnicodeString(const UChar *text, |
|
2682 int32_t textLength); |
|
2683 |
|
2684 /** |
|
2685 * Readonly-aliasing UChar* constructor. |
|
2686 * The text will be used for the UnicodeString object, but |
|
2687 * it will not be released when the UnicodeString is destroyed. |
|
2688 * This has copy-on-write semantics: |
|
2689 * When the string is modified, then the buffer is first copied into |
|
2690 * newly allocated memory. |
|
2691 * The aliased buffer is never modified. |
|
2692 * In an assignment to another UnicodeString, the text will be aliased again, |
|
2693 * so that both strings then alias the same readonly-text. |
|
2694 * |
|
2695 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. |
|
2696 * This must be true if <code>textLength==-1</code>. |
|
2697 * @param text The characters to alias for the UnicodeString. |
|
2698 * @param textLength The number of Unicode characters in <code>text</code> to alias. |
|
2699 * If -1, then this constructor will determine the length |
|
2700 * by calling <code>u_strlen()</code>. |
|
2701 * @stable ICU 2.0 |
|
2702 */ |
|
2703 UnicodeString(UBool isTerminated, |
|
2704 const UChar *text, |
|
2705 int32_t textLength); |
|
2706 |
|
2707 /** |
|
2708 * Writable-aliasing UChar* constructor. |
|
2709 * The text will be used for the UnicodeString object, but |
|
2710 * it will not be released when the UnicodeString is destroyed. |
|
2711 * This has write-through semantics: |
|
2712 * For as long as the capacity of the buffer is sufficient, write operations |
|
2713 * will directly affect the buffer. When more capacity is necessary, then |
|
2714 * a new buffer will be allocated and the contents copied as with regularly |
|
2715 * constructed strings. |
|
2716 * In an assignment to another UnicodeString, the buffer will be copied. |
|
2717 * The extract(UChar *dst) function detects whether the dst pointer is the same |
|
2718 * as the string buffer itself and will in this case not copy the contents. |
|
2719 * |
|
2720 * @param buffer The characters to alias for the UnicodeString. |
|
2721 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. |
|
2722 * @param buffCapacity The size of <code>buffer</code> in UChars. |
|
2723 * @stable ICU 2.0 |
|
2724 */ |
|
2725 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); |
|
2726 |
|
2727 #if !UCONFIG_NO_CONVERSION |
|
2728 |
|
2729 /** |
|
2730 * char* constructor. |
|
2731 * @param codepageData an array of bytes, null-terminated |
|
2732 * @param codepage the encoding of <TT>codepageData</TT>. The special |
|
2733 * value 0 for <TT>codepage</TT> indicates that the text is in the |
|
2734 * platform's default codepage. |
|
2735 * |
|
2736 * If <code>codepage</code> is an empty string (<code>""</code>), |
|
2737 * then a simple conversion is performed on the codepage-invariant |
|
2738 * subset ("invariant characters") of the platform encoding. See utypes.h. |
|
2739 * Recommendation: For invariant-character strings use the constructor |
|
2740 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) |
|
2741 * because it avoids object code dependencies of UnicodeString on |
|
2742 * the conversion code. |
|
2743 * |
|
2744 * @stable ICU 2.0 |
|
2745 */ |
|
2746 UnicodeString(const char *codepageData, |
|
2747 const char *codepage = 0); |
|
2748 |
|
2749 /** |
|
2750 * char* constructor. |
|
2751 * @param codepageData an array of bytes. |
|
2752 * @param dataLength The number of bytes in <TT>codepageData</TT>. |
|
2753 * @param codepage the encoding of <TT>codepageData</TT>. The special |
|
2754 * value 0 for <TT>codepage</TT> indicates that the text is in the |
|
2755 * platform's default codepage. |
|
2756 * If <code>codepage</code> is an empty string (<code>""</code>), |
|
2757 * then a simple conversion is performed on the codepage-invariant |
|
2758 * subset ("invariant characters") of the platform encoding. See utypes.h. |
|
2759 * Recommendation: For invariant-character strings use the constructor |
|
2760 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) |
|
2761 * because it avoids object code dependencies of UnicodeString on |
|
2762 * the conversion code. |
|
2763 * |
|
2764 * @stable ICU 2.0 |
|
2765 */ |
|
2766 UnicodeString(const char *codepageData, |
|
2767 int32_t dataLength, |
|
2768 const char *codepage = 0); |
|
2769 |
|
2770 /** |
|
2771 * char * / UConverter constructor. |
|
2772 * This constructor uses an existing UConverter object to |
|
2773 * convert the codepage string to Unicode and construct a UnicodeString |
|
2774 * from that. |
|
2775 * |
|
2776 * The converter is reset at first. |
|
2777 * If the error code indicates a failure before this constructor is called, |
|
2778 * or if an error occurs during conversion or construction, |
|
2779 * then the string will be bogus. |
|
2780 * |
|
2781 * This function avoids the overhead of opening and closing a converter if |
|
2782 * multiple strings are constructed. |
|
2783 * |
|
2784 * @param src input codepage string |
|
2785 * @param srcLength length of the input string, can be -1 for NUL-terminated strings |
|
2786 * @param cnv converter object (ucnv_resetToUnicode() will be called), |
|
2787 * can be NULL for the default converter |
|
2788 * @param errorCode normal ICU error code |
|
2789 * @stable ICU 2.0 |
|
2790 */ |
|
2791 UnicodeString( |
|
2792 const char *src, int32_t srcLength, |
|
2793 UConverter *cnv, |
|
2794 UErrorCode &errorCode); |
|
2795 |
|
2796 #endif |
|
2797 |
|
2798 /** |
|
2799 * Constructs a Unicode string from an invariant-character char * string. |
|
2800 * About invariant characters see utypes.h. |
|
2801 * This constructor has no runtime dependency on conversion code and is |
|
2802 * therefore recommended over ones taking a charset name string |
|
2803 * (where the empty string "" indicates invariant-character conversion). |
|
2804 * |
|
2805 * Use the macro US_INV as the third, signature-distinguishing parameter. |
|
2806 * |
|
2807 * For example: |
|
2808 * \code |
|
2809 * void fn(const char *s) { |
|
2810 * UnicodeString ustr(s, -1, US_INV); |
|
2811 * // use ustr ... |
|
2812 * } |
|
2813 * \endcode |
|
2814 * |
|
2815 * @param src String using only invariant characters. |
|
2816 * @param length Length of src, or -1 if NUL-terminated. |
|
2817 * @param inv Signature-distinguishing paramater, use US_INV. |
|
2818 * |
|
2819 * @see US_INV |
|
2820 * @draft ICU 3.2 |
|
2821 */ |
|
2822 UnicodeString(const char *src, int32_t length, enum EInvariant inv); |
|
2823 |
|
2824 |
|
2825 /** |
|
2826 * Copy constructor. |
|
2827 * @param that The UnicodeString object to copy. |
|
2828 * @stable ICU 2.0 |
|
2829 */ |
|
2830 UnicodeString(const UnicodeString& that); |
|
2831 |
|
2832 /** |
|
2833 * 'Substring' constructor from tail of source string. |
|
2834 * @param src The UnicodeString object to copy. |
|
2835 * @param srcStart The offset into <tt>src</tt> at which to start copying. |
|
2836 * @stable ICU 2.2 |
|
2837 */ |
|
2838 UnicodeString(const UnicodeString& src, int32_t srcStart); |
|
2839 |
|
2840 /** |
|
2841 * 'Substring' constructor from subrange of source string. |
|
2842 * @param src The UnicodeString object to copy. |
|
2843 * @param srcStart The offset into <tt>src</tt> at which to start copying. |
|
2844 * @param srcLength The number of characters from <tt>src</tt> to copy. |
|
2845 * @stable ICU 2.2 |
|
2846 */ |
|
2847 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); |
|
2848 |
|
2849 /** |
|
2850 * Clone this object, an instance of a subclass of Replaceable. |
|
2851 * Clones can be used concurrently in multiple threads. |
|
2852 * If a subclass does not implement clone(), or if an error occurs, |
|
2853 * then NULL is returned. |
|
2854 * The clone functions in all subclasses return a pointer to a Replaceable |
|
2855 * because some compilers do not support covariant (same-as-this) |
|
2856 * return types; cast to the appropriate subclass if necessary. |
|
2857 * The caller must delete the clone. |
|
2858 * |
|
2859 * @return a clone of this object |
|
2860 * |
|
2861 * @see Replaceable::clone |
|
2862 * @see getDynamicClassID |
|
2863 * @stable ICU 2.6 |
|
2864 */ |
|
2865 virtual Replaceable *clone() const; |
|
2866 |
|
2867 /** Destructor. |
|
2868 * @stable ICU 2.0 |
|
2869 */ |
|
2870 virtual ~UnicodeString(); |
|
2871 |
|
2872 |
|
2873 /* Miscellaneous operations */ |
|
2874 |
|
2875 /** |
|
2876 * Unescape a string of characters and return a string containing |
|
2877 * the result. The following escape sequences are recognized: |
|
2878 * |
|
2879 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] |
|
2880 * \\Uhhhhhhhh 8 hex digits |
|
2881 * \\xhh 1-2 hex digits |
|
2882 * \\ooo 1-3 octal digits; o in [0-7] |
|
2883 * \\cX control-X; X is masked with 0x1F |
|
2884 * |
|
2885 * as well as the standard ANSI C escapes: |
|
2886 * |
|
2887 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, |
|
2888 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, |
|
2889 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C |
|
2890 * |
|
2891 * Anything else following a backslash is generically escaped. For |
|
2892 * example, "[a\\-z]" returns "[a-z]". |
|
2893 * |
|
2894 * If an escape sequence is ill-formed, this method returns an empty |
|
2895 * string. An example of an ill-formed sequence is "\\u" followed by |
|
2896 * fewer than 4 hex digits. |
|
2897 * |
|
2898 * This function is similar to u_unescape() but not identical to it. |
|
2899 * The latter takes a source char*, so it does escape recognition |
|
2900 * and also invariant conversion. |
|
2901 * |
|
2902 * @return a string with backslash escapes interpreted, or an |
|
2903 * empty string on error. |
|
2904 * @see UnicodeString#unescapeAt() |
|
2905 * @see u_unescape() |
|
2906 * @see u_unescapeAt() |
|
2907 * @stable ICU 2.0 |
|
2908 */ |
|
2909 UnicodeString unescape() const; |
|
2910 |
|
2911 /** |
|
2912 * Unescape a single escape sequence and return the represented |
|
2913 * character. See unescape() for a listing of the recognized escape |
|
2914 * sequences. The character at offset-1 is assumed (without |
|
2915 * checking) to be a backslash. If the escape sequence is |
|
2916 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is |
|
2917 * returned. |
|
2918 * |
|
2919 * @param offset an input output parameter. On input, it is the |
|
2920 * offset into this string where the escape sequence is located, |
|
2921 * after the initial backslash. On output, it is advanced after the |
|
2922 * last character parsed. On error, it is not advanced at all. |
|
2923 * @return the character represented by the escape sequence at |
|
2924 * offset, or (UChar32)0xFFFFFFFF on error. |
|
2925 * @see UnicodeString#unescape() |
|
2926 * @see u_unescape() |
|
2927 * @see u_unescapeAt() |
|
2928 * @stable ICU 2.0 |
|
2929 */ |
|
2930 UChar32 unescapeAt(int32_t &offset) const; |
|
2931 |
|
2932 /** |
|
2933 * ICU "poor man's RTTI", returns a UClassID for this class. |
|
2934 * |
|
2935 * @stable ICU 2.2 |
|
2936 */ |
|
2937 static UClassID U_EXPORT2 getStaticClassID(); |
|
2938 |
|
2939 /** |
|
2940 * ICU "poor man's RTTI", returns a UClassID for the actual class. |
|
2941 * |
|
2942 * @stable ICU 2.2 |
|
2943 */ |
|
2944 virtual UClassID getDynamicClassID() const; |
|
2945 |
|
2946 //======================================== |
|
2947 // Implementation methods |
|
2948 //======================================== |
|
2949 |
|
2950 protected: |
|
2951 /** |
|
2952 * Implement Replaceable::getLength() (see jitterbug 1027). |
|
2953 * @stable ICU 2.4 |
|
2954 */ |
|
2955 virtual int32_t getLength() const; |
|
2956 |
|
2957 /** |
|
2958 * The change in Replaceable to use virtual getCharAt() allows |
|
2959 * UnicodeString::charAt() to be inline again (see jitterbug 709). |
|
2960 * @stable ICU 2.4 |
|
2961 */ |
|
2962 virtual UChar getCharAt(int32_t offset) const; |
|
2963 |
|
2964 /** |
|
2965 * The change in Replaceable to use virtual getChar32At() allows |
|
2966 * UnicodeString::char32At() to be inline again (see jitterbug 709). |
|
2967 * @stable ICU 2.4 |
|
2968 */ |
|
2969 virtual UChar32 getChar32At(int32_t offset) const; |
|
2970 |
|
2971 private: |
|
2972 |
|
2973 inline int8_t |
|
2974 doCompare(int32_t start, |
|
2975 int32_t length, |
|
2976 const UnicodeString& srcText, |
|
2977 int32_t srcStart, |
|
2978 int32_t srcLength) const; |
|
2979 |
|
2980 int8_t doCompare(int32_t start, |
|
2981 int32_t length, |
|
2982 const UChar *srcChars, |
|
2983 int32_t srcStart, |
|
2984 int32_t srcLength) const; |
|
2985 |
|
2986 inline int8_t |
|
2987 doCompareCodePointOrder(int32_t start, |
|
2988 int32_t length, |
|
2989 const UnicodeString& srcText, |
|
2990 int32_t srcStart, |
|
2991 int32_t srcLength) const; |
|
2992 |
|
2993 int8_t doCompareCodePointOrder(int32_t start, |
|
2994 int32_t length, |
|
2995 const UChar *srcChars, |
|
2996 int32_t srcStart, |
|
2997 int32_t srcLength) const; |
|
2998 |
|
2999 inline int8_t |
|
3000 doCaseCompare(int32_t start, |
|
3001 int32_t length, |
|
3002 const UnicodeString &srcText, |
|
3003 int32_t srcStart, |
|
3004 int32_t srcLength, |
|
3005 uint32_t options) const; |
|
3006 |
|
3007 int8_t |
|
3008 doCaseCompare(int32_t start, |
|
3009 int32_t length, |
|
3010 const UChar *srcChars, |
|
3011 int32_t srcStart, |
|
3012 int32_t srcLength, |
|
3013 uint32_t options) const; |
|
3014 |
|
3015 int32_t doIndexOf(UChar c, |
|
3016 int32_t start, |
|
3017 int32_t length) const; |
|
3018 |
|
3019 int32_t doIndexOf(UChar32 c, |
|
3020 int32_t start, |
|
3021 int32_t length) const; |
|
3022 |
|
3023 int32_t doLastIndexOf(UChar c, |
|
3024 int32_t start, |
|
3025 int32_t length) const; |
|
3026 |
|
3027 int32_t doLastIndexOf(UChar32 c, |
|
3028 int32_t start, |
|
3029 int32_t length) const; |
|
3030 |
|
3031 void doExtract(int32_t start, |
|
3032 int32_t length, |
|
3033 UChar *dst, |
|
3034 int32_t dstStart) const; |
|
3035 |
|
3036 inline void doExtract(int32_t start, |
|
3037 int32_t length, |
|
3038 UnicodeString& target) const; |
|
3039 |
|
3040 inline UChar doCharAt(int32_t offset) const; |
|
3041 |
|
3042 UnicodeString& doReplace(int32_t start, |
|
3043 int32_t length, |
|
3044 const UnicodeString& srcText, |
|
3045 int32_t srcStart, |
|
3046 int32_t srcLength); |
|
3047 |
|
3048 UnicodeString& doReplace(int32_t start, |
|
3049 int32_t length, |
|
3050 const UChar *srcChars, |
|
3051 int32_t srcStart, |
|
3052 int32_t srcLength); |
|
3053 |
|
3054 UnicodeString& doReverse(int32_t start, |
|
3055 int32_t length); |
|
3056 |
|
3057 // calculate hash code |
|
3058 int32_t doHashCode(void) const; |
|
3059 |
|
3060 // get pointer to start of array |
|
3061 inline UChar* getArrayStart(void); |
|
3062 inline const UChar* getArrayStart(void) const; |
|
3063 |
|
3064 // allocate the array; result may be fStackBuffer |
|
3065 // sets refCount to 1 if appropriate |
|
3066 // sets fArray, fCapacity, and fFlags |
|
3067 // returns boolean for success or failure |
|
3068 UBool allocate(int32_t capacity); |
|
3069 |
|
3070 // release the array if owned |
|
3071 void releaseArray(void); |
|
3072 |
|
3073 // turn a bogus string into an empty one |
|
3074 void unBogus(); |
|
3075 |
|
3076 // implements assigment operator, copy constructor, and fastCopyFrom() |
|
3077 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); |
|
3078 |
|
3079 // Pin start and limit to acceptable values. |
|
3080 inline void pinIndex(int32_t& start) const; |
|
3081 inline void pinIndices(int32_t& start, |
|
3082 int32_t& length) const; |
|
3083 |
|
3084 #if !UCONFIG_NO_CONVERSION |
|
3085 |
|
3086 /* Internal extract() using UConverter. */ |
|
3087 int32_t doExtract(int32_t start, int32_t length, |
|
3088 char *dest, int32_t destCapacity, |
|
3089 UConverter *cnv, |
|
3090 UErrorCode &errorCode) const; |
|
3091 |
|
3092 /* |
|
3093 * Real constructor for converting from codepage data. |
|
3094 * It assumes that it is called with !fRefCounted. |
|
3095 * |
|
3096 * If <code>codepage==0</code>, then the default converter |
|
3097 * is used for the platform encoding. |
|
3098 * If <code>codepage</code> is an empty string (<code>""</code>), |
|
3099 * then a simple conversion is performed on the codepage-invariant |
|
3100 * subset ("invariant characters") of the platform encoding. See utypes.h. |
|
3101 */ |
|
3102 void doCodepageCreate(const char *codepageData, |
|
3103 int32_t dataLength, |
|
3104 const char *codepage); |
|
3105 |
|
3106 /* |
|
3107 * Worker function for creating a UnicodeString from |
|
3108 * a codepage string using a UConverter. |
|
3109 */ |
|
3110 void |
|
3111 doCodepageCreate(const char *codepageData, |
|
3112 int32_t dataLength, |
|
3113 UConverter *converter, |
|
3114 UErrorCode &status); |
|
3115 |
|
3116 #endif |
|
3117 |
|
3118 /* |
|
3119 * This function is called when write access to the array |
|
3120 * is necessary. |
|
3121 * |
|
3122 * We need to make a copy of the array if |
|
3123 * the buffer is read-only, or |
|
3124 * the buffer is refCounted (shared), and refCount>1, or |
|
3125 * the buffer is too small. |
|
3126 * |
|
3127 * Return FALSE if memory could not be allocated. |
|
3128 */ |
|
3129 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, |
|
3130 int32_t growCapacity = -1, |
|
3131 UBool doCopyArray = TRUE, |
|
3132 int32_t **pBufferToDelete = 0, |
|
3133 UBool forceClone = FALSE); |
|
3134 |
|
3135 // common function for case mappings |
|
3136 UnicodeString & |
|
3137 caseMap(BreakIterator *titleIter, |
|
3138 const char *locale, |
|
3139 uint32_t options, |
|
3140 int32_t toWhichCase); |
|
3141 |
|
3142 // ref counting |
|
3143 void addRef(void); |
|
3144 int32_t removeRef(void); |
|
3145 int32_t refCount(void) const; |
|
3146 |
|
3147 // constants |
|
3148 enum { |
|
3149 US_STACKBUF_SIZE=7, // Size of stack buffer for small strings |
|
3150 kInvalidUChar=0xffff, // invalid UChar index |
|
3151 kGrowSize=128, // grow size for this buffer |
|
3152 kInvalidHashCode=0, // invalid hash code |
|
3153 kEmptyHashCode=1, // hash code for empty string |
|
3154 |
|
3155 // bit flag values for fFlags |
|
3156 kIsBogus=1, // this string is bogus, i.e., not valid or NULL |
|
3157 kUsingStackBuffer=2,// fArray==fStackBuffer |
|
3158 kRefCounted=4, // there is a refCount field before the characters in fArray |
|
3159 kBufferIsReadonly=8,// do not write to this buffer |
|
3160 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), |
|
3161 // and releaseBuffer(newLength) must be called |
|
3162 |
|
3163 // combined values for convenience |
|
3164 kShortString=kUsingStackBuffer, |
|
3165 kLongString=kRefCounted, |
|
3166 kReadonlyAlias=kBufferIsReadonly, |
|
3167 kWritableAlias=0 |
|
3168 }; |
|
3169 |
|
3170 friend class StringCharacterIterator; |
|
3171 friend class StringThreadTest; |
|
3172 |
|
3173 /* |
|
3174 * The following are all the class fields that are stored |
|
3175 * in each UnicodeString object. |
|
3176 * Note that UnicodeString has virtual functions, |
|
3177 * therefore there is an implicit vtable pointer |
|
3178 * as the first real field. |
|
3179 * The fields should be aligned such that no padding is |
|
3180 * necessary, mostly by having larger types first. |
|
3181 * On 32-bit machines, the size should be 32 bytes, |
|
3182 * on 64-bit machines (8-byte pointers), it should be 40 bytes. |
|
3183 */ |
|
3184 // (implicit) *vtable; |
|
3185 int32_t fLength; // number of characters in fArray |
|
3186 int32_t fCapacity; // sizeof fArray |
|
3187 UChar *fArray; // the Unicode data |
|
3188 uint16_t fFlags; // bit flags: see constants above |
|
3189 UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings |
|
3190 |
|
3191 }; |
|
3192 |
|
3193 /** |
|
3194 * Create a new UnicodeString with the concatenation of two others. |
|
3195 * |
|
3196 * @param s1 The first string to be copied to the new one. |
|
3197 * @param s2 The second string to be copied to the new one, after s1. |
|
3198 * @return UnicodeString(s1).append(s2) |
|
3199 * @stable ICU 2.8 |
|
3200 */ |
|
3201 U_COMMON_API UnicodeString U_EXPORT2 |
|
3202 operator+ (const UnicodeString &s1, const UnicodeString &s2); |
|
3203 |
|
3204 U_NAMESPACE_END |
|
3205 |
|
3206 // inline implementations -------------------------------------------------- *** |
|
3207 |
|
3208 //======================================== |
|
3209 // Array copying |
|
3210 //======================================== |
|
3211 /** |
|
3212 * Copy an array of UnicodeString OBJECTS (not pointers). |
|
3213 * @internal |
|
3214 */ |
|
3215 inline void |
|
3216 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count) |
|
3217 { while(count-- > 0) *dst++ = *src++; } |
|
3218 |
|
3219 /** |
|
3220 * Copy an array of UnicodeString OBJECTS (not pointers). |
|
3221 * @internal |
|
3222 */ |
|
3223 inline void |
|
3224 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart, |
|
3225 U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count) |
|
3226 { uprv_arrayCopy(src+srcStart, dst+dstStart, count); } |
|
3227 |
|
3228 U_NAMESPACE_BEGIN |
|
3229 |
|
3230 //======================================== |
|
3231 // Inline members |
|
3232 //======================================== |
|
3233 |
|
3234 //======================================== |
|
3235 // Privates |
|
3236 //======================================== |
|
3237 |
|
3238 inline void |
|
3239 UnicodeString::pinIndex(int32_t& start) const |
|
3240 { |
|
3241 // pin index |
|
3242 if(start < 0) { |
|
3243 start = 0; |
|
3244 } else if(start > fLength) { |
|
3245 start = fLength; |
|
3246 } |
|
3247 } |
|
3248 |
|
3249 inline void |
|
3250 UnicodeString::pinIndices(int32_t& start, |
|
3251 int32_t& _length) const |
|
3252 { |
|
3253 // pin indices |
|
3254 if(start < 0) { |
|
3255 start = 0; |
|
3256 } else if(start > fLength) { |
|
3257 start = fLength; |
|
3258 } |
|
3259 if(_length < 0) { |
|
3260 _length = 0; |
|
3261 } else if(_length > (fLength - start)) { |
|
3262 _length = (fLength - start); |
|
3263 } |
|
3264 } |
|
3265 |
|
3266 inline UChar* |
|
3267 UnicodeString::getArrayStart() |
|
3268 { return fArray; } |
|
3269 |
|
3270 inline const UChar* |
|
3271 UnicodeString::getArrayStart() const |
|
3272 { return fArray; } |
|
3273 |
|
3274 //======================================== |
|
3275 // Read-only implementation methods |
|
3276 //======================================== |
|
3277 inline int32_t |
|
3278 UnicodeString::length() const |
|
3279 { return fLength; } |
|
3280 |
|
3281 inline int32_t |
|
3282 UnicodeString::getCapacity() const |
|
3283 { return fCapacity; } |
|
3284 |
|
3285 inline int32_t |
|
3286 UnicodeString::hashCode() const |
|
3287 { return doHashCode(); } |
|
3288 |
|
3289 inline UBool |
|
3290 UnicodeString::isBogus() const |
|
3291 { return (UBool)(fFlags & kIsBogus); } |
|
3292 |
|
3293 inline const UChar * |
|
3294 UnicodeString::getBuffer() const { |
|
3295 if(!(fFlags&(kIsBogus|kOpenGetBuffer))) { |
|
3296 return fArray; |
|
3297 } else { |
|
3298 return 0; |
|
3299 } |
|
3300 } |
|
3301 |
|
3302 //======================================== |
|
3303 // Read-only alias methods |
|
3304 //======================================== |
|
3305 inline int8_t |
|
3306 UnicodeString::doCompare(int32_t start, |
|
3307 int32_t length, |
|
3308 const UnicodeString& srcText, |
|
3309 int32_t srcStart, |
|
3310 int32_t srcLength) const |
|
3311 { |
|
3312 if(srcText.isBogus()) { |
|
3313 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise |
|
3314 } else { |
|
3315 srcText.pinIndices(srcStart, srcLength); |
|
3316 return doCompare(start, length, srcText.fArray, srcStart, srcLength); |
|
3317 } |
|
3318 } |
|
3319 |
|
3320 inline UBool |
|
3321 UnicodeString::operator== (const UnicodeString& text) const |
|
3322 { |
|
3323 if(isBogus()) { |
|
3324 return text.isBogus(); |
|
3325 } else { |
|
3326 return |
|
3327 !text.isBogus() && |
|
3328 fLength == text.fLength && |
|
3329 doCompare(0, fLength, text, 0, text.fLength) == 0; |
|
3330 } |
|
3331 } |
|
3332 |
|
3333 inline UBool |
|
3334 UnicodeString::operator!= (const UnicodeString& text) const |
|
3335 { return (! operator==(text)); } |
|
3336 |
|
3337 inline UBool |
|
3338 UnicodeString::operator> (const UnicodeString& text) const |
|
3339 { return doCompare(0, fLength, text, 0, text.fLength) == 1; } |
|
3340 |
|
3341 inline UBool |
|
3342 UnicodeString::operator< (const UnicodeString& text) const |
|
3343 { return doCompare(0, fLength, text, 0, text.fLength) == -1; } |
|
3344 |
|
3345 inline UBool |
|
3346 UnicodeString::operator>= (const UnicodeString& text) const |
|
3347 { return doCompare(0, fLength, text, 0, text.fLength) != -1; } |
|
3348 |
|
3349 inline UBool |
|
3350 UnicodeString::operator<= (const UnicodeString& text) const |
|
3351 { return doCompare(0, fLength, text, 0, text.fLength) != 1; } |
|
3352 |
|
3353 inline int8_t |
|
3354 UnicodeString::compare(const UnicodeString& text) const |
|
3355 { return doCompare(0, fLength, text, 0, text.fLength); } |
|
3356 |
|
3357 inline int8_t |
|
3358 UnicodeString::compare(int32_t start, |
|
3359 int32_t _length, |
|
3360 const UnicodeString& srcText) const |
|
3361 { return doCompare(start, _length, srcText, 0, srcText.fLength); } |
|
3362 |
|
3363 inline int8_t |
|
3364 UnicodeString::compare(const UChar *srcChars, |
|
3365 int32_t srcLength) const |
|
3366 { return doCompare(0, fLength, srcChars, 0, srcLength); } |
|
3367 |
|
3368 inline int8_t |
|
3369 UnicodeString::compare(int32_t start, |
|
3370 int32_t _length, |
|
3371 const UnicodeString& srcText, |
|
3372 int32_t srcStart, |
|
3373 int32_t srcLength) const |
|
3374 { return doCompare(start, _length, srcText, srcStart, srcLength); } |
|
3375 |
|
3376 inline int8_t |
|
3377 UnicodeString::compare(int32_t start, |
|
3378 int32_t _length, |
|
3379 const UChar *srcChars) const |
|
3380 { return doCompare(start, _length, srcChars, 0, _length); } |
|
3381 |
|
3382 inline int8_t |
|
3383 UnicodeString::compare(int32_t start, |
|
3384 int32_t _length, |
|
3385 const UChar *srcChars, |
|
3386 int32_t srcStart, |
|
3387 int32_t srcLength) const |
|
3388 { return doCompare(start, _length, srcChars, srcStart, srcLength); } |
|
3389 |
|
3390 inline int8_t |
|
3391 UnicodeString::compareBetween(int32_t start, |
|
3392 int32_t limit, |
|
3393 const UnicodeString& srcText, |
|
3394 int32_t srcStart, |
|
3395 int32_t srcLimit) const |
|
3396 { return doCompare(start, limit - start, |
|
3397 srcText, srcStart, srcLimit - srcStart); } |
|
3398 |
|
3399 inline int8_t |
|
3400 UnicodeString::doCompareCodePointOrder(int32_t start, |
|
3401 int32_t length, |
|
3402 const UnicodeString& srcText, |
|
3403 int32_t srcStart, |
|
3404 int32_t srcLength) const |
|
3405 { |
|
3406 if(srcText.isBogus()) { |
|
3407 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise |
|
3408 } else { |
|
3409 srcText.pinIndices(srcStart, srcLength); |
|
3410 return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength); |
|
3411 } |
|
3412 } |
|
3413 |
|
3414 inline int8_t |
|
3415 UnicodeString::compareCodePointOrder(const UnicodeString& text) const |
|
3416 { return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); } |
|
3417 |
|
3418 inline int8_t |
|
3419 UnicodeString::compareCodePointOrder(int32_t start, |
|
3420 int32_t _length, |
|
3421 const UnicodeString& srcText) const |
|
3422 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); } |
|
3423 |
|
3424 inline int8_t |
|
3425 UnicodeString::compareCodePointOrder(const UChar *srcChars, |
|
3426 int32_t srcLength) const |
|
3427 { return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); } |
|
3428 |
|
3429 inline int8_t |
|
3430 UnicodeString::compareCodePointOrder(int32_t start, |
|
3431 int32_t _length, |
|
3432 const UnicodeString& srcText, |
|
3433 int32_t srcStart, |
|
3434 int32_t srcLength) const |
|
3435 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } |
|
3436 |
|
3437 inline int8_t |
|
3438 UnicodeString::compareCodePointOrder(int32_t start, |
|
3439 int32_t _length, |
|
3440 const UChar *srcChars) const |
|
3441 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } |
|
3442 |
|
3443 inline int8_t |
|
3444 UnicodeString::compareCodePointOrder(int32_t start, |
|
3445 int32_t _length, |
|
3446 const UChar *srcChars, |
|
3447 int32_t srcStart, |
|
3448 int32_t srcLength) const |
|
3449 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } |
|
3450 |
|
3451 inline int8_t |
|
3452 UnicodeString::compareCodePointOrderBetween(int32_t start, |
|
3453 int32_t limit, |
|
3454 const UnicodeString& srcText, |
|
3455 int32_t srcStart, |
|
3456 int32_t srcLimit) const |
|
3457 { return doCompareCodePointOrder(start, limit - start, |
|
3458 srcText, srcStart, srcLimit - srcStart); } |
|
3459 |
|
3460 inline int8_t |
|
3461 UnicodeString::doCaseCompare(int32_t start, |
|
3462 int32_t length, |
|
3463 const UnicodeString &srcText, |
|
3464 int32_t srcStart, |
|
3465 int32_t srcLength, |
|
3466 uint32_t options) const |
|
3467 { |
|
3468 if(srcText.isBogus()) { |
|
3469 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise |
|
3470 } else { |
|
3471 srcText.pinIndices(srcStart, srcLength); |
|
3472 return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options); |
|
3473 } |
|
3474 } |
|
3475 |
|
3476 inline int8_t |
|
3477 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { |
|
3478 return doCaseCompare(0, fLength, text, 0, text.fLength, options); |
|
3479 } |
|
3480 |
|
3481 inline int8_t |
|
3482 UnicodeString::caseCompare(int32_t start, |
|
3483 int32_t _length, |
|
3484 const UnicodeString &srcText, |
|
3485 uint32_t options) const { |
|
3486 return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options); |
|
3487 } |
|
3488 |
|
3489 inline int8_t |
|
3490 UnicodeString::caseCompare(const UChar *srcChars, |
|
3491 int32_t srcLength, |
|
3492 uint32_t options) const { |
|
3493 return doCaseCompare(0, fLength, srcChars, 0, srcLength, options); |
|
3494 } |
|
3495 |
|
3496 inline int8_t |
|
3497 UnicodeString::caseCompare(int32_t start, |
|
3498 int32_t _length, |
|
3499 const UnicodeString &srcText, |
|
3500 int32_t srcStart, |
|
3501 int32_t srcLength, |
|
3502 uint32_t options) const { |
|
3503 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); |
|
3504 } |
|
3505 |
|
3506 inline int8_t |
|
3507 UnicodeString::caseCompare(int32_t start, |
|
3508 int32_t _length, |
|
3509 const UChar *srcChars, |
|
3510 uint32_t options) const { |
|
3511 return doCaseCompare(start, _length, srcChars, 0, _length, options); |
|
3512 } |
|
3513 |
|
3514 inline int8_t |
|
3515 UnicodeString::caseCompare(int32_t start, |
|
3516 int32_t _length, |
|
3517 const UChar *srcChars, |
|
3518 int32_t srcStart, |
|
3519 int32_t srcLength, |
|
3520 uint32_t options) const { |
|
3521 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); |
|
3522 } |
|
3523 |
|
3524 inline int8_t |
|
3525 UnicodeString::caseCompareBetween(int32_t start, |
|
3526 int32_t limit, |
|
3527 const UnicodeString &srcText, |
|
3528 int32_t srcStart, |
|
3529 int32_t srcLimit, |
|
3530 uint32_t options) const { |
|
3531 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); |
|
3532 } |
|
3533 |
|
3534 inline int32_t |
|
3535 UnicodeString::indexOf(const UnicodeString& srcText, |
|
3536 int32_t srcStart, |
|
3537 int32_t srcLength, |
|
3538 int32_t start, |
|
3539 int32_t _length) const |
|
3540 { |
|
3541 if(!srcText.isBogus()) { |
|
3542 srcText.pinIndices(srcStart, srcLength); |
|
3543 if(srcLength > 0) { |
|
3544 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); |
|
3545 } |
|
3546 } |
|
3547 return -1; |
|
3548 } |
|
3549 |
|
3550 inline int32_t |
|
3551 UnicodeString::indexOf(const UnicodeString& text) const |
|
3552 { return indexOf(text, 0, text.fLength, 0, fLength); } |
|
3553 |
|
3554 inline int32_t |
|
3555 UnicodeString::indexOf(const UnicodeString& text, |
|
3556 int32_t start) const { |
|
3557 pinIndex(start); |
|
3558 return indexOf(text, 0, text.fLength, start, fLength - start); |
|
3559 } |
|
3560 |
|
3561 inline int32_t |
|
3562 UnicodeString::indexOf(const UnicodeString& text, |
|
3563 int32_t start, |
|
3564 int32_t _length) const |
|
3565 { return indexOf(text, 0, text.fLength, start, _length); } |
|
3566 |
|
3567 inline int32_t |
|
3568 UnicodeString::indexOf(const UChar *srcChars, |
|
3569 int32_t srcLength, |
|
3570 int32_t start) const { |
|
3571 pinIndex(start); |
|
3572 return indexOf(srcChars, 0, srcLength, start, fLength - start); |
|
3573 } |
|
3574 |
|
3575 inline int32_t |
|
3576 UnicodeString::indexOf(const UChar *srcChars, |
|
3577 int32_t srcLength, |
|
3578 int32_t start, |
|
3579 int32_t _length) const |
|
3580 { return indexOf(srcChars, 0, srcLength, start, _length); } |
|
3581 |
|
3582 inline int32_t |
|
3583 UnicodeString::indexOf(UChar c, |
|
3584 int32_t start, |
|
3585 int32_t _length) const |
|
3586 { return doIndexOf(c, start, _length); } |
|
3587 |
|
3588 inline int32_t |
|
3589 UnicodeString::indexOf(UChar32 c, |
|
3590 int32_t start, |
|
3591 int32_t _length) const |
|
3592 { return doIndexOf(c, start, _length); } |
|
3593 |
|
3594 inline int32_t |
|
3595 UnicodeString::indexOf(UChar c) const |
|
3596 { return doIndexOf(c, 0, fLength); } |
|
3597 |
|
3598 inline int32_t |
|
3599 UnicodeString::indexOf(UChar32 c) const |
|
3600 { return indexOf(c, 0, fLength); } |
|
3601 |
|
3602 inline int32_t |
|
3603 UnicodeString::indexOf(UChar c, |
|
3604 int32_t start) const { |
|
3605 pinIndex(start); |
|
3606 return doIndexOf(c, start, fLength - start); |
|
3607 } |
|
3608 |
|
3609 inline int32_t |
|
3610 UnicodeString::indexOf(UChar32 c, |
|
3611 int32_t start) const { |
|
3612 pinIndex(start); |
|
3613 return indexOf(c, start, fLength - start); |
|
3614 } |
|
3615 |
|
3616 inline int32_t |
|
3617 UnicodeString::lastIndexOf(const UChar *srcChars, |
|
3618 int32_t srcLength, |
|
3619 int32_t start, |
|
3620 int32_t _length) const |
|
3621 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } |
|
3622 |
|
3623 inline int32_t |
|
3624 UnicodeString::lastIndexOf(const UChar *srcChars, |
|
3625 int32_t srcLength, |
|
3626 int32_t start) const { |
|
3627 pinIndex(start); |
|
3628 return lastIndexOf(srcChars, 0, srcLength, start, fLength - start); |
|
3629 } |
|
3630 |
|
3631 inline int32_t |
|
3632 UnicodeString::lastIndexOf(const UnicodeString& srcText, |
|
3633 int32_t srcStart, |
|
3634 int32_t srcLength, |
|
3635 int32_t start, |
|
3636 int32_t _length) const |
|
3637 { |
|
3638 if(!srcText.isBogus()) { |
|
3639 srcText.pinIndices(srcStart, srcLength); |
|
3640 if(srcLength > 0) { |
|
3641 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); |
|
3642 } |
|
3643 } |
|
3644 return -1; |
|
3645 } |
|
3646 |
|
3647 inline int32_t |
|
3648 UnicodeString::lastIndexOf(const UnicodeString& text, |
|
3649 int32_t start, |
|
3650 int32_t _length) const |
|
3651 { return lastIndexOf(text, 0, text.fLength, start, _length); } |
|
3652 |
|
3653 inline int32_t |
|
3654 UnicodeString::lastIndexOf(const UnicodeString& text, |
|
3655 int32_t start) const { |
|
3656 pinIndex(start); |
|
3657 return lastIndexOf(text, 0, text.fLength, start, fLength - start); |
|
3658 } |
|
3659 |
|
3660 inline int32_t |
|
3661 UnicodeString::lastIndexOf(const UnicodeString& text) const |
|
3662 { return lastIndexOf(text, 0, text.fLength, 0, fLength); } |
|
3663 |
|
3664 inline int32_t |
|
3665 UnicodeString::lastIndexOf(UChar c, |
|
3666 int32_t start, |
|
3667 int32_t _length) const |
|
3668 { return doLastIndexOf(c, start, _length); } |
|
3669 |
|
3670 inline int32_t |
|
3671 UnicodeString::lastIndexOf(UChar32 c, |
|
3672 int32_t start, |
|
3673 int32_t _length) const { |
|
3674 return doLastIndexOf(c, start, _length); |
|
3675 } |
|
3676 |
|
3677 inline int32_t |
|
3678 UnicodeString::lastIndexOf(UChar c) const |
|
3679 { return doLastIndexOf(c, 0, fLength); } |
|
3680 |
|
3681 inline int32_t |
|
3682 UnicodeString::lastIndexOf(UChar32 c) const { |
|
3683 return lastIndexOf(c, 0, fLength); |
|
3684 } |
|
3685 |
|
3686 inline int32_t |
|
3687 UnicodeString::lastIndexOf(UChar c, |
|
3688 int32_t start) const { |
|
3689 pinIndex(start); |
|
3690 return doLastIndexOf(c, start, fLength - start); |
|
3691 } |
|
3692 |
|
3693 inline int32_t |
|
3694 UnicodeString::lastIndexOf(UChar32 c, |
|
3695 int32_t start) const { |
|
3696 pinIndex(start); |
|
3697 return lastIndexOf(c, start, fLength - start); |
|
3698 } |
|
3699 |
|
3700 inline UBool |
|
3701 UnicodeString::startsWith(const UnicodeString& text) const |
|
3702 { return compare(0, text.fLength, text, 0, text.fLength) == 0; } |
|
3703 |
|
3704 inline UBool |
|
3705 UnicodeString::startsWith(const UnicodeString& srcText, |
|
3706 int32_t srcStart, |
|
3707 int32_t srcLength) const |
|
3708 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } |
|
3709 |
|
3710 inline UBool |
|
3711 UnicodeString::startsWith(const UChar *srcChars, |
|
3712 int32_t srcLength) const |
|
3713 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } |
|
3714 |
|
3715 inline UBool |
|
3716 UnicodeString::startsWith(const UChar *srcChars, |
|
3717 int32_t srcStart, |
|
3718 int32_t srcLength) const |
|
3719 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;} |
|
3720 |
|
3721 inline UBool |
|
3722 UnicodeString::endsWith(const UnicodeString& text) const |
|
3723 { return doCompare(fLength - text.fLength, text.fLength, |
|
3724 text, 0, text.fLength) == 0; } |
|
3725 |
|
3726 inline UBool |
|
3727 UnicodeString::endsWith(const UnicodeString& srcText, |
|
3728 int32_t srcStart, |
|
3729 int32_t srcLength) const { |
|
3730 srcText.pinIndices(srcStart, srcLength); |
|
3731 return doCompare(fLength - srcLength, srcLength, |
|
3732 srcText, srcStart, srcLength) == 0; |
|
3733 } |
|
3734 |
|
3735 inline UBool |
|
3736 UnicodeString::endsWith(const UChar *srcChars, |
|
3737 int32_t srcLength) const { |
|
3738 if(srcLength < 0) { |
|
3739 srcLength = u_strlen(srcChars); |
|
3740 } |
|
3741 return doCompare(fLength - srcLength, srcLength, |
|
3742 srcChars, 0, srcLength) == 0; |
|
3743 } |
|
3744 |
|
3745 inline UBool |
|
3746 UnicodeString::endsWith(const UChar *srcChars, |
|
3747 int32_t srcStart, |
|
3748 int32_t srcLength) const { |
|
3749 if(srcLength < 0) { |
|
3750 srcLength = u_strlen(srcChars + srcStart); |
|
3751 } |
|
3752 return doCompare(fLength - srcLength, srcLength, |
|
3753 srcChars, srcStart, srcLength) == 0; |
|
3754 } |
|
3755 |
|
3756 //======================================== |
|
3757 // replace |
|
3758 //======================================== |
|
3759 inline UnicodeString& |
|
3760 UnicodeString::replace(int32_t start, |
|
3761 int32_t _length, |
|
3762 const UnicodeString& srcText) |
|
3763 { return doReplace(start, _length, srcText, 0, srcText.fLength); } |
|
3764 |
|
3765 inline UnicodeString& |
|
3766 UnicodeString::replace(int32_t start, |
|
3767 int32_t _length, |
|
3768 const UnicodeString& srcText, |
|
3769 int32_t srcStart, |
|
3770 int32_t srcLength) |
|
3771 { return doReplace(start, _length, srcText, srcStart, srcLength); } |
|
3772 |
|
3773 inline UnicodeString& |
|
3774 UnicodeString::replace(int32_t start, |
|
3775 int32_t _length, |
|
3776 const UChar *srcChars, |
|
3777 int32_t srcLength) |
|
3778 { return doReplace(start, _length, srcChars, 0, srcLength); } |
|
3779 |
|
3780 inline UnicodeString& |
|
3781 UnicodeString::replace(int32_t start, |
|
3782 int32_t _length, |
|
3783 const UChar *srcChars, |
|
3784 int32_t srcStart, |
|
3785 int32_t srcLength) |
|
3786 { return doReplace(start, _length, srcChars, srcStart, srcLength); } |
|
3787 |
|
3788 inline UnicodeString& |
|
3789 UnicodeString::replace(int32_t start, |
|
3790 int32_t _length, |
|
3791 UChar srcChar) |
|
3792 { return doReplace(start, _length, &srcChar, 0, 1); } |
|
3793 |
|
3794 inline UnicodeString& |
|
3795 UnicodeString::replace(int32_t start, |
|
3796 int32_t _length, |
|
3797 UChar32 srcChar) { |
|
3798 UChar buffer[U16_MAX_LENGTH]; |
|
3799 int32_t count = 0; |
|
3800 UBool isError = FALSE; |
|
3801 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); |
|
3802 return doReplace(start, _length, buffer, 0, count); |
|
3803 } |
|
3804 |
|
3805 inline UnicodeString& |
|
3806 UnicodeString::replaceBetween(int32_t start, |
|
3807 int32_t limit, |
|
3808 const UnicodeString& srcText) |
|
3809 { return doReplace(start, limit - start, srcText, 0, srcText.fLength); } |
|
3810 |
|
3811 inline UnicodeString& |
|
3812 UnicodeString::replaceBetween(int32_t start, |
|
3813 int32_t limit, |
|
3814 const UnicodeString& srcText, |
|
3815 int32_t srcStart, |
|
3816 int32_t srcLimit) |
|
3817 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } |
|
3818 |
|
3819 inline UnicodeString& |
|
3820 UnicodeString::findAndReplace(const UnicodeString& oldText, |
|
3821 const UnicodeString& newText) |
|
3822 { return findAndReplace(0, fLength, oldText, 0, oldText.fLength, |
|
3823 newText, 0, newText.fLength); } |
|
3824 |
|
3825 inline UnicodeString& |
|
3826 UnicodeString::findAndReplace(int32_t start, |
|
3827 int32_t _length, |
|
3828 const UnicodeString& oldText, |
|
3829 const UnicodeString& newText) |
|
3830 { return findAndReplace(start, _length, oldText, 0, oldText.fLength, |
|
3831 newText, 0, newText.fLength); } |
|
3832 |
|
3833 // ============================ |
|
3834 // extract |
|
3835 // ============================ |
|
3836 inline void |
|
3837 UnicodeString::doExtract(int32_t start, |
|
3838 int32_t _length, |
|
3839 UnicodeString& target) const |
|
3840 { target.replace(0, target.fLength, *this, start, _length); } |
|
3841 |
|
3842 inline void |
|
3843 UnicodeString::extract(int32_t start, |
|
3844 int32_t _length, |
|
3845 UChar *target, |
|
3846 int32_t targetStart) const |
|
3847 { doExtract(start, _length, target, targetStart); } |
|
3848 |
|
3849 inline void |
|
3850 UnicodeString::extract(int32_t start, |
|
3851 int32_t _length, |
|
3852 UnicodeString& target) const |
|
3853 { doExtract(start, _length, target); } |
|
3854 |
|
3855 #if !UCONFIG_NO_CONVERSION |
|
3856 |
|
3857 inline int32_t |
|
3858 UnicodeString::extract(int32_t start, |
|
3859 int32_t _length, |
|
3860 char *dst, |
|
3861 const char *codepage) const |
|
3862 |
|
3863 { |
|
3864 // This dstSize value will be checked explicitly |
|
3865 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); |
|
3866 } |
|
3867 |
|
3868 #endif |
|
3869 |
|
3870 inline void |
|
3871 UnicodeString::extractBetween(int32_t start, |
|
3872 int32_t limit, |
|
3873 UChar *dst, |
|
3874 int32_t dstStart) const { |
|
3875 pinIndex(start); |
|
3876 pinIndex(limit); |
|
3877 doExtract(start, limit - start, dst, dstStart); |
|
3878 } |
|
3879 |
|
3880 inline UChar |
|
3881 UnicodeString::doCharAt(int32_t offset) const |
|
3882 { |
|
3883 if((uint32_t)offset < (uint32_t)fLength) { |
|
3884 return fArray[offset]; |
|
3885 } else { |
|
3886 return kInvalidUChar; |
|
3887 } |
|
3888 } |
|
3889 |
|
3890 inline UChar |
|
3891 UnicodeString::charAt(int32_t offset) const |
|
3892 { return doCharAt(offset); } |
|
3893 |
|
3894 inline UChar |
|
3895 UnicodeString::operator[] (int32_t offset) const |
|
3896 { return doCharAt(offset); } |
|
3897 |
|
3898 inline UChar32 |
|
3899 UnicodeString::char32At(int32_t offset) const |
|
3900 { |
|
3901 if((uint32_t)offset < (uint32_t)fLength) { |
|
3902 UChar32 c; |
|
3903 U16_GET(fArray, 0, offset, fLength, c); |
|
3904 return c; |
|
3905 } else { |
|
3906 return kInvalidUChar; |
|
3907 } |
|
3908 } |
|
3909 |
|
3910 inline int32_t |
|
3911 UnicodeString::getChar32Start(int32_t offset) const { |
|
3912 if((uint32_t)offset < (uint32_t)fLength) { |
|
3913 U16_SET_CP_START(fArray, 0, offset); |
|
3914 return offset; |
|
3915 } else { |
|
3916 return 0; |
|
3917 } |
|
3918 } |
|
3919 |
|
3920 inline int32_t |
|
3921 UnicodeString::getChar32Limit(int32_t offset) const { |
|
3922 if((uint32_t)offset < (uint32_t)fLength) { |
|
3923 U16_SET_CP_LIMIT(fArray, 0, offset, fLength); |
|
3924 return offset; |
|
3925 } else { |
|
3926 return fLength; |
|
3927 } |
|
3928 } |
|
3929 |
|
3930 inline UBool |
|
3931 UnicodeString::isEmpty() const { |
|
3932 return fLength == 0; |
|
3933 } |
|
3934 |
|
3935 //======================================== |
|
3936 // Write implementation methods |
|
3937 //======================================== |
|
3938 inline const UChar * |
|
3939 UnicodeString::getTerminatedBuffer() { |
|
3940 if(fFlags&(kIsBogus|kOpenGetBuffer)) { |
|
3941 return 0; |
|
3942 } else if(fLength<fCapacity && fArray[fLength]==0) { |
|
3943 return fArray; |
|
3944 } else if(cloneArrayIfNeeded(fLength+1)) { |
|
3945 fArray[fLength]=0; |
|
3946 return fArray; |
|
3947 } else { |
|
3948 return 0; |
|
3949 } |
|
3950 } |
|
3951 |
|
3952 inline UnicodeString& |
|
3953 UnicodeString::operator= (UChar ch) |
|
3954 { return doReplace(0, fLength, &ch, 0, 1); } |
|
3955 |
|
3956 inline UnicodeString& |
|
3957 UnicodeString::operator= (UChar32 ch) |
|
3958 { return replace(0, fLength, ch); } |
|
3959 |
|
3960 inline UnicodeString& |
|
3961 UnicodeString::setTo(const UnicodeString& srcText, |
|
3962 int32_t srcStart, |
|
3963 int32_t srcLength) |
|
3964 { |
|
3965 unBogus(); |
|
3966 return doReplace(0, fLength, srcText, srcStart, srcLength); |
|
3967 } |
|
3968 |
|
3969 inline UnicodeString& |
|
3970 UnicodeString::setTo(const UnicodeString& srcText, |
|
3971 int32_t srcStart) |
|
3972 { |
|
3973 unBogus(); |
|
3974 srcText.pinIndex(srcStart); |
|
3975 return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart); |
|
3976 } |
|
3977 |
|
3978 inline UnicodeString& |
|
3979 UnicodeString::setTo(const UnicodeString& srcText) |
|
3980 { |
|
3981 unBogus(); |
|
3982 return doReplace(0, fLength, srcText, 0, srcText.fLength); |
|
3983 } |
|
3984 |
|
3985 inline UnicodeString& |
|
3986 UnicodeString::setTo(const UChar *srcChars, |
|
3987 int32_t srcLength) |
|
3988 { |
|
3989 unBogus(); |
|
3990 return doReplace(0, fLength, srcChars, 0, srcLength); |
|
3991 } |
|
3992 |
|
3993 inline UnicodeString& |
|
3994 UnicodeString::setTo(UChar srcChar) |
|
3995 { |
|
3996 unBogus(); |
|
3997 return doReplace(0, fLength, &srcChar, 0, 1); |
|
3998 } |
|
3999 |
|
4000 inline UnicodeString& |
|
4001 UnicodeString::setTo(UChar32 srcChar) |
|
4002 { |
|
4003 unBogus(); |
|
4004 return replace(0, fLength, srcChar); |
|
4005 } |
|
4006 |
|
4007 inline UnicodeString& |
|
4008 UnicodeString::operator+= (UChar ch) |
|
4009 { return doReplace(fLength, 0, &ch, 0, 1); } |
|
4010 |
|
4011 inline UnicodeString& |
|
4012 UnicodeString::operator+= (UChar32 ch) { |
|
4013 UChar buffer[U16_MAX_LENGTH]; |
|
4014 int32_t _length = 0; |
|
4015 UBool isError = FALSE; |
|
4016 U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError); |
|
4017 return doReplace(fLength, 0, buffer, 0, _length); |
|
4018 } |
|
4019 |
|
4020 inline UnicodeString& |
|
4021 UnicodeString::operator+= (const UnicodeString& srcText) |
|
4022 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); } |
|
4023 |
|
4024 inline UnicodeString& |
|
4025 UnicodeString::append(const UnicodeString& srcText, |
|
4026 int32_t srcStart, |
|
4027 int32_t srcLength) |
|
4028 { return doReplace(fLength, 0, srcText, srcStart, srcLength); } |
|
4029 |
|
4030 inline UnicodeString& |
|
4031 UnicodeString::append(const UnicodeString& srcText) |
|
4032 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); } |
|
4033 |
|
4034 inline UnicodeString& |
|
4035 UnicodeString::append(const UChar *srcChars, |
|
4036 int32_t srcStart, |
|
4037 int32_t srcLength) |
|
4038 { return doReplace(fLength, 0, srcChars, srcStart, srcLength); } |
|
4039 |
|
4040 inline UnicodeString& |
|
4041 UnicodeString::append(const UChar *srcChars, |
|
4042 int32_t srcLength) |
|
4043 { return doReplace(fLength, 0, srcChars, 0, srcLength); } |
|
4044 |
|
4045 inline UnicodeString& |
|
4046 UnicodeString::append(UChar srcChar) |
|
4047 { return doReplace(fLength, 0, &srcChar, 0, 1); } |
|
4048 |
|
4049 inline UnicodeString& |
|
4050 UnicodeString::append(UChar32 srcChar) { |
|
4051 UChar buffer[U16_MAX_LENGTH]; |
|
4052 int32_t _length = 0; |
|
4053 UBool isError = FALSE; |
|
4054 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); |
|
4055 return doReplace(fLength, 0, buffer, 0, _length); |
|
4056 } |
|
4057 |
|
4058 inline UnicodeString& |
|
4059 UnicodeString::insert(int32_t start, |
|
4060 const UnicodeString& srcText, |
|
4061 int32_t srcStart, |
|
4062 int32_t srcLength) |
|
4063 { return doReplace(start, 0, srcText, srcStart, srcLength); } |
|
4064 |
|
4065 inline UnicodeString& |
|
4066 UnicodeString::insert(int32_t start, |
|
4067 const UnicodeString& srcText) |
|
4068 { return doReplace(start, 0, srcText, 0, srcText.fLength); } |
|
4069 |
|
4070 inline UnicodeString& |
|
4071 UnicodeString::insert(int32_t start, |
|
4072 const UChar *srcChars, |
|
4073 int32_t srcStart, |
|
4074 int32_t srcLength) |
|
4075 { return doReplace(start, 0, srcChars, srcStart, srcLength); } |
|
4076 |
|
4077 inline UnicodeString& |
|
4078 UnicodeString::insert(int32_t start, |
|
4079 const UChar *srcChars, |
|
4080 int32_t srcLength) |
|
4081 { return doReplace(start, 0, srcChars, 0, srcLength); } |
|
4082 |
|
4083 inline UnicodeString& |
|
4084 UnicodeString::insert(int32_t start, |
|
4085 UChar srcChar) |
|
4086 { return doReplace(start, 0, &srcChar, 0, 1); } |
|
4087 |
|
4088 inline UnicodeString& |
|
4089 UnicodeString::insert(int32_t start, |
|
4090 UChar32 srcChar) |
|
4091 { return replace(start, 0, srcChar); } |
|
4092 |
|
4093 |
|
4094 inline UnicodeString& |
|
4095 UnicodeString::remove() |
|
4096 { |
|
4097 // remove() of a bogus string makes the string empty and non-bogus |
|
4098 if(isBogus()) { |
|
4099 unBogus(); |
|
4100 } else { |
|
4101 fLength = 0; |
|
4102 } |
|
4103 return *this; |
|
4104 } |
|
4105 |
|
4106 inline UnicodeString& |
|
4107 UnicodeString::remove(int32_t start, |
|
4108 int32_t _length) |
|
4109 { |
|
4110 if(start <= 0 && _length == INT32_MAX) { |
|
4111 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus |
|
4112 return remove(); |
|
4113 } else { |
|
4114 return doReplace(start, _length, NULL, 0, 0); |
|
4115 } |
|
4116 } |
|
4117 |
|
4118 inline UnicodeString& |
|
4119 UnicodeString::removeBetween(int32_t start, |
|
4120 int32_t limit) |
|
4121 { return doReplace(start, limit - start, NULL, 0, 0); } |
|
4122 |
|
4123 inline UBool |
|
4124 UnicodeString::truncate(int32_t targetLength) |
|
4125 { |
|
4126 if(isBogus() && targetLength == 0) { |
|
4127 // truncate(0) of a bogus string makes the string empty and non-bogus |
|
4128 unBogus(); |
|
4129 return FALSE; |
|
4130 } else if((uint32_t)targetLength < (uint32_t)fLength) { |
|
4131 fLength = targetLength; |
|
4132 return TRUE; |
|
4133 } else { |
|
4134 return FALSE; |
|
4135 } |
|
4136 } |
|
4137 |
|
4138 inline UnicodeString& |
|
4139 UnicodeString::reverse() |
|
4140 { return doReverse(0, fLength); } |
|
4141 |
|
4142 inline UnicodeString& |
|
4143 UnicodeString::reverse(int32_t start, |
|
4144 int32_t _length) |
|
4145 { return doReverse(start, _length); } |
|
4146 |
|
4147 U_NAMESPACE_END |
|
4148 |
|
4149 #endif |