fontservices/textshaperplugin/IcuSource/common/unicode/unistr.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 **********************************************************************
       
     3 *   Copyright (C) 1998-2005, International Business Machines
       
     4 *   Corporation and others.  All Rights Reserved.
       
     5 **********************************************************************
       
     6 *
       
     7 * File unistr.h
       
     8 *
       
     9 * Modification History:
       
    10 *
       
    11 *   Date        Name        Description
       
    12 *   09/25/98    stephen     Creation.
       
    13 *   11/11/98    stephen     Changed per 11/9 code review.
       
    14 *   04/20/99    stephen     Overhauled per 4/16 code review.
       
    15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
       
    16 *                           handleReplaceBetween(); other methods unchanged.
       
    17 *   06/25/01    grhoten     Remove dependency on iostream.
       
    18 ******************************************************************************
       
    19 */
       
    20 
       
    21 #ifndef UNISTR_H
       
    22 #define UNISTR_H
       
    23 
       
    24 /**
       
    25  * \file 
       
    26  * \brief C++ API: Unicode String 
       
    27  */
       
    28 
       
    29 #include "unicode/rep.h"
       
    30 
       
    31 struct UConverter;          // unicode/ucnv.h
       
    32 class  StringThreadTest;
       
    33 
       
    34 #ifndef U_COMPARE_CODE_POINT_ORDER
       
    35 /* see also ustring.h and unorm.h */
       
    36 /**
       
    37  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
       
    38  * Compare strings in code point order instead of code unit order.
       
    39  * @stable ICU 2.2
       
    40  */
       
    41 #define U_COMPARE_CODE_POINT_ORDER  0x8000
       
    42 #endif
       
    43 
       
    44 #ifndef USTRING_H
       
    45 /* see ustring.h */
       
    46 U_STABLE int32_t U_EXPORT2
       
    47 u_strlen(const UChar *s);
       
    48 #endif
       
    49 
       
    50 U_NAMESPACE_BEGIN
       
    51 
       
    52 class Locale;               // unicode/locid.h
       
    53 class StringCharacterIterator;
       
    54 class BreakIterator;        // unicode/brkiter.h
       
    55 
       
    56 /* The <iostream> include has been moved to unicode/ustream.h */
       
    57 
       
    58 /**
       
    59  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
       
    60  * which constructs a Unicode string from an invariant-character char * string.
       
    61  * About invariant characters see utypes.h.
       
    62  * This constructor has no runtime dependency on conversion code and is
       
    63  * therefore recommended over ones taking a charset name string
       
    64  * (where the empty string "" indicates invariant-character conversion).
       
    65  *
       
    66  * @draft ICU 3.2
       
    67  */
       
    68 #define US_INV UnicodeString::kInvariant
       
    69 
       
    70 /**
       
    71  * Unicode String literals in C++.
       
    72  * Dependent on the platform properties, different UnicodeString
       
    73  * constructors should be used to create a UnicodeString object from
       
    74  * a string literal.
       
    75  * The macros are defined for maximum performance.
       
    76  * They work only for strings that contain "invariant characters", i.e.,
       
    77  * only latin letters, digits, and some punctuation.
       
    78  * See utypes.h for details.
       
    79  *
       
    80  * The string parameter must be a C string literal.
       
    81  * The length of the string, not including the terminating
       
    82  * <code>NUL</code>, must be specified as a constant.
       
    83  * The U_STRING_DECL macro should be invoked exactly once for one
       
    84  * such string variable before it is used.
       
    85  * @stable ICU 2.0
       
    86  */
       
    87 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
       
    88 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
       
    89 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
       
    90 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
       
    91 #else
       
    92 #   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
       
    93 #endif
       
    94 
       
    95 /**
       
    96  * Unicode String literals in C++.
       
    97  * Dependent on the platform properties, different UnicodeString
       
    98  * constructors should be used to create a UnicodeString object from
       
    99  * a string literal.
       
   100  * The macros are defined for improved performance.
       
   101  * They work only for strings that contain "invariant characters", i.e.,
       
   102  * only latin letters, digits, and some punctuation.
       
   103  * See utypes.h for details.
       
   104  *
       
   105  * The string parameter must be a C string literal.
       
   106  * @stable ICU 2.0
       
   107  */
       
   108 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
       
   109 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
       
   110 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
       
   111 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
       
   112 #else
       
   113 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
       
   114 #endif
       
   115 
       
   116 /**
       
   117  * UnicodeString is a string class that stores Unicode characters directly and provides
       
   118  * similar functionality as the Java String and StringBuffer classes.
       
   119  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
       
   120  *
       
   121  * The UnicodeString class is not suitable for subclassing.
       
   122  *
       
   123  * <p>For an overview of Unicode strings in C and C++ see the
       
   124  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
       
   125  *
       
   126  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
       
   127  * A Unicode character may be stored with either one code unit
       
   128  * (the most common case) or with a matched pair of special code units
       
   129  * ("surrogates"). The data type for code units is UChar. 
       
   130  * For single-character handling, a Unicode character code <em>point</em> is a value
       
   131  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
       
   132  *
       
   133  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
       
   134  * This is the same as with multi-byte char* strings in traditional string handling.
       
   135  * Operations on partial strings typically do not test for code point boundaries.
       
   136  * If necessary, the user needs to take care of such boundaries by testing for the code unit
       
   137  * values or by using functions like
       
   138  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
       
   139  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
       
   140  *
       
   141  * UnicodeString methods are more lenient with regard to input parameter values
       
   142  * than other ICU APIs. In particular:
       
   143  * - If indexes are out of bounds for a UnicodeString object
       
   144  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
       
   145  * - If primitive string pointer values (e.g., const UChar * or char *)
       
   146  *   for input strings are NULL, then those input string parameters are treated
       
   147  *   as if they pointed to an empty string.
       
   148  *   However, this is <em>not</em> the case for char * parameters for charset names
       
   149  *   or other IDs.
       
   150  * - Most UnicodeString methods do not take a UErrorCode parameter because
       
   151  *   there are usually very few opportunities for failure other than a shortage
       
   152  *   of memory, error codes in low-level C++ string methods would be inconvenient,
       
   153  *   and the error code as the last parameter (ICU convention) would prevent
       
   154  *   the use of default parameter values.
       
   155  *   Instead, such methods set the UnicodeString into a "bogus" state
       
   156  *   (see isBogus()) if an error occurs.
       
   157  *
       
   158  * In string comparisons, two UnicodeString objects that are both "bogus"
       
   159  * compare equal (to be transitive and prevent endless loops in sorting),
       
   160  * and a "bogus" string compares less than any non-"bogus" one.
       
   161  *
       
   162  * Const UnicodeString methods are thread-safe. Multiple threads can use
       
   163  * const methods on the same UnicodeString object simultaneously,
       
   164  * but non-const methods must not be called concurrently (in multiple threads)
       
   165  * with any other (const or non-const) methods.
       
   166  *
       
   167  * Similarly, const UnicodeString & parameters are thread-safe.
       
   168  * One object may be passed in as such a parameter concurrently in multiple threads.
       
   169  * This includes the const UnicodeString & parameters for
       
   170  * copy construction, assignment, and cloning.
       
   171  *
       
   172  * <p>UnicodeString uses several storage methods.
       
   173  * String contents can be stored inside the UnicodeString object itself,
       
   174  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
       
   175  * Most of this is done transparently, but careful aliasing in particular provides
       
   176  * significant performance improvements.
       
   177  * Also, the internal buffer is accessible via special functions.
       
   178  * For details see the
       
   179  * <a href="http://icu.sourceforge.net/userguide/strings.html">User Guide Strings chapter</a>.</p>
       
   180  *
       
   181  * @see utf.h
       
   182  * @see CharacterIterator
       
   183  * @stable ICU 2.0
       
   184  */
       
   185 class U_COMMON_API UnicodeString : public Replaceable
       
   186 {
       
   187 public:
       
   188 
       
   189   /**
       
   190    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
       
   191    * which constructs a Unicode string from an invariant-character char * string.
       
   192    * Use the macro US_INV instead of the full qualification for this value.
       
   193    *
       
   194    * @see US_INV
       
   195    * @draft ICU 3.2
       
   196    */
       
   197   enum EInvariant {
       
   198     /**
       
   199      * @see EInvariant
       
   200      * @draft ICU 3.2
       
   201      */
       
   202     kInvariant
       
   203   };
       
   204 
       
   205   //========================================
       
   206   // Read-only operations
       
   207   //========================================
       
   208 
       
   209   /* Comparison - bitwise only - for international comparison use collation */
       
   210 
       
   211   /**
       
   212    * Equality operator. Performs only bitwise comparison.
       
   213    * @param text The UnicodeString to compare to this one.
       
   214    * @return TRUE if <TT>text</TT> contains the same characters as this one,
       
   215    * FALSE otherwise.
       
   216    * @stable ICU 2.0
       
   217    */
       
   218   inline UBool operator== (const UnicodeString& text) const;
       
   219 
       
   220   /**
       
   221    * Inequality operator. Performs only bitwise comparison.
       
   222    * @param text The UnicodeString to compare to this one.
       
   223    * @return FALSE if <TT>text</TT> contains the same characters as this one,
       
   224    * TRUE otherwise.
       
   225    * @stable ICU 2.0
       
   226    */
       
   227   inline UBool operator!= (const UnicodeString& text) const;
       
   228 
       
   229   /**
       
   230    * Greater than operator. Performs only bitwise comparison.
       
   231    * @param text The UnicodeString to compare to this one.
       
   232    * @return TRUE if the characters in this are bitwise
       
   233    * greater than the characters in <code>text</code>, FALSE otherwise
       
   234    * @stable ICU 2.0
       
   235    */
       
   236   inline UBool operator> (const UnicodeString& text) const;
       
   237 
       
   238   /**
       
   239    * Less than operator. Performs only bitwise comparison.
       
   240    * @param text The UnicodeString to compare to this one.
       
   241    * @return TRUE if the characters in this are bitwise
       
   242    * less than the characters in <code>text</code>, FALSE otherwise
       
   243    * @stable ICU 2.0
       
   244    */
       
   245   inline UBool operator< (const UnicodeString& text) const;
       
   246 
       
   247   /**
       
   248    * Greater than or equal operator. Performs only bitwise comparison.
       
   249    * @param text The UnicodeString to compare to this one.
       
   250    * @return TRUE if the characters in this are bitwise
       
   251    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
       
   252    * @stable ICU 2.0
       
   253    */
       
   254   inline UBool operator>= (const UnicodeString& text) const;
       
   255 
       
   256   /**
       
   257    * Less than or equal operator. Performs only bitwise comparison.
       
   258    * @param text The UnicodeString to compare to this one.
       
   259    * @return TRUE if the characters in this are bitwise
       
   260    * less than or equal to the characters in <code>text</code>, FALSE otherwise
       
   261    * @stable ICU 2.0
       
   262    */
       
   263   inline UBool operator<= (const UnicodeString& text) const;
       
   264 
       
   265   /**
       
   266    * Compare the characters bitwise in this UnicodeString to
       
   267    * the characters in <code>text</code>.
       
   268    * @param text The UnicodeString to compare to this one.
       
   269    * @return The result of bitwise character comparison: 0 if this
       
   270    * contains the same characters as <code>text</code>, -1 if the characters in
       
   271    * this are bitwise less than the characters in <code>text</code>, +1 if the
       
   272    * characters in this are bitwise greater than the characters
       
   273    * in <code>text</code>.
       
   274    * @stable ICU 2.0
       
   275    */
       
   276   inline int8_t compare(const UnicodeString& text) const;
       
   277 
       
   278   /**
       
   279    * Compare the characters bitwise in the range
       
   280    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
       
   281    * in <TT>text</TT>
       
   282    * @param start the offset at which the compare operation begins
       
   283    * @param length the number of characters of text to compare.
       
   284    * @param text the other text to be compared against this string.
       
   285    * @return The result of bitwise character comparison: 0 if this
       
   286    * contains the same characters as <code>text</code>, -1 if the characters in
       
   287    * this are bitwise less than the characters in <code>text</code>, +1 if the
       
   288    * characters in this are bitwise greater than the characters
       
   289    * in <code>text</code>.
       
   290    * @stable ICU 2.0
       
   291    */
       
   292   inline int8_t compare(int32_t start,
       
   293          int32_t length,
       
   294          const UnicodeString& text) const;
       
   295 
       
   296   /**
       
   297    * Compare the characters bitwise in the range
       
   298    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
       
   299    * in <TT>srcText</TT> in the range
       
   300    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   301    * @param start the offset at which the compare operation begins
       
   302    * @param length the number of characters in this to compare.
       
   303    * @param srcText the text to be compared
       
   304    * @param srcStart the offset into <TT>srcText</TT> to start comparison
       
   305    * @param srcLength the number of characters in <TT>src</TT> to compare
       
   306    * @return The result of bitwise character comparison: 0 if this
       
   307    * contains the same characters as <code>srcText</code>, -1 if the characters in
       
   308    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
       
   309    * characters in this are bitwise greater than the characters
       
   310    * in <code>srcText</code>.
       
   311    * @stable ICU 2.0
       
   312    */
       
   313    inline int8_t compare(int32_t start,
       
   314          int32_t length,
       
   315          const UnicodeString& srcText,
       
   316          int32_t srcStart,
       
   317          int32_t srcLength) const;
       
   318 
       
   319   /**
       
   320    * Compare the characters bitwise in this UnicodeString with the first
       
   321    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
       
   322    * @param srcChars The characters to compare to this UnicodeString.
       
   323    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
       
   324    * @return The result of bitwise character comparison: 0 if this
       
   325    * contains the same characters as <code>srcChars</code>, -1 if the characters in
       
   326    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
       
   327    * characters in this are bitwise greater than the characters
       
   328    * in <code>srcChars</code>.
       
   329    * @stable ICU 2.0
       
   330    */
       
   331   inline int8_t compare(const UChar *srcChars,
       
   332          int32_t srcLength) const;
       
   333 
       
   334   /**
       
   335    * Compare the characters bitwise in the range
       
   336    * [<TT>start</TT>, <TT>start + length</TT>) with the first
       
   337    * <TT>length</TT> characters in <TT>srcChars</TT>
       
   338    * @param start the offset at which the compare operation begins
       
   339    * @param length the number of characters to compare.
       
   340    * @param srcChars the characters to be compared
       
   341    * @return The result of bitwise character comparison: 0 if this
       
   342    * contains the same characters as <code>srcChars</code>, -1 if the characters in
       
   343    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
       
   344    * characters in this are bitwise greater than the characters
       
   345    * in <code>srcChars</code>.
       
   346    * @stable ICU 2.0
       
   347    */
       
   348   inline int8_t compare(int32_t start,
       
   349          int32_t length,
       
   350          const UChar *srcChars) const;
       
   351 
       
   352   /**
       
   353    * Compare the characters bitwise in the range
       
   354    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
       
   355    * in <TT>srcChars</TT> in the range
       
   356    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   357    * @param start the offset at which the compare operation begins
       
   358    * @param length the number of characters in this to compare
       
   359    * @param srcChars the characters to be compared
       
   360    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
       
   361    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
       
   362    * @return The result of bitwise character comparison: 0 if this
       
   363    * contains the same characters as <code>srcChars</code>, -1 if the characters in
       
   364    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
       
   365    * characters in this are bitwise greater than the characters
       
   366    * in <code>srcChars</code>.
       
   367    * @stable ICU 2.0
       
   368    */
       
   369   inline int8_t compare(int32_t start,
       
   370          int32_t length,
       
   371          const UChar *srcChars,
       
   372          int32_t srcStart,
       
   373          int32_t srcLength) const;
       
   374 
       
   375   /**
       
   376    * Compare the characters bitwise in the range
       
   377    * [<TT>start</TT>, <TT>limit</TT>) with the characters
       
   378    * in <TT>srcText</TT> in the range
       
   379    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
       
   380    * @param start the offset at which the compare operation begins
       
   381    * @param limit the offset immediately following the compare operation
       
   382    * @param srcText the text to be compared
       
   383    * @param srcStart the offset into <TT>srcText</TT> to start comparison
       
   384    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
       
   385    * @return The result of bitwise character comparison: 0 if this
       
   386    * contains the same characters as <code>srcText</code>, -1 if the characters in
       
   387    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
       
   388    * characters in this are bitwise greater than the characters
       
   389    * in <code>srcText</code>.
       
   390    * @stable ICU 2.0
       
   391    */
       
   392   inline int8_t compareBetween(int32_t start,
       
   393             int32_t limit,
       
   394             const UnicodeString& srcText,
       
   395             int32_t srcStart,
       
   396             int32_t srcLimit) const;
       
   397 
       
   398   /**
       
   399    * Compare two Unicode strings in code point order.
       
   400    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   401    * if supplementary characters are present:
       
   402    *
       
   403    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   404    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   405    * which means that they compare as less than some other BMP characters like U+feff.
       
   406    * This function compares Unicode strings in code point order.
       
   407    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   408    *
       
   409    * @param text Another string to compare this one to.
       
   410    * @return a negative/zero/positive integer corresponding to whether
       
   411    * this string is less than/equal to/greater than the second one
       
   412    * in code point order
       
   413    * @stable ICU 2.0
       
   414    */
       
   415   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
       
   416 
       
   417   /**
       
   418    * Compare two Unicode strings in code point order.
       
   419    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   420    * if supplementary characters are present:
       
   421    *
       
   422    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   423    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   424    * which means that they compare as less than some other BMP characters like U+feff.
       
   425    * This function compares Unicode strings in code point order.
       
   426    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   427    *
       
   428    * @param start The start offset in this string at which the compare operation begins.
       
   429    * @param length The number of code units from this string to compare.
       
   430    * @param srcText Another string to compare this one to.
       
   431    * @return a negative/zero/positive integer corresponding to whether
       
   432    * this string is less than/equal to/greater than the second one
       
   433    * in code point order
       
   434    * @stable ICU 2.0
       
   435    */
       
   436   inline int8_t compareCodePointOrder(int32_t start,
       
   437                                       int32_t length,
       
   438                                       const UnicodeString& srcText) const;
       
   439 
       
   440   /**
       
   441    * Compare two Unicode strings in code point order.
       
   442    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   443    * if supplementary characters are present:
       
   444    *
       
   445    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   446    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   447    * which means that they compare as less than some other BMP characters like U+feff.
       
   448    * This function compares Unicode strings in code point order.
       
   449    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   450    *
       
   451    * @param start The start offset in this string at which the compare operation begins.
       
   452    * @param length The number of code units from this string to compare.
       
   453    * @param srcText Another string to compare this one to.
       
   454    * @param srcStart The start offset in that string at which the compare operation begins.
       
   455    * @param srcLength The number of code units from that string to compare.
       
   456    * @return a negative/zero/positive integer corresponding to whether
       
   457    * this string is less than/equal to/greater than the second one
       
   458    * in code point order
       
   459    * @stable ICU 2.0
       
   460    */
       
   461    inline int8_t compareCodePointOrder(int32_t start,
       
   462                                        int32_t length,
       
   463                                        const UnicodeString& srcText,
       
   464                                        int32_t srcStart,
       
   465                                        int32_t srcLength) const;
       
   466 
       
   467   /**
       
   468    * Compare two Unicode strings in code point order.
       
   469    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   470    * if supplementary characters are present:
       
   471    *
       
   472    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   473    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   474    * which means that they compare as less than some other BMP characters like U+feff.
       
   475    * This function compares Unicode strings in code point order.
       
   476    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   477    *
       
   478    * @param srcChars A pointer to another string to compare this one to.
       
   479    * @param srcLength The number of code units from that string to compare.
       
   480    * @return a negative/zero/positive integer corresponding to whether
       
   481    * this string is less than/equal to/greater than the second one
       
   482    * in code point order
       
   483    * @stable ICU 2.0
       
   484    */
       
   485   inline int8_t compareCodePointOrder(const UChar *srcChars,
       
   486                                       int32_t srcLength) const;
       
   487 
       
   488   /**
       
   489    * Compare two Unicode strings in code point order.
       
   490    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   491    * if supplementary characters are present:
       
   492    *
       
   493    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   494    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   495    * which means that they compare as less than some other BMP characters like U+feff.
       
   496    * This function compares Unicode strings in code point order.
       
   497    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   498    *
       
   499    * @param start The start offset in this string at which the compare operation begins.
       
   500    * @param length The number of code units from this string to compare.
       
   501    * @param srcChars A pointer to another string to compare this one to.
       
   502    * @return a negative/zero/positive integer corresponding to whether
       
   503    * this string is less than/equal to/greater than the second one
       
   504    * in code point order
       
   505    * @stable ICU 2.0
       
   506    */
       
   507   inline int8_t compareCodePointOrder(int32_t start,
       
   508                                       int32_t length,
       
   509                                       const UChar *srcChars) const;
       
   510 
       
   511   /**
       
   512    * Compare two Unicode strings in code point order.
       
   513    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   514    * if supplementary characters are present:
       
   515    *
       
   516    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   517    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   518    * which means that they compare as less than some other BMP characters like U+feff.
       
   519    * This function compares Unicode strings in code point order.
       
   520    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   521    *
       
   522    * @param start The start offset in this string at which the compare operation begins.
       
   523    * @param length The number of code units from this string to compare.
       
   524    * @param srcChars A pointer to another string to compare this one to.
       
   525    * @param srcStart The start offset in that string at which the compare operation begins.
       
   526    * @param srcLength The number of code units from that string to compare.
       
   527    * @return a negative/zero/positive integer corresponding to whether
       
   528    * this string is less than/equal to/greater than the second one
       
   529    * in code point order
       
   530    * @stable ICU 2.0
       
   531    */
       
   532   inline int8_t compareCodePointOrder(int32_t start,
       
   533                                       int32_t length,
       
   534                                       const UChar *srcChars,
       
   535                                       int32_t srcStart,
       
   536                                       int32_t srcLength) const;
       
   537 
       
   538   /**
       
   539    * Compare two Unicode strings in code point order.
       
   540    * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
       
   541    * if supplementary characters are present:
       
   542    *
       
   543    * In UTF-16, supplementary characters (with code points U+10000 and above) are
       
   544    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
       
   545    * which means that they compare as less than some other BMP characters like U+feff.
       
   546    * This function compares Unicode strings in code point order.
       
   547    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
       
   548    *
       
   549    * @param start The start offset in this string at which the compare operation begins.
       
   550    * @param limit The offset after the last code unit from this string to compare.
       
   551    * @param srcText Another string to compare this one to.
       
   552    * @param srcStart The start offset in that string at which the compare operation begins.
       
   553    * @param srcLimit The offset after the last code unit from that string to compare.
       
   554    * @return a negative/zero/positive integer corresponding to whether
       
   555    * this string is less than/equal to/greater than the second one
       
   556    * in code point order
       
   557    * @stable ICU 2.0
       
   558    */
       
   559   inline int8_t compareCodePointOrderBetween(int32_t start,
       
   560                                              int32_t limit,
       
   561                                              const UnicodeString& srcText,
       
   562                                              int32_t srcStart,
       
   563                                              int32_t srcLimit) const;
       
   564 
       
   565   /**
       
   566    * Compare two strings case-insensitively using full case folding.
       
   567    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
       
   568    *
       
   569    * @param text Another string to compare this one to.
       
   570    * @param options A bit set of options:
       
   571    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   572    *     Comparison in code unit order with default case folding.
       
   573    *
       
   574    *   - U_COMPARE_CODE_POINT_ORDER
       
   575    *     Set to choose code point order instead of code unit order
       
   576    *     (see u_strCompare for details).
       
   577    *
       
   578    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   579    *
       
   580    * @return A negative, zero, or positive integer indicating the comparison result.
       
   581    * @stable ICU 2.0
       
   582    */
       
   583   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
       
   584 
       
   585   /**
       
   586    * Compare two strings case-insensitively using full case folding.
       
   587    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
       
   588    *
       
   589    * @param start The start offset in this string at which the compare operation begins.
       
   590    * @param length The number of code units from this string to compare.
       
   591    * @param srcText Another string to compare this one to.
       
   592    * @param options A bit set of options:
       
   593    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   594    *     Comparison in code unit order with default case folding.
       
   595    *
       
   596    *   - U_COMPARE_CODE_POINT_ORDER
       
   597    *     Set to choose code point order instead of code unit order
       
   598    *     (see u_strCompare for details).
       
   599    *
       
   600    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   601    *
       
   602    * @return A negative, zero, or positive integer indicating the comparison result.
       
   603    * @stable ICU 2.0
       
   604    */
       
   605   inline int8_t caseCompare(int32_t start,
       
   606          int32_t length,
       
   607          const UnicodeString& srcText,
       
   608          uint32_t options) const;
       
   609 
       
   610   /**
       
   611    * Compare two strings case-insensitively using full case folding.
       
   612    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
       
   613    *
       
   614    * @param start The start offset in this string at which the compare operation begins.
       
   615    * @param length The number of code units from this string to compare.
       
   616    * @param srcText Another string to compare this one to.
       
   617    * @param srcStart The start offset in that string at which the compare operation begins.
       
   618    * @param srcLength The number of code units from that string to compare.
       
   619    * @param options A bit set of options:
       
   620    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   621    *     Comparison in code unit order with default case folding.
       
   622    *
       
   623    *   - U_COMPARE_CODE_POINT_ORDER
       
   624    *     Set to choose code point order instead of code unit order
       
   625    *     (see u_strCompare for details).
       
   626    *
       
   627    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   628    *
       
   629    * @return A negative, zero, or positive integer indicating the comparison result.
       
   630    * @stable ICU 2.0
       
   631    */
       
   632   inline int8_t caseCompare(int32_t start,
       
   633          int32_t length,
       
   634          const UnicodeString& srcText,
       
   635          int32_t srcStart,
       
   636          int32_t srcLength,
       
   637          uint32_t options) const;
       
   638 
       
   639   /**
       
   640    * Compare two strings case-insensitively using full case folding.
       
   641    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
       
   642    *
       
   643    * @param srcChars A pointer to another string to compare this one to.
       
   644    * @param srcLength The number of code units from that string to compare.
       
   645    * @param options A bit set of options:
       
   646    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   647    *     Comparison in code unit order with default case folding.
       
   648    *
       
   649    *   - U_COMPARE_CODE_POINT_ORDER
       
   650    *     Set to choose code point order instead of code unit order
       
   651    *     (see u_strCompare for details).
       
   652    *
       
   653    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   654    *
       
   655    * @return A negative, zero, or positive integer indicating the comparison result.
       
   656    * @stable ICU 2.0
       
   657    */
       
   658   inline int8_t caseCompare(const UChar *srcChars,
       
   659          int32_t srcLength,
       
   660          uint32_t options) const;
       
   661 
       
   662   /**
       
   663    * Compare two strings case-insensitively using full case folding.
       
   664    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
       
   665    *
       
   666    * @param start The start offset in this string at which the compare operation begins.
       
   667    * @param length The number of code units from this string to compare.
       
   668    * @param srcChars A pointer to another string to compare this one to.
       
   669    * @param options A bit set of options:
       
   670    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   671    *     Comparison in code unit order with default case folding.
       
   672    *
       
   673    *   - U_COMPARE_CODE_POINT_ORDER
       
   674    *     Set to choose code point order instead of code unit order
       
   675    *     (see u_strCompare for details).
       
   676    *
       
   677    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   678    *
       
   679    * @return A negative, zero, or positive integer indicating the comparison result.
       
   680    * @stable ICU 2.0
       
   681    */
       
   682   inline int8_t caseCompare(int32_t start,
       
   683          int32_t length,
       
   684          const UChar *srcChars,
       
   685          uint32_t options) const;
       
   686 
       
   687   /**
       
   688    * Compare two strings case-insensitively using full case folding.
       
   689    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
       
   690    *
       
   691    * @param start The start offset in this string at which the compare operation begins.
       
   692    * @param length The number of code units from this string to compare.
       
   693    * @param srcChars A pointer to another string to compare this one to.
       
   694    * @param srcStart The start offset in that string at which the compare operation begins.
       
   695    * @param srcLength The number of code units from that string to compare.
       
   696    * @param options A bit set of options:
       
   697    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   698    *     Comparison in code unit order with default case folding.
       
   699    *
       
   700    *   - U_COMPARE_CODE_POINT_ORDER
       
   701    *     Set to choose code point order instead of code unit order
       
   702    *     (see u_strCompare for details).
       
   703    *
       
   704    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   705    *
       
   706    * @return A negative, zero, or positive integer indicating the comparison result.
       
   707    * @stable ICU 2.0
       
   708    */
       
   709   inline int8_t caseCompare(int32_t start,
       
   710          int32_t length,
       
   711          const UChar *srcChars,
       
   712          int32_t srcStart,
       
   713          int32_t srcLength,
       
   714          uint32_t options) const;
       
   715 
       
   716   /**
       
   717    * Compare two strings case-insensitively using full case folding.
       
   718    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
       
   719    *
       
   720    * @param start The start offset in this string at which the compare operation begins.
       
   721    * @param limit The offset after the last code unit from this string to compare.
       
   722    * @param srcText Another string to compare this one to.
       
   723    * @param srcStart The start offset in that string at which the compare operation begins.
       
   724    * @param srcLimit The offset after the last code unit from that string to compare.
       
   725    * @param options A bit set of options:
       
   726    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   727    *     Comparison in code unit order with default case folding.
       
   728    *
       
   729    *   - U_COMPARE_CODE_POINT_ORDER
       
   730    *     Set to choose code point order instead of code unit order
       
   731    *     (see u_strCompare for details).
       
   732    *
       
   733    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   734    *
       
   735    * @return A negative, zero, or positive integer indicating the comparison result.
       
   736    * @stable ICU 2.0
       
   737    */
       
   738   inline int8_t caseCompareBetween(int32_t start,
       
   739             int32_t limit,
       
   740             const UnicodeString& srcText,
       
   741             int32_t srcStart,
       
   742             int32_t srcLimit,
       
   743             uint32_t options) const;
       
   744 
       
   745   /**
       
   746    * Determine if this starts with the characters in <TT>text</TT>
       
   747    * @param text The text to match.
       
   748    * @return TRUE if this starts with the characters in <TT>text</TT>,
       
   749    * FALSE otherwise
       
   750    * @stable ICU 2.0
       
   751    */
       
   752   inline UBool startsWith(const UnicodeString& text) const;
       
   753 
       
   754   /**
       
   755    * Determine if this starts with the characters in <TT>srcText</TT>
       
   756    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   757    * @param srcText The text to match.
       
   758    * @param srcStart the offset into <TT>srcText</TT> to start matching
       
   759    * @param srcLength the number of characters in <TT>srcText</TT> to match
       
   760    * @return TRUE if this starts with the characters in <TT>text</TT>,
       
   761    * FALSE otherwise
       
   762    * @stable ICU 2.0
       
   763    */
       
   764   inline UBool startsWith(const UnicodeString& srcText,
       
   765             int32_t srcStart,
       
   766             int32_t srcLength) const;
       
   767 
       
   768   /**
       
   769    * Determine if this starts with the characters in <TT>srcChars</TT>
       
   770    * @param srcChars The characters to match.
       
   771    * @param srcLength the number of characters in <TT>srcChars</TT>
       
   772    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
       
   773    * FALSE otherwise
       
   774    * @stable ICU 2.0
       
   775    */
       
   776   inline UBool startsWith(const UChar *srcChars,
       
   777             int32_t srcLength) const;
       
   778 
       
   779   /**
       
   780    * Determine if this ends with the characters in <TT>srcChars</TT>
       
   781    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   782    * @param srcChars The characters to match.
       
   783    * @param srcStart the offset into <TT>srcText</TT> to start matching
       
   784    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
   785    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
       
   786    * @stable ICU 2.0
       
   787    */
       
   788   inline UBool startsWith(const UChar *srcChars,
       
   789             int32_t srcStart,
       
   790             int32_t srcLength) const;
       
   791 
       
   792   /**
       
   793    * Determine if this ends with the characters in <TT>text</TT>
       
   794    * @param text The text to match.
       
   795    * @return TRUE if this ends with the characters in <TT>text</TT>,
       
   796    * FALSE otherwise
       
   797    * @stable ICU 2.0
       
   798    */
       
   799   inline UBool endsWith(const UnicodeString& text) const;
       
   800 
       
   801   /**
       
   802    * Determine if this ends with the characters in <TT>srcText</TT>
       
   803    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   804    * @param srcText The text to match.
       
   805    * @param srcStart the offset into <TT>srcText</TT> to start matching
       
   806    * @param srcLength the number of characters in <TT>srcText</TT> to match
       
   807    * @return TRUE if this ends with the characters in <TT>text</TT>,
       
   808    * FALSE otherwise
       
   809    * @stable ICU 2.0
       
   810    */
       
   811   inline UBool endsWith(const UnicodeString& srcText,
       
   812           int32_t srcStart,
       
   813           int32_t srcLength) const;
       
   814 
       
   815   /**
       
   816    * Determine if this ends with the characters in <TT>srcChars</TT>
       
   817    * @param srcChars The characters to match.
       
   818    * @param srcLength the number of characters in <TT>srcChars</TT>
       
   819    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
       
   820    * FALSE otherwise
       
   821    * @stable ICU 2.0
       
   822    */
       
   823   inline UBool endsWith(const UChar *srcChars,
       
   824           int32_t srcLength) const;
       
   825 
       
   826   /**
       
   827    * Determine if this ends with the characters in <TT>srcChars</TT>
       
   828    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
   829    * @param srcChars The characters to match.
       
   830    * @param srcStart the offset into <TT>srcText</TT> to start matching
       
   831    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
   832    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
       
   833    * FALSE otherwise
       
   834    * @stable ICU 2.0
       
   835    */
       
   836   inline UBool endsWith(const UChar *srcChars,
       
   837           int32_t srcStart,
       
   838           int32_t srcLength) const;
       
   839 
       
   840 
       
   841   /* Searching - bitwise only */
       
   842 
       
   843   /**
       
   844    * Locate in this the first occurrence of the characters in <TT>text</TT>,
       
   845    * using bitwise comparison.
       
   846    * @param text The text to search for.
       
   847    * @return The offset into this of the start of <TT>text</TT>,
       
   848    * or -1 if not found.
       
   849    * @stable ICU 2.0
       
   850    */
       
   851   inline int32_t indexOf(const UnicodeString& text) const;
       
   852 
       
   853   /**
       
   854    * Locate in this the first occurrence of the characters in <TT>text</TT>
       
   855    * starting at offset <TT>start</TT>, using bitwise comparison.
       
   856    * @param text The text to search for.
       
   857    * @param start The offset at which searching will start.
       
   858    * @return The offset into this of the start of <TT>text</TT>,
       
   859    * or -1 if not found.
       
   860    * @stable ICU 2.0
       
   861    */
       
   862   inline int32_t indexOf(const UnicodeString& text,
       
   863               int32_t start) const;
       
   864 
       
   865   /**
       
   866    * Locate in this the first occurrence in the range
       
   867    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
   868    * in <TT>text</TT>, using bitwise comparison.
       
   869    * @param text The text to search for.
       
   870    * @param start The offset at which searching will start.
       
   871    * @param length The number of characters to search
       
   872    * @return The offset into this of the start of <TT>text</TT>,
       
   873    * or -1 if not found.
       
   874    * @stable ICU 2.0
       
   875    */
       
   876   inline int32_t indexOf(const UnicodeString& text,
       
   877               int32_t start,
       
   878               int32_t length) const;
       
   879 
       
   880   /**
       
   881    * Locate in this the first occurrence in the range
       
   882    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
   883    *  in <TT>srcText</TT> in the range
       
   884    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
       
   885    * using bitwise comparison.
       
   886    * @param srcText The text to search for.
       
   887    * @param srcStart the offset into <TT>srcText</TT> at which
       
   888    * to start matching
       
   889    * @param srcLength the number of characters in <TT>srcText</TT> to match
       
   890    * @param start the offset into this at which to start matching
       
   891    * @param length the number of characters in this to search
       
   892    * @return The offset into this of the start of <TT>text</TT>,
       
   893    * or -1 if not found.
       
   894    * @stable ICU 2.0
       
   895    */
       
   896   inline int32_t indexOf(const UnicodeString& srcText,
       
   897               int32_t srcStart,
       
   898               int32_t srcLength,
       
   899               int32_t start,
       
   900               int32_t length) const;
       
   901 
       
   902   /**
       
   903    * Locate in this the first occurrence of the characters in
       
   904    * <TT>srcChars</TT>
       
   905    * starting at offset <TT>start</TT>, using bitwise comparison.
       
   906    * @param srcChars The text to search for.
       
   907    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
   908    * @param start the offset into this at which to start matching
       
   909    * @return The offset into this of the start of <TT>text</TT>,
       
   910    * or -1 if not found.
       
   911    * @stable ICU 2.0
       
   912    */
       
   913   inline int32_t indexOf(const UChar *srcChars,
       
   914               int32_t srcLength,
       
   915               int32_t start) const;
       
   916 
       
   917   /**
       
   918    * Locate in this the first occurrence in the range
       
   919    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
   920    * in <TT>srcChars</TT>, using bitwise comparison.
       
   921    * @param srcChars The text to search for.
       
   922    * @param srcLength the number of characters in <TT>srcChars</TT>
       
   923    * @param start The offset at which searching will start.
       
   924    * @param length The number of characters to search
       
   925    * @return The offset into this of the start of <TT>srcChars</TT>,
       
   926    * or -1 if not found.
       
   927    * @stable ICU 2.0
       
   928    */
       
   929   inline int32_t indexOf(const UChar *srcChars,
       
   930               int32_t srcLength,
       
   931               int32_t start,
       
   932               int32_t length) const;
       
   933 
       
   934   /**
       
   935    * Locate in this the first occurrence in the range
       
   936    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
   937    * in <TT>srcChars</TT> in the range
       
   938    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
       
   939    * using bitwise comparison.
       
   940    * @param srcChars The text to search for.
       
   941    * @param srcStart the offset into <TT>srcChars</TT> at which
       
   942    * to start matching
       
   943    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
   944    * @param start the offset into this at which to start matching
       
   945    * @param length the number of characters in this to search
       
   946    * @return The offset into this of the start of <TT>text</TT>,
       
   947    * or -1 if not found.
       
   948    * @stable ICU 2.0
       
   949    */
       
   950   int32_t indexOf(const UChar *srcChars,
       
   951               int32_t srcStart,
       
   952               int32_t srcLength,
       
   953               int32_t start,
       
   954               int32_t length) const;
       
   955 
       
   956   /**
       
   957    * Locate in this the first occurrence of the BMP code point <code>c</code>,
       
   958    * using bitwise comparison.
       
   959    * @param c The code unit to search for.
       
   960    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
   961    * @stable ICU 2.0
       
   962    */
       
   963   inline int32_t indexOf(UChar c) const;
       
   964 
       
   965   /**
       
   966    * Locate in this the first occurrence of the code point <TT>c</TT>,
       
   967    * using bitwise comparison.
       
   968    *
       
   969    * @param c The code point to search for.
       
   970    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
   971    * @stable ICU 2.0
       
   972    */
       
   973   inline int32_t indexOf(UChar32 c) const;
       
   974 
       
   975   /**
       
   976    * Locate in this the first occurrence of the BMP code point <code>c</code>,
       
   977    * starting at offset <TT>start</TT>, using bitwise comparison.
       
   978    * @param c The code unit to search for.
       
   979    * @param start The offset at which searching will start.
       
   980    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
   981    * @stable ICU 2.0
       
   982    */
       
   983   inline int32_t indexOf(UChar c,
       
   984               int32_t start) const;
       
   985 
       
   986   /**
       
   987    * Locate in this the first occurrence of the code point <TT>c</TT>
       
   988    * starting at offset <TT>start</TT>, using bitwise comparison.
       
   989    *
       
   990    * @param c The code point to search for.
       
   991    * @param start The offset at which searching will start.
       
   992    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
   993    * @stable ICU 2.0
       
   994    */
       
   995   inline int32_t indexOf(UChar32 c,
       
   996               int32_t start) const;
       
   997 
       
   998   /**
       
   999    * Locate in this the first occurrence of the BMP code point <code>c</code>
       
  1000    * in the range [<TT>start</TT>, <TT>start + length</TT>),
       
  1001    * using bitwise comparison.
       
  1002    * @param c The code unit to search for.
       
  1003    * @param start the offset into this at which to start matching
       
  1004    * @param length the number of characters in this to search
       
  1005    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1006    * @stable ICU 2.0
       
  1007    */
       
  1008   inline int32_t indexOf(UChar c,
       
  1009               int32_t start,
       
  1010               int32_t length) const;
       
  1011 
       
  1012   /**
       
  1013    * Locate in this the first occurrence of the code point <TT>c</TT>
       
  1014    * in the range [<TT>start</TT>, <TT>start + length</TT>),
       
  1015    * using bitwise comparison.
       
  1016    *
       
  1017    * @param c The code point to search for.
       
  1018    * @param start the offset into this at which to start matching
       
  1019    * @param length the number of characters in this to search
       
  1020    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1021    * @stable ICU 2.0
       
  1022    */
       
  1023   inline int32_t indexOf(UChar32 c,
       
  1024               int32_t start,
       
  1025               int32_t length) const;
       
  1026 
       
  1027   /**
       
  1028    * Locate in this the last occurrence of the characters in <TT>text</TT>,
       
  1029    * using bitwise comparison.
       
  1030    * @param text The text to search for.
       
  1031    * @return The offset into this of the start of <TT>text</TT>,
       
  1032    * or -1 if not found.
       
  1033    * @stable ICU 2.0
       
  1034    */
       
  1035   inline int32_t lastIndexOf(const UnicodeString& text) const;
       
  1036 
       
  1037   /**
       
  1038    * Locate in this the last occurrence of the characters in <TT>text</TT>
       
  1039    * starting at offset <TT>start</TT>, using bitwise comparison.
       
  1040    * @param text The text to search for.
       
  1041    * @param start The offset at which searching will start.
       
  1042    * @return The offset into this of the start of <TT>text</TT>,
       
  1043    * or -1 if not found.
       
  1044    * @stable ICU 2.0
       
  1045    */
       
  1046   inline int32_t lastIndexOf(const UnicodeString& text,
       
  1047               int32_t start) const;
       
  1048 
       
  1049   /**
       
  1050    * Locate in this the last occurrence in the range
       
  1051    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
  1052    * in <TT>text</TT>, using bitwise comparison.
       
  1053    * @param text The text to search for.
       
  1054    * @param start The offset at which searching will start.
       
  1055    * @param length The number of characters to search
       
  1056    * @return The offset into this of the start of <TT>text</TT>,
       
  1057    * or -1 if not found.
       
  1058    * @stable ICU 2.0
       
  1059    */
       
  1060   inline int32_t lastIndexOf(const UnicodeString& text,
       
  1061               int32_t start,
       
  1062               int32_t length) const;
       
  1063 
       
  1064   /**
       
  1065    * Locate in this the last occurrence in the range
       
  1066    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
  1067    * in <TT>srcText</TT> in the range
       
  1068    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
       
  1069    * using bitwise comparison.
       
  1070    * @param srcText The text to search for.
       
  1071    * @param srcStart the offset into <TT>srcText</TT> at which
       
  1072    * to start matching
       
  1073    * @param srcLength the number of characters in <TT>srcText</TT> to match
       
  1074    * @param start the offset into this at which to start matching
       
  1075    * @param length the number of characters in this to search
       
  1076    * @return The offset into this of the start of <TT>text</TT>,
       
  1077    * or -1 if not found.
       
  1078    * @stable ICU 2.0
       
  1079    */
       
  1080   inline int32_t lastIndexOf(const UnicodeString& srcText,
       
  1081               int32_t srcStart,
       
  1082               int32_t srcLength,
       
  1083               int32_t start,
       
  1084               int32_t length) const;
       
  1085 
       
  1086   /**
       
  1087    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
       
  1088    * starting at offset <TT>start</TT>, using bitwise comparison.
       
  1089    * @param srcChars The text to search for.
       
  1090    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
  1091    * @param start the offset into this at which to start matching
       
  1092    * @return The offset into this of the start of <TT>text</TT>,
       
  1093    * or -1 if not found.
       
  1094    * @stable ICU 2.0
       
  1095    */
       
  1096   inline int32_t lastIndexOf(const UChar *srcChars,
       
  1097               int32_t srcLength,
       
  1098               int32_t start) const;
       
  1099 
       
  1100   /**
       
  1101    * Locate in this the last occurrence in the range
       
  1102    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
  1103    * in <TT>srcChars</TT>, using bitwise comparison.
       
  1104    * @param srcChars The text to search for.
       
  1105    * @param srcLength the number of characters in <TT>srcChars</TT>
       
  1106    * @param start The offset at which searching will start.
       
  1107    * @param length The number of characters to search
       
  1108    * @return The offset into this of the start of <TT>srcChars</TT>,
       
  1109    * or -1 if not found.
       
  1110    * @stable ICU 2.0
       
  1111    */
       
  1112   inline int32_t lastIndexOf(const UChar *srcChars,
       
  1113               int32_t srcLength,
       
  1114               int32_t start,
       
  1115               int32_t length) const;
       
  1116 
       
  1117   /**
       
  1118    * Locate in this the last occurrence in the range
       
  1119    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
       
  1120    * in <TT>srcChars</TT> in the range
       
  1121    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
       
  1122    * using bitwise comparison.
       
  1123    * @param srcChars The text to search for.
       
  1124    * @param srcStart the offset into <TT>srcChars</TT> at which
       
  1125    * to start matching
       
  1126    * @param srcLength the number of characters in <TT>srcChars</TT> to match
       
  1127    * @param start the offset into this at which to start matching
       
  1128    * @param length the number of characters in this to search
       
  1129    * @return The offset into this of the start of <TT>text</TT>,
       
  1130    * or -1 if not found.
       
  1131    * @stable ICU 2.0
       
  1132    */
       
  1133   int32_t lastIndexOf(const UChar *srcChars,
       
  1134               int32_t srcStart,
       
  1135               int32_t srcLength,
       
  1136               int32_t start,
       
  1137               int32_t length) const;
       
  1138 
       
  1139   /**
       
  1140    * Locate in this the last occurrence of the BMP code point <code>c</code>,
       
  1141    * using bitwise comparison.
       
  1142    * @param c The code unit to search for.
       
  1143    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1144    * @stable ICU 2.0
       
  1145    */
       
  1146   inline int32_t lastIndexOf(UChar c) const;
       
  1147 
       
  1148   /**
       
  1149    * Locate in this the last occurrence of the code point <TT>c</TT>,
       
  1150    * using bitwise comparison.
       
  1151    *
       
  1152    * @param c The code point to search for.
       
  1153    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1154    * @stable ICU 2.0
       
  1155    */
       
  1156   inline int32_t lastIndexOf(UChar32 c) const;
       
  1157 
       
  1158   /**
       
  1159    * Locate in this the last occurrence of the BMP code point <code>c</code>
       
  1160    * starting at offset <TT>start</TT>, using bitwise comparison.
       
  1161    * @param c The code unit to search for.
       
  1162    * @param start The offset at which searching will start.
       
  1163    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1164    * @stable ICU 2.0
       
  1165    */
       
  1166   inline int32_t lastIndexOf(UChar c,
       
  1167               int32_t start) const;
       
  1168 
       
  1169   /**
       
  1170    * Locate in this the last occurrence of the code point <TT>c</TT>
       
  1171    * starting at offset <TT>start</TT>, using bitwise comparison.
       
  1172    *
       
  1173    * @param c The code point to search for.
       
  1174    * @param start The offset at which searching will start.
       
  1175    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1176    * @stable ICU 2.0
       
  1177    */
       
  1178   inline int32_t lastIndexOf(UChar32 c,
       
  1179               int32_t start) const;
       
  1180 
       
  1181   /**
       
  1182    * Locate in this the last occurrence of the BMP code point <code>c</code>
       
  1183    * in the range [<TT>start</TT>, <TT>start + length</TT>),
       
  1184    * using bitwise comparison.
       
  1185    * @param c The code unit to search for.
       
  1186    * @param start the offset into this at which to start matching
       
  1187    * @param length the number of characters in this to search
       
  1188    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1189    * @stable ICU 2.0
       
  1190    */
       
  1191   inline int32_t lastIndexOf(UChar c,
       
  1192               int32_t start,
       
  1193               int32_t length) const;
       
  1194 
       
  1195   /**
       
  1196    * Locate in this the last occurrence of the code point <TT>c</TT>
       
  1197    * in the range [<TT>start</TT>, <TT>start + length</TT>),
       
  1198    * using bitwise comparison.
       
  1199    *
       
  1200    * @param c The code point to search for.
       
  1201    * @param start the offset into this at which to start matching
       
  1202    * @param length the number of characters in this to search
       
  1203    * @return The offset into this of <TT>c</TT>, or -1 if not found.
       
  1204    * @stable ICU 2.0
       
  1205    */
       
  1206   inline int32_t lastIndexOf(UChar32 c,
       
  1207               int32_t start,
       
  1208               int32_t length) const;
       
  1209 
       
  1210 
       
  1211   /* Character access */
       
  1212 
       
  1213   /**
       
  1214    * Return the code unit at offset <tt>offset</tt>.
       
  1215    * If the offset is not valid (0..length()-1) then U+ffff is returned.
       
  1216    * @param offset a valid offset into the text
       
  1217    * @return the code unit at offset <tt>offset</tt>
       
  1218    *         or 0xffff if the offset is not valid for this string
       
  1219    * @stable ICU 2.0
       
  1220    */
       
  1221   inline UChar charAt(int32_t offset) const;
       
  1222 
       
  1223   /**
       
  1224    * Return the code unit at offset <tt>offset</tt>.
       
  1225    * If the offset is not valid (0..length()-1) then U+ffff is returned.
       
  1226    * @param offset a valid offset into the text
       
  1227    * @return the code unit at offset <tt>offset</tt>
       
  1228    * @stable ICU 2.0
       
  1229    */
       
  1230   inline UChar operator[] (int32_t offset) const;
       
  1231 
       
  1232   /**
       
  1233    * Return the code point that contains the code unit
       
  1234    * at offset <tt>offset</tt>.
       
  1235    * If the offset is not valid (0..length()-1) then U+ffff is returned.
       
  1236    * @param offset a valid offset into the text
       
  1237    * that indicates the text offset of any of the code units
       
  1238    * that will be assembled into a code point (21-bit value) and returned
       
  1239    * @return the code point of text at <tt>offset</tt>
       
  1240    *         or 0xffff if the offset is not valid for this string
       
  1241    * @stable ICU 2.0
       
  1242    */
       
  1243   inline UChar32 char32At(int32_t offset) const;
       
  1244 
       
  1245   /**
       
  1246    * Adjust a random-access offset so that
       
  1247    * it points to the beginning of a Unicode character.
       
  1248    * The offset that is passed in points to
       
  1249    * any code unit of a code point,
       
  1250    * while the returned offset will point to the first code unit
       
  1251    * of the same code point.
       
  1252    * In UTF-16, if the input offset points to a second surrogate
       
  1253    * of a surrogate pair, then the returned offset will point
       
  1254    * to the first surrogate.
       
  1255    * @param offset a valid offset into one code point of the text
       
  1256    * @return offset of the first code unit of the same code point
       
  1257    * @see U16_SET_CP_START
       
  1258    * @stable ICU 2.0
       
  1259    */
       
  1260   inline int32_t getChar32Start(int32_t offset) const;
       
  1261 
       
  1262   /**
       
  1263    * Adjust a random-access offset so that
       
  1264    * it points behind a Unicode character.
       
  1265    * The offset that is passed in points behind
       
  1266    * any code unit of a code point,
       
  1267    * while the returned offset will point behind the last code unit
       
  1268    * of the same code point.
       
  1269    * In UTF-16, if the input offset points behind the first surrogate
       
  1270    * (i.e., to the second surrogate)
       
  1271    * of a surrogate pair, then the returned offset will point
       
  1272    * behind the second surrogate (i.e., to the first surrogate).
       
  1273    * @param offset a valid offset after any code unit of a code point of the text
       
  1274    * @return offset of the first code unit after the same code point
       
  1275    * @see U16_SET_CP_LIMIT
       
  1276    * @stable ICU 2.0
       
  1277    */
       
  1278   inline int32_t getChar32Limit(int32_t offset) const;
       
  1279 
       
  1280   /**
       
  1281    * Move the code unit index along the string by delta code points.
       
  1282    * Interpret the input index as a code unit-based offset into the string,
       
  1283    * move the index forward or backward by delta code points, and
       
  1284    * return the resulting index.
       
  1285    * The input index should point to the first code unit of a code point,
       
  1286    * if there is more than one.
       
  1287    *
       
  1288    * Both input and output indexes are code unit-based as for all
       
  1289    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
       
  1290    * If delta<0 then the index is moved backward (toward the start of the string).
       
  1291    * If delta>0 then the index is moved forward (toward the end of the string).
       
  1292    *
       
  1293    * This behaves like CharacterIterator::move32(delta, kCurrent).
       
  1294    *
       
  1295    * Behavior for out-of-bounds indexes:
       
  1296    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
       
  1297    * if the input index<0 then it is pinned to 0;
       
  1298    * if it is index>length() then it is pinned to length().
       
  1299    * Afterwards, the index is moved by <code>delta</code> code points
       
  1300    * forward or backward,
       
  1301    * but no further backward than to 0 and no further forward than to length().
       
  1302    * The resulting index return value will be in between 0 and length(), inclusively.
       
  1303    *
       
  1304    * Examples:
       
  1305    * <pre>
       
  1306    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
       
  1307    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
       
  1308    *
       
  1309    * // initial index: position of U+10000
       
  1310    * int32_t index=1;
       
  1311    *
       
  1312    * // the following examples will all result in index==4, position of U+10ffff
       
  1313    *
       
  1314    * // skip 2 code points from some position in the string
       
  1315    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
       
  1316    *
       
  1317    * // go to the 3rd code point from the start of s (0-based)
       
  1318    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
       
  1319    *
       
  1320    * // go to the next-to-last code point of s
       
  1321    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
       
  1322    * </pre>
       
  1323    *
       
  1324    * @param index input code unit index
       
  1325    * @param delta (signed) code point count to move the index forward or backward
       
  1326    *        in the string
       
  1327    * @return the resulting code unit index
       
  1328    * @stable ICU 2.0
       
  1329    */
       
  1330   int32_t moveIndex32(int32_t index, int32_t delta) const;
       
  1331 
       
  1332   /* Substring extraction */
       
  1333 
       
  1334   /**
       
  1335    * Copy the characters in the range
       
  1336    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
       
  1337    * beginning at <tt>dstStart</tt>.
       
  1338    * If the string aliases to <code>dst</code> itself as an external buffer,
       
  1339    * then extract() will not copy the contents.
       
  1340    *
       
  1341    * @param start offset of first character which will be copied into the array
       
  1342    * @param length the number of characters to extract
       
  1343    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
       
  1344    * must be at least (<tt>dstStart + length</tt>).
       
  1345    * @param dstStart the offset in <TT>dst</TT> where the first character
       
  1346    * will be extracted
       
  1347    * @stable ICU 2.0
       
  1348    */
       
  1349   inline void extract(int32_t start,
       
  1350            int32_t length,
       
  1351            UChar *dst,
       
  1352            int32_t dstStart = 0) const;
       
  1353 
       
  1354   /**
       
  1355    * Copy the contents of the string into dest.
       
  1356    * This is a convenience function that
       
  1357    * checks if there is enough space in dest,
       
  1358    * extracts the entire string if possible,
       
  1359    * and NUL-terminates dest if possible.
       
  1360    *
       
  1361    * If the string fits into dest but cannot be NUL-terminated
       
  1362    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
       
  1363    * If the string itself does not fit into dest
       
  1364    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
       
  1365    *
       
  1366    * If the string aliases to <code>dest</code> itself as an external buffer,
       
  1367    * then extract() will not copy the contents.
       
  1368    *
       
  1369    * @param dest Destination string buffer.
       
  1370    * @param destCapacity Number of UChars available at dest.
       
  1371    * @param errorCode ICU error code.
       
  1372    * @return length()
       
  1373    * @stable ICU 2.0
       
  1374    */
       
  1375   int32_t
       
  1376   extract(UChar *dest, int32_t destCapacity,
       
  1377           UErrorCode &errorCode) const;
       
  1378 
       
  1379   /**
       
  1380    * Copy the characters in the range
       
  1381    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
       
  1382    * <tt>target</tt>.
       
  1383    * @param start offset of first character which will be copied
       
  1384    * @param length the number of characters to extract
       
  1385    * @param target UnicodeString into which to copy characters.
       
  1386    * @return A reference to <TT>target</TT>
       
  1387    * @stable ICU 2.0
       
  1388    */
       
  1389   inline void extract(int32_t start,
       
  1390            int32_t length,
       
  1391            UnicodeString& target) const;
       
  1392 
       
  1393   /**
       
  1394    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
       
  1395    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
       
  1396    * @param start offset of first character which will be copied into the array
       
  1397    * @param limit offset immediately following the last character to be copied
       
  1398    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
       
  1399    * must be at least (<tt>dstStart + (limit - start)</tt>).
       
  1400    * @param dstStart the offset in <TT>dst</TT> where the first character
       
  1401    * will be extracted
       
  1402    * @stable ICU 2.0
       
  1403    */
       
  1404   inline void extractBetween(int32_t start,
       
  1405               int32_t limit,
       
  1406               UChar *dst,
       
  1407               int32_t dstStart = 0) const;
       
  1408 
       
  1409   /**
       
  1410    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
       
  1411    * into the UnicodeString <tt>target</tt>.  Replaceable API.
       
  1412    * @param start offset of first character which will be copied
       
  1413    * @param limit offset immediately following the last character to be copied
       
  1414    * @param target UnicodeString into which to copy characters.
       
  1415    * @return A reference to <TT>target</TT>
       
  1416    * @stable ICU 2.0
       
  1417    */
       
  1418   virtual void extractBetween(int32_t start,
       
  1419               int32_t limit,
       
  1420               UnicodeString& target) const;
       
  1421 
       
  1422   /**
       
  1423    * Copy the characters in the range 
       
  1424    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
       
  1425    * All characters must be invariant (see utypes.h).
       
  1426    * Use US_INV as the last, signature-distinguishing parameter.
       
  1427    *
       
  1428    * This function does not write any more than <code>targetLength</code>
       
  1429    * characters but returns the length of the entire output string
       
  1430    * so that one can allocate a larger buffer and call the function again
       
  1431    * if necessary.
       
  1432    * The output string is NUL-terminated if possible.
       
  1433    *
       
  1434    * @param start offset of first character which will be copied
       
  1435    * @param startLength the number of characters to extract
       
  1436    * @param target the target buffer for extraction, can be NULL
       
  1437    *               if targetLength is 0
       
  1438    * @param targetCapacity the length of the target buffer
       
  1439    * @param inv Signature-distinguishing paramater, use US_INV.
       
  1440    * @return the output string length, not including the terminating NUL
       
  1441    * @draft ICU 3.2
       
  1442    */
       
  1443   int32_t extract(int32_t start,
       
  1444            int32_t startLength,
       
  1445            char *target,
       
  1446            int32_t targetCapacity,
       
  1447            enum EInvariant inv) const;
       
  1448 
       
  1449 #if !UCONFIG_NO_CONVERSION
       
  1450 
       
  1451   /**
       
  1452    * Copy the characters in the range
       
  1453    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
       
  1454    * in a specified codepage.
       
  1455    * The output string is NUL-terminated.
       
  1456    *
       
  1457    * Recommendation: For invariant-character strings use
       
  1458    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
       
  1459    * because it avoids object code dependencies of UnicodeString on
       
  1460    * the conversion code.
       
  1461    *
       
  1462    * @param start offset of first character which will be copied
       
  1463    * @param startLength the number of characters to extract
       
  1464    * @param target the target buffer for extraction
       
  1465    * @param codepage the desired codepage for the characters.  0 has
       
  1466    * the special meaning of the default codepage
       
  1467    * If <code>codepage</code> is an empty string (<code>""</code>),
       
  1468    * then a simple conversion is performed on the codepage-invariant
       
  1469    * subset ("invariant characters") of the platform encoding. See utypes.h.
       
  1470    * If <TT>target</TT> is NULL, then the number of bytes required for
       
  1471    * <TT>target</TT> is returned. It is assumed that the target is big enough
       
  1472    * to fit all of the characters.
       
  1473    * @return the output string length, not including the terminating NUL
       
  1474    * @stable ICU 2.0
       
  1475    */
       
  1476   inline int32_t extract(int32_t start,
       
  1477                  int32_t startLength,
       
  1478                  char *target,
       
  1479                  const char *codepage = 0) const;
       
  1480 
       
  1481   /**
       
  1482    * Copy the characters in the range
       
  1483    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
       
  1484    * in a specified codepage.
       
  1485    * This function does not write any more than <code>targetLength</code>
       
  1486    * characters but returns the length of the entire output string
       
  1487    * so that one can allocate a larger buffer and call the function again
       
  1488    * if necessary.
       
  1489    * The output string is NUL-terminated if possible.
       
  1490    *
       
  1491    * Recommendation: For invariant-character strings use
       
  1492    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
       
  1493    * because it avoids object code dependencies of UnicodeString on
       
  1494    * the conversion code.
       
  1495    *
       
  1496    * @param start offset of first character which will be copied
       
  1497    * @param startLength the number of characters to extract
       
  1498    * @param target the target buffer for extraction
       
  1499    * @param targetLength the length of the target buffer
       
  1500    * @param codepage the desired codepage for the characters.  0 has
       
  1501    * the special meaning of the default codepage
       
  1502    * If <code>codepage</code> is an empty string (<code>""</code>),
       
  1503    * then a simple conversion is performed on the codepage-invariant
       
  1504    * subset ("invariant characters") of the platform encoding. See utypes.h.
       
  1505    * If <TT>target</TT> is NULL, then the number of bytes required for
       
  1506    * <TT>target</TT> is returned.
       
  1507    * @return the output string length, not including the terminating NUL
       
  1508    * @stable ICU 2.0
       
  1509    */
       
  1510   int32_t extract(int32_t start,
       
  1511            int32_t startLength,
       
  1512            char *target,
       
  1513            uint32_t targetLength,
       
  1514            const char *codepage = 0) const;
       
  1515 
       
  1516   /**
       
  1517    * Convert the UnicodeString into a codepage string using an existing UConverter.
       
  1518    * The output string is NUL-terminated if possible.
       
  1519    *
       
  1520    * This function avoids the overhead of opening and closing a converter if
       
  1521    * multiple strings are extracted.
       
  1522    *
       
  1523    * @param dest destination string buffer, can be NULL if destCapacity==0
       
  1524    * @param destCapacity the number of chars available at dest
       
  1525    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
       
  1526    *        or NULL for the default converter
       
  1527    * @param errorCode normal ICU error code
       
  1528    * @return the length of the output string, not counting the terminating NUL;
       
  1529    *         if the length is greater than destCapacity, then the string will not fit
       
  1530    *         and a buffer of the indicated length would need to be passed in
       
  1531    * @stable ICU 2.0
       
  1532    */
       
  1533   int32_t extract(char *dest, int32_t destCapacity,
       
  1534                   UConverter *cnv,
       
  1535                   UErrorCode &errorCode) const;
       
  1536 
       
  1537 #endif
       
  1538 
       
  1539   /* Length operations */
       
  1540 
       
  1541   /**
       
  1542    * Return the length of the UnicodeString object.
       
  1543    * The length is the number of UChar code units are in the UnicodeString.
       
  1544    * If you want the number of code points, please use countChar32().
       
  1545    * @return the length of the UnicodeString object
       
  1546    * @see countChar32
       
  1547    * @stable ICU 2.0
       
  1548    */
       
  1549   inline int32_t length(void) const;
       
  1550 
       
  1551   /**
       
  1552    * Count Unicode code points in the length UChar code units of the string.
       
  1553    * A code point may occupy either one or two UChar code units.
       
  1554    * Counting code points involves reading all code units.
       
  1555    *
       
  1556    * This functions is basically the inverse of moveIndex32().
       
  1557    *
       
  1558    * @param start the index of the first code unit to check
       
  1559    * @param length the number of UChar code units to check
       
  1560    * @return the number of code points in the specified code units
       
  1561    * @see length
       
  1562    * @stable ICU 2.0
       
  1563    */
       
  1564   int32_t
       
  1565   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
       
  1566 
       
  1567   /**
       
  1568    * Check if the length UChar code units of the string
       
  1569    * contain more Unicode code points than a certain number.
       
  1570    * This is more efficient than counting all code points in this part of the string
       
  1571    * and comparing that number with a threshold.
       
  1572    * This function may not need to scan the string at all if the length
       
  1573    * falls within a certain range, and
       
  1574    * never needs to count more than 'number+1' code points.
       
  1575    * Logically equivalent to (countChar32(start, length)>number).
       
  1576    * A Unicode code point may occupy either one or two UChar code units.
       
  1577    *
       
  1578    * @param start the index of the first code unit to check (0 for the entire string)
       
  1579    * @param length the number of UChar code units to check
       
  1580    *               (use INT32_MAX for the entire string; remember that start/length
       
  1581    *                values are pinned)
       
  1582    * @param number The number of code points in the (sub)string is compared against
       
  1583    *               the 'number' parameter.
       
  1584    * @return Boolean value for whether the string contains more Unicode code points
       
  1585    *         than 'number'. Same as (u_countChar32(s, length)>number).
       
  1586    * @see countChar32
       
  1587    * @see u_strHasMoreChar32Than
       
  1588    * @stable ICU 2.4
       
  1589    */
       
  1590   UBool
       
  1591   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
       
  1592 
       
  1593   /**
       
  1594    * Determine if this string is empty.
       
  1595    * @return TRUE if this string contains 0 characters, FALSE otherwise.
       
  1596    * @stable ICU 2.0
       
  1597    */
       
  1598   inline UBool isEmpty(void) const;
       
  1599 
       
  1600   /**
       
  1601    * Return the capacity of the internal buffer of the UnicodeString object.
       
  1602    * This is useful together with the getBuffer functions.
       
  1603    * See there for details.
       
  1604    *
       
  1605    * @return the number of UChars available in the internal buffer
       
  1606    * @see getBuffer
       
  1607    * @stable ICU 2.0
       
  1608    */
       
  1609   inline int32_t getCapacity(void) const;
       
  1610 
       
  1611   /* Other operations */
       
  1612 
       
  1613   /**
       
  1614    * Generate a hash code for this object.
       
  1615    * @return The hash code of this UnicodeString.
       
  1616    * @stable ICU 2.0
       
  1617    */
       
  1618   inline int32_t hashCode(void) const;
       
  1619 
       
  1620   /**
       
  1621    * Determine if this object contains a valid string.
       
  1622    * A bogus string has no value. It is different from an empty string.
       
  1623    * It can be used to indicate that no string value is available.
       
  1624    * getBuffer() and getTerminatedBuffer() return NULL, and
       
  1625    * length() returns 0.
       
  1626    *
       
  1627    * @return TRUE if the string is valid, FALSE otherwise
       
  1628    * @see setToBogus()
       
  1629    * @stable ICU 2.0
       
  1630    */
       
  1631   inline UBool isBogus(void) const;
       
  1632 
       
  1633 
       
  1634   //========================================
       
  1635   // Write operations
       
  1636   //========================================
       
  1637 
       
  1638   /* Assignment operations */
       
  1639 
       
  1640   /**
       
  1641    * Assignment operator.  Replace the characters in this UnicodeString
       
  1642    * with the characters from <TT>srcText</TT>.
       
  1643    * @param srcText The text containing the characters to replace
       
  1644    * @return a reference to this
       
  1645    * @stable ICU 2.0
       
  1646    */
       
  1647   UnicodeString &operator=(const UnicodeString &srcText);
       
  1648 
       
  1649   /**
       
  1650    * Almost the same as the assignment operator.
       
  1651    * Replace the characters in this UnicodeString
       
  1652    * with the characters from <code>srcText</code>.
       
  1653    *
       
  1654    * This function works the same for all strings except for ones that
       
  1655    * are readonly aliases.
       
  1656    * Starting with ICU 2.4, the assignment operator and the copy constructor
       
  1657    * allocate a new buffer and copy the buffer contents even for readonly aliases.
       
  1658    * This function implements the old, more efficient but less safe behavior
       
  1659    * of making this string also a readonly alias to the same buffer.
       
  1660    * The fastCopyFrom function must be used only if it is known that the lifetime of
       
  1661    * this UnicodeString is at least as long as the lifetime of the aliased buffer
       
  1662    * including its contents, for example for strings from resource bundles
       
  1663    * or aliases to string contents.
       
  1664    *
       
  1665    * @param src The text containing the characters to replace.
       
  1666    * @return a reference to this
       
  1667    * @stable ICU 2.4
       
  1668    */
       
  1669   UnicodeString &fastCopyFrom(const UnicodeString &src);
       
  1670 
       
  1671   /**
       
  1672    * Assignment operator.  Replace the characters in this UnicodeString
       
  1673    * with the code unit <TT>ch</TT>.
       
  1674    * @param ch the code unit to replace
       
  1675    * @return a reference to this
       
  1676    * @stable ICU 2.0
       
  1677    */
       
  1678   inline UnicodeString& operator= (UChar ch);
       
  1679 
       
  1680   /**
       
  1681    * Assignment operator.  Replace the characters in this UnicodeString
       
  1682    * with the code point <TT>ch</TT>.
       
  1683    * @param ch the code point to replace
       
  1684    * @return a reference to this
       
  1685    * @stable ICU 2.0
       
  1686    */
       
  1687   inline UnicodeString& operator= (UChar32 ch);
       
  1688 
       
  1689   /**
       
  1690    * Set the text in the UnicodeString object to the characters
       
  1691    * in <TT>srcText</TT> in the range
       
  1692    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
       
  1693    * <TT>srcText</TT> is not modified.
       
  1694    * @param srcText the source for the new characters
       
  1695    * @param srcStart the offset into <TT>srcText</TT> where new characters
       
  1696    * will be obtained
       
  1697    * @return a reference to this
       
  1698    * @stable ICU 2.2
       
  1699    */
       
  1700   inline UnicodeString& setTo(const UnicodeString& srcText,
       
  1701                int32_t srcStart);
       
  1702 
       
  1703   /**
       
  1704    * Set the text in the UnicodeString object to the characters
       
  1705    * in <TT>srcText</TT> in the range
       
  1706    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
  1707    * <TT>srcText</TT> is not modified.
       
  1708    * @param srcText the source for the new characters
       
  1709    * @param srcStart the offset into <TT>srcText</TT> where new characters
       
  1710    * will be obtained
       
  1711    * @param srcLength the number of characters in <TT>srcText</TT> in the
       
  1712    * replace string.
       
  1713    * @return a reference to this
       
  1714    * @stable ICU 2.0
       
  1715    */
       
  1716   inline UnicodeString& setTo(const UnicodeString& srcText,
       
  1717                int32_t srcStart,
       
  1718                int32_t srcLength);
       
  1719 
       
  1720   /**
       
  1721    * Set the text in the UnicodeString object to the characters in
       
  1722    * <TT>srcText</TT>.
       
  1723    * <TT>srcText</TT> is not modified.
       
  1724    * @param srcText the source for the new characters
       
  1725    * @return a reference to this
       
  1726    * @stable ICU 2.0
       
  1727    */
       
  1728   inline UnicodeString& setTo(const UnicodeString& srcText);
       
  1729 
       
  1730   /**
       
  1731    * Set the characters in the UnicodeString object to the characters
       
  1732    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
       
  1733    * @param srcChars the source for the new characters
       
  1734    * @param srcLength the number of Unicode characters in srcChars.
       
  1735    * @return a reference to this
       
  1736    * @stable ICU 2.0
       
  1737    */
       
  1738   inline UnicodeString& setTo(const UChar *srcChars,
       
  1739                int32_t srcLength);
       
  1740 
       
  1741   /**
       
  1742    * Set the characters in the UnicodeString object to the code unit
       
  1743    * <TT>srcChar</TT>.
       
  1744    * @param srcChar the code unit which becomes the UnicodeString's character
       
  1745    * content
       
  1746    * @return a reference to this
       
  1747    * @stable ICU 2.0
       
  1748    */
       
  1749   UnicodeString& setTo(UChar srcChar);
       
  1750 
       
  1751   /**
       
  1752    * Set the characters in the UnicodeString object to the code point
       
  1753    * <TT>srcChar</TT>.
       
  1754    * @param srcChar the code point which becomes the UnicodeString's character
       
  1755    * content
       
  1756    * @return a reference to this
       
  1757    * @stable ICU 2.0
       
  1758    */
       
  1759   UnicodeString& setTo(UChar32 srcChar);
       
  1760 
       
  1761   /**
       
  1762    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
       
  1763    * The text will be used for the UnicodeString object, but
       
  1764    * it will not be released when the UnicodeString is destroyed.
       
  1765    * This has copy-on-write semantics:
       
  1766    * When the string is modified, then the buffer is first copied into
       
  1767    * newly allocated memory.
       
  1768    * The aliased buffer is never modified.
       
  1769    * In an assignment to another UnicodeString, the text will be aliased again,
       
  1770    * so that both strings then alias the same readonly-text.
       
  1771    *
       
  1772    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
       
  1773    *                     This must be true if <code>textLength==-1</code>.
       
  1774    * @param text The characters to alias for the UnicodeString.
       
  1775    * @param textLength The number of Unicode characters in <code>text</code> to alias.
       
  1776    *                   If -1, then this constructor will determine the length
       
  1777    *                   by calling <code>u_strlen()</code>.
       
  1778    * @return a reference to this
       
  1779    * @stable ICU 2.0
       
  1780    */
       
  1781   UnicodeString &setTo(UBool isTerminated,
       
  1782                        const UChar *text,
       
  1783                        int32_t textLength);
       
  1784 
       
  1785   /**
       
  1786    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
       
  1787    * The text will be used for the UnicodeString object, but
       
  1788    * it will not be released when the UnicodeString is destroyed.
       
  1789    * This has write-through semantics:
       
  1790    * For as long as the capacity of the buffer is sufficient, write operations
       
  1791    * will directly affect the buffer. When more capacity is necessary, then
       
  1792    * a new buffer will be allocated and the contents copied as with regularly
       
  1793    * constructed strings.
       
  1794    * In an assignment to another UnicodeString, the buffer will be copied.
       
  1795    * The extract(UChar *dst) function detects whether the dst pointer is the same
       
  1796    * as the string buffer itself and will in this case not copy the contents.
       
  1797    *
       
  1798    * @param buffer The characters to alias for the UnicodeString.
       
  1799    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
       
  1800    * @param buffCapacity The size of <code>buffer</code> in UChars.
       
  1801    * @return a reference to this
       
  1802    * @stable ICU 2.0
       
  1803    */
       
  1804   UnicodeString &setTo(UChar *buffer,
       
  1805                        int32_t buffLength,
       
  1806                        int32_t buffCapacity);
       
  1807 
       
  1808   /**
       
  1809    * Make this UnicodeString object invalid.
       
  1810    * The string will test TRUE with isBogus().
       
  1811    *
       
  1812    * A bogus string has no value. It is different from an empty string.
       
  1813    * It can be used to indicate that no string value is available.
       
  1814    * getBuffer() and getTerminatedBuffer() return NULL, and
       
  1815    * length() returns 0.
       
  1816    *
       
  1817    * This utility function is used throughout the UnicodeString
       
  1818    * implementation to indicate that a UnicodeString operation failed,
       
  1819    * and may be used in other functions,
       
  1820    * especially but not exclusively when such functions do not
       
  1821    * take a UErrorCode for simplicity.
       
  1822    *
       
  1823    * The following methods, and no others, will clear a string object's bogus flag:
       
  1824    * - remove()
       
  1825    * - remove(0, INT32_MAX)
       
  1826    * - truncate(0)
       
  1827    * - operator=() (assignment operator)
       
  1828    * - setTo(...)
       
  1829    *
       
  1830    * The simplest ways to turn a bogus string into an empty one
       
  1831    * is to use the remove() function.
       
  1832    * Examples for other functions that are equivalent to "set to empty string":
       
  1833    * \code
       
  1834    * if(s.isBogus()) {
       
  1835    *   s.remove();           // set to an empty string (remove all), or
       
  1836    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
       
  1837    *   s.truncate(0);        // set to an empty string (complete truncation), or
       
  1838    *   s=UnicodeString();    // assign an empty string, or
       
  1839    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
       
  1840    *   static const UChar nul=0;
       
  1841    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
       
  1842    * }
       
  1843    * \endcode
       
  1844    *
       
  1845    * @see isBogus()
       
  1846    * @stable ICU 2.0
       
  1847    */
       
  1848   void setToBogus();
       
  1849 
       
  1850   /**
       
  1851    * Set the character at the specified offset to the specified character.
       
  1852    * @param offset A valid offset into the text of the character to set
       
  1853    * @param ch The new character
       
  1854    * @return A reference to this
       
  1855    * @stable ICU 2.0
       
  1856    */
       
  1857   UnicodeString& setCharAt(int32_t offset,
       
  1858                UChar ch);
       
  1859 
       
  1860 
       
  1861   /* Append operations */
       
  1862 
       
  1863   /**
       
  1864    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
       
  1865    * object.
       
  1866    * @param ch the code unit to be appended
       
  1867    * @return a reference to this
       
  1868    * @stable ICU 2.0
       
  1869    */
       
  1870  inline  UnicodeString& operator+= (UChar ch);
       
  1871 
       
  1872   /**
       
  1873    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
       
  1874    * object.
       
  1875    * @param ch the code point to be appended
       
  1876    * @return a reference to this
       
  1877    * @stable ICU 2.0
       
  1878    */
       
  1879  inline  UnicodeString& operator+= (UChar32 ch);
       
  1880 
       
  1881   /**
       
  1882    * Append operator. Append the characters in <TT>srcText</TT> to the
       
  1883    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
       
  1884    * not modified.
       
  1885    * @param srcText the source for the new characters
       
  1886    * @return a reference to this
       
  1887    * @stable ICU 2.0
       
  1888    */
       
  1889   inline UnicodeString& operator+= (const UnicodeString& srcText);
       
  1890 
       
  1891   /**
       
  1892    * Append the characters
       
  1893    * in <TT>srcText</TT> in the range
       
  1894    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
       
  1895    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
       
  1896    * is not modified.
       
  1897    * @param srcText the source for the new characters
       
  1898    * @param srcStart the offset into <TT>srcText</TT> where new characters
       
  1899    * will be obtained
       
  1900    * @param srcLength the number of characters in <TT>srcText</TT> in
       
  1901    * the append string
       
  1902    * @return a reference to this
       
  1903    * @stable ICU 2.0
       
  1904    */
       
  1905   inline UnicodeString& append(const UnicodeString& srcText,
       
  1906             int32_t srcStart,
       
  1907             int32_t srcLength);
       
  1908 
       
  1909   /**
       
  1910    * Append the characters in <TT>srcText</TT> to the UnicodeString object at
       
  1911    * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
       
  1912    * @param srcText the source for the new characters
       
  1913    * @return a reference to this
       
  1914    * @stable ICU 2.0
       
  1915    */
       
  1916   inline UnicodeString& append(const UnicodeString& srcText);
       
  1917 
       
  1918   /**
       
  1919    * Append the characters in <TT>srcChars</TT> in the range
       
  1920    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
       
  1921    * object at offset
       
  1922    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
       
  1923    * @param srcChars the source for the new characters
       
  1924    * @param srcStart the offset into <TT>srcChars</TT> where new characters
       
  1925    * will be obtained
       
  1926    * @param srcLength the number of characters in <TT>srcChars</TT> in
       
  1927    * the append string
       
  1928    * @return a reference to this
       
  1929    * @stable ICU 2.0
       
  1930    */
       
  1931   inline UnicodeString& append(const UChar *srcChars,
       
  1932             int32_t srcStart,
       
  1933             int32_t srcLength);
       
  1934 
       
  1935   /**
       
  1936    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
       
  1937    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
       
  1938    * @param srcChars the source for the new characters
       
  1939    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
       
  1940    * @return a reference to this
       
  1941    * @stable ICU 2.0
       
  1942    */
       
  1943   inline UnicodeString& append(const UChar *srcChars,
       
  1944             int32_t srcLength);
       
  1945 
       
  1946   /**
       
  1947    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
       
  1948    * @param srcChar the code unit to append
       
  1949    * @return a reference to this
       
  1950    * @stable ICU 2.0
       
  1951    */
       
  1952   inline UnicodeString& append(UChar srcChar);
       
  1953 
       
  1954   /**
       
  1955    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
       
  1956    * @param srcChar the code point to append
       
  1957    * @return a reference to this
       
  1958    * @stable ICU 2.0
       
  1959    */
       
  1960   inline UnicodeString& append(UChar32 srcChar);
       
  1961 
       
  1962 
       
  1963   /* Insert operations */
       
  1964 
       
  1965   /**
       
  1966    * Insert the characters in <TT>srcText</TT> in the range
       
  1967    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
       
  1968    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
       
  1969    * @param start the offset where the insertion begins
       
  1970    * @param srcText the source for the new characters
       
  1971    * @param srcStart the offset into <TT>srcText</TT> where new characters
       
  1972    * will be obtained
       
  1973    * @param srcLength the number of characters in <TT>srcText</TT> in
       
  1974    * the insert string
       
  1975    * @return a reference to this
       
  1976    * @stable ICU 2.0
       
  1977    */
       
  1978   inline UnicodeString& insert(int32_t start,
       
  1979             const UnicodeString& srcText,
       
  1980             int32_t srcStart,
       
  1981             int32_t srcLength);
       
  1982 
       
  1983   /**
       
  1984    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
       
  1985    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
       
  1986    * @param start the offset where the insertion begins
       
  1987    * @param srcText the source for the new characters
       
  1988    * @return a reference to this
       
  1989    * @stable ICU 2.0
       
  1990    */
       
  1991   inline UnicodeString& insert(int32_t start,
       
  1992             const UnicodeString& srcText);
       
  1993 
       
  1994   /**
       
  1995    * Insert the characters in <TT>srcChars</TT> in the range
       
  1996    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
       
  1997    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
       
  1998    * @param start the offset at which the insertion begins
       
  1999    * @param srcChars the source for the new characters
       
  2000    * @param srcStart the offset into <TT>srcChars</TT> where new characters
       
  2001    * will be obtained
       
  2002    * @param srcLength the number of characters in <TT>srcChars</TT>
       
  2003    * in the insert string
       
  2004    * @return a reference to this
       
  2005    * @stable ICU 2.0
       
  2006    */
       
  2007   inline UnicodeString& insert(int32_t start,
       
  2008             const UChar *srcChars,
       
  2009             int32_t srcStart,
       
  2010             int32_t srcLength);
       
  2011 
       
  2012   /**
       
  2013    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
       
  2014    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
       
  2015    * @param start the offset where the insertion begins
       
  2016    * @param srcChars the source for the new characters
       
  2017    * @param srcLength the number of Unicode characters in srcChars.
       
  2018    * @return a reference to this
       
  2019    * @stable ICU 2.0
       
  2020    */
       
  2021   inline UnicodeString& insert(int32_t start,
       
  2022             const UChar *srcChars,
       
  2023             int32_t srcLength);
       
  2024 
       
  2025   /**
       
  2026    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
       
  2027    * offset <TT>start</TT>.
       
  2028    * @param start the offset at which the insertion occurs
       
  2029    * @param srcChar the code unit to insert
       
  2030    * @return a reference to this
       
  2031    * @stable ICU 2.0
       
  2032    */
       
  2033   inline UnicodeString& insert(int32_t start,
       
  2034             UChar srcChar);
       
  2035 
       
  2036   /**
       
  2037    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
       
  2038    * offset <TT>start</TT>.
       
  2039    * @param start the offset at which the insertion occurs
       
  2040    * @param srcChar the code point to insert
       
  2041    * @return a reference to this
       
  2042    * @stable ICU 2.0
       
  2043    */
       
  2044   inline UnicodeString& insert(int32_t start,
       
  2045             UChar32 srcChar);
       
  2046 
       
  2047 
       
  2048   /* Replace operations */
       
  2049 
       
  2050   /**
       
  2051    * Replace the characters in the range
       
  2052    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
       
  2053    * <TT>srcText</TT> in the range
       
  2054    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
       
  2055    * <TT>srcText</TT> is not modified.
       
  2056    * @param start the offset at which the replace operation begins
       
  2057    * @param length the number of characters to replace. The character at
       
  2058    * <TT>start + length</TT> is not modified.
       
  2059    * @param srcText the source for the new characters
       
  2060    * @param srcStart the offset into <TT>srcText</TT> where new characters
       
  2061    * will be obtained
       
  2062    * @param srcLength the number of characters in <TT>srcText</TT> in
       
  2063    * the replace string
       
  2064    * @return a reference to this
       
  2065    * @stable ICU 2.0
       
  2066    */
       
  2067   UnicodeString& replace(int32_t start,
       
  2068              int32_t length,
       
  2069              const UnicodeString& srcText,
       
  2070              int32_t srcStart,
       
  2071              int32_t srcLength);
       
  2072 
       
  2073   /**
       
  2074    * Replace the characters in the range
       
  2075    * [<TT>start</TT>, <TT>start + length</TT>)
       
  2076    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
       
  2077    *  not modified.
       
  2078    * @param start the offset at which the replace operation begins
       
  2079    * @param length the number of characters to replace. The character at
       
  2080    * <TT>start + length</TT> is not modified.
       
  2081    * @param srcText the source for the new characters
       
  2082    * @return a reference to this
       
  2083    * @stable ICU 2.0
       
  2084    */
       
  2085   UnicodeString& replace(int32_t start,
       
  2086              int32_t length,
       
  2087              const UnicodeString& srcText);
       
  2088 
       
  2089   /**
       
  2090    * Replace the characters in the range
       
  2091    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
       
  2092    * <TT>srcChars</TT> in the range
       
  2093    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
       
  2094    * is not modified.
       
  2095    * @param start the offset at which the replace operation begins
       
  2096    * @param length the number of characters to replace.  The character at
       
  2097    * <TT>start + length</TT> is not modified.
       
  2098    * @param srcChars the source for the new characters
       
  2099    * @param srcStart the offset into <TT>srcChars</TT> where new characters
       
  2100    * will be obtained
       
  2101    * @param srcLength the number of characters in <TT>srcChars</TT>
       
  2102    * in the replace string
       
  2103    * @return a reference to this
       
  2104    * @stable ICU 2.0
       
  2105    */
       
  2106   UnicodeString& replace(int32_t start,
       
  2107              int32_t length,
       
  2108              const UChar *srcChars,
       
  2109              int32_t srcStart,
       
  2110              int32_t srcLength);
       
  2111 
       
  2112   /**
       
  2113    * Replace the characters in the range
       
  2114    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
       
  2115    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
       
  2116    * @param start the offset at which the replace operation begins
       
  2117    * @param length number of characters to replace.  The character at
       
  2118    * <TT>start + length</TT> is not modified.
       
  2119    * @param srcChars the source for the new characters
       
  2120    * @param srcLength the number of Unicode characters in srcChars
       
  2121    * @return a reference to this
       
  2122    * @stable ICU 2.0
       
  2123    */
       
  2124   inline UnicodeString& replace(int32_t start,
       
  2125              int32_t length,
       
  2126              const UChar *srcChars,
       
  2127              int32_t srcLength);
       
  2128 
       
  2129   /**
       
  2130    * Replace the characters in the range
       
  2131    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
       
  2132    * <TT>srcChar</TT>.
       
  2133    * @param start the offset at which the replace operation begins
       
  2134    * @param length the number of characters to replace.  The character at
       
  2135    * <TT>start + length</TT> is not modified.
       
  2136    * @param srcChar the new code unit
       
  2137    * @return a reference to this
       
  2138    * @stable ICU 2.0
       
  2139    */
       
  2140   inline UnicodeString& replace(int32_t start,
       
  2141              int32_t length,
       
  2142              UChar srcChar);
       
  2143 
       
  2144   /**
       
  2145    * Replace the characters in the range
       
  2146    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
       
  2147    * <TT>srcChar</TT>.
       
  2148    * @param start the offset at which the replace operation begins
       
  2149    * @param length the number of characters to replace.  The character at
       
  2150    * <TT>start + length</TT> is not modified.
       
  2151    * @param srcChar the new code point
       
  2152    * @return a reference to this
       
  2153    * @stable ICU 2.0
       
  2154    */
       
  2155   inline UnicodeString& replace(int32_t start,
       
  2156              int32_t length,
       
  2157              UChar32 srcChar);
       
  2158 
       
  2159   /**
       
  2160    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
       
  2161    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
       
  2162    * @param start the offset at which the replace operation begins
       
  2163    * @param limit the offset immediately following the replace range
       
  2164    * @param srcText the source for the new characters
       
  2165    * @return a reference to this
       
  2166    * @stable ICU 2.0
       
  2167    */
       
  2168   inline UnicodeString& replaceBetween(int32_t start,
       
  2169                 int32_t limit,
       
  2170                 const UnicodeString& srcText);
       
  2171 
       
  2172   /**
       
  2173    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
       
  2174    * with the characters in <TT>srcText</TT> in the range
       
  2175    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
       
  2176    * @param start the offset at which the replace operation begins
       
  2177    * @param limit the offset immediately following the replace range
       
  2178    * @param srcText the source for the new characters
       
  2179    * @param srcStart the offset into <TT>srcChars</TT> where new characters
       
  2180    * will be obtained
       
  2181    * @param srcLimit the offset immediately following the range to copy
       
  2182    * in <TT>srcText</TT>
       
  2183    * @return a reference to this
       
  2184    * @stable ICU 2.0
       
  2185    */
       
  2186   inline UnicodeString& replaceBetween(int32_t start,
       
  2187                 int32_t limit,
       
  2188                 const UnicodeString& srcText,
       
  2189                 int32_t srcStart,
       
  2190                 int32_t srcLimit);
       
  2191 
       
  2192   /**
       
  2193    * Replace a substring of this object with the given text.
       
  2194    * @param start the beginning index, inclusive; <code>0 <= start
       
  2195    * <= limit</code>.
       
  2196    * @param limit the ending index, exclusive; <code>start <= limit
       
  2197    * <= length()</code>.
       
  2198    * @param text the text to replace characters <code>start</code>
       
  2199    * to <code>limit - 1</code>
       
  2200    * @stable ICU 2.0
       
  2201    */
       
  2202   virtual void handleReplaceBetween(int32_t start,
       
  2203                                     int32_t limit,
       
  2204                                     const UnicodeString& text);
       
  2205 
       
  2206   /**
       
  2207    * Replaceable API
       
  2208    * @return TRUE if it has MetaData
       
  2209    * @stable ICU 2.4
       
  2210    */
       
  2211   virtual UBool hasMetaData() const;
       
  2212 
       
  2213   /**
       
  2214    * Copy a substring of this object, retaining attribute (out-of-band)
       
  2215    * information.  This method is used to duplicate or reorder substrings.
       
  2216    * The destination index must not overlap the source range.
       
  2217    *
       
  2218    * @param start the beginning index, inclusive; <code>0 <= start <=
       
  2219    * limit</code>.
       
  2220    * @param limit the ending index, exclusive; <code>start <= limit <=
       
  2221    * length()</code>.
       
  2222    * @param dest the destination index.  The characters from
       
  2223    * <code>start..limit-1</code> will be copied to <code>dest</code>.
       
  2224    * Implementations of this method may assume that <code>dest <= start ||
       
  2225    * dest >= limit</code>.
       
  2226    * @stable ICU 2.0
       
  2227    */
       
  2228   virtual void copy(int32_t start, int32_t limit, int32_t dest);
       
  2229 
       
  2230   /* Search and replace operations */
       
  2231 
       
  2232   /**
       
  2233    * Replace all occurrences of characters in oldText with the characters
       
  2234    * in newText
       
  2235    * @param oldText the text containing the search text
       
  2236    * @param newText the text containing the replacement text
       
  2237    * @return a reference to this
       
  2238    * @stable ICU 2.0
       
  2239    */
       
  2240   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
       
  2241                 const UnicodeString& newText);
       
  2242 
       
  2243   /**
       
  2244    * Replace all occurrences of characters in oldText with characters
       
  2245    * in newText
       
  2246    * in the range [<TT>start</TT>, <TT>start + length</TT>).
       
  2247    * @param start the start of the range in which replace will performed
       
  2248    * @param length the length of the range in which replace will be performed
       
  2249    * @param oldText the text containing the search text
       
  2250    * @param newText the text containing the replacement text
       
  2251    * @return a reference to this
       
  2252    * @stable ICU 2.0
       
  2253    */
       
  2254   inline UnicodeString& findAndReplace(int32_t start,
       
  2255                 int32_t length,
       
  2256                 const UnicodeString& oldText,
       
  2257                 const UnicodeString& newText);
       
  2258 
       
  2259   /**
       
  2260    * Replace all occurrences of characters in oldText in the range
       
  2261    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
       
  2262    * in newText in the range
       
  2263    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
       
  2264    * in the range [<TT>start</TT>, <TT>start + length</TT>).
       
  2265    * @param start the start of the range in which replace will performed
       
  2266    * @param length the length of the range in which replace will be performed
       
  2267    * @param oldText the text containing the search text
       
  2268    * @param oldStart the start of the search range in <TT>oldText</TT>
       
  2269    * @param oldLength the length of the search range in <TT>oldText</TT>
       
  2270    * @param newText the text containing the replacement text
       
  2271    * @param newStart the start of the replacement range in <TT>newText</TT>
       
  2272    * @param newLength the length of the replacement range in <TT>newText</TT>
       
  2273    * @return a reference to this
       
  2274    * @stable ICU 2.0
       
  2275    */
       
  2276   UnicodeString& findAndReplace(int32_t start,
       
  2277                 int32_t length,
       
  2278                 const UnicodeString& oldText,
       
  2279                 int32_t oldStart,
       
  2280                 int32_t oldLength,
       
  2281                 const UnicodeString& newText,
       
  2282                 int32_t newStart,
       
  2283                 int32_t newLength);
       
  2284 
       
  2285 
       
  2286   /* Remove operations */
       
  2287 
       
  2288   /**
       
  2289    * Remove all characters from the UnicodeString object.
       
  2290    * @return a reference to this
       
  2291    * @stable ICU 2.0
       
  2292    */
       
  2293   inline UnicodeString& remove(void);
       
  2294 
       
  2295   /**
       
  2296    * Remove the characters in the range
       
  2297    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
       
  2298    * @param start the offset of the first character to remove
       
  2299    * @param length the number of characters to remove
       
  2300    * @return a reference to this
       
  2301    * @stable ICU 2.0
       
  2302    */
       
  2303   inline UnicodeString& remove(int32_t start,
       
  2304                                int32_t length = (int32_t)INT32_MAX);
       
  2305 
       
  2306   /**
       
  2307    * Remove the characters in the range
       
  2308    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
       
  2309    * @param start the offset of the first character to remove
       
  2310    * @param limit the offset immediately following the range to remove
       
  2311    * @return a reference to this
       
  2312    * @stable ICU 2.0
       
  2313    */
       
  2314   inline UnicodeString& removeBetween(int32_t start,
       
  2315                                       int32_t limit = (int32_t)INT32_MAX);
       
  2316 
       
  2317 
       
  2318   /* Length operations */
       
  2319 
       
  2320   /**
       
  2321    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
       
  2322    * If the length of this UnicodeString is less than targetLength,
       
  2323    * length() - targetLength copies of padChar will be added to the
       
  2324    * beginning of this UnicodeString.
       
  2325    * @param targetLength the desired length of the string
       
  2326    * @param padChar the character to use for padding. Defaults to
       
  2327    * space (U+0020)
       
  2328    * @return TRUE if the text was padded, FALSE otherwise.
       
  2329    * @stable ICU 2.0
       
  2330    */
       
  2331   UBool padLeading(int32_t targetLength,
       
  2332                     UChar padChar = 0x0020);
       
  2333 
       
  2334   /**
       
  2335    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
       
  2336    * If the length of this UnicodeString is less than targetLength,
       
  2337    * length() - targetLength copies of padChar will be added to the
       
  2338    * end of this UnicodeString.
       
  2339    * @param targetLength the desired length of the string
       
  2340    * @param padChar the character to use for padding. Defaults to
       
  2341    * space (U+0020)
       
  2342    * @return TRUE if the text was padded, FALSE otherwise.
       
  2343    * @stable ICU 2.0
       
  2344    */
       
  2345   UBool padTrailing(int32_t targetLength,
       
  2346                      UChar padChar = 0x0020);
       
  2347 
       
  2348   /**
       
  2349    * Truncate this UnicodeString to the <TT>targetLength</TT>.
       
  2350    * @param targetLength the desired length of this UnicodeString.
       
  2351    * @return TRUE if the text was truncated, FALSE otherwise
       
  2352    * @stable ICU 2.0
       
  2353    */
       
  2354   inline UBool truncate(int32_t targetLength);
       
  2355 
       
  2356   /**
       
  2357    * Trims leading and trailing whitespace from this UnicodeString.
       
  2358    * @return a reference to this
       
  2359    * @stable ICU 2.0
       
  2360    */
       
  2361   UnicodeString& trim(void);
       
  2362 
       
  2363 
       
  2364   /* Miscellaneous operations */
       
  2365 
       
  2366   /**
       
  2367    * Reverse this UnicodeString in place.
       
  2368    * @return a reference to this
       
  2369    * @stable ICU 2.0
       
  2370    */
       
  2371   inline UnicodeString& reverse(void);
       
  2372 
       
  2373   /**
       
  2374    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
       
  2375    * this UnicodeString.
       
  2376    * @param start the start of the range to reverse
       
  2377    * @param length the number of characters to to reverse
       
  2378    * @return a reference to this
       
  2379    * @stable ICU 2.0
       
  2380    */
       
  2381   inline UnicodeString& reverse(int32_t start,
       
  2382              int32_t length);
       
  2383 
       
  2384   /**
       
  2385    * Convert the characters in this to UPPER CASE following the conventions of
       
  2386    * the default locale.
       
  2387    * @return A reference to this.
       
  2388    * @stable ICU 2.0
       
  2389    */
       
  2390   UnicodeString& toUpper(void);
       
  2391 
       
  2392   /**
       
  2393    * Convert the characters in this to UPPER CASE following the conventions of
       
  2394    * a specific locale.
       
  2395    * @param locale The locale containing the conventions to use.
       
  2396    * @return A reference to this.
       
  2397    * @stable ICU 2.0
       
  2398    */
       
  2399   UnicodeString& toUpper(const Locale& locale);
       
  2400 
       
  2401   /**
       
  2402    * Convert the characters in this to lower case following the conventions of
       
  2403    * the default locale.
       
  2404    * @return A reference to this.
       
  2405    * @stable ICU 2.0
       
  2406    */
       
  2407   UnicodeString& toLower(void);
       
  2408 
       
  2409   /**
       
  2410    * Convert the characters in this to lower case following the conventions of
       
  2411    * a specific locale.
       
  2412    * @param locale The locale containing the conventions to use.
       
  2413    * @return A reference to this.
       
  2414    * @stable ICU 2.0
       
  2415    */
       
  2416   UnicodeString& toLower(const Locale& locale);
       
  2417 
       
  2418 #if !UCONFIG_NO_BREAK_ITERATION
       
  2419 
       
  2420   /**
       
  2421    * Titlecase this string, convenience function using the default locale.
       
  2422    *
       
  2423    * Casing is locale-dependent and context-sensitive.
       
  2424    * Titlecasing uses a break iterator to find the first characters of words
       
  2425    * that are to be titlecased. It titlecases those characters and lowercases
       
  2426    * all others.
       
  2427    *
       
  2428    * The titlecase break iterator can be provided to customize for arbitrary
       
  2429    * styles, using rules and dictionaries beyond the standard iterators.
       
  2430    * It may be more efficient to always provide an iterator to avoid
       
  2431    * opening and closing one for each string.
       
  2432    * The standard titlecase iterator for the root locale implements the
       
  2433    * algorithm of Unicode TR 21.
       
  2434    *
       
  2435    * This function uses only the first() and next() methods of the
       
  2436    * provided break iterator.
       
  2437    *
       
  2438    * @param titleIter A break iterator to find the first characters of words
       
  2439    *                  that are to be titlecased.
       
  2440    *                  If none is provided (0), then a standard titlecase
       
  2441    *                  break iterator is opened.
       
  2442    *                  Otherwise the provided iterator is set to the string's text.
       
  2443    * @return A reference to this.
       
  2444    * @stable ICU 2.1
       
  2445    */
       
  2446   UnicodeString &toTitle(BreakIterator *titleIter);
       
  2447 
       
  2448   /**
       
  2449    * Titlecase this string.
       
  2450    *
       
  2451    * Casing is locale-dependent and context-sensitive.
       
  2452    * Titlecasing uses a break iterator to find the first characters of words
       
  2453    * that are to be titlecased. It titlecases those characters and lowercases
       
  2454    * all others.
       
  2455    *
       
  2456    * The titlecase break iterator can be provided to customize for arbitrary
       
  2457    * styles, using rules and dictionaries beyond the standard iterators.
       
  2458    * It may be more efficient to always provide an iterator to avoid
       
  2459    * opening and closing one for each string.
       
  2460    * The standard titlecase iterator for the root locale implements the
       
  2461    * algorithm of Unicode TR 21.
       
  2462    *
       
  2463    * This function uses only the first() and next() methods of the
       
  2464    * provided break iterator.
       
  2465    *
       
  2466    * @param titleIter A break iterator to find the first characters of words
       
  2467    *                  that are to be titlecased.
       
  2468    *                  If none is provided (0), then a standard titlecase
       
  2469    *                  break iterator is opened.
       
  2470    *                  Otherwise the provided iterator is set to the string's text.
       
  2471    * @param locale    The locale to consider.
       
  2472    * @return A reference to this.
       
  2473    * @stable ICU 2.1
       
  2474    */
       
  2475   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
       
  2476 
       
  2477 #endif
       
  2478 
       
  2479   /**
       
  2480    * Case-fold the characters in this string.
       
  2481    * Case-folding is locale-independent and not context-sensitive,
       
  2482    * but there is an option for whether to include or exclude mappings for dotted I
       
  2483    * and dotless i that are marked with 'I' in CaseFolding.txt.
       
  2484    * The result may be longer or shorter than the original.
       
  2485    *
       
  2486    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
  2487    * @return A reference to this.
       
  2488    * @stable ICU 2.0
       
  2489    */
       
  2490   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
       
  2491 
       
  2492   //========================================
       
  2493   // Access to the internal buffer
       
  2494   //========================================
       
  2495 
       
  2496   /**
       
  2497    * Get a read/write pointer to the internal buffer.
       
  2498    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
       
  2499    * writable, and is still owned by the UnicodeString object.
       
  2500    * Calls to getBuffer(minCapacity) must not be nested, and
       
  2501    * must be matched with calls to releaseBuffer(newLength).
       
  2502    * If the string buffer was read-only or shared,
       
  2503    * then it will be reallocated and copied.
       
  2504    *
       
  2505    * An attempted nested call will return 0, and will not further modify the
       
  2506    * state of the UnicodeString object.
       
  2507    * It also returns 0 if the string is bogus.
       
  2508    *
       
  2509    * The actual capacity of the string buffer may be larger than minCapacity.
       
  2510    * getCapacity() returns the actual capacity.
       
  2511    * For many operations, the full capacity should be used to avoid reallocations.
       
  2512    *
       
  2513    * While the buffer is "open" between getBuffer(minCapacity)
       
  2514    * and releaseBuffer(newLength), the following applies:
       
  2515    * - The string length is set to 0.
       
  2516    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
       
  2517    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
       
  2518    * - You can read from and write to the returned buffer.
       
  2519    * - The previous string contents will still be in the buffer;
       
  2520    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
       
  2521    *   If the length() was greater than minCapacity, then any contents after minCapacity
       
  2522    *   may be lost.
       
  2523    *   The buffer contents is not NUL-terminated by getBuffer().
       
  2524    *   If length()<getCapacity() then you can terminate it by writing a NUL
       
  2525    *   at index length().
       
  2526    * - You must call releaseBuffer(newLength) before and in order to
       
  2527    *   return to normal UnicodeString operation.
       
  2528    *
       
  2529    * @param minCapacity the minimum number of UChars that are to be available
       
  2530    *        in the buffer, starting at the returned pointer;
       
  2531    *        default to the current string capacity if minCapacity==-1
       
  2532    * @return a writable pointer to the internal string buffer,
       
  2533    *         or 0 if an error occurs (nested calls, out of memory)
       
  2534    *
       
  2535    * @see releaseBuffer
       
  2536    * @see getTerminatedBuffer()
       
  2537    * @stable ICU 2.0
       
  2538    */
       
  2539   UChar *getBuffer(int32_t minCapacity);
       
  2540 
       
  2541   /**
       
  2542    * Release a read/write buffer on a UnicodeString object with an
       
  2543    * "open" getBuffer(minCapacity).
       
  2544    * This function must be called in a matched pair with getBuffer(minCapacity).
       
  2545    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
       
  2546    *
       
  2547    * It will set the string length to newLength, at most to the current capacity.
       
  2548    * If newLength==-1 then it will set the length according to the
       
  2549    * first NUL in the buffer, or to the capacity if there is no NUL.
       
  2550    *
       
  2551    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
       
  2552    *
       
  2553    * @param newLength the new length of the UnicodeString object;
       
  2554    *        defaults to the current capacity if newLength is greater than that;
       
  2555    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
       
  2556    *        the current capacity of the string
       
  2557    *
       
  2558    * @see getBuffer(int32_t minCapacity)
       
  2559    * @stable ICU 2.0
       
  2560    */
       
  2561   void releaseBuffer(int32_t newLength=-1);
       
  2562 
       
  2563   /**
       
  2564    * Get a read-only pointer to the internal buffer.
       
  2565    * This can be called at any time on a valid UnicodeString.
       
  2566    *
       
  2567    * It returns 0 if the string is bogus, or
       
  2568    * during an "open" getBuffer(minCapacity).
       
  2569    *
       
  2570    * It can be called as many times as desired.
       
  2571    * The pointer that it returns will remain valid until the UnicodeString object is modified,
       
  2572    * at which time the pointer is semantically invalidated and must not be used any more.
       
  2573    *
       
  2574    * The capacity of the buffer can be determined with getCapacity().
       
  2575    * The part after length() may or may not be initialized and valid,
       
  2576    * depending on the history of the UnicodeString object.
       
  2577    *
       
  2578    * The buffer contents is (probably) not NUL-terminated.
       
  2579    * You can check if it is with
       
  2580    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
       
  2581    * (See getTerminatedBuffer().)
       
  2582    *
       
  2583    * The buffer may reside in read-only memory. Its contents must not
       
  2584    * be modified.
       
  2585    *
       
  2586    * @return a read-only pointer to the internal string buffer,
       
  2587    *         or 0 if the string is empty or bogus
       
  2588    *
       
  2589    * @see getBuffer(int32_t minCapacity)
       
  2590    * @see getTerminatedBuffer()
       
  2591    * @stable ICU 2.0
       
  2592    */
       
  2593   inline const UChar *getBuffer() const;
       
  2594 
       
  2595   /**
       
  2596    * Get a read-only pointer to the internal buffer,
       
  2597    * making sure that it is NUL-terminated.
       
  2598    * This can be called at any time on a valid UnicodeString.
       
  2599    *
       
  2600    * It returns 0 if the string is bogus, or
       
  2601    * during an "open" getBuffer(minCapacity), or if the buffer cannot
       
  2602    * be NUL-terminated (because memory allocation failed).
       
  2603    *
       
  2604    * It can be called as many times as desired.
       
  2605    * The pointer that it returns will remain valid until the UnicodeString object is modified,
       
  2606    * at which time the pointer is semantically invalidated and must not be used any more.
       
  2607    *
       
  2608    * The capacity of the buffer can be determined with getCapacity().
       
  2609    * The part after length()+1 may or may not be initialized and valid,
       
  2610    * depending on the history of the UnicodeString object.
       
  2611    *
       
  2612    * The buffer contents is guaranteed to be NUL-terminated.
       
  2613    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
       
  2614    * is written.
       
  2615    * For this reason, this function is not const, unlike getBuffer().
       
  2616    * Note that a UnicodeString may also contain NUL characters as part of its contents.
       
  2617    *
       
  2618    * The buffer may reside in read-only memory. Its contents must not
       
  2619    * be modified.
       
  2620    *
       
  2621    * @return a read-only pointer to the internal string buffer,
       
  2622    *         or 0 if the string is empty or bogus
       
  2623    *
       
  2624    * @see getBuffer(int32_t minCapacity)
       
  2625    * @see getBuffer()
       
  2626    * @stable ICU 2.2
       
  2627    */
       
  2628   inline const UChar *getTerminatedBuffer();
       
  2629 
       
  2630   //========================================
       
  2631   // Constructors
       
  2632   //========================================
       
  2633 
       
  2634   /** Construct an empty UnicodeString.
       
  2635    * @stable ICU 2.0
       
  2636    */
       
  2637   UnicodeString();
       
  2638 
       
  2639   /**
       
  2640    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
       
  2641    * @param capacity the number of UChars this UnicodeString should hold
       
  2642    * before a resize is necessary; if count is greater than 0 and count
       
  2643    * code points c take up more space than capacity, then capacity is adjusted
       
  2644    * accordingly.
       
  2645    * @param c is used to initially fill the string
       
  2646    * @param count specifies how many code points c are to be written in the
       
  2647    *              string
       
  2648    * @stable ICU 2.0
       
  2649    */
       
  2650   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
       
  2651 
       
  2652   /**
       
  2653    * Single UChar (code unit) constructor.
       
  2654    * @param ch the character to place in the UnicodeString
       
  2655    * @stable ICU 2.0
       
  2656    */
       
  2657   UnicodeString(UChar ch);
       
  2658 
       
  2659   /**
       
  2660    * Single UChar32 (code point) constructor.
       
  2661    * @param ch the character to place in the UnicodeString
       
  2662    * @stable ICU 2.0
       
  2663    */
       
  2664   UnicodeString(UChar32 ch);
       
  2665 
       
  2666   /**
       
  2667    * UChar* constructor.
       
  2668    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
       
  2669    * must be NULL (U+0000) terminated.
       
  2670    * @stable ICU 2.0
       
  2671    */
       
  2672   UnicodeString(const UChar *text);
       
  2673 
       
  2674   /**
       
  2675    * UChar* constructor.
       
  2676    * @param text The characters to place in the UnicodeString.
       
  2677    * @param textLength The number of Unicode characters in <TT>text</TT>
       
  2678    * to copy.
       
  2679    * @stable ICU 2.0
       
  2680    */
       
  2681   UnicodeString(const UChar *text,
       
  2682         int32_t textLength);
       
  2683 
       
  2684   /**
       
  2685    * Readonly-aliasing UChar* constructor.
       
  2686    * The text will be used for the UnicodeString object, but
       
  2687    * it will not be released when the UnicodeString is destroyed.
       
  2688    * This has copy-on-write semantics:
       
  2689    * When the string is modified, then the buffer is first copied into
       
  2690    * newly allocated memory.
       
  2691    * The aliased buffer is never modified.
       
  2692    * In an assignment to another UnicodeString, the text will be aliased again,
       
  2693    * so that both strings then alias the same readonly-text.
       
  2694    *
       
  2695    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
       
  2696    *                     This must be true if <code>textLength==-1</code>.
       
  2697    * @param text The characters to alias for the UnicodeString.
       
  2698    * @param textLength The number of Unicode characters in <code>text</code> to alias.
       
  2699    *                   If -1, then this constructor will determine the length
       
  2700    *                   by calling <code>u_strlen()</code>.
       
  2701    * @stable ICU 2.0
       
  2702    */
       
  2703   UnicodeString(UBool isTerminated,
       
  2704                 const UChar *text,
       
  2705                 int32_t textLength);
       
  2706 
       
  2707   /**
       
  2708    * Writable-aliasing UChar* constructor.
       
  2709    * The text will be used for the UnicodeString object, but
       
  2710    * it will not be released when the UnicodeString is destroyed.
       
  2711    * This has write-through semantics:
       
  2712    * For as long as the capacity of the buffer is sufficient, write operations
       
  2713    * will directly affect the buffer. When more capacity is necessary, then
       
  2714    * a new buffer will be allocated and the contents copied as with regularly
       
  2715    * constructed strings.
       
  2716    * In an assignment to another UnicodeString, the buffer will be copied.
       
  2717    * The extract(UChar *dst) function detects whether the dst pointer is the same
       
  2718    * as the string buffer itself and will in this case not copy the contents.
       
  2719    *
       
  2720    * @param buffer The characters to alias for the UnicodeString.
       
  2721    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
       
  2722    * @param buffCapacity The size of <code>buffer</code> in UChars.
       
  2723    * @stable ICU 2.0
       
  2724    */
       
  2725   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
       
  2726 
       
  2727 #if !UCONFIG_NO_CONVERSION
       
  2728 
       
  2729   /**
       
  2730    * char* constructor.
       
  2731    * @param codepageData an array of bytes, null-terminated
       
  2732    * @param codepage the encoding of <TT>codepageData</TT>.  The special
       
  2733    * value 0 for <TT>codepage</TT> indicates that the text is in the
       
  2734    * platform's default codepage.
       
  2735    *
       
  2736    * If <code>codepage</code> is an empty string (<code>""</code>),
       
  2737    * then a simple conversion is performed on the codepage-invariant
       
  2738    * subset ("invariant characters") of the platform encoding. See utypes.h.
       
  2739    * Recommendation: For invariant-character strings use the constructor
       
  2740    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
       
  2741    * because it avoids object code dependencies of UnicodeString on
       
  2742    * the conversion code.
       
  2743    *
       
  2744    * @stable ICU 2.0
       
  2745    */
       
  2746   UnicodeString(const char *codepageData,
       
  2747         const char *codepage = 0);
       
  2748 
       
  2749   /**
       
  2750    * char* constructor.
       
  2751    * @param codepageData an array of bytes.
       
  2752    * @param dataLength The number of bytes in <TT>codepageData</TT>.
       
  2753    * @param codepage the encoding of <TT>codepageData</TT>.  The special
       
  2754    * value 0 for <TT>codepage</TT> indicates that the text is in the
       
  2755    * platform's default codepage.
       
  2756    * If <code>codepage</code> is an empty string (<code>""</code>),
       
  2757    * then a simple conversion is performed on the codepage-invariant
       
  2758    * subset ("invariant characters") of the platform encoding. See utypes.h.
       
  2759    * Recommendation: For invariant-character strings use the constructor
       
  2760    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
       
  2761    * because it avoids object code dependencies of UnicodeString on
       
  2762    * the conversion code.
       
  2763    *
       
  2764    * @stable ICU 2.0
       
  2765    */
       
  2766   UnicodeString(const char *codepageData,
       
  2767         int32_t dataLength,
       
  2768         const char *codepage = 0);
       
  2769 
       
  2770   /**
       
  2771    * char * / UConverter constructor.
       
  2772    * This constructor uses an existing UConverter object to
       
  2773    * convert the codepage string to Unicode and construct a UnicodeString
       
  2774    * from that.
       
  2775    *
       
  2776    * The converter is reset at first.
       
  2777    * If the error code indicates a failure before this constructor is called,
       
  2778    * or if an error occurs during conversion or construction,
       
  2779    * then the string will be bogus.
       
  2780    *
       
  2781    * This function avoids the overhead of opening and closing a converter if
       
  2782    * multiple strings are constructed.
       
  2783    *
       
  2784    * @param src input codepage string
       
  2785    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
       
  2786    * @param cnv converter object (ucnv_resetToUnicode() will be called),
       
  2787    *        can be NULL for the default converter
       
  2788    * @param errorCode normal ICU error code
       
  2789    * @stable ICU 2.0
       
  2790    */
       
  2791   UnicodeString(
       
  2792         const char *src, int32_t srcLength,
       
  2793         UConverter *cnv,
       
  2794         UErrorCode &errorCode);
       
  2795 
       
  2796 #endif
       
  2797 
       
  2798   /**
       
  2799    * Constructs a Unicode string from an invariant-character char * string.
       
  2800    * About invariant characters see utypes.h.
       
  2801    * This constructor has no runtime dependency on conversion code and is
       
  2802    * therefore recommended over ones taking a charset name string
       
  2803    * (where the empty string "" indicates invariant-character conversion).
       
  2804    *
       
  2805    * Use the macro US_INV as the third, signature-distinguishing parameter.
       
  2806    *
       
  2807    * For example:
       
  2808    * \code
       
  2809    * void fn(const char *s) {
       
  2810    *   UnicodeString ustr(s, -1, US_INV);
       
  2811    *   // use ustr ...
       
  2812    * }
       
  2813    * \endcode
       
  2814    *
       
  2815    * @param src String using only invariant characters.
       
  2816    * @param length Length of src, or -1 if NUL-terminated.
       
  2817    * @param inv Signature-distinguishing paramater, use US_INV.
       
  2818    *
       
  2819    * @see US_INV
       
  2820    * @draft ICU 3.2
       
  2821    */
       
  2822   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
       
  2823 
       
  2824 
       
  2825   /**
       
  2826    * Copy constructor.
       
  2827    * @param that The UnicodeString object to copy.
       
  2828    * @stable ICU 2.0
       
  2829    */
       
  2830   UnicodeString(const UnicodeString& that);
       
  2831 
       
  2832   /**
       
  2833    * 'Substring' constructor from tail of source string.
       
  2834    * @param src The UnicodeString object to copy.
       
  2835    * @param srcStart The offset into <tt>src</tt> at which to start copying.
       
  2836    * @stable ICU 2.2
       
  2837    */
       
  2838   UnicodeString(const UnicodeString& src, int32_t srcStart);
       
  2839 
       
  2840   /**
       
  2841    * 'Substring' constructor from subrange of source string.
       
  2842    * @param src The UnicodeString object to copy.
       
  2843    * @param srcStart The offset into <tt>src</tt> at which to start copying.
       
  2844    * @param srcLength The number of characters from <tt>src</tt> to copy.
       
  2845    * @stable ICU 2.2
       
  2846    */
       
  2847   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
       
  2848 
       
  2849   /**
       
  2850    * Clone this object, an instance of a subclass of Replaceable.
       
  2851    * Clones can be used concurrently in multiple threads.
       
  2852    * If a subclass does not implement clone(), or if an error occurs,
       
  2853    * then NULL is returned.
       
  2854    * The clone functions in all subclasses return a pointer to a Replaceable
       
  2855    * because some compilers do not support covariant (same-as-this)
       
  2856    * return types; cast to the appropriate subclass if necessary.
       
  2857    * The caller must delete the clone.
       
  2858    *
       
  2859    * @return a clone of this object
       
  2860    *
       
  2861    * @see Replaceable::clone
       
  2862    * @see getDynamicClassID
       
  2863    * @stable ICU 2.6
       
  2864    */
       
  2865   virtual Replaceable *clone() const;
       
  2866 
       
  2867   /** Destructor.
       
  2868    * @stable ICU 2.0
       
  2869    */
       
  2870   virtual ~UnicodeString();
       
  2871 
       
  2872 
       
  2873   /* Miscellaneous operations */
       
  2874 
       
  2875   /**
       
  2876    * Unescape a string of characters and return a string containing
       
  2877    * the result.  The following escape sequences are recognized:
       
  2878    *
       
  2879    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
       
  2880    * \\Uhhhhhhhh   8 hex digits
       
  2881    * \\xhh         1-2 hex digits
       
  2882    * \\ooo         1-3 octal digits; o in [0-7]
       
  2883    * \\cX          control-X; X is masked with 0x1F
       
  2884    *
       
  2885    * as well as the standard ANSI C escapes:
       
  2886    *
       
  2887    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
       
  2888    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
       
  2889    * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
       
  2890    *
       
  2891    * Anything else following a backslash is generically escaped.  For
       
  2892    * example, "[a\\-z]" returns "[a-z]".
       
  2893    *
       
  2894    * If an escape sequence is ill-formed, this method returns an empty
       
  2895    * string.  An example of an ill-formed sequence is "\\u" followed by
       
  2896    * fewer than 4 hex digits.
       
  2897    *
       
  2898    * This function is similar to u_unescape() but not identical to it.
       
  2899    * The latter takes a source char*, so it does escape recognition
       
  2900    * and also invariant conversion.
       
  2901    *
       
  2902    * @return a string with backslash escapes interpreted, or an
       
  2903    * empty string on error.
       
  2904    * @see UnicodeString#unescapeAt()
       
  2905    * @see u_unescape()
       
  2906    * @see u_unescapeAt()
       
  2907    * @stable ICU 2.0
       
  2908    */
       
  2909   UnicodeString unescape() const;
       
  2910 
       
  2911   /**
       
  2912    * Unescape a single escape sequence and return the represented
       
  2913    * character.  See unescape() for a listing of the recognized escape
       
  2914    * sequences.  The character at offset-1 is assumed (without
       
  2915    * checking) to be a backslash.  If the escape sequence is
       
  2916    * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
       
  2917    * returned.
       
  2918    *
       
  2919    * @param offset an input output parameter.  On input, it is the
       
  2920    * offset into this string where the escape sequence is located,
       
  2921    * after the initial backslash.  On output, it is advanced after the
       
  2922    * last character parsed.  On error, it is not advanced at all.
       
  2923    * @return the character represented by the escape sequence at
       
  2924    * offset, or (UChar32)0xFFFFFFFF on error.
       
  2925    * @see UnicodeString#unescape()
       
  2926    * @see u_unescape()
       
  2927    * @see u_unescapeAt()
       
  2928    * @stable ICU 2.0
       
  2929    */
       
  2930   UChar32 unescapeAt(int32_t &offset) const;
       
  2931 
       
  2932   /**
       
  2933    * ICU "poor man's RTTI", returns a UClassID for this class.
       
  2934    *
       
  2935    * @stable ICU 2.2
       
  2936    */
       
  2937   static UClassID U_EXPORT2 getStaticClassID();
       
  2938 
       
  2939   /**
       
  2940    * ICU "poor man's RTTI", returns a UClassID for the actual class.
       
  2941    *
       
  2942    * @stable ICU 2.2
       
  2943    */
       
  2944   virtual UClassID getDynamicClassID() const;
       
  2945 
       
  2946   //========================================
       
  2947   // Implementation methods
       
  2948   //========================================
       
  2949 
       
  2950 protected:
       
  2951   /**
       
  2952    * Implement Replaceable::getLength() (see jitterbug 1027).
       
  2953    * @stable ICU 2.4
       
  2954    */
       
  2955   virtual int32_t getLength() const;
       
  2956 
       
  2957   /**
       
  2958    * The change in Replaceable to use virtual getCharAt() allows
       
  2959    * UnicodeString::charAt() to be inline again (see jitterbug 709).
       
  2960    * @stable ICU 2.4
       
  2961    */
       
  2962   virtual UChar getCharAt(int32_t offset) const;
       
  2963 
       
  2964   /**
       
  2965    * The change in Replaceable to use virtual getChar32At() allows
       
  2966    * UnicodeString::char32At() to be inline again (see jitterbug 709).
       
  2967    * @stable ICU 2.4
       
  2968    */
       
  2969   virtual UChar32 getChar32At(int32_t offset) const;
       
  2970 
       
  2971 private:
       
  2972 
       
  2973   inline int8_t
       
  2974   doCompare(int32_t start,
       
  2975            int32_t length,
       
  2976            const UnicodeString& srcText,
       
  2977            int32_t srcStart,
       
  2978            int32_t srcLength) const;
       
  2979 
       
  2980   int8_t doCompare(int32_t start,
       
  2981            int32_t length,
       
  2982            const UChar *srcChars,
       
  2983            int32_t srcStart,
       
  2984            int32_t srcLength) const;
       
  2985 
       
  2986   inline int8_t
       
  2987   doCompareCodePointOrder(int32_t start,
       
  2988                           int32_t length,
       
  2989                           const UnicodeString& srcText,
       
  2990                           int32_t srcStart,
       
  2991                           int32_t srcLength) const;
       
  2992 
       
  2993   int8_t doCompareCodePointOrder(int32_t start,
       
  2994                                  int32_t length,
       
  2995                                  const UChar *srcChars,
       
  2996                                  int32_t srcStart,
       
  2997                                  int32_t srcLength) const;
       
  2998 
       
  2999   inline int8_t
       
  3000   doCaseCompare(int32_t start,
       
  3001                 int32_t length,
       
  3002                 const UnicodeString &srcText,
       
  3003                 int32_t srcStart,
       
  3004                 int32_t srcLength,
       
  3005                 uint32_t options) const;
       
  3006 
       
  3007   int8_t
       
  3008   doCaseCompare(int32_t start,
       
  3009                 int32_t length,
       
  3010                 const UChar *srcChars,
       
  3011                 int32_t srcStart,
       
  3012                 int32_t srcLength,
       
  3013                 uint32_t options) const;
       
  3014 
       
  3015   int32_t doIndexOf(UChar c,
       
  3016             int32_t start,
       
  3017             int32_t length) const;
       
  3018 
       
  3019   int32_t doIndexOf(UChar32 c,
       
  3020                         int32_t start,
       
  3021                         int32_t length) const;
       
  3022 
       
  3023   int32_t doLastIndexOf(UChar c,
       
  3024                 int32_t start,
       
  3025                 int32_t length) const;
       
  3026 
       
  3027   int32_t doLastIndexOf(UChar32 c,
       
  3028                             int32_t start,
       
  3029                             int32_t length) const;
       
  3030 
       
  3031   void doExtract(int32_t start,
       
  3032          int32_t length,
       
  3033          UChar *dst,
       
  3034          int32_t dstStart) const;
       
  3035 
       
  3036   inline void doExtract(int32_t start,
       
  3037          int32_t length,
       
  3038          UnicodeString& target) const;
       
  3039 
       
  3040   inline UChar doCharAt(int32_t offset)  const;
       
  3041 
       
  3042   UnicodeString& doReplace(int32_t start,
       
  3043                int32_t length,
       
  3044                const UnicodeString& srcText,
       
  3045                int32_t srcStart,
       
  3046                int32_t srcLength);
       
  3047 
       
  3048   UnicodeString& doReplace(int32_t start,
       
  3049                int32_t length,
       
  3050                const UChar *srcChars,
       
  3051                int32_t srcStart,
       
  3052                int32_t srcLength);
       
  3053 
       
  3054   UnicodeString& doReverse(int32_t start,
       
  3055                int32_t length);
       
  3056 
       
  3057   // calculate hash code
       
  3058   int32_t doHashCode(void) const;
       
  3059 
       
  3060   // get pointer to start of array
       
  3061   inline UChar* getArrayStart(void);
       
  3062   inline const UChar* getArrayStart(void) const;
       
  3063 
       
  3064   // allocate the array; result may be fStackBuffer
       
  3065   // sets refCount to 1 if appropriate
       
  3066   // sets fArray, fCapacity, and fFlags
       
  3067   // returns boolean for success or failure
       
  3068   UBool allocate(int32_t capacity);
       
  3069 
       
  3070   // release the array if owned
       
  3071   void releaseArray(void);
       
  3072 
       
  3073   // turn a bogus string into an empty one
       
  3074   void unBogus();
       
  3075 
       
  3076   // implements assigment operator, copy constructor, and fastCopyFrom()
       
  3077   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
       
  3078 
       
  3079   // Pin start and limit to acceptable values.
       
  3080   inline void pinIndex(int32_t& start) const;
       
  3081   inline void pinIndices(int32_t& start,
       
  3082                          int32_t& length) const;
       
  3083 
       
  3084 #if !UCONFIG_NO_CONVERSION
       
  3085 
       
  3086   /* Internal extract() using UConverter. */
       
  3087   int32_t doExtract(int32_t start, int32_t length,
       
  3088                     char *dest, int32_t destCapacity,
       
  3089                     UConverter *cnv,
       
  3090                     UErrorCode &errorCode) const;
       
  3091 
       
  3092   /*
       
  3093    * Real constructor for converting from codepage data.
       
  3094    * It assumes that it is called with !fRefCounted.
       
  3095    *
       
  3096    * If <code>codepage==0</code>, then the default converter
       
  3097    * is used for the platform encoding.
       
  3098    * If <code>codepage</code> is an empty string (<code>""</code>),
       
  3099    * then a simple conversion is performed on the codepage-invariant
       
  3100    * subset ("invariant characters") of the platform encoding. See utypes.h.
       
  3101    */
       
  3102   void doCodepageCreate(const char *codepageData,
       
  3103                         int32_t dataLength,
       
  3104                         const char *codepage);
       
  3105 
       
  3106   /*
       
  3107    * Worker function for creating a UnicodeString from
       
  3108    * a codepage string using a UConverter.
       
  3109    */
       
  3110   void
       
  3111   doCodepageCreate(const char *codepageData,
       
  3112                    int32_t dataLength,
       
  3113                    UConverter *converter,
       
  3114                    UErrorCode &status);
       
  3115 
       
  3116 #endif
       
  3117 
       
  3118   /*
       
  3119    * This function is called when write access to the array
       
  3120    * is necessary.
       
  3121    *
       
  3122    * We need to make a copy of the array if
       
  3123    * the buffer is read-only, or
       
  3124    * the buffer is refCounted (shared), and refCount>1, or
       
  3125    * the buffer is too small.
       
  3126    *
       
  3127    * Return FALSE if memory could not be allocated.
       
  3128    */
       
  3129   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
       
  3130                             int32_t growCapacity = -1,
       
  3131                             UBool doCopyArray = TRUE,
       
  3132                             int32_t **pBufferToDelete = 0,
       
  3133                             UBool forceClone = FALSE);
       
  3134 
       
  3135   // common function for case mappings
       
  3136   UnicodeString &
       
  3137   caseMap(BreakIterator *titleIter,
       
  3138           const char *locale,
       
  3139           uint32_t options,
       
  3140           int32_t toWhichCase);
       
  3141 
       
  3142   // ref counting
       
  3143   void addRef(void);
       
  3144   int32_t removeRef(void);
       
  3145   int32_t refCount(void) const;
       
  3146 
       
  3147   // constants
       
  3148   enum {
       
  3149     US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
       
  3150     kInvalidUChar=0xffff, // invalid UChar index
       
  3151     kGrowSize=128, // grow size for this buffer
       
  3152     kInvalidHashCode=0, // invalid hash code
       
  3153     kEmptyHashCode=1, // hash code for empty string
       
  3154 
       
  3155     // bit flag values for fFlags
       
  3156     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
       
  3157     kUsingStackBuffer=2,// fArray==fStackBuffer
       
  3158     kRefCounted=4,      // there is a refCount field before the characters in fArray
       
  3159     kBufferIsReadonly=8,// do not write to this buffer
       
  3160     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
       
  3161                         // and releaseBuffer(newLength) must be called
       
  3162 
       
  3163     // combined values for convenience
       
  3164     kShortString=kUsingStackBuffer,
       
  3165     kLongString=kRefCounted,
       
  3166     kReadonlyAlias=kBufferIsReadonly,
       
  3167     kWritableAlias=0
       
  3168   };
       
  3169 
       
  3170   friend class StringCharacterIterator;
       
  3171   friend class StringThreadTest;
       
  3172 
       
  3173   /*
       
  3174    * The following are all the class fields that are stored
       
  3175    * in each UnicodeString object.
       
  3176    * Note that UnicodeString has virtual functions,
       
  3177    * therefore there is an implicit vtable pointer
       
  3178    * as the first real field.
       
  3179    * The fields should be aligned such that no padding is
       
  3180    * necessary, mostly by having larger types first.
       
  3181    * On 32-bit machines, the size should be 32 bytes,
       
  3182    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
       
  3183    */
       
  3184   // (implicit) *vtable;
       
  3185   int32_t   fLength;        // number of characters in fArray
       
  3186   int32_t   fCapacity;      // sizeof fArray
       
  3187   UChar     *fArray;        // the Unicode data
       
  3188   uint16_t  fFlags;         // bit flags: see constants above
       
  3189   UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
       
  3190 
       
  3191 };
       
  3192 
       
  3193 /**
       
  3194  * Create a new UnicodeString with the concatenation of two others.
       
  3195  *
       
  3196  * @param s1 The first string to be copied to the new one.
       
  3197  * @param s2 The second string to be copied to the new one, after s1.
       
  3198  * @return UnicodeString(s1).append(s2)
       
  3199  * @stable ICU 2.8
       
  3200  */
       
  3201 U_COMMON_API UnicodeString U_EXPORT2
       
  3202 operator+ (const UnicodeString &s1, const UnicodeString &s2);
       
  3203 
       
  3204 U_NAMESPACE_END
       
  3205 
       
  3206 // inline implementations -------------------------------------------------- ***
       
  3207 
       
  3208 //========================================
       
  3209 // Array copying
       
  3210 //========================================
       
  3211 /**
       
  3212  * Copy an array of UnicodeString OBJECTS (not pointers).
       
  3213  * @internal
       
  3214  */
       
  3215 inline void
       
  3216 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
       
  3217 { while(count-- > 0) *dst++ = *src++; }
       
  3218 
       
  3219 /**
       
  3220  * Copy an array of UnicodeString OBJECTS (not pointers).
       
  3221  * @internal
       
  3222  */
       
  3223 inline void
       
  3224 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
       
  3225         U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
       
  3226 { uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
       
  3227 
       
  3228 U_NAMESPACE_BEGIN
       
  3229 
       
  3230 //========================================
       
  3231 // Inline members
       
  3232 //========================================
       
  3233 
       
  3234 //========================================
       
  3235 // Privates
       
  3236 //========================================
       
  3237 
       
  3238 inline void
       
  3239 UnicodeString::pinIndex(int32_t& start) const
       
  3240 {
       
  3241   // pin index
       
  3242   if(start < 0) {
       
  3243     start = 0;
       
  3244   } else if(start > fLength) {
       
  3245     start = fLength;
       
  3246   }
       
  3247 }
       
  3248 
       
  3249 inline void
       
  3250 UnicodeString::pinIndices(int32_t& start,
       
  3251                           int32_t& _length) const
       
  3252 {
       
  3253   // pin indices
       
  3254   if(start < 0) {
       
  3255     start = 0;
       
  3256   } else if(start > fLength) {
       
  3257     start = fLength;
       
  3258   }
       
  3259   if(_length < 0) {
       
  3260     _length = 0;
       
  3261   } else if(_length > (fLength - start)) {
       
  3262     _length = (fLength - start);
       
  3263   }
       
  3264 }
       
  3265 
       
  3266 inline UChar*
       
  3267 UnicodeString::getArrayStart()
       
  3268 { return fArray; }
       
  3269 
       
  3270 inline const UChar*
       
  3271 UnicodeString::getArrayStart() const
       
  3272 { return fArray; }
       
  3273 
       
  3274 //========================================
       
  3275 // Read-only implementation methods
       
  3276 //========================================
       
  3277 inline int32_t
       
  3278 UnicodeString::length() const
       
  3279 { return fLength; }
       
  3280 
       
  3281 inline int32_t
       
  3282 UnicodeString::getCapacity() const
       
  3283 { return fCapacity; }
       
  3284 
       
  3285 inline int32_t
       
  3286 UnicodeString::hashCode() const
       
  3287 { return doHashCode(); }
       
  3288 
       
  3289 inline UBool
       
  3290 UnicodeString::isBogus() const
       
  3291 { return (UBool)(fFlags & kIsBogus); }
       
  3292 
       
  3293 inline const UChar *
       
  3294 UnicodeString::getBuffer() const {
       
  3295   if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
       
  3296     return fArray;
       
  3297   } else {
       
  3298     return 0;
       
  3299   }
       
  3300 }
       
  3301 
       
  3302 //========================================
       
  3303 // Read-only alias methods
       
  3304 //========================================
       
  3305 inline int8_t
       
  3306 UnicodeString::doCompare(int32_t start,
       
  3307               int32_t length,
       
  3308               const UnicodeString& srcText,
       
  3309               int32_t srcStart,
       
  3310               int32_t srcLength) const
       
  3311 {
       
  3312   if(srcText.isBogus()) {
       
  3313     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
       
  3314   } else {
       
  3315     srcText.pinIndices(srcStart, srcLength);
       
  3316     return doCompare(start, length, srcText.fArray, srcStart, srcLength);
       
  3317   }
       
  3318 }
       
  3319 
       
  3320 inline UBool
       
  3321 UnicodeString::operator== (const UnicodeString& text) const
       
  3322 {
       
  3323   if(isBogus()) {
       
  3324     return text.isBogus();
       
  3325   } else {
       
  3326     return
       
  3327       !text.isBogus() &&
       
  3328       fLength == text.fLength &&
       
  3329       doCompare(0, fLength, text, 0, text.fLength) == 0;
       
  3330   }
       
  3331 }
       
  3332 
       
  3333 inline UBool
       
  3334 UnicodeString::operator!= (const UnicodeString& text) const
       
  3335 { return (! operator==(text)); }
       
  3336 
       
  3337 inline UBool
       
  3338 UnicodeString::operator> (const UnicodeString& text) const
       
  3339 { return doCompare(0, fLength, text, 0, text.fLength) == 1; }
       
  3340 
       
  3341 inline UBool
       
  3342 UnicodeString::operator< (const UnicodeString& text) const
       
  3343 { return doCompare(0, fLength, text, 0, text.fLength) == -1; }
       
  3344 
       
  3345 inline UBool
       
  3346 UnicodeString::operator>= (const UnicodeString& text) const
       
  3347 { return doCompare(0, fLength, text, 0, text.fLength) != -1; }
       
  3348 
       
  3349 inline UBool
       
  3350 UnicodeString::operator<= (const UnicodeString& text) const
       
  3351 { return doCompare(0, fLength, text, 0, text.fLength) != 1; }
       
  3352 
       
  3353 inline int8_t
       
  3354 UnicodeString::compare(const UnicodeString& text) const
       
  3355 { return doCompare(0, fLength, text, 0, text.fLength); }
       
  3356 
       
  3357 inline int8_t
       
  3358 UnicodeString::compare(int32_t start,
       
  3359                int32_t _length,
       
  3360                const UnicodeString& srcText) const
       
  3361 { return doCompare(start, _length, srcText, 0, srcText.fLength); }
       
  3362 
       
  3363 inline int8_t
       
  3364 UnicodeString::compare(const UChar *srcChars,
       
  3365                int32_t srcLength) const
       
  3366 { return doCompare(0, fLength, srcChars, 0, srcLength); }
       
  3367 
       
  3368 inline int8_t
       
  3369 UnicodeString::compare(int32_t start,
       
  3370                int32_t _length,
       
  3371                const UnicodeString& srcText,
       
  3372                int32_t srcStart,
       
  3373                int32_t srcLength) const
       
  3374 { return doCompare(start, _length, srcText, srcStart, srcLength); }
       
  3375 
       
  3376 inline int8_t
       
  3377 UnicodeString::compare(int32_t start,
       
  3378                int32_t _length,
       
  3379                const UChar *srcChars) const
       
  3380 { return doCompare(start, _length, srcChars, 0, _length); }
       
  3381 
       
  3382 inline int8_t
       
  3383 UnicodeString::compare(int32_t start,
       
  3384                int32_t _length,
       
  3385                const UChar *srcChars,
       
  3386                int32_t srcStart,
       
  3387                int32_t srcLength) const
       
  3388 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
       
  3389 
       
  3390 inline int8_t
       
  3391 UnicodeString::compareBetween(int32_t start,
       
  3392                   int32_t limit,
       
  3393                   const UnicodeString& srcText,
       
  3394                   int32_t srcStart,
       
  3395                   int32_t srcLimit) const
       
  3396 { return doCompare(start, limit - start,
       
  3397            srcText, srcStart, srcLimit - srcStart); }
       
  3398 
       
  3399 inline int8_t
       
  3400 UnicodeString::doCompareCodePointOrder(int32_t start,
       
  3401                                        int32_t length,
       
  3402                                        const UnicodeString& srcText,
       
  3403                                        int32_t srcStart,
       
  3404                                        int32_t srcLength) const
       
  3405 {
       
  3406   if(srcText.isBogus()) {
       
  3407     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
       
  3408   } else {
       
  3409     srcText.pinIndices(srcStart, srcLength);
       
  3410     return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
       
  3411   }
       
  3412 }
       
  3413 
       
  3414 inline int8_t
       
  3415 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
       
  3416 { return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
       
  3417 
       
  3418 inline int8_t
       
  3419 UnicodeString::compareCodePointOrder(int32_t start,
       
  3420                                      int32_t _length,
       
  3421                                      const UnicodeString& srcText) const
       
  3422 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
       
  3423 
       
  3424 inline int8_t
       
  3425 UnicodeString::compareCodePointOrder(const UChar *srcChars,
       
  3426                                      int32_t srcLength) const
       
  3427 { return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
       
  3428 
       
  3429 inline int8_t
       
  3430 UnicodeString::compareCodePointOrder(int32_t start,
       
  3431                                      int32_t _length,
       
  3432                                      const UnicodeString& srcText,
       
  3433                                      int32_t srcStart,
       
  3434                                      int32_t srcLength) const
       
  3435 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
       
  3436 
       
  3437 inline int8_t
       
  3438 UnicodeString::compareCodePointOrder(int32_t start,
       
  3439                                      int32_t _length,
       
  3440                                      const UChar *srcChars) const
       
  3441 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
       
  3442 
       
  3443 inline int8_t
       
  3444 UnicodeString::compareCodePointOrder(int32_t start,
       
  3445                                      int32_t _length,
       
  3446                                      const UChar *srcChars,
       
  3447                                      int32_t srcStart,
       
  3448                                      int32_t srcLength) const
       
  3449 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
       
  3450 
       
  3451 inline int8_t
       
  3452 UnicodeString::compareCodePointOrderBetween(int32_t start,
       
  3453                                             int32_t limit,
       
  3454                                             const UnicodeString& srcText,
       
  3455                                             int32_t srcStart,
       
  3456                                             int32_t srcLimit) const
       
  3457 { return doCompareCodePointOrder(start, limit - start,
       
  3458            srcText, srcStart, srcLimit - srcStart); }
       
  3459 
       
  3460 inline int8_t
       
  3461 UnicodeString::doCaseCompare(int32_t start,
       
  3462                              int32_t length,
       
  3463                              const UnicodeString &srcText,
       
  3464                              int32_t srcStart,
       
  3465                              int32_t srcLength,
       
  3466                              uint32_t options) const
       
  3467 {
       
  3468   if(srcText.isBogus()) {
       
  3469     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
       
  3470   } else {
       
  3471     srcText.pinIndices(srcStart, srcLength);
       
  3472     return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
       
  3473   }
       
  3474 }
       
  3475 
       
  3476 inline int8_t
       
  3477 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
       
  3478   return doCaseCompare(0, fLength, text, 0, text.fLength, options);
       
  3479 }
       
  3480 
       
  3481 inline int8_t
       
  3482 UnicodeString::caseCompare(int32_t start,
       
  3483                            int32_t _length,
       
  3484                            const UnicodeString &srcText,
       
  3485                            uint32_t options) const {
       
  3486   return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
       
  3487 }
       
  3488 
       
  3489 inline int8_t
       
  3490 UnicodeString::caseCompare(const UChar *srcChars,
       
  3491                            int32_t srcLength,
       
  3492                            uint32_t options) const {
       
  3493   return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
       
  3494 }
       
  3495 
       
  3496 inline int8_t
       
  3497 UnicodeString::caseCompare(int32_t start,
       
  3498                            int32_t _length,
       
  3499                            const UnicodeString &srcText,
       
  3500                            int32_t srcStart,
       
  3501                            int32_t srcLength,
       
  3502                            uint32_t options) const {
       
  3503   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
       
  3504 }
       
  3505 
       
  3506 inline int8_t
       
  3507 UnicodeString::caseCompare(int32_t start,
       
  3508                            int32_t _length,
       
  3509                            const UChar *srcChars,
       
  3510                            uint32_t options) const {
       
  3511   return doCaseCompare(start, _length, srcChars, 0, _length, options);
       
  3512 }
       
  3513 
       
  3514 inline int8_t
       
  3515 UnicodeString::caseCompare(int32_t start,
       
  3516                            int32_t _length,
       
  3517                            const UChar *srcChars,
       
  3518                            int32_t srcStart,
       
  3519                            int32_t srcLength,
       
  3520                            uint32_t options) const {
       
  3521   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
       
  3522 }
       
  3523 
       
  3524 inline int8_t
       
  3525 UnicodeString::caseCompareBetween(int32_t start,
       
  3526                                   int32_t limit,
       
  3527                                   const UnicodeString &srcText,
       
  3528                                   int32_t srcStart,
       
  3529                                   int32_t srcLimit,
       
  3530                                   uint32_t options) const {
       
  3531   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
       
  3532 }
       
  3533 
       
  3534 inline int32_t
       
  3535 UnicodeString::indexOf(const UnicodeString& srcText,
       
  3536                int32_t srcStart,
       
  3537                int32_t srcLength,
       
  3538                int32_t start,
       
  3539                int32_t _length) const
       
  3540 {
       
  3541   if(!srcText.isBogus()) {
       
  3542     srcText.pinIndices(srcStart, srcLength);
       
  3543     if(srcLength > 0) {
       
  3544       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
       
  3545     }
       
  3546   }
       
  3547   return -1;
       
  3548 }
       
  3549 
       
  3550 inline int32_t
       
  3551 UnicodeString::indexOf(const UnicodeString& text) const
       
  3552 { return indexOf(text, 0, text.fLength, 0, fLength); }
       
  3553 
       
  3554 inline int32_t
       
  3555 UnicodeString::indexOf(const UnicodeString& text,
       
  3556                int32_t start) const {
       
  3557   pinIndex(start);
       
  3558   return indexOf(text, 0, text.fLength, start, fLength - start);
       
  3559 }
       
  3560 
       
  3561 inline int32_t
       
  3562 UnicodeString::indexOf(const UnicodeString& text,
       
  3563                int32_t start,
       
  3564                int32_t _length) const
       
  3565 { return indexOf(text, 0, text.fLength, start, _length); }
       
  3566 
       
  3567 inline int32_t
       
  3568 UnicodeString::indexOf(const UChar *srcChars,
       
  3569                int32_t srcLength,
       
  3570                int32_t start) const {
       
  3571   pinIndex(start);
       
  3572   return indexOf(srcChars, 0, srcLength, start, fLength - start);
       
  3573 }
       
  3574 
       
  3575 inline int32_t
       
  3576 UnicodeString::indexOf(const UChar *srcChars,
       
  3577                int32_t srcLength,
       
  3578                int32_t start,
       
  3579                int32_t _length) const
       
  3580 { return indexOf(srcChars, 0, srcLength, start, _length); }
       
  3581 
       
  3582 inline int32_t
       
  3583 UnicodeString::indexOf(UChar c,
       
  3584                int32_t start,
       
  3585                int32_t _length) const
       
  3586 { return doIndexOf(c, start, _length); }
       
  3587 
       
  3588 inline int32_t
       
  3589 UnicodeString::indexOf(UChar32 c,
       
  3590                int32_t start,
       
  3591                int32_t _length) const
       
  3592 { return doIndexOf(c, start, _length); }
       
  3593 
       
  3594 inline int32_t
       
  3595 UnicodeString::indexOf(UChar c) const
       
  3596 { return doIndexOf(c, 0, fLength); }
       
  3597 
       
  3598 inline int32_t
       
  3599 UnicodeString::indexOf(UChar32 c) const
       
  3600 { return indexOf(c, 0, fLength); }
       
  3601 
       
  3602 inline int32_t
       
  3603 UnicodeString::indexOf(UChar c,
       
  3604                int32_t start) const {
       
  3605   pinIndex(start);
       
  3606   return doIndexOf(c, start, fLength - start);
       
  3607 }
       
  3608 
       
  3609 inline int32_t
       
  3610 UnicodeString::indexOf(UChar32 c,
       
  3611                int32_t start) const {
       
  3612   pinIndex(start);
       
  3613   return indexOf(c, start, fLength - start);
       
  3614 }
       
  3615 
       
  3616 inline int32_t
       
  3617 UnicodeString::lastIndexOf(const UChar *srcChars,
       
  3618                int32_t srcLength,
       
  3619                int32_t start,
       
  3620                int32_t _length) const
       
  3621 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
       
  3622 
       
  3623 inline int32_t
       
  3624 UnicodeString::lastIndexOf(const UChar *srcChars,
       
  3625                int32_t srcLength,
       
  3626                int32_t start) const {
       
  3627   pinIndex(start);
       
  3628   return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
       
  3629 }
       
  3630 
       
  3631 inline int32_t
       
  3632 UnicodeString::lastIndexOf(const UnicodeString& srcText,
       
  3633                int32_t srcStart,
       
  3634                int32_t srcLength,
       
  3635                int32_t start,
       
  3636                int32_t _length) const
       
  3637 {
       
  3638   if(!srcText.isBogus()) {
       
  3639     srcText.pinIndices(srcStart, srcLength);
       
  3640     if(srcLength > 0) {
       
  3641       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
       
  3642     }
       
  3643   }
       
  3644   return -1;
       
  3645 }
       
  3646 
       
  3647 inline int32_t
       
  3648 UnicodeString::lastIndexOf(const UnicodeString& text,
       
  3649                int32_t start,
       
  3650                int32_t _length) const
       
  3651 { return lastIndexOf(text, 0, text.fLength, start, _length); }
       
  3652 
       
  3653 inline int32_t
       
  3654 UnicodeString::lastIndexOf(const UnicodeString& text,
       
  3655                int32_t start) const {
       
  3656   pinIndex(start);
       
  3657   return lastIndexOf(text, 0, text.fLength, start, fLength - start);
       
  3658 }
       
  3659 
       
  3660 inline int32_t
       
  3661 UnicodeString::lastIndexOf(const UnicodeString& text) const
       
  3662 { return lastIndexOf(text, 0, text.fLength, 0, fLength); }
       
  3663 
       
  3664 inline int32_t
       
  3665 UnicodeString::lastIndexOf(UChar c,
       
  3666                int32_t start,
       
  3667                int32_t _length) const
       
  3668 { return doLastIndexOf(c, start, _length); }
       
  3669 
       
  3670 inline int32_t
       
  3671 UnicodeString::lastIndexOf(UChar32 c,
       
  3672                int32_t start,
       
  3673                int32_t _length) const {
       
  3674   return doLastIndexOf(c, start, _length);
       
  3675 }
       
  3676 
       
  3677 inline int32_t
       
  3678 UnicodeString::lastIndexOf(UChar c) const
       
  3679 { return doLastIndexOf(c, 0, fLength); }
       
  3680 
       
  3681 inline int32_t
       
  3682 UnicodeString::lastIndexOf(UChar32 c) const {
       
  3683   return lastIndexOf(c, 0, fLength);
       
  3684 }
       
  3685 
       
  3686 inline int32_t
       
  3687 UnicodeString::lastIndexOf(UChar c,
       
  3688                int32_t start) const {
       
  3689   pinIndex(start);
       
  3690   return doLastIndexOf(c, start, fLength - start);
       
  3691 }
       
  3692 
       
  3693 inline int32_t
       
  3694 UnicodeString::lastIndexOf(UChar32 c,
       
  3695                int32_t start) const {
       
  3696   pinIndex(start);
       
  3697   return lastIndexOf(c, start, fLength - start);
       
  3698 }
       
  3699 
       
  3700 inline UBool
       
  3701 UnicodeString::startsWith(const UnicodeString& text) const
       
  3702 { return compare(0, text.fLength, text, 0, text.fLength) == 0; }
       
  3703 
       
  3704 inline UBool
       
  3705 UnicodeString::startsWith(const UnicodeString& srcText,
       
  3706               int32_t srcStart,
       
  3707               int32_t srcLength) const
       
  3708 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
       
  3709 
       
  3710 inline UBool
       
  3711 UnicodeString::startsWith(const UChar *srcChars,
       
  3712               int32_t srcLength) const
       
  3713 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
       
  3714 
       
  3715 inline UBool
       
  3716 UnicodeString::startsWith(const UChar *srcChars,
       
  3717               int32_t srcStart,
       
  3718               int32_t srcLength) const
       
  3719 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
       
  3720 
       
  3721 inline UBool
       
  3722 UnicodeString::endsWith(const UnicodeString& text) const
       
  3723 { return doCompare(fLength - text.fLength, text.fLength,
       
  3724            text, 0, text.fLength) == 0; }
       
  3725 
       
  3726 inline UBool
       
  3727 UnicodeString::endsWith(const UnicodeString& srcText,
       
  3728             int32_t srcStart,
       
  3729             int32_t srcLength) const {
       
  3730   srcText.pinIndices(srcStart, srcLength);
       
  3731   return doCompare(fLength - srcLength, srcLength,
       
  3732                    srcText, srcStart, srcLength) == 0;
       
  3733 }
       
  3734 
       
  3735 inline UBool
       
  3736 UnicodeString::endsWith(const UChar *srcChars,
       
  3737             int32_t srcLength) const {
       
  3738   if(srcLength < 0) {
       
  3739     srcLength = u_strlen(srcChars);
       
  3740   }
       
  3741   return doCompare(fLength - srcLength, srcLength,
       
  3742                    srcChars, 0, srcLength) == 0;
       
  3743 }
       
  3744 
       
  3745 inline UBool
       
  3746 UnicodeString::endsWith(const UChar *srcChars,
       
  3747             int32_t srcStart,
       
  3748             int32_t srcLength) const {
       
  3749   if(srcLength < 0) {
       
  3750     srcLength = u_strlen(srcChars + srcStart);
       
  3751   }
       
  3752   return doCompare(fLength - srcLength, srcLength,
       
  3753                    srcChars, srcStart, srcLength) == 0;
       
  3754 }
       
  3755 
       
  3756 //========================================
       
  3757 // replace
       
  3758 //========================================
       
  3759 inline UnicodeString&
       
  3760 UnicodeString::replace(int32_t start,
       
  3761                int32_t _length,
       
  3762                const UnicodeString& srcText)
       
  3763 { return doReplace(start, _length, srcText, 0, srcText.fLength); }
       
  3764 
       
  3765 inline UnicodeString&
       
  3766 UnicodeString::replace(int32_t start,
       
  3767                int32_t _length,
       
  3768                const UnicodeString& srcText,
       
  3769                int32_t srcStart,
       
  3770                int32_t srcLength)
       
  3771 { return doReplace(start, _length, srcText, srcStart, srcLength); }
       
  3772 
       
  3773 inline UnicodeString&
       
  3774 UnicodeString::replace(int32_t start,
       
  3775                int32_t _length,
       
  3776                const UChar *srcChars,
       
  3777                int32_t srcLength)
       
  3778 { return doReplace(start, _length, srcChars, 0, srcLength); }
       
  3779 
       
  3780 inline UnicodeString&
       
  3781 UnicodeString::replace(int32_t start,
       
  3782                int32_t _length,
       
  3783                const UChar *srcChars,
       
  3784                int32_t srcStart,
       
  3785                int32_t srcLength)
       
  3786 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
       
  3787 
       
  3788 inline UnicodeString&
       
  3789 UnicodeString::replace(int32_t start,
       
  3790                int32_t _length,
       
  3791                UChar srcChar)
       
  3792 { return doReplace(start, _length, &srcChar, 0, 1); }
       
  3793 
       
  3794 inline UnicodeString&
       
  3795 UnicodeString::replace(int32_t start,
       
  3796                int32_t _length,
       
  3797                UChar32 srcChar) {
       
  3798   UChar buffer[U16_MAX_LENGTH];
       
  3799   int32_t count = 0;
       
  3800   UBool isError = FALSE;
       
  3801   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
       
  3802   return doReplace(start, _length, buffer, 0, count);
       
  3803 }
       
  3804 
       
  3805 inline UnicodeString&
       
  3806 UnicodeString::replaceBetween(int32_t start,
       
  3807                   int32_t limit,
       
  3808                   const UnicodeString& srcText)
       
  3809 { return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
       
  3810 
       
  3811 inline UnicodeString&
       
  3812 UnicodeString::replaceBetween(int32_t start,
       
  3813                   int32_t limit,
       
  3814                   const UnicodeString& srcText,
       
  3815                   int32_t srcStart,
       
  3816                   int32_t srcLimit)
       
  3817 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
       
  3818 
       
  3819 inline UnicodeString&
       
  3820 UnicodeString::findAndReplace(const UnicodeString& oldText,
       
  3821                   const UnicodeString& newText)
       
  3822 { return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
       
  3823             newText, 0, newText.fLength); }
       
  3824 
       
  3825 inline UnicodeString&
       
  3826 UnicodeString::findAndReplace(int32_t start,
       
  3827                   int32_t _length,
       
  3828                   const UnicodeString& oldText,
       
  3829                   const UnicodeString& newText)
       
  3830 { return findAndReplace(start, _length, oldText, 0, oldText.fLength,
       
  3831             newText, 0, newText.fLength); }
       
  3832 
       
  3833 // ============================
       
  3834 // extract
       
  3835 // ============================
       
  3836 inline void
       
  3837 UnicodeString::doExtract(int32_t start,
       
  3838              int32_t _length,
       
  3839              UnicodeString& target) const
       
  3840 { target.replace(0, target.fLength, *this, start, _length); }
       
  3841 
       
  3842 inline void
       
  3843 UnicodeString::extract(int32_t start,
       
  3844                int32_t _length,
       
  3845                UChar *target,
       
  3846                int32_t targetStart) const
       
  3847 { doExtract(start, _length, target, targetStart); }
       
  3848 
       
  3849 inline void
       
  3850 UnicodeString::extract(int32_t start,
       
  3851                int32_t _length,
       
  3852                UnicodeString& target) const
       
  3853 { doExtract(start, _length, target); }
       
  3854 
       
  3855 #if !UCONFIG_NO_CONVERSION
       
  3856 
       
  3857 inline int32_t
       
  3858 UnicodeString::extract(int32_t start,
       
  3859                int32_t _length,
       
  3860                char *dst,
       
  3861                const char *codepage) const
       
  3862 
       
  3863 {
       
  3864   // This dstSize value will be checked explicitly
       
  3865   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
       
  3866 }
       
  3867 
       
  3868 #endif
       
  3869 
       
  3870 inline void
       
  3871 UnicodeString::extractBetween(int32_t start,
       
  3872                   int32_t limit,
       
  3873                   UChar *dst,
       
  3874                   int32_t dstStart) const {
       
  3875   pinIndex(start);
       
  3876   pinIndex(limit);
       
  3877   doExtract(start, limit - start, dst, dstStart);
       
  3878 }
       
  3879 
       
  3880 inline UChar
       
  3881 UnicodeString::doCharAt(int32_t offset) const
       
  3882 {
       
  3883   if((uint32_t)offset < (uint32_t)fLength) {
       
  3884     return fArray[offset];
       
  3885   } else {
       
  3886     return kInvalidUChar;
       
  3887   }
       
  3888 }
       
  3889 
       
  3890 inline UChar
       
  3891 UnicodeString::charAt(int32_t offset) const
       
  3892 { return doCharAt(offset); }
       
  3893 
       
  3894 inline UChar
       
  3895 UnicodeString::operator[] (int32_t offset) const
       
  3896 { return doCharAt(offset); }
       
  3897 
       
  3898 inline UChar32
       
  3899 UnicodeString::char32At(int32_t offset) const
       
  3900 {
       
  3901   if((uint32_t)offset < (uint32_t)fLength) {
       
  3902     UChar32 c;
       
  3903     U16_GET(fArray, 0, offset, fLength, c);
       
  3904     return c;
       
  3905   } else {
       
  3906     return kInvalidUChar;
       
  3907   }
       
  3908 }
       
  3909 
       
  3910 inline int32_t
       
  3911 UnicodeString::getChar32Start(int32_t offset) const {
       
  3912   if((uint32_t)offset < (uint32_t)fLength) {
       
  3913     U16_SET_CP_START(fArray, 0, offset);
       
  3914     return offset;
       
  3915   } else {
       
  3916     return 0;
       
  3917   }
       
  3918 }
       
  3919 
       
  3920 inline int32_t
       
  3921 UnicodeString::getChar32Limit(int32_t offset) const {
       
  3922   if((uint32_t)offset < (uint32_t)fLength) {
       
  3923     U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
       
  3924     return offset;
       
  3925   } else {
       
  3926     return fLength;
       
  3927   }
       
  3928 }
       
  3929 
       
  3930 inline UBool
       
  3931 UnicodeString::isEmpty() const {
       
  3932   return fLength == 0;
       
  3933 }
       
  3934 
       
  3935 //========================================
       
  3936 // Write implementation methods
       
  3937 //========================================
       
  3938 inline const UChar *
       
  3939 UnicodeString::getTerminatedBuffer() {
       
  3940   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
       
  3941     return 0;
       
  3942   } else if(fLength<fCapacity && fArray[fLength]==0) {
       
  3943     return fArray;
       
  3944   } else if(cloneArrayIfNeeded(fLength+1)) {
       
  3945     fArray[fLength]=0;
       
  3946     return fArray;
       
  3947   } else {
       
  3948     return 0;
       
  3949   }
       
  3950 }
       
  3951 
       
  3952 inline UnicodeString&
       
  3953 UnicodeString::operator= (UChar ch)
       
  3954 { return doReplace(0, fLength, &ch, 0, 1); }
       
  3955 
       
  3956 inline UnicodeString&
       
  3957 UnicodeString::operator= (UChar32 ch)
       
  3958 { return replace(0, fLength, ch); }
       
  3959 
       
  3960 inline UnicodeString&
       
  3961 UnicodeString::setTo(const UnicodeString& srcText,
       
  3962              int32_t srcStart,
       
  3963              int32_t srcLength)
       
  3964 {
       
  3965   unBogus();
       
  3966   return doReplace(0, fLength, srcText, srcStart, srcLength);
       
  3967 }
       
  3968 
       
  3969 inline UnicodeString&
       
  3970 UnicodeString::setTo(const UnicodeString& srcText,
       
  3971              int32_t srcStart)
       
  3972 {
       
  3973   unBogus();
       
  3974   srcText.pinIndex(srcStart);
       
  3975   return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
       
  3976 }
       
  3977 
       
  3978 inline UnicodeString&
       
  3979 UnicodeString::setTo(const UnicodeString& srcText)
       
  3980 {
       
  3981   unBogus();
       
  3982   return doReplace(0, fLength, srcText, 0, srcText.fLength);
       
  3983 }
       
  3984 
       
  3985 inline UnicodeString&
       
  3986 UnicodeString::setTo(const UChar *srcChars,
       
  3987              int32_t srcLength)
       
  3988 {
       
  3989   unBogus();
       
  3990   return doReplace(0, fLength, srcChars, 0, srcLength);
       
  3991 }
       
  3992 
       
  3993 inline UnicodeString&
       
  3994 UnicodeString::setTo(UChar srcChar)
       
  3995 {
       
  3996   unBogus();
       
  3997   return doReplace(0, fLength, &srcChar, 0, 1);
       
  3998 }
       
  3999 
       
  4000 inline UnicodeString&
       
  4001 UnicodeString::setTo(UChar32 srcChar)
       
  4002 {
       
  4003   unBogus();
       
  4004   return replace(0, fLength, srcChar);
       
  4005 }
       
  4006 
       
  4007 inline UnicodeString&
       
  4008 UnicodeString::operator+= (UChar ch)
       
  4009 { return doReplace(fLength, 0, &ch, 0, 1); }
       
  4010 
       
  4011 inline UnicodeString&
       
  4012 UnicodeString::operator+= (UChar32 ch) {
       
  4013   UChar buffer[U16_MAX_LENGTH];
       
  4014   int32_t _length = 0;
       
  4015   UBool isError = FALSE;
       
  4016   U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
       
  4017   return doReplace(fLength, 0, buffer, 0, _length);
       
  4018 }
       
  4019 
       
  4020 inline UnicodeString&
       
  4021 UnicodeString::operator+= (const UnicodeString& srcText)
       
  4022 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
       
  4023 
       
  4024 inline UnicodeString&
       
  4025 UnicodeString::append(const UnicodeString& srcText,
       
  4026               int32_t srcStart,
       
  4027               int32_t srcLength)
       
  4028 { return doReplace(fLength, 0, srcText, srcStart, srcLength); }
       
  4029 
       
  4030 inline UnicodeString&
       
  4031 UnicodeString::append(const UnicodeString& srcText)
       
  4032 { return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
       
  4033 
       
  4034 inline UnicodeString&
       
  4035 UnicodeString::append(const UChar *srcChars,
       
  4036               int32_t srcStart,
       
  4037               int32_t srcLength)
       
  4038 { return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
       
  4039 
       
  4040 inline UnicodeString&
       
  4041 UnicodeString::append(const UChar *srcChars,
       
  4042               int32_t srcLength)
       
  4043 { return doReplace(fLength, 0, srcChars, 0, srcLength); }
       
  4044 
       
  4045 inline UnicodeString&
       
  4046 UnicodeString::append(UChar srcChar)
       
  4047 { return doReplace(fLength, 0, &srcChar, 0, 1); }
       
  4048 
       
  4049 inline UnicodeString&
       
  4050 UnicodeString::append(UChar32 srcChar) {
       
  4051   UChar buffer[U16_MAX_LENGTH];
       
  4052   int32_t _length = 0;
       
  4053   UBool isError = FALSE;
       
  4054   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
       
  4055   return doReplace(fLength, 0, buffer, 0, _length);
       
  4056 }
       
  4057 
       
  4058 inline UnicodeString&
       
  4059 UnicodeString::insert(int32_t start,
       
  4060               const UnicodeString& srcText,
       
  4061               int32_t srcStart,
       
  4062               int32_t srcLength)
       
  4063 { return doReplace(start, 0, srcText, srcStart, srcLength); }
       
  4064 
       
  4065 inline UnicodeString&
       
  4066 UnicodeString::insert(int32_t start,
       
  4067               const UnicodeString& srcText)
       
  4068 { return doReplace(start, 0, srcText, 0, srcText.fLength); }
       
  4069 
       
  4070 inline UnicodeString&
       
  4071 UnicodeString::insert(int32_t start,
       
  4072               const UChar *srcChars,
       
  4073               int32_t srcStart,
       
  4074               int32_t srcLength)
       
  4075 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
       
  4076 
       
  4077 inline UnicodeString&
       
  4078 UnicodeString::insert(int32_t start,
       
  4079               const UChar *srcChars,
       
  4080               int32_t srcLength)
       
  4081 { return doReplace(start, 0, srcChars, 0, srcLength); }
       
  4082 
       
  4083 inline UnicodeString&
       
  4084 UnicodeString::insert(int32_t start,
       
  4085               UChar srcChar)
       
  4086 { return doReplace(start, 0, &srcChar, 0, 1); }
       
  4087 
       
  4088 inline UnicodeString&
       
  4089 UnicodeString::insert(int32_t start,
       
  4090               UChar32 srcChar)
       
  4091 { return replace(start, 0, srcChar); }
       
  4092 
       
  4093 
       
  4094 inline UnicodeString&
       
  4095 UnicodeString::remove()
       
  4096 {
       
  4097   // remove() of a bogus string makes the string empty and non-bogus
       
  4098   if(isBogus()) {
       
  4099     unBogus();
       
  4100   } else {
       
  4101     fLength = 0;
       
  4102   }
       
  4103   return *this;
       
  4104 }
       
  4105 
       
  4106 inline UnicodeString&
       
  4107 UnicodeString::remove(int32_t start,
       
  4108              int32_t _length)
       
  4109 {
       
  4110   if(start <= 0 && _length == INT32_MAX) {
       
  4111     // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
       
  4112     return remove();
       
  4113   } else {
       
  4114     return doReplace(start, _length, NULL, 0, 0);
       
  4115   }
       
  4116 }
       
  4117 
       
  4118 inline UnicodeString&
       
  4119 UnicodeString::removeBetween(int32_t start,
       
  4120                 int32_t limit)
       
  4121 { return doReplace(start, limit - start, NULL, 0, 0); }
       
  4122 
       
  4123 inline UBool
       
  4124 UnicodeString::truncate(int32_t targetLength)
       
  4125 {
       
  4126   if(isBogus() && targetLength == 0) {
       
  4127     // truncate(0) of a bogus string makes the string empty and non-bogus
       
  4128     unBogus();
       
  4129     return FALSE;
       
  4130   } else if((uint32_t)targetLength < (uint32_t)fLength) {
       
  4131     fLength = targetLength;
       
  4132     return TRUE;
       
  4133   } else {
       
  4134     return FALSE;
       
  4135   }
       
  4136 }
       
  4137 
       
  4138 inline UnicodeString&
       
  4139 UnicodeString::reverse()
       
  4140 { return doReverse(0, fLength); }
       
  4141 
       
  4142 inline UnicodeString&
       
  4143 UnicodeString::reverse(int32_t start,
       
  4144                int32_t _length)
       
  4145 { return doReverse(start, _length); }
       
  4146 
       
  4147 U_NAMESPACE_END
       
  4148 
       
  4149 #endif