webengine/osswebengine/WebKit/icu/unicode/ustring.h
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 **********************************************************************
       
     3 *   Copyright (C) 1998-2004, International Business Machines
       
     4 *   Corporation and others.  All Rights Reserved.
       
     5 **********************************************************************
       
     6 *
       
     7 * File ustring.h
       
     8 *
       
     9 * Modification History:
       
    10 *
       
    11 *   Date        Name        Description
       
    12 *   12/07/98    bertrand    Creation.
       
    13 ******************************************************************************
       
    14 */
       
    15 
       
    16 #ifndef USTRING_H
       
    17 #define USTRING_H
       
    18 
       
    19 #include "unicode/utypes.h"
       
    20 #include "unicode/putil.h"
       
    21 #include "unicode/uiter.h"
       
    22 
       
    23 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
       
    24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
       
    25 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
       
    26     typedef void UBreakIterator;
       
    27 #endif
       
    28 
       
    29 /**
       
    30  * \file
       
    31  * \brief C API: Unicode string handling functions
       
    32  *
       
    33  * These C API functions provide general Unicode string handling.
       
    34  *
       
    35  * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
       
    36  * functions. (For example, they do not check for bad arguments like NULL string pointers.)
       
    37  * In some cases, only the thread-safe variant of such a function is implemented here
       
    38  * (see u_strtok_r()).
       
    39  *
       
    40  * Other functions provide more Unicode-specific functionality like locale-specific
       
    41  * upper/lower-casing and string comparison in code point order.
       
    42  *
       
    43  * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
       
    44  * UTF-16 encodes each Unicode code point with either one or two UChar code units.
       
    45  * (This is the default form of Unicode, and a forward-compatible extension of the original,
       
    46  * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
       
    47  * in 1996.)
       
    48  *
       
    49  * Some APIs accept a 32-bit UChar32 value for a single code point.
       
    50  *
       
    51  * ICU also handles 16-bit Unicode text with unpaired surrogates.
       
    52  * Such text is not well-formed UTF-16.
       
    53  * Code-point-related functions treat unpaired surrogates as surrogate code points,
       
    54  * i.e., as separate units.
       
    55  *
       
    56  * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
       
    57  * it is much more efficient even for random access because the code unit values
       
    58  * for single-unit characters vs. lead units vs. trail units are completely disjoint.
       
    59  * This means that it is easy to determine character (code point) boundaries from
       
    60  * random offsets in the string.
       
    61  *
       
    62  * Unicode (UTF-16) string processing is optimized for the single-unit case.
       
    63  * Although it is important to support supplementary characters
       
    64  * (which use pairs of lead/trail code units called "surrogates"),
       
    65  * their occurrence is rare. Almost all characters in modern use require only
       
    66  * a single UChar code unit (i.e., their code point values are <=0xffff).
       
    67  *
       
    68  * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html).
       
    69  * For a discussion of the handling of unpaired surrogates see also
       
    70  * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
       
    71  */
       
    72 
       
    73 /**
       
    74  * Determine the length of an array of UChar.
       
    75  *
       
    76  * @param s The array of UChars, NULL (U+0000) terminated.
       
    77  * @return The number of UChars in <code>chars</code>, minus the terminator.
       
    78  * @stable ICU 2.0
       
    79  */
       
    80 U_STABLE int32_t U_EXPORT2
       
    81 u_strlen(const UChar *s);
       
    82 
       
    83 /**
       
    84  * Count Unicode code points in the length UChar code units of the string.
       
    85  * A code point may occupy either one or two UChar code units.
       
    86  * Counting code points involves reading all code units.
       
    87  *
       
    88  * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
       
    89  *
       
    90  * @param s The input string.
       
    91  * @param length The number of UChar code units to be checked, or -1 to count all
       
    92  *               code points before the first NUL (U+0000).
       
    93  * @return The number of code points in the specified code units.
       
    94  * @stable ICU 2.0
       
    95  */
       
    96 U_STABLE int32_t U_EXPORT2
       
    97 u_countChar32(const UChar *s, int32_t length);
       
    98 
       
    99 /**
       
   100  * Check if the string contains more Unicode code points than a certain number.
       
   101  * This is more efficient than counting all code points in the entire string
       
   102  * and comparing that number with a threshold.
       
   103  * This function may not need to scan the string at all if the length is known
       
   104  * (not -1 for NUL-termination) and falls within a certain range, and
       
   105  * never needs to count more than 'number+1' code points.
       
   106  * Logically equivalent to (u_countChar32(s, length)>number).
       
   107  * A Unicode code point may occupy either one or two UChar code units.
       
   108  *
       
   109  * @param s The input string.
       
   110  * @param length The length of the string, or -1 if it is NUL-terminated.
       
   111  * @param number The number of code points in the string is compared against
       
   112  *               the 'number' parameter.
       
   113  * @return Boolean value for whether the string contains more Unicode code points
       
   114  *         than 'number'. Same as (u_countChar32(s, length)>number).
       
   115  * @stable ICU 2.4
       
   116  */
       
   117 U_STABLE UBool U_EXPORT2
       
   118 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
       
   119 
       
   120 /**
       
   121  * Concatenate two ustrings.  Appends a copy of <code>src</code>,
       
   122  * including the null terminator, to <code>dst</code>. The initial copied
       
   123  * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
       
   124  *
       
   125  * @param dst The destination string.
       
   126  * @param src The source string.
       
   127  * @return A pointer to <code>dst</code>.
       
   128  * @stable ICU 2.0
       
   129  */
       
   130 U_STABLE UChar* U_EXPORT2
       
   131 u_strcat(UChar     *dst, 
       
   132     const UChar     *src);
       
   133 
       
   134 /**
       
   135  * Concatenate two ustrings.  
       
   136  * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
       
   137  * Adds a terminating NUL.
       
   138  * If src is too long, then only <code>n-1</code> characters will be copied
       
   139  * before the terminating NUL.
       
   140  * If <code>n&lt;=0</code> then dst is not modified.
       
   141  *
       
   142  * @param dst The destination string.
       
   143  * @param src The source string.
       
   144  * @param n The maximum number of characters to compare.
       
   145  * @return A pointer to <code>dst</code>.
       
   146  * @stable ICU 2.0
       
   147  */
       
   148 U_STABLE UChar* U_EXPORT2
       
   149 u_strncat(UChar     *dst, 
       
   150      const UChar     *src, 
       
   151      int32_t     n);
       
   152 
       
   153 /**
       
   154  * Find the first occurrence of a substring in a string.
       
   155  * The substring is found at code point boundaries.
       
   156  * That means that if the substring begins with
       
   157  * a trail surrogate or ends with a lead surrogate,
       
   158  * then it is found only if these surrogates stand alone in the text.
       
   159  * Otherwise, the substring edge units would be matched against
       
   160  * halves of surrogate pairs.
       
   161  *
       
   162  * @param s The string to search (NUL-terminated).
       
   163  * @param substring The substring to find (NUL-terminated).
       
   164  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
       
   165  *         or <code>s</code> itself if the <code>substring</code> is empty,
       
   166  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
       
   167  * @stable ICU 2.0
       
   168  *
       
   169  * @see u_strrstr
       
   170  * @see u_strFindFirst
       
   171  * @see u_strFindLast
       
   172  */
       
   173 U_STABLE UChar * U_EXPORT2
       
   174 u_strstr(const UChar *s, const UChar *substring);
       
   175 
       
   176 /**
       
   177  * Find the first occurrence of a substring in a string.
       
   178  * The substring is found at code point boundaries.
       
   179  * That means that if the substring begins with
       
   180  * a trail surrogate or ends with a lead surrogate,
       
   181  * then it is found only if these surrogates stand alone in the text.
       
   182  * Otherwise, the substring edge units would be matched against
       
   183  * halves of surrogate pairs.
       
   184  *
       
   185  * @param s The string to search.
       
   186  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
       
   187  * @param substring The substring to find (NUL-terminated).
       
   188  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
       
   189  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
       
   190  *         or <code>s</code> itself if the <code>substring</code> is empty,
       
   191  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
       
   192  * @stable ICU 2.4
       
   193  *
       
   194  * @see u_strstr
       
   195  * @see u_strFindLast
       
   196  */
       
   197 U_STABLE UChar * U_EXPORT2
       
   198 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
       
   199 
       
   200 /**
       
   201  * Find the first occurrence of a BMP code point in a string.
       
   202  * A surrogate code point is found only if its match in the text is not
       
   203  * part of a surrogate pair.
       
   204  * A NUL character is found at the string terminator.
       
   205  *
       
   206  * @param s The string to search (NUL-terminated).
       
   207  * @param c The BMP code point to find.
       
   208  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
       
   209  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   210  * @stable ICU 2.0
       
   211  *
       
   212  * @see u_strchr32
       
   213  * @see u_memchr
       
   214  * @see u_strstr
       
   215  * @see u_strFindFirst
       
   216  */
       
   217 U_STABLE UChar * U_EXPORT2
       
   218 u_strchr(const UChar *s, UChar c);
       
   219 
       
   220 /**
       
   221  * Find the first occurrence of a code point in a string.
       
   222  * A surrogate code point is found only if its match in the text is not
       
   223  * part of a surrogate pair.
       
   224  * A NUL character is found at the string terminator.
       
   225  *
       
   226  * @param s The string to search (NUL-terminated).
       
   227  * @param c The code point to find.
       
   228  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
       
   229  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   230  * @stable ICU 2.0
       
   231  *
       
   232  * @see u_strchr
       
   233  * @see u_memchr32
       
   234  * @see u_strstr
       
   235  * @see u_strFindFirst
       
   236  */
       
   237 U_STABLE UChar * U_EXPORT2
       
   238 u_strchr32(const UChar *s, UChar32 c);
       
   239 
       
   240 /**
       
   241  * Find the last occurrence of a substring in a string.
       
   242  * The substring is found at code point boundaries.
       
   243  * That means that if the substring begins with
       
   244  * a trail surrogate or ends with a lead surrogate,
       
   245  * then it is found only if these surrogates stand alone in the text.
       
   246  * Otherwise, the substring edge units would be matched against
       
   247  * halves of surrogate pairs.
       
   248  *
       
   249  * @param s The string to search (NUL-terminated).
       
   250  * @param substring The substring to find (NUL-terminated).
       
   251  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
       
   252  *         or <code>s</code> itself if the <code>substring</code> is empty,
       
   253  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
       
   254  * @stable ICU 2.4
       
   255  *
       
   256  * @see u_strstr
       
   257  * @see u_strFindFirst
       
   258  * @see u_strFindLast
       
   259  */
       
   260 U_STABLE UChar * U_EXPORT2
       
   261 u_strrstr(const UChar *s, const UChar *substring);
       
   262 
       
   263 /**
       
   264  * Find the last occurrence of a substring in a string.
       
   265  * The substring is found at code point boundaries.
       
   266  * That means that if the substring begins with
       
   267  * a trail surrogate or ends with a lead surrogate,
       
   268  * then it is found only if these surrogates stand alone in the text.
       
   269  * Otherwise, the substring edge units would be matched against
       
   270  * halves of surrogate pairs.
       
   271  *
       
   272  * @param s The string to search.
       
   273  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
       
   274  * @param substring The substring to find (NUL-terminated).
       
   275  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
       
   276  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
       
   277  *         or <code>s</code> itself if the <code>substring</code> is empty,
       
   278  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
       
   279  * @stable ICU 2.4
       
   280  *
       
   281  * @see u_strstr
       
   282  * @see u_strFindLast
       
   283  */
       
   284 U_STABLE UChar * U_EXPORT2
       
   285 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
       
   286 
       
   287 /**
       
   288  * Find the last occurrence of a BMP code point in a string.
       
   289  * A surrogate code point is found only if its match in the text is not
       
   290  * part of a surrogate pair.
       
   291  * A NUL character is found at the string terminator.
       
   292  *
       
   293  * @param s The string to search (NUL-terminated).
       
   294  * @param c The BMP code point to find.
       
   295  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
       
   296  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   297  * @stable ICU 2.4
       
   298  *
       
   299  * @see u_strrchr32
       
   300  * @see u_memrchr
       
   301  * @see u_strrstr
       
   302  * @see u_strFindLast
       
   303  */
       
   304 U_STABLE UChar * U_EXPORT2
       
   305 u_strrchr(const UChar *s, UChar c);
       
   306 
       
   307 /**
       
   308  * Find the last occurrence of a code point in a string.
       
   309  * A surrogate code point is found only if its match in the text is not
       
   310  * part of a surrogate pair.
       
   311  * A NUL character is found at the string terminator.
       
   312  *
       
   313  * @param s The string to search (NUL-terminated).
       
   314  * @param c The code point to find.
       
   315  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
       
   316  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   317  * @stable ICU 2.4
       
   318  *
       
   319  * @see u_strrchr
       
   320  * @see u_memchr32
       
   321  * @see u_strrstr
       
   322  * @see u_strFindLast
       
   323  */
       
   324 U_STABLE UChar * U_EXPORT2
       
   325 u_strrchr32(const UChar *s, UChar32 c);
       
   326 
       
   327 /**
       
   328  * Locates the first occurrence in the string <code>string</code> of any of the characters
       
   329  * in the string <code>matchSet</code>.
       
   330  * Works just like C's strpbrk but with Unicode.
       
   331  *
       
   332  * @param string The string in which to search, NUL-terminated.
       
   333  * @param matchSet A NUL-terminated string defining a set of code points
       
   334  *                 for which to search in the text string.
       
   335  * @return A pointer to the  character in <code>string</code> that matches one of the
       
   336  *         characters in <code>matchSet</code>, or NULL if no such character is found.
       
   337  * @stable ICU 2.0
       
   338  */
       
   339 U_STABLE UChar * U_EXPORT2
       
   340 u_strpbrk(const UChar *string, const UChar *matchSet);
       
   341 
       
   342 /**
       
   343  * Returns the number of consecutive characters in <code>string</code>,
       
   344  * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
       
   345  * Works just like C's strcspn but with Unicode.
       
   346  *
       
   347  * @param string The string in which to search, NUL-terminated.
       
   348  * @param matchSet A NUL-terminated string defining a set of code points
       
   349  *                 for which to search in the text string.
       
   350  * @return The number of initial characters in <code>string</code> that do not
       
   351  *         occur in <code>matchSet</code>.
       
   352  * @see u_strspn
       
   353  * @stable ICU 2.0
       
   354  */
       
   355 U_STABLE int32_t U_EXPORT2
       
   356 u_strcspn(const UChar *string, const UChar *matchSet);
       
   357 
       
   358 /**
       
   359  * Returns the number of consecutive characters in <code>string</code>,
       
   360  * beginning with the first, that occur somewhere in <code>matchSet</code>.
       
   361  * Works just like C's strspn but with Unicode.
       
   362  *
       
   363  * @param string The string in which to search, NUL-terminated.
       
   364  * @param matchSet A NUL-terminated string defining a set of code points
       
   365  *                 for which to search in the text string.
       
   366  * @return The number of initial characters in <code>string</code> that do
       
   367  *         occur in <code>matchSet</code>.
       
   368  * @see u_strcspn
       
   369  * @stable ICU 2.0
       
   370  */
       
   371 U_STABLE int32_t U_EXPORT2
       
   372 u_strspn(const UChar *string, const UChar *matchSet);
       
   373 
       
   374 /**
       
   375  * The string tokenizer API allows an application to break a string into
       
   376  * tokens. Unlike strtok(), the saveState (the current pointer within the
       
   377  * original string) is maintained in saveState. In the first call, the
       
   378  * argument src is a pointer to the string. In subsequent calls to
       
   379  * return successive tokens of that string, src must be specified as
       
   380  * NULL. The value saveState is set by this function to maintain the
       
   381  * function's position within the string, and on each subsequent call
       
   382  * you must give this argument the same variable. This function does
       
   383  * handle surrogate pairs. This function is similar to the strtok_r()
       
   384  * the POSIX Threads Extension (1003.1c-1995) version.
       
   385  *
       
   386  * @param src String containing token(s). This string will be modified.
       
   387  *            After the first call to u_strtok_r(), this argument must
       
   388  *            be NULL to get to the next token.
       
   389  * @param delim Set of delimiter characters (Unicode code points).
       
   390  * @param saveState The current pointer within the original string,
       
   391  *              which is set by this function. The saveState
       
   392  *              parameter should the address of a local variable of type
       
   393  *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
       
   394  *              &myLocalSaveState for this parameter).
       
   395  * @return A pointer to the next token found in src, or NULL
       
   396  *         when there are no more tokens.
       
   397  * @stable ICU 2.0
       
   398  */
       
   399 U_STABLE UChar * U_EXPORT2
       
   400 u_strtok_r(UChar    *src, 
       
   401      const UChar    *delim,
       
   402            UChar   **saveState);
       
   403 
       
   404 /**
       
   405  * Compare two Unicode strings for bitwise equality (code unit order).
       
   406  *
       
   407  * @param s1 A string to compare.
       
   408  * @param s2 A string to compare.
       
   409  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
       
   410  * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
       
   411  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
       
   412  * @stable ICU 2.0
       
   413  */
       
   414 U_STABLE int32_t  U_EXPORT2
       
   415 u_strcmp(const UChar     *s1, 
       
   416          const UChar     *s2);
       
   417 
       
   418 /**
       
   419  * Compare two Unicode strings in code point order.
       
   420  * See u_strCompare for details.
       
   421  *
       
   422  * @param s1 A string to compare.
       
   423  * @param s2 A string to compare.
       
   424  * @return a negative/zero/positive integer corresponding to whether
       
   425  * the first string is less than/equal to/greater than the second one
       
   426  * in code point order
       
   427  * @stable ICU 2.0
       
   428  */
       
   429 U_STABLE int32_t U_EXPORT2
       
   430 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
       
   431 
       
   432 /**
       
   433  * Compare two Unicode strings (binary order).
       
   434  *
       
   435  * The comparison can be done in code unit order or in code point order.
       
   436  * They differ only in UTF-16 when
       
   437  * comparing supplementary code points (U+10000..U+10ffff)
       
   438  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
       
   439  * In code unit order, high BMP code points sort after supplementary code points
       
   440  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
       
   441  *
       
   442  * This functions works with strings of different explicitly specified lengths
       
   443  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
       
   444  * NUL-terminated strings are possible with length arguments of -1.
       
   445  *
       
   446  * @param s1 First source string.
       
   447  * @param length1 Length of first source string, or -1 if NUL-terminated.
       
   448  *
       
   449  * @param s2 Second source string.
       
   450  * @param length2 Length of second source string, or -1 if NUL-terminated.
       
   451  *
       
   452  * @param codePointOrder Choose between code unit order (FALSE)
       
   453  *                       and code point order (TRUE).
       
   454  *
       
   455  * @return <0 or 0 or >0 as usual for string comparisons
       
   456  *
       
   457  * @stable ICU 2.2
       
   458  */
       
   459 U_STABLE int32_t U_EXPORT2
       
   460 u_strCompare(const UChar *s1, int32_t length1,
       
   461              const UChar *s2, int32_t length2,
       
   462              UBool codePointOrder);
       
   463 
       
   464 /**
       
   465  * Compare two Unicode strings (binary order)
       
   466  * as presented by UCharIterator objects.
       
   467  * Works otherwise just like u_strCompare().
       
   468  *
       
   469  * Both iterators are reset to their start positions.
       
   470  * When the function returns, it is undefined where the iterators
       
   471  * have stopped.
       
   472  *
       
   473  * @param iter1 First source string iterator.
       
   474  * @param iter2 Second source string iterator.
       
   475  * @param codePointOrder Choose between code unit order (FALSE)
       
   476  *                       and code point order (TRUE).
       
   477  *
       
   478  * @return <0 or 0 or >0 as usual for string comparisons
       
   479  *
       
   480  * @see u_strCompare
       
   481  *
       
   482  * @stable ICU 2.6
       
   483  */
       
   484 U_STABLE int32_t U_EXPORT2
       
   485 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
       
   486 
       
   487 #ifndef U_COMPARE_CODE_POINT_ORDER
       
   488 /* see also unistr.h and unorm.h */
       
   489 /**
       
   490  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
       
   491  * Compare strings in code point order instead of code unit order.
       
   492  * @stable ICU 2.2
       
   493  */
       
   494 #define U_COMPARE_CODE_POINT_ORDER  0x8000
       
   495 #endif
       
   496 
       
   497 /**
       
   498  * Compare two strings case-insensitively using full case folding.
       
   499  * This is equivalent to
       
   500  *   u_strCompare(u_strFoldCase(s1, options),
       
   501  *                u_strFoldCase(s2, options),
       
   502  *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
       
   503  *
       
   504  * The comparison can be done in UTF-16 code unit order or in code point order.
       
   505  * They differ only when comparing supplementary code points (U+10000..U+10ffff)
       
   506  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
       
   507  * In code unit order, high BMP code points sort after supplementary code points
       
   508  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
       
   509  *
       
   510  * This functions works with strings of different explicitly specified lengths
       
   511  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
       
   512  * NUL-terminated strings are possible with length arguments of -1.
       
   513  *
       
   514  * @param s1 First source string.
       
   515  * @param length1 Length of first source string, or -1 if NUL-terminated.
       
   516  *
       
   517  * @param s2 Second source string.
       
   518  * @param length2 Length of second source string, or -1 if NUL-terminated.
       
   519  *
       
   520  * @param options A bit set of options:
       
   521  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   522  *     Comparison in code unit order with default case folding.
       
   523  *
       
   524  *   - U_COMPARE_CODE_POINT_ORDER
       
   525  *     Set to choose code point order instead of code unit order
       
   526  *     (see u_strCompare for details).
       
   527  *
       
   528  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   529  *
       
   530  * @param pErrorCode Must be a valid pointer to an error code value,
       
   531  *                  which must not indicate a failure before the function call.
       
   532  *
       
   533  * @return <0 or 0 or >0 as usual for string comparisons
       
   534  *
       
   535  * @stable ICU 2.2
       
   536  */
       
   537 U_STABLE int32_t U_EXPORT2
       
   538 u_strCaseCompare(const UChar *s1, int32_t length1,
       
   539                  const UChar *s2, int32_t length2,
       
   540                  uint32_t options,
       
   541                  UErrorCode *pErrorCode);
       
   542 
       
   543 /**
       
   544  * Compare two ustrings for bitwise equality. 
       
   545  * Compares at most <code>n</code> characters.
       
   546  *
       
   547  * @param ucs1 A string to compare.
       
   548  * @param ucs2 A string to compare.
       
   549  * @param n The maximum number of characters to compare.
       
   550  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
       
   551  * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
       
   552  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
       
   553  * @stable ICU 2.0
       
   554  */
       
   555 U_STABLE int32_t U_EXPORT2
       
   556 u_strncmp(const UChar     *ucs1, 
       
   557      const UChar     *ucs2, 
       
   558      int32_t     n);
       
   559 
       
   560 /**
       
   561  * Compare two Unicode strings in code point order.
       
   562  * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
       
   563  * For details, see u_strCompare().
       
   564  *
       
   565  * @param s1 A string to compare.
       
   566  * @param s2 A string to compare.
       
   567  * @param n The maximum number of characters to compare.
       
   568  * @return a negative/zero/positive integer corresponding to whether
       
   569  * the first string is less than/equal to/greater than the second one
       
   570  * in code point order
       
   571  * @stable ICU 2.0
       
   572  */
       
   573 U_STABLE int32_t U_EXPORT2
       
   574 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
       
   575 
       
   576 /**
       
   577  * Compare two strings case-insensitively using full case folding.
       
   578  * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
       
   579  *
       
   580  * @param s1 A string to compare.
       
   581  * @param s2 A string to compare.
       
   582  * @param options A bit set of options:
       
   583  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   584  *     Comparison in code unit order with default case folding.
       
   585  *
       
   586  *   - U_COMPARE_CODE_POINT_ORDER
       
   587  *     Set to choose code point order instead of code unit order
       
   588  *     (see u_strCompare for details).
       
   589  *
       
   590  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   591  *
       
   592  * @return A negative, zero, or positive integer indicating the comparison result.
       
   593  * @stable ICU 2.0
       
   594  */
       
   595 U_STABLE int32_t U_EXPORT2
       
   596 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
       
   597 
       
   598 /**
       
   599  * Compare two strings case-insensitively using full case folding.
       
   600  * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
       
   601  * u_strFoldCase(s2, at most n, options)).
       
   602  *
       
   603  * @param s1 A string to compare.
       
   604  * @param s2 A string to compare.
       
   605  * @param n The maximum number of characters each string to case-fold and then compare.
       
   606  * @param options A bit set of options:
       
   607  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   608  *     Comparison in code unit order with default case folding.
       
   609  *
       
   610  *   - U_COMPARE_CODE_POINT_ORDER
       
   611  *     Set to choose code point order instead of code unit order
       
   612  *     (see u_strCompare for details).
       
   613  *
       
   614  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   615  *
       
   616  * @return A negative, zero, or positive integer indicating the comparison result.
       
   617  * @stable ICU 2.0
       
   618  */
       
   619 U_STABLE int32_t U_EXPORT2
       
   620 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
       
   621 
       
   622 /**
       
   623  * Compare two strings case-insensitively using full case folding.
       
   624  * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
       
   625  * u_strFoldCase(s2, n, options)).
       
   626  *
       
   627  * @param s1 A string to compare.
       
   628  * @param s2 A string to compare.
       
   629  * @param length The number of characters in each string to case-fold and then compare.
       
   630  * @param options A bit set of options:
       
   631  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
       
   632  *     Comparison in code unit order with default case folding.
       
   633  *
       
   634  *   - U_COMPARE_CODE_POINT_ORDER
       
   635  *     Set to choose code point order instead of code unit order
       
   636  *     (see u_strCompare for details).
       
   637  *
       
   638  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
   639  *
       
   640  * @return A negative, zero, or positive integer indicating the comparison result.
       
   641  * @stable ICU 2.0
       
   642  */
       
   643 U_STABLE int32_t U_EXPORT2
       
   644 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
       
   645 
       
   646 /**
       
   647  * Copy a ustring. Adds a null terminator.
       
   648  *
       
   649  * @param dst The destination string.
       
   650  * @param src The source string.
       
   651  * @return A pointer to <code>dst</code>.
       
   652  * @stable ICU 2.0
       
   653  */
       
   654 U_STABLE UChar* U_EXPORT2
       
   655 u_strcpy(UChar     *dst, 
       
   656     const UChar     *src);
       
   657 
       
   658 /**
       
   659  * Copy a ustring.
       
   660  * Copies at most <code>n</code> characters.  The result will be null terminated
       
   661  * if the length of <code>src</code> is less than <code>n</code>.
       
   662  *
       
   663  * @param dst The destination string.
       
   664  * @param src The source string.
       
   665  * @param n The maximum number of characters to copy.
       
   666  * @return A pointer to <code>dst</code>.
       
   667  * @stable ICU 2.0
       
   668  */
       
   669 U_STABLE UChar* U_EXPORT2
       
   670 u_strncpy(UChar     *dst, 
       
   671      const UChar     *src, 
       
   672      int32_t     n);
       
   673 
       
   674 #if !UCONFIG_NO_CONVERSION
       
   675 
       
   676 /**
       
   677  * Copy a byte string encoded in the default codepage to a ustring.
       
   678  * Adds a null terminator.
       
   679  * Performs a host byte to UChar conversion
       
   680  *
       
   681  * @param dst The destination string.
       
   682  * @param src The source string.
       
   683  * @return A pointer to <code>dst</code>.
       
   684  * @stable ICU 2.0
       
   685  */
       
   686 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
       
   687                const char *src );
       
   688 
       
   689 /**
       
   690  * Copy a byte string encoded in the default codepage to a ustring.
       
   691  * Copies at most <code>n</code> characters.  The result will be null terminated
       
   692  * if the length of <code>src</code> is less than <code>n</code>.
       
   693  * Performs a host byte to UChar conversion
       
   694  *
       
   695  * @param dst The destination string.
       
   696  * @param src The source string.
       
   697  * @param n The maximum number of characters to copy.
       
   698  * @return A pointer to <code>dst</code>.
       
   699  * @stable ICU 2.0
       
   700  */
       
   701 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
       
   702             const char *src,
       
   703             int32_t n);
       
   704 
       
   705 /**
       
   706  * Copy ustring to a byte string encoded in the default codepage.
       
   707  * Adds a null terminator.
       
   708  * Performs a UChar to host byte conversion
       
   709  *
       
   710  * @param dst The destination string.
       
   711  * @param src The source string.
       
   712  * @return A pointer to <code>dst</code>.
       
   713  * @stable ICU 2.0
       
   714  */
       
   715 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
       
   716             const UChar *src );
       
   717 
       
   718 /**
       
   719  * Copy ustring to a byte string encoded in the default codepage.
       
   720  * Copies at most <code>n</code> characters.  The result will be null terminated
       
   721  * if the length of <code>src</code> is less than <code>n</code>.
       
   722  * Performs a UChar to host byte conversion
       
   723  *
       
   724  * @param dst The destination string.
       
   725  * @param src The source string.
       
   726  * @param n The maximum number of characters to copy.
       
   727  * @return A pointer to <code>dst</code>.
       
   728  * @stable ICU 2.0
       
   729  */
       
   730 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
       
   731             const UChar *src,
       
   732             int32_t n );
       
   733 
       
   734 #endif
       
   735 
       
   736 /**
       
   737  * Synonym for memcpy(), but with UChars only.
       
   738  * @param dest The destination string
       
   739  * @param src The source string
       
   740  * @param count The number of characters to copy
       
   741  * @return A pointer to <code>dest</code>
       
   742  * @stable ICU 2.0
       
   743  */
       
   744 U_STABLE UChar* U_EXPORT2
       
   745 u_memcpy(UChar *dest, const UChar *src, int32_t count);
       
   746 
       
   747 /**
       
   748  * Synonym for memmove(), but with UChars only.
       
   749  * @param dest The destination string
       
   750  * @param src The source string
       
   751  * @param count The number of characters to move
       
   752  * @return A pointer to <code>dest</code>
       
   753  * @stable ICU 2.0
       
   754  */
       
   755 U_STABLE UChar* U_EXPORT2
       
   756 u_memmove(UChar *dest, const UChar *src, int32_t count);
       
   757 
       
   758 /**
       
   759  * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
       
   760  *
       
   761  * @param dest The destination string.
       
   762  * @param c The character to initialize the string.
       
   763  * @param count The maximum number of characters to set.
       
   764  * @return A pointer to <code>dest</code>.
       
   765  * @stable ICU 2.0
       
   766  */
       
   767 U_STABLE UChar* U_EXPORT2
       
   768 u_memset(UChar *dest, UChar c, int32_t count);
       
   769 
       
   770 /**
       
   771  * Compare the first <code>count</code> UChars of each buffer.
       
   772  *
       
   773  * @param buf1 The first string to compare.
       
   774  * @param buf2 The second string to compare.
       
   775  * @param count The maximum number of UChars to compare.
       
   776  * @return When buf1 < buf2, a negative number is returned.
       
   777  *      When buf1 == buf2, 0 is returned.
       
   778  *      When buf1 > buf2, a positive number is returned.
       
   779  * @stable ICU 2.0
       
   780  */
       
   781 U_STABLE int32_t U_EXPORT2
       
   782 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
       
   783 
       
   784 /**
       
   785  * Compare two Unicode strings in code point order.
       
   786  * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
       
   787  * For details, see u_strCompare().
       
   788  *
       
   789  * @param s1 A string to compare.
       
   790  * @param s2 A string to compare.
       
   791  * @param count The maximum number of characters to compare.
       
   792  * @return a negative/zero/positive integer corresponding to whether
       
   793  * the first string is less than/equal to/greater than the second one
       
   794  * in code point order
       
   795  * @stable ICU 2.0
       
   796  */
       
   797 U_STABLE int32_t U_EXPORT2
       
   798 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
       
   799 
       
   800 /**
       
   801  * Find the first occurrence of a BMP code point in a string.
       
   802  * A surrogate code point is found only if its match in the text is not
       
   803  * part of a surrogate pair.
       
   804  * A NUL character is found at the string terminator.
       
   805  *
       
   806  * @param s The string to search (contains <code>count</code> UChars).
       
   807  * @param c The BMP code point to find.
       
   808  * @param count The length of the string.
       
   809  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
       
   810  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   811  * @stable ICU 2.0
       
   812  *
       
   813  * @see u_strchr
       
   814  * @see u_memchr32
       
   815  * @see u_strFindFirst
       
   816  */
       
   817 U_STABLE UChar* U_EXPORT2
       
   818 u_memchr(const UChar *s, UChar c, int32_t count);
       
   819 
       
   820 /**
       
   821  * Find the first occurrence of a code point in a string.
       
   822  * A surrogate code point is found only if its match in the text is not
       
   823  * part of a surrogate pair.
       
   824  * A NUL character is found at the string terminator.
       
   825  *
       
   826  * @param s The string to search (contains <code>count</code> UChars).
       
   827  * @param c The code point to find.
       
   828  * @param count The length of the string.
       
   829  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
       
   830  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   831  * @stable ICU 2.0
       
   832  *
       
   833  * @see u_strchr32
       
   834  * @see u_memchr
       
   835  * @see u_strFindFirst
       
   836  */
       
   837 U_STABLE UChar* U_EXPORT2
       
   838 u_memchr32(const UChar *s, UChar32 c, int32_t count);
       
   839 
       
   840 /**
       
   841  * Find the last occurrence of a BMP code point in a string.
       
   842  * A surrogate code point is found only if its match in the text is not
       
   843  * part of a surrogate pair.
       
   844  * A NUL character is found at the string terminator.
       
   845  *
       
   846  * @param s The string to search (contains <code>count</code> UChars).
       
   847  * @param c The BMP code point to find.
       
   848  * @param count The length of the string.
       
   849  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
       
   850  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   851  * @stable ICU 2.4
       
   852  *
       
   853  * @see u_strrchr
       
   854  * @see u_memrchr32
       
   855  * @see u_strFindLast
       
   856  */
       
   857 U_STABLE UChar* U_EXPORT2
       
   858 u_memrchr(const UChar *s, UChar c, int32_t count);
       
   859 
       
   860 /**
       
   861  * Find the last occurrence of a code point in a string.
       
   862  * A surrogate code point is found only if its match in the text is not
       
   863  * part of a surrogate pair.
       
   864  * A NUL character is found at the string terminator.
       
   865  *
       
   866  * @param s The string to search (contains <code>count</code> UChars).
       
   867  * @param c The code point to find.
       
   868  * @param count The length of the string.
       
   869  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
       
   870  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
       
   871  * @stable ICU 2.4
       
   872  *
       
   873  * @see u_strrchr32
       
   874  * @see u_memrchr
       
   875  * @see u_strFindLast
       
   876  */
       
   877 U_STABLE UChar* U_EXPORT2
       
   878 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
       
   879 
       
   880 /**
       
   881  * Unicode String literals in C.
       
   882  * We need one macro to declare a variable for the string
       
   883  * and to statically preinitialize it if possible,
       
   884  * and a second macro to dynamically intialize such a string variable if necessary.
       
   885  *
       
   886  * The macros are defined for maximum performance.
       
   887  * They work only for strings that contain "invariant characters", i.e.,
       
   888  * only latin letters, digits, and some punctuation.
       
   889  * See utypes.h for details.
       
   890  *
       
   891  * A pair of macros for a single string must be used with the same
       
   892  * parameters.
       
   893  * The string parameter must be a C string literal.
       
   894  * The length of the string, not including the terminating
       
   895  * <code>NUL</code>, must be specified as a constant.
       
   896  * The U_STRING_DECL macro should be invoked exactly once for one
       
   897  * such string variable before it is used.
       
   898  *
       
   899  * Usage:
       
   900  * <pre>
       
   901  * &#32;   U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
       
   902  * &#32;   U_STRING_DECL(ustringVar2, "jumps 5%", 8);
       
   903  * &#32;   static UBool didInit=FALSE;
       
   904  * &#32;
       
   905  * &#32;   int32_t function() {
       
   906  * &#32;       if(!didInit) {
       
   907  * &#32;           U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
       
   908  * &#32;           U_STRING_INIT(ustringVar2, "jumps 5%", 8);
       
   909  * &#32;           didInit=TRUE;
       
   910  * &#32;       }
       
   911  * &#32;       return u_strcmp(ustringVar1, ustringVar2);
       
   912  * &#32;   }
       
   913  * </pre>
       
   914  * @stable ICU 2.0
       
   915  */
       
   916 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
       
   917 #   define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }
       
   918     /**@stable ICU 2.0 */
       
   919 #   define U_STRING_INIT(var, cs, length)
       
   920 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
       
   921 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
       
   922     /**@stable ICU 2.0 */
       
   923 #   define U_STRING_INIT(var, cs, length)
       
   924 #else
       
   925 #   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
       
   926     /**@stable ICU 2.0 */
       
   927 #   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
       
   928 #endif
       
   929 
       
   930 /**
       
   931  * Unescape a string of characters and write the resulting
       
   932  * Unicode characters to the destination buffer.  The following escape
       
   933  * sequences are recognized:
       
   934  *
       
   935  * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
       
   936  * \\Uhhhhhhhh   8 hex digits
       
   937  * \\xhh         1-2 hex digits
       
   938  * \\x{h...}     1-8 hex digits
       
   939  * \\ooo         1-3 octal digits; o in [0-7]
       
   940  * \\cX          control-X; X is masked with 0x1F
       
   941  *
       
   942  * as well as the standard ANSI C escapes:
       
   943  *
       
   944  * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
       
   945  * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
       
   946  * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
       
   947  *
       
   948  * Anything else following a backslash is generically escaped.  For
       
   949  * example, "[a\\-z]" returns "[a-z]".
       
   950  *
       
   951  * If an escape sequence is ill-formed, this method returns an empty
       
   952  * string.  An example of an ill-formed sequence is "\\u" followed by
       
   953  * fewer than 4 hex digits.
       
   954  *
       
   955  * The above characters are recognized in the compiler's codepage,
       
   956  * that is, they are coded as 'u', '\\', etc.  Characters that are
       
   957  * not parts of escape sequences are converted using u_charsToUChars().
       
   958  *
       
   959  * This function is similar to UnicodeString::unescape() but not
       
   960  * identical to it.  The latter takes a source UnicodeString, so it
       
   961  * does escape recognition but no conversion.
       
   962  *
       
   963  * @param src a zero-terminated string of invariant characters
       
   964  * @param dest pointer to buffer to receive converted and unescaped
       
   965  * text and, if there is room, a zero terminator.  May be NULL for
       
   966  * preflighting, in which case no UChars will be written, but the
       
   967  * return value will still be valid.  On error, an empty string is
       
   968  * stored here (if possible).
       
   969  * @param destCapacity the number of UChars that may be written at
       
   970  * dest.  Ignored if dest == NULL.
       
   971  * @return the length of unescaped string.
       
   972  * @see u_unescapeAt
       
   973  * @see UnicodeString#unescape()
       
   974  * @see UnicodeString#unescapeAt()
       
   975  * @stable ICU 2.0
       
   976  */
       
   977 U_STABLE int32_t U_EXPORT2
       
   978 u_unescape(const char *src,
       
   979            UChar *dest, int32_t destCapacity);
       
   980 
       
   981 U_CDECL_BEGIN
       
   982 /**
       
   983  * Callback function for u_unescapeAt() that returns a character of
       
   984  * the source text given an offset and a context pointer.  The context
       
   985  * pointer will be whatever is passed into u_unescapeAt().
       
   986  *
       
   987  * @param offset pointer to the offset that will be passed to u_unescapeAt().
       
   988  * @param context an opaque pointer passed directly into u_unescapeAt()
       
   989  * @return the character represented by the escape sequence at
       
   990  * offset
       
   991  * @see u_unescapeAt
       
   992  * @stable ICU 2.0
       
   993  */
       
   994 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
       
   995 U_CDECL_END
       
   996 
       
   997 /**
       
   998  * Unescape a single sequence. The character at offset-1 is assumed
       
   999  * (without checking) to be a backslash.  This method takes a callback
       
  1000  * pointer to a function that returns the UChar at a given offset.  By
       
  1001  * varying this callback, ICU functions are able to unescape char*
       
  1002  * strings, UnicodeString objects, and UFILE pointers.
       
  1003  *
       
  1004  * If offset is out of range, or if the escape sequence is ill-formed,
       
  1005  * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
       
  1006  * for a list of recognized sequences.
       
  1007  *
       
  1008  * @param charAt callback function that returns a UChar of the source
       
  1009  * text given an offset and a context pointer.
       
  1010  * @param offset pointer to the offset that will be passed to charAt.
       
  1011  * The offset value will be updated upon return to point after the
       
  1012  * last parsed character of the escape sequence.  On error the offset
       
  1013  * is unchanged.
       
  1014  * @param length the number of characters in the source text.  The
       
  1015  * last character of the source text is considered to be at offset
       
  1016  * length-1.
       
  1017  * @param context an opaque pointer passed directly into charAt.
       
  1018  * @return the character represented by the escape sequence at
       
  1019  * offset, or (UChar32)0xFFFFFFFF on error.
       
  1020  * @see u_unescape()
       
  1021  * @see UnicodeString#unescape()
       
  1022  * @see UnicodeString#unescapeAt()
       
  1023  * @stable ICU 2.0
       
  1024  */
       
  1025 U_STABLE UChar32 U_EXPORT2
       
  1026 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
       
  1027              int32_t *offset,
       
  1028              int32_t length,
       
  1029              void *context);
       
  1030 
       
  1031 /**
       
  1032  * Uppercase the characters in a string.
       
  1033  * Casing is locale-dependent and context-sensitive.
       
  1034  * The result may be longer or shorter than the original.
       
  1035  * The source string and the destination buffer are allowed to overlap.
       
  1036  *
       
  1037  * @param dest      A buffer for the result string. The result will be zero-terminated if
       
  1038  *                  the buffer is large enough.
       
  1039  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
       
  1040  *                  dest may be NULL and the function will only return the length of the result
       
  1041  *                  without writing any of the result string.
       
  1042  * @param src       The original string
       
  1043  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
       
  1044  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
       
  1045  * @param pErrorCode Must be a valid pointer to an error code value,
       
  1046  *                  which must not indicate a failure before the function call.
       
  1047  * @return The length of the result string. It may be greater than destCapacity. In that case,
       
  1048  *         only some of the result was written to the destination buffer.
       
  1049  * @stable ICU 2.0
       
  1050  */
       
  1051 U_STABLE int32_t U_EXPORT2
       
  1052 u_strToUpper(UChar *dest, int32_t destCapacity,
       
  1053              const UChar *src, int32_t srcLength,
       
  1054              const char *locale,
       
  1055              UErrorCode *pErrorCode);
       
  1056 
       
  1057 /**
       
  1058  * Lowercase the characters in a string.
       
  1059  * Casing is locale-dependent and context-sensitive.
       
  1060  * The result may be longer or shorter than the original.
       
  1061  * The source string and the destination buffer are allowed to overlap.
       
  1062  *
       
  1063  * @param dest      A buffer for the result string. The result will be zero-terminated if
       
  1064  *                  the buffer is large enough.
       
  1065  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
       
  1066  *                  dest may be NULL and the function will only return the length of the result
       
  1067  *                  without writing any of the result string.
       
  1068  * @param src       The original string
       
  1069  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
       
  1070  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
       
  1071  * @param pErrorCode Must be a valid pointer to an error code value,
       
  1072  *                  which must not indicate a failure before the function call.
       
  1073  * @return The length of the result string. It may be greater than destCapacity. In that case,
       
  1074  *         only some of the result was written to the destination buffer.
       
  1075  * @stable ICU 2.0
       
  1076  */
       
  1077 U_STABLE int32_t U_EXPORT2
       
  1078 u_strToLower(UChar *dest, int32_t destCapacity,
       
  1079              const UChar *src, int32_t srcLength,
       
  1080              const char *locale,
       
  1081              UErrorCode *pErrorCode);
       
  1082 
       
  1083 #if !UCONFIG_NO_BREAK_ITERATION
       
  1084 
       
  1085 /**
       
  1086  * Titlecase a string.
       
  1087  * Casing is locale-dependent and context-sensitive.
       
  1088  * Titlecasing uses a break iterator to find the first characters of words
       
  1089  * that are to be titlecased. It titlecases those characters and lowercases
       
  1090  * all others.
       
  1091  *
       
  1092  * The titlecase break iterator can be provided to customize for arbitrary
       
  1093  * styles, using rules and dictionaries beyond the standard iterators.
       
  1094  * It may be more efficient to always provide an iterator to avoid
       
  1095  * opening and closing one for each string.
       
  1096  * The standard titlecase iterator for the root locale implements the
       
  1097  * algorithm of Unicode TR 21.
       
  1098  *
       
  1099  * This function uses only the first() and next() methods of the
       
  1100  * provided break iterator.
       
  1101  *
       
  1102  * The result may be longer or shorter than the original.
       
  1103  * The source string and the destination buffer are allowed to overlap.
       
  1104  *
       
  1105  * @param dest      A buffer for the result string. The result will be zero-terminated if
       
  1106  *                  the buffer is large enough.
       
  1107  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
       
  1108  *                  dest may be NULL and the function will only return the length of the result
       
  1109  *                  without writing any of the result string.
       
  1110  * @param src       The original string
       
  1111  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
       
  1112  * @param titleIter A break iterator to find the first characters of words
       
  1113  *                  that are to be titlecased.
       
  1114  *                  If none is provided (NULL), then a standard titlecase
       
  1115  *                  break iterator is opened.
       
  1116  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
       
  1117  * @param pErrorCode Must be a valid pointer to an error code value,
       
  1118  *                  which must not indicate a failure before the function call.
       
  1119  * @return The length of the result string. It may be greater than destCapacity. In that case,
       
  1120  *         only some of the result was written to the destination buffer.
       
  1121  * @stable ICU 2.1
       
  1122  */
       
  1123 U_STABLE int32_t U_EXPORT2
       
  1124 u_strToTitle(UChar *dest, int32_t destCapacity,
       
  1125              const UChar *src, int32_t srcLength,
       
  1126              UBreakIterator *titleIter,
       
  1127              const char *locale,
       
  1128              UErrorCode *pErrorCode);
       
  1129 
       
  1130 #endif
       
  1131 
       
  1132 /**
       
  1133  * Case-fold the characters in a string.
       
  1134  * Case-folding is locale-independent and not context-sensitive,
       
  1135  * but there is an option for whether to include or exclude mappings for dotted I
       
  1136  * and dotless i that are marked with 'I' in CaseFolding.txt.
       
  1137  * The result may be longer or shorter than the original.
       
  1138  * The source string and the destination buffer are allowed to overlap.
       
  1139  *
       
  1140  * @param dest      A buffer for the result string. The result will be zero-terminated if
       
  1141  *                  the buffer is large enough.
       
  1142  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
       
  1143  *                  dest may be NULL and the function will only return the length of the result
       
  1144  *                  without writing any of the result string.
       
  1145  * @param src       The original string
       
  1146  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
       
  1147  * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
       
  1148  * @param pErrorCode Must be a valid pointer to an error code value,
       
  1149  *                  which must not indicate a failure before the function call.
       
  1150  * @return The length of the result string. It may be greater than destCapacity. In that case,
       
  1151  *         only some of the result was written to the destination buffer.
       
  1152  * @stable ICU 2.0
       
  1153  */
       
  1154 U_STABLE int32_t U_EXPORT2
       
  1155 u_strFoldCase(UChar *dest, int32_t destCapacity,
       
  1156               const UChar *src, int32_t srcLength,
       
  1157               uint32_t options,
       
  1158               UErrorCode *pErrorCode);
       
  1159 
       
  1160 /**
       
  1161  * Converts a sequence of UChars to wchar_t units.
       
  1162  *
       
  1163  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1164  *                      the buffer is large enough.
       
  1165  * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
       
  1166  *                      dest may be NULL and the function will only return the length of the 
       
  1167  *                      result without writing any of the result string (pre-flighting).
       
  1168  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1169  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1170  *                      number of output units corresponding to the transformation of 
       
  1171  *                      all the input units, even in case of a buffer overflow.
       
  1172  * @param src           The original source string
       
  1173  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1174  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1175  *                      which must not indicate a failure before the function call.
       
  1176  * @return The pointer to destination buffer.
       
  1177  * @stable ICU 2.0
       
  1178  */
       
  1179 U_STABLE wchar_t* U_EXPORT2
       
  1180 u_strToWCS(wchar_t *dest, 
       
  1181            int32_t destCapacity,
       
  1182            int32_t *pDestLength,
       
  1183            const UChar *src, 
       
  1184            int32_t srcLength,
       
  1185            UErrorCode *pErrorCode);
       
  1186 /**
       
  1187  * Converts a sequence of wchar_t units to UChars
       
  1188  *
       
  1189  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1190  *                      the buffer is large enough.
       
  1191  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
       
  1192  *                      dest may be NULL and the function will only return the length of the 
       
  1193  *                      result without writing any of the result string (pre-flighting).
       
  1194  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1195  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1196  *                      number of output units corresponding to the transformation of 
       
  1197  *                      all the input units, even in case of a buffer overflow.
       
  1198  * @param src           The original source string
       
  1199  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1200  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1201  *                      which must not indicate a failure before the function call.
       
  1202  * @return The pointer to destination buffer.
       
  1203  * @stable ICU 2.0
       
  1204  */
       
  1205 U_STABLE UChar* U_EXPORT2
       
  1206 u_strFromWCS(UChar   *dest,
       
  1207              int32_t destCapacity, 
       
  1208              int32_t *pDestLength,
       
  1209              const wchar_t *src,
       
  1210              int32_t srcLength,
       
  1211              UErrorCode *pErrorCode);
       
  1212 /**
       
  1213  * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
       
  1214  *
       
  1215  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1216  *                      the buffer is large enough.
       
  1217  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
       
  1218  *                      dest may be NULL and the function will only return the length of the 
       
  1219  *                      result without writing any of the result string (pre-flighting).
       
  1220  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1221  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1222  *                      number of output units corresponding to the transformation of 
       
  1223  *                      all the input units, even in case of a buffer overflow.
       
  1224  * @param src           The original source string
       
  1225  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1226  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1227  *                      which must not indicate a failure before the function call.
       
  1228  * @return The pointer to destination buffer.
       
  1229  * @stable ICU 2.0
       
  1230  */
       
  1231 U_STABLE char* U_EXPORT2 
       
  1232 u_strToUTF8(char *dest,           
       
  1233             int32_t destCapacity,
       
  1234             int32_t *pDestLength,
       
  1235             const UChar *src, 
       
  1236             int32_t srcLength,
       
  1237             UErrorCode *pErrorCode);
       
  1238 
       
  1239 /**
       
  1240  * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
       
  1241  *
       
  1242  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1243  *                      the buffer is large enough.
       
  1244  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
       
  1245  *                      dest may be NULL and the function will only return the length of the 
       
  1246  *                      result without writing any of the result string (pre-flighting).
       
  1247  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1248  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1249  *                      number of output units corresponding to the transformation of 
       
  1250  *                      all the input units, even in case of a buffer overflow.
       
  1251  * @param src           The original source string
       
  1252  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1253  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1254  *                      which must not indicate a failure before the function call.
       
  1255  * @return The pointer to destination buffer.
       
  1256  * @stable ICU 2.0
       
  1257  */
       
  1258 U_STABLE UChar* U_EXPORT2
       
  1259 u_strFromUTF8(UChar *dest,             
       
  1260               int32_t destCapacity,
       
  1261               int32_t *pDestLength,
       
  1262               const char *src, 
       
  1263               int32_t srcLength,
       
  1264               UErrorCode *pErrorCode);
       
  1265 
       
  1266 /**
       
  1267  * Converts a sequence of UChars (UTF-16) to UTF32 units.
       
  1268  *
       
  1269  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1270  *                      the buffer is large enough.
       
  1271  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
       
  1272  *                      dest may be NULL and the function will only return the length of the 
       
  1273  *                      result without writing any of the result string (pre-flighting).
       
  1274  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1275  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1276  *                      number of output units corresponding to the transformation of 
       
  1277  *                      all the input units, even in case of a buffer overflow.
       
  1278  * @param src           The original source string
       
  1279  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1280  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1281  *                      which must not indicate a failure before the function call.
       
  1282  * @return The pointer to destination buffer.
       
  1283  * @stable ICU 2.0
       
  1284  */
       
  1285 U_STABLE UChar32* U_EXPORT2 
       
  1286 u_strToUTF32(UChar32 *dest, 
       
  1287              int32_t  destCapacity,
       
  1288              int32_t  *pDestLength,
       
  1289              const UChar *src, 
       
  1290              int32_t  srcLength,
       
  1291              UErrorCode *pErrorCode);
       
  1292 
       
  1293 /**
       
  1294  * Converts a sequence of UTF32 units to UChars (UTF-16)
       
  1295  *
       
  1296  * @param dest          A buffer for the result string. The result will be zero-terminated if
       
  1297  *                      the buffer is large enough.
       
  1298  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
       
  1299  *                      dest may be NULL and the function will only return the length of the 
       
  1300  *                      result without writing any of the result string (pre-flighting).
       
  1301  * @param pDestLength   A pointer to receive the number of units written to the destination. If 
       
  1302  *                      pDestLength!=NULL then *pDestLength is always set to the 
       
  1303  *                      number of output units corresponding to the transformation of 
       
  1304  *                      all the input units, even in case of a buffer overflow.
       
  1305  * @param src           The original source string
       
  1306  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
       
  1307  * @param pErrorCode    Must be a valid pointer to an error code value,
       
  1308  *                      which must not indicate a failure before the function call.
       
  1309  * @return The pointer to destination buffer.
       
  1310  * @stable ICU 2.0
       
  1311  */
       
  1312 U_STABLE UChar* U_EXPORT2 
       
  1313 u_strFromUTF32(UChar   *dest,
       
  1314                int32_t destCapacity, 
       
  1315                int32_t *pDestLength,
       
  1316                const UChar32 *src,
       
  1317                int32_t srcLength,
       
  1318                UErrorCode *pErrorCode);
       
  1319 
       
  1320 #endif