JavaScriptGlue/icu/unicode/uiter.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2 *******************************************************************************
       
     3 *
       
     4 *   Copyright (C) 2002-2004, International Business Machines
       
     5 *   Corporation and others.  All Rights Reserved.
       
     6 *
       
     7 *******************************************************************************
       
     8 *   file name:  uiter.h
       
     9 *   encoding:   US-ASCII
       
    10 *   tab size:   8 (not used)
       
    11 *   indentation:4
       
    12 *
       
    13 *   created on: 2002jan18
       
    14 *   created by: Markus W. Scherer
       
    15 */
       
    16 
       
    17 #ifndef __UITER_H__
       
    18 #define __UITER_H__
       
    19 
       
    20 /**
       
    21  * \file
       
    22  * \brief C API: Unicode Character Iteration
       
    23  *
       
    24  * @see UCharIterator
       
    25  */
       
    26 
       
    27 #include "unicode/utypes.h"
       
    28 
       
    29 #ifdef XP_CPLUSPLUS
       
    30     U_NAMESPACE_BEGIN
       
    31 
       
    32     class CharacterIterator;
       
    33     class Replaceable;
       
    34 
       
    35     U_NAMESPACE_END
       
    36 #endif
       
    37 
       
    38 U_CDECL_BEGIN
       
    39 
       
    40 struct UCharIterator;
       
    41 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
       
    42 
       
    43 /**
       
    44  * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
       
    45  * @see UCharIteratorMove
       
    46  * @see UCharIterator
       
    47  * @stable ICU 2.1
       
    48  */
       
    49 typedef enum UCharIteratorOrigin {
       
    50     UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
       
    51 } UCharIteratorOrigin;
       
    52 
       
    53 /** Constants for UCharIterator. @stable ICU 2.6 */
       
    54 enum {
       
    55     /**
       
    56      * Constant value that may be returned by UCharIteratorMove
       
    57      * indicating that the final UTF-16 index is not known, but that the move succeeded.
       
    58      * This can occur when moving relative to limit or length, or
       
    59      * when moving relative to the current index after a setState()
       
    60      * when the current UTF-16 index is not known.
       
    61      *
       
    62      * It would be very inefficient to have to count from the beginning of the text
       
    63      * just to get the current/limit/length index after moving relative to it.
       
    64      * The actual index can be determined with getIndex(UITER_CURRENT)
       
    65      * which will count the UChars if necessary.
       
    66      *
       
    67      * @stable ICU 2.6
       
    68      */
       
    69     UITER_UNKNOWN_INDEX=-2
       
    70 };
       
    71 
       
    72 
       
    73 /**
       
    74  * Constant for UCharIterator getState() indicating an error or
       
    75  * an unknown state.
       
    76  * Returned by uiter_getState()/UCharIteratorGetState
       
    77  * when an error occurs.
       
    78  * Also, some UCharIterator implementations may not be able to return
       
    79  * a valid state for each position. This will be clearly documented
       
    80  * for each such iterator (none of the public ones here).
       
    81  *
       
    82  * @stable ICU 2.6
       
    83  */
       
    84 #define UITER_NO_STATE ((uint32_t)0xffffffff)
       
    85 
       
    86 /**
       
    87  * Function type declaration for UCharIterator.getIndex().
       
    88  *
       
    89  * Gets the current position, or the start or limit of the
       
    90  * iteration range.
       
    91  *
       
    92  * This function may perform slowly for UITER_CURRENT after setState() was called,
       
    93  * or for UITER_LENGTH, because an iterator implementation may have to count
       
    94  * UChars if the underlying storage is not UTF-16.
       
    95  *
       
    96  * @param iter the UCharIterator structure ("this pointer")
       
    97  * @param origin get the 0, start, limit, length, or current index
       
    98  * @return the requested index, or U_SENTINEL in an error condition
       
    99  *
       
   100  * @see UCharIteratorOrigin
       
   101  * @see UCharIterator
       
   102  * @stable ICU 2.1
       
   103  */
       
   104 typedef int32_t U_CALLCONV
       
   105 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
       
   106 
       
   107 /**
       
   108  * Function type declaration for UCharIterator.move().
       
   109  *
       
   110  * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
       
   111  *
       
   112  * Moves the current position relative to the start or limit of the
       
   113  * iteration range, or relative to the current position itself.
       
   114  * The movement is expressed in numbers of code units forward
       
   115  * or backward by specifying a positive or negative delta.
       
   116  * Out of bounds movement will be pinned to the start or limit.
       
   117  *
       
   118  * This function may perform slowly for moving relative to UITER_LENGTH
       
   119  * because an iterator implementation may have to count the rest of the
       
   120  * UChars if the native storage is not UTF-16.
       
   121  *
       
   122  * When moving relative to the limit or length, or
       
   123  * relative to the current position after setState() was called,
       
   124  * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
       
   125  * determination of the actual UTF-16 index.
       
   126  * The actual index can be determined with getIndex(UITER_CURRENT)
       
   127  * which will count the UChars if necessary.
       
   128  * See UITER_UNKNOWN_INDEX for details.
       
   129  *
       
   130  * @param iter the UCharIterator structure ("this pointer")
       
   131  * @param delta can be positive, zero, or negative
       
   132  * @param origin move relative to the 0, start, limit, length, or current index
       
   133  * @return the new index, or U_SENTINEL on an error condition,
       
   134  *         or UITER_UNKNOWN_INDEX when the index is not known.
       
   135  *
       
   136  * @see UCharIteratorOrigin
       
   137  * @see UCharIterator
       
   138  * @see UITER_UNKNOWN_INDEX
       
   139  * @stable ICU 2.1
       
   140  */
       
   141 typedef int32_t U_CALLCONV
       
   142 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
       
   143 
       
   144 /**
       
   145  * Function type declaration for UCharIterator.hasNext().
       
   146  *
       
   147  * Check if current() and next() can still
       
   148  * return another code unit.
       
   149  *
       
   150  * @param iter the UCharIterator structure ("this pointer")
       
   151  * @return boolean value for whether current() and next() can still return another code unit
       
   152  *
       
   153  * @see UCharIterator
       
   154  * @stable ICU 2.1
       
   155  */
       
   156 typedef UBool U_CALLCONV
       
   157 UCharIteratorHasNext(UCharIterator *iter);
       
   158 
       
   159 /**
       
   160  * Function type declaration for UCharIterator.hasPrevious().
       
   161  *
       
   162  * Check if previous() can still return another code unit.
       
   163  *
       
   164  * @param iter the UCharIterator structure ("this pointer")
       
   165  * @return boolean value for whether previous() can still return another code unit
       
   166  *
       
   167  * @see UCharIterator
       
   168  * @stable ICU 2.1
       
   169  */
       
   170 typedef UBool U_CALLCONV
       
   171 UCharIteratorHasPrevious(UCharIterator *iter);
       
   172  
       
   173 /**
       
   174  * Function type declaration for UCharIterator.current().
       
   175  *
       
   176  * Return the code unit at the current position,
       
   177  * or U_SENTINEL if there is none (index is at the limit).
       
   178  *
       
   179  * @param iter the UCharIterator structure ("this pointer")
       
   180  * @return the current code unit
       
   181  *
       
   182  * @see UCharIterator
       
   183  * @stable ICU 2.1
       
   184  */
       
   185 typedef UChar32 U_CALLCONV
       
   186 UCharIteratorCurrent(UCharIterator *iter);
       
   187 
       
   188 /**
       
   189  * Function type declaration for UCharIterator.next().
       
   190  *
       
   191  * Return the code unit at the current index and increment
       
   192  * the index (post-increment, like s[i++]),
       
   193  * or return U_SENTINEL if there is none (index is at the limit).
       
   194  *
       
   195  * @param iter the UCharIterator structure ("this pointer")
       
   196  * @return the current code unit (and post-increment the current index)
       
   197  *
       
   198  * @see UCharIterator
       
   199  * @stable ICU 2.1
       
   200  */
       
   201 typedef UChar32 U_CALLCONV
       
   202 UCharIteratorNext(UCharIterator *iter);
       
   203 
       
   204 /**
       
   205  * Function type declaration for UCharIterator.previous().
       
   206  *
       
   207  * Decrement the index and return the code unit from there
       
   208  * (pre-decrement, like s[--i]),
       
   209  * or return U_SENTINEL if there is none (index is at the start).
       
   210  *
       
   211  * @param iter the UCharIterator structure ("this pointer")
       
   212  * @return the previous code unit (after pre-decrementing the current index)
       
   213  *
       
   214  * @see UCharIterator
       
   215  * @stable ICU 2.1
       
   216  */
       
   217 typedef UChar32 U_CALLCONV
       
   218 UCharIteratorPrevious(UCharIterator *iter);
       
   219 
       
   220 /**
       
   221  * Function type declaration for UCharIterator.reservedFn().
       
   222  * Reserved for future use.
       
   223  *
       
   224  * @param iter the UCharIterator structure ("this pointer")
       
   225  * @param something some integer argument
       
   226  * @return some integer
       
   227  *
       
   228  * @see UCharIterator
       
   229  * @stable ICU 2.1
       
   230  */
       
   231 typedef int32_t U_CALLCONV
       
   232 UCharIteratorReserved(UCharIterator *iter, int32_t something);
       
   233 
       
   234 /**
       
   235  * Function type declaration for UCharIterator.getState().
       
   236  *
       
   237  * Get the "state" of the iterator in the form of a single 32-bit word.
       
   238  * It is recommended that the state value be calculated to be as small as
       
   239  * is feasible. For strings with limited lengths, fewer than 32 bits may
       
   240  * be sufficient.
       
   241  *
       
   242  * This is used together with setState()/UCharIteratorSetState
       
   243  * to save and restore the iterator position more efficiently than with
       
   244  * getIndex()/move().
       
   245  *
       
   246  * The iterator state is defined as a uint32_t value because it is designed
       
   247  * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
       
   248  * of the character iterator.
       
   249  *
       
   250  * With some UCharIterator implementations (e.g., UTF-8),
       
   251  * getting and setting the UTF-16 index with existing functions
       
   252  * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
       
   253  * relatively slow because the iterator has to "walk" from a known index
       
   254  * to the requested one.
       
   255  * This takes more time the farther it needs to go.
       
   256  *
       
   257  * An opaque state value allows an iterator implementation to provide
       
   258  * an internal index (UTF-8: the source byte array index) for
       
   259  * fast, constant-time restoration.
       
   260  *
       
   261  * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
       
   262  * the UTF-16 index may not be restored as well, but the iterator can deliver
       
   263  * the correct text contents and move relative to the current position
       
   264  * without performance degradation.
       
   265  *
       
   266  * Some UCharIterator implementations may not be able to return
       
   267  * a valid state for each position, in which case they return UITER_NO_STATE instead.
       
   268  * This will be clearly documented for each such iterator (none of the public ones here).
       
   269  *
       
   270  * @param iter the UCharIterator structure ("this pointer")
       
   271  * @return the state word
       
   272  *
       
   273  * @see UCharIterator
       
   274  * @see UCharIteratorSetState
       
   275  * @see UITER_NO_STATE
       
   276  * @stable ICU 2.6
       
   277  */
       
   278 typedef uint32_t U_CALLCONV
       
   279 UCharIteratorGetState(const UCharIterator *iter);
       
   280 
       
   281 /**
       
   282  * Function type declaration for UCharIterator.setState().
       
   283  *
       
   284  * Restore the "state" of the iterator using a state word from a getState() call.
       
   285  * The iterator object need not be the same one as for which getState() was called,
       
   286  * but it must be of the same type (set up using the same uiter_setXYZ function)
       
   287  * and it must iterate over the same string
       
   288  * (binary identical regardless of memory address).
       
   289  * For more about the state word see UCharIteratorGetState.
       
   290  *
       
   291  * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
       
   292  * the UTF-16 index may not be restored as well, but the iterator can deliver
       
   293  * the correct text contents and move relative to the current position
       
   294  * without performance degradation.
       
   295  *
       
   296  * @param iter the UCharIterator structure ("this pointer")
       
   297  * @param state the state word from a getState() call
       
   298  *              on a same-type, same-string iterator
       
   299  * @param pErrorCode Must be a valid pointer to an error code value,
       
   300  *                   which must not indicate a failure before the function call.
       
   301  *
       
   302  * @see UCharIterator
       
   303  * @see UCharIteratorGetState
       
   304  * @stable ICU 2.6
       
   305  */
       
   306 typedef void U_CALLCONV
       
   307 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
       
   308 
       
   309 
       
   310 /**
       
   311  * C API for code unit iteration.
       
   312  * This can be used as a C wrapper around
       
   313  * CharacterIterator, Replaceable, or implemented using simple strings, etc.
       
   314  *
       
   315  * There are two roles for using UCharIterator:
       
   316  *
       
   317  * A "provider" sets the necessary function pointers and controls the "protected"
       
   318  * fields of the UCharIterator structure. A "provider" passes a UCharIterator
       
   319  * into C APIs that need a UCharIterator as an abstract, flexible string interface.
       
   320  *
       
   321  * Implementations of such C APIs are "callers" of UCharIterator functions;
       
   322  * they only use the "public" function pointers and never access the "protected"
       
   323  * fields directly.
       
   324  *
       
   325  * The current() and next() functions only check the current index against the
       
   326  * limit, and previous() only checks the current index against the start,
       
   327  * to see if the iterator already reached the end of the iteration range.
       
   328  *
       
   329  * The assumption - in all iterators - is that the index is moved via the API,
       
   330  * which means it won't go out of bounds, or the index is modified by
       
   331  * user code that knows enough about the iterator implementation to set valid
       
   332  * index values.
       
   333  *
       
   334  * UCharIterator functions return code unit values 0..0xffff,
       
   335  * or U_SENTINEL if the iteration bounds are reached.
       
   336  *
       
   337  * @stable ICU 2.1
       
   338  */
       
   339 struct UCharIterator {
       
   340     /**
       
   341      * (protected) Pointer to string or wrapped object or similar.
       
   342      * Not used by caller.
       
   343      * @stable ICU 2.1
       
   344      */
       
   345     const void *context;
       
   346 
       
   347     /**
       
   348      * (protected) Length of string or similar.
       
   349      * Not used by caller.
       
   350      * @stable ICU 2.1
       
   351      */
       
   352     int32_t length;
       
   353 
       
   354     /**
       
   355      * (protected) Start index or similar.
       
   356      * Not used by caller.
       
   357      * @stable ICU 2.1
       
   358      */
       
   359     int32_t start;
       
   360 
       
   361     /**
       
   362      * (protected) Current index or similar.
       
   363      * Not used by caller.
       
   364      * @stable ICU 2.1
       
   365      */
       
   366     int32_t index;
       
   367 
       
   368     /**
       
   369      * (protected) Limit index or similar.
       
   370      * Not used by caller.
       
   371      * @stable ICU 2.1
       
   372      */
       
   373     int32_t limit;
       
   374 
       
   375     /**
       
   376      * (protected) Used by UTF-8 iterators and possibly others.
       
   377      * @stable ICU 2.1
       
   378      */
       
   379     int32_t reservedField;
       
   380 
       
   381     /**
       
   382      * (public) Returns the current position or the
       
   383      * start or limit index of the iteration range.
       
   384      *
       
   385      * @see UCharIteratorGetIndex
       
   386      * @stable ICU 2.1
       
   387      */
       
   388     UCharIteratorGetIndex *getIndex;
       
   389 
       
   390     /**
       
   391      * (public) Moves the current position relative to the start or limit of the
       
   392      * iteration range, or relative to the current position itself.
       
   393      * The movement is expressed in numbers of code units forward
       
   394      * or backward by specifying a positive or negative delta.
       
   395      *
       
   396      * @see UCharIteratorMove
       
   397      * @stable ICU 2.1
       
   398      */
       
   399     UCharIteratorMove *move;
       
   400 
       
   401     /**
       
   402      * (public) Check if current() and next() can still
       
   403      * return another code unit.
       
   404      *
       
   405      * @see UCharIteratorHasNext
       
   406      * @stable ICU 2.1
       
   407      */
       
   408     UCharIteratorHasNext *hasNext;
       
   409 
       
   410     /**
       
   411      * (public) Check if previous() can still return another code unit.
       
   412      *
       
   413      * @see UCharIteratorHasPrevious
       
   414      * @stable ICU 2.1
       
   415      */
       
   416     UCharIteratorHasPrevious *hasPrevious;
       
   417 
       
   418     /**
       
   419      * (public) Return the code unit at the current position,
       
   420      * or U_SENTINEL if there is none (index is at the limit).
       
   421      *
       
   422      * @see UCharIteratorCurrent
       
   423      * @stable ICU 2.1
       
   424      */
       
   425     UCharIteratorCurrent *current;
       
   426 
       
   427     /**
       
   428      * (public) Return the code unit at the current index and increment
       
   429      * the index (post-increment, like s[i++]),
       
   430      * or return U_SENTINEL if there is none (index is at the limit).
       
   431      *
       
   432      * @see UCharIteratorNext
       
   433      * @stable ICU 2.1
       
   434      */
       
   435     UCharIteratorNext *next;
       
   436 
       
   437     /**
       
   438      * (public) Decrement the index and return the code unit from there
       
   439      * (pre-decrement, like s[--i]),
       
   440      * or return U_SENTINEL if there is none (index is at the start).
       
   441      *
       
   442      * @see UCharIteratorPrevious
       
   443      * @stable ICU 2.1
       
   444      */
       
   445     UCharIteratorPrevious *previous;
       
   446 
       
   447     /**
       
   448      * (public) Reserved for future use. Currently NULL.
       
   449      *
       
   450      * @see UCharIteratorReserved
       
   451      * @stable ICU 2.1
       
   452      */
       
   453     UCharIteratorReserved *reservedFn;
       
   454 
       
   455     /**
       
   456      * (public) Return the state of the iterator, to be restored later with setState().
       
   457      * This function pointer is NULL if the iterator does not implement it.
       
   458      *
       
   459      * @see UCharIteratorGet
       
   460      * @stable ICU 2.6
       
   461      */
       
   462     UCharIteratorGetState *getState;
       
   463 
       
   464     /**
       
   465      * (public) Restore the iterator state from the state word from a call
       
   466      * to getState().
       
   467      * This function pointer is NULL if the iterator does not implement it.
       
   468      *
       
   469      * @see UCharIteratorSet
       
   470      * @stable ICU 2.6
       
   471      */
       
   472     UCharIteratorSetState *setState;
       
   473 };
       
   474 
       
   475 /**
       
   476  * Helper function for UCharIterator to get the code point
       
   477  * at the current index.
       
   478  *
       
   479  * Return the code point that includes the code unit at the current position,
       
   480  * or U_SENTINEL if there is none (index is at the limit).
       
   481  * If the current code unit is a lead or trail surrogate,
       
   482  * then the following or preceding surrogate is used to form
       
   483  * the code point value.
       
   484  *
       
   485  * @param iter the UCharIterator structure ("this pointer")
       
   486  * @return the current code point
       
   487  *
       
   488  * @see UCharIterator
       
   489  * @see U16_GET
       
   490  * @see UnicodeString::char32At()
       
   491  * @stable ICU 2.1
       
   492  */
       
   493 U_STABLE UChar32 U_EXPORT2
       
   494 uiter_current32(UCharIterator *iter);
       
   495 
       
   496 /**
       
   497  * Helper function for UCharIterator to get the next code point.
       
   498  *
       
   499  * Return the code point at the current index and increment
       
   500  * the index (post-increment, like s[i++]),
       
   501  * or return U_SENTINEL if there is none (index is at the limit).
       
   502  *
       
   503  * @param iter the UCharIterator structure ("this pointer")
       
   504  * @return the current code point (and post-increment the current index)
       
   505  *
       
   506  * @see UCharIterator
       
   507  * @see U16_NEXT
       
   508  * @stable ICU 2.1
       
   509  */
       
   510 U_STABLE UChar32 U_EXPORT2
       
   511 uiter_next32(UCharIterator *iter);
       
   512 
       
   513 /**
       
   514  * Helper function for UCharIterator to get the previous code point.
       
   515  *
       
   516  * Decrement the index and return the code point from there
       
   517  * (pre-decrement, like s[--i]),
       
   518  * or return U_SENTINEL if there is none (index is at the start).
       
   519  *
       
   520  * @param iter the UCharIterator structure ("this pointer")
       
   521  * @return the previous code point (after pre-decrementing the current index)
       
   522  *
       
   523  * @see UCharIterator
       
   524  * @see U16_PREV
       
   525  * @stable ICU 2.1
       
   526  */
       
   527 U_STABLE UChar32 U_EXPORT2
       
   528 uiter_previous32(UCharIterator *iter);
       
   529 
       
   530 /**
       
   531  * Get the "state" of the iterator in the form of a single 32-bit word.
       
   532  * This is a convenience function that calls iter->getState(iter)
       
   533  * if iter->getState is not NULL;
       
   534  * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
       
   535  *
       
   536  * Some UCharIterator implementations may not be able to return
       
   537  * a valid state for each position, in which case they return UITER_NO_STATE instead.
       
   538  * This will be clearly documented for each such iterator (none of the public ones here).
       
   539  *
       
   540  * @param iter the UCharIterator structure ("this pointer")
       
   541  * @return the state word
       
   542  *
       
   543  * @see UCharIterator
       
   544  * @see UCharIteratorGetState
       
   545  * @see UITER_NO_STATE
       
   546  * @stable ICU 2.6
       
   547  */
       
   548 U_STABLE uint32_t U_EXPORT2
       
   549 uiter_getState(const UCharIterator *iter);
       
   550 
       
   551 /**
       
   552  * Restore the "state" of the iterator using a state word from a getState() call.
       
   553  * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
       
   554  * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
       
   555  *
       
   556  * @param iter the UCharIterator structure ("this pointer")
       
   557  * @param state the state word from a getState() call
       
   558  *              on a same-type, same-string iterator
       
   559  * @param pErrorCode Must be a valid pointer to an error code value,
       
   560  *                   which must not indicate a failure before the function call.
       
   561  *
       
   562  * @see UCharIterator
       
   563  * @see UCharIteratorSetState
       
   564  * @stable ICU 2.6
       
   565  */
       
   566 U_STABLE void U_EXPORT2
       
   567 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
       
   568 
       
   569 /**
       
   570  * Set up a UCharIterator to iterate over a string.
       
   571  *
       
   572  * Sets the UCharIterator function pointers for iteration over the string s
       
   573  * with iteration boundaries start=index=0 and length=limit=string length.
       
   574  * The "provider" may set the start, index, and limit values at any time
       
   575  * within the range 0..length.
       
   576  * The length field will be ignored.
       
   577  *
       
   578  * The string pointer s is set into UCharIterator.context without copying
       
   579  * or reallocating the string contents.
       
   580  *
       
   581  * getState() simply returns the current index.
       
   582  * move() will always return the final index.
       
   583  *
       
   584  * @param iter UCharIterator structure to be set for iteration
       
   585  * @param s String to iterate over
       
   586  * @param length Length of s, or -1 if NUL-terminated
       
   587  *
       
   588  * @see UCharIterator
       
   589  * @stable ICU 2.1
       
   590  */
       
   591 U_STABLE void U_EXPORT2
       
   592 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
       
   593 
       
   594 /**
       
   595  * Set up a UCharIterator to iterate over a UTF-16BE string
       
   596  * (byte vector with a big-endian pair of bytes per UChar).
       
   597  *
       
   598  * Everything works just like with a normal UChar iterator (uiter_setString),
       
   599  * except that UChars are assembled from byte pairs,
       
   600  * and that the length argument here indicates an even number of bytes.
       
   601  *
       
   602  * getState() simply returns the current index.
       
   603  * move() will always return the final index.
       
   604  *
       
   605  * @param iter UCharIterator structure to be set for iteration
       
   606  * @param s UTF-16BE string to iterate over
       
   607  * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
       
   608  *               (NUL means pair of 0 bytes at even index from s)
       
   609  *
       
   610  * @see UCharIterator
       
   611  * @see uiter_setString
       
   612  * @stable ICU 2.6
       
   613  */
       
   614 U_STABLE void U_EXPORT2
       
   615 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
       
   616 
       
   617 /**
       
   618  * Set up a UCharIterator to iterate over a UTF-8 string.
       
   619  *
       
   620  * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
       
   621  * with UTF-8 iteration boundaries 0 and length.
       
   622  * The implementation counts the UTF-16 index on the fly and
       
   623  * lazily evaluates the UTF-16 length of the text.
       
   624  *
       
   625  * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
       
   626  * When the reservedField is not 0, then it contains a supplementary code point
       
   627  * and the UTF-16 index is between the two corresponding surrogates.
       
   628  * At that point, the UTF-8 index is behind that code point.
       
   629  *
       
   630  * The UTF-8 string pointer s is set into UCharIterator.context without copying
       
   631  * or reallocating the string contents.
       
   632  *
       
   633  * getState() returns a state value consisting of
       
   634  * - the current UTF-8 source byte index (bits 31..1)
       
   635  * - a flag (bit 0) that indicates whether the UChar position is in the middle
       
   636  *   of a surrogate pair
       
   637  *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
       
   638  *
       
   639  * getState() cannot also encode the UTF-16 index in the state value.
       
   640  * move(relative to limit or length), or
       
   641  * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
       
   642  *
       
   643  * @param iter UCharIterator structure to be set for iteration
       
   644  * @param s UTF-8 string to iterate over
       
   645  * @param length Length of s in bytes, or -1 if NUL-terminated
       
   646  *
       
   647  * @see UCharIterator
       
   648  * @stable ICU 2.6
       
   649  */
       
   650 U_STABLE void U_EXPORT2
       
   651 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
       
   652 
       
   653 #ifdef XP_CPLUSPLUS
       
   654 
       
   655 /**
       
   656  * Set up a UCharIterator to wrap around a C++ CharacterIterator.
       
   657  *
       
   658  * Sets the UCharIterator function pointers for iteration using the
       
   659  * CharacterIterator charIter.
       
   660  *
       
   661  * The CharacterIterator pointer charIter is set into UCharIterator.context
       
   662  * without copying or cloning the CharacterIterator object.
       
   663  * The other "protected" UCharIterator fields are set to 0 and will be ignored.
       
   664  * The iteration index and boundaries are controlled by the CharacterIterator.
       
   665  *
       
   666  * getState() simply returns the current index.
       
   667  * move() will always return the final index.
       
   668  *
       
   669  * @param iter UCharIterator structure to be set for iteration
       
   670  * @param charIter CharacterIterator to wrap
       
   671  *
       
   672  * @see UCharIterator
       
   673  * @stable ICU 2.1
       
   674  */
       
   675 U_STABLE void U_EXPORT2
       
   676 uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter);
       
   677 
       
   678 /**
       
   679  * Set up a UCharIterator to iterate over a C++ Replaceable.
       
   680  *
       
   681  * Sets the UCharIterator function pointers for iteration over the
       
   682  * Replaceable rep with iteration boundaries start=index=0 and
       
   683  * length=limit=rep->length().
       
   684  * The "provider" may set the start, index, and limit values at any time
       
   685  * within the range 0..length=rep->length().
       
   686  * The length field will be ignored.
       
   687  *
       
   688  * The Replaceable pointer rep is set into UCharIterator.context without copying
       
   689  * or cloning/reallocating the Replaceable object.
       
   690  *
       
   691  * getState() simply returns the current index.
       
   692  * move() will always return the final index.
       
   693  *
       
   694  * @param iter UCharIterator structure to be set for iteration
       
   695  * @param rep Replaceable to iterate over
       
   696  *
       
   697  * @see UCharIterator
       
   698  * @stable ICU 2.1
       
   699  */
       
   700 U_STABLE void U_EXPORT2
       
   701 uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep);
       
   702 
       
   703 #endif
       
   704 
       
   705 U_CDECL_END
       
   706 
       
   707 #endif