|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2002-2004, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: uiter.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2002jan18 |
|
14 * created by: Markus W. Scherer |
|
15 */ |
|
16 |
|
17 #ifndef __UITER_H__ |
|
18 #define __UITER_H__ |
|
19 |
|
20 /** |
|
21 * \file |
|
22 * \brief C API: Unicode Character Iteration |
|
23 * |
|
24 * @see UCharIterator |
|
25 */ |
|
26 |
|
27 #include "unicode/utypes.h" |
|
28 |
|
29 #ifdef XP_CPLUSPLUS |
|
30 U_NAMESPACE_BEGIN |
|
31 |
|
32 class CharacterIterator; |
|
33 class Replaceable; |
|
34 |
|
35 U_NAMESPACE_END |
|
36 #endif |
|
37 |
|
38 U_CDECL_BEGIN |
|
39 |
|
40 struct UCharIterator; |
|
41 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ |
|
42 |
|
43 /** |
|
44 * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). |
|
45 * @see UCharIteratorMove |
|
46 * @see UCharIterator |
|
47 * @stable ICU 2.1 |
|
48 */ |
|
49 typedef enum UCharIteratorOrigin { |
|
50 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH |
|
51 } UCharIteratorOrigin; |
|
52 |
|
53 /** Constants for UCharIterator. @stable ICU 2.6 */ |
|
54 enum { |
|
55 /** |
|
56 * Constant value that may be returned by UCharIteratorMove |
|
57 * indicating that the final UTF-16 index is not known, but that the move succeeded. |
|
58 * This can occur when moving relative to limit or length, or |
|
59 * when moving relative to the current index after a setState() |
|
60 * when the current UTF-16 index is not known. |
|
61 * |
|
62 * It would be very inefficient to have to count from the beginning of the text |
|
63 * just to get the current/limit/length index after moving relative to it. |
|
64 * The actual index can be determined with getIndex(UITER_CURRENT) |
|
65 * which will count the UChars if necessary. |
|
66 * |
|
67 * @stable ICU 2.6 |
|
68 */ |
|
69 UITER_UNKNOWN_INDEX=-2 |
|
70 }; |
|
71 |
|
72 |
|
73 /** |
|
74 * Constant for UCharIterator getState() indicating an error or |
|
75 * an unknown state. |
|
76 * Returned by uiter_getState()/UCharIteratorGetState |
|
77 * when an error occurs. |
|
78 * Also, some UCharIterator implementations may not be able to return |
|
79 * a valid state for each position. This will be clearly documented |
|
80 * for each such iterator (none of the public ones here). |
|
81 * |
|
82 * @stable ICU 2.6 |
|
83 */ |
|
84 #define UITER_NO_STATE ((uint32_t)0xffffffff) |
|
85 |
|
86 /** |
|
87 * Function type declaration for UCharIterator.getIndex(). |
|
88 * |
|
89 * Gets the current position, or the start or limit of the |
|
90 * iteration range. |
|
91 * |
|
92 * This function may perform slowly for UITER_CURRENT after setState() was called, |
|
93 * or for UITER_LENGTH, because an iterator implementation may have to count |
|
94 * UChars if the underlying storage is not UTF-16. |
|
95 * |
|
96 * @param iter the UCharIterator structure ("this pointer") |
|
97 * @param origin get the 0, start, limit, length, or current index |
|
98 * @return the requested index, or U_SENTINEL in an error condition |
|
99 * |
|
100 * @see UCharIteratorOrigin |
|
101 * @see UCharIterator |
|
102 * @stable ICU 2.1 |
|
103 */ |
|
104 typedef int32_t U_CALLCONV |
|
105 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); |
|
106 |
|
107 /** |
|
108 * Function type declaration for UCharIterator.move(). |
|
109 * |
|
110 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). |
|
111 * |
|
112 * Moves the current position relative to the start or limit of the |
|
113 * iteration range, or relative to the current position itself. |
|
114 * The movement is expressed in numbers of code units forward |
|
115 * or backward by specifying a positive or negative delta. |
|
116 * Out of bounds movement will be pinned to the start or limit. |
|
117 * |
|
118 * This function may perform slowly for moving relative to UITER_LENGTH |
|
119 * because an iterator implementation may have to count the rest of the |
|
120 * UChars if the native storage is not UTF-16. |
|
121 * |
|
122 * When moving relative to the limit or length, or |
|
123 * relative to the current position after setState() was called, |
|
124 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient |
|
125 * determination of the actual UTF-16 index. |
|
126 * The actual index can be determined with getIndex(UITER_CURRENT) |
|
127 * which will count the UChars if necessary. |
|
128 * See UITER_UNKNOWN_INDEX for details. |
|
129 * |
|
130 * @param iter the UCharIterator structure ("this pointer") |
|
131 * @param delta can be positive, zero, or negative |
|
132 * @param origin move relative to the 0, start, limit, length, or current index |
|
133 * @return the new index, or U_SENTINEL on an error condition, |
|
134 * or UITER_UNKNOWN_INDEX when the index is not known. |
|
135 * |
|
136 * @see UCharIteratorOrigin |
|
137 * @see UCharIterator |
|
138 * @see UITER_UNKNOWN_INDEX |
|
139 * @stable ICU 2.1 |
|
140 */ |
|
141 typedef int32_t U_CALLCONV |
|
142 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); |
|
143 |
|
144 /** |
|
145 * Function type declaration for UCharIterator.hasNext(). |
|
146 * |
|
147 * Check if current() and next() can still |
|
148 * return another code unit. |
|
149 * |
|
150 * @param iter the UCharIterator structure ("this pointer") |
|
151 * @return boolean value for whether current() and next() can still return another code unit |
|
152 * |
|
153 * @see UCharIterator |
|
154 * @stable ICU 2.1 |
|
155 */ |
|
156 typedef UBool U_CALLCONV |
|
157 UCharIteratorHasNext(UCharIterator *iter); |
|
158 |
|
159 /** |
|
160 * Function type declaration for UCharIterator.hasPrevious(). |
|
161 * |
|
162 * Check if previous() can still return another code unit. |
|
163 * |
|
164 * @param iter the UCharIterator structure ("this pointer") |
|
165 * @return boolean value for whether previous() can still return another code unit |
|
166 * |
|
167 * @see UCharIterator |
|
168 * @stable ICU 2.1 |
|
169 */ |
|
170 typedef UBool U_CALLCONV |
|
171 UCharIteratorHasPrevious(UCharIterator *iter); |
|
172 |
|
173 /** |
|
174 * Function type declaration for UCharIterator.current(). |
|
175 * |
|
176 * Return the code unit at the current position, |
|
177 * or U_SENTINEL if there is none (index is at the limit). |
|
178 * |
|
179 * @param iter the UCharIterator structure ("this pointer") |
|
180 * @return the current code unit |
|
181 * |
|
182 * @see UCharIterator |
|
183 * @stable ICU 2.1 |
|
184 */ |
|
185 typedef UChar32 U_CALLCONV |
|
186 UCharIteratorCurrent(UCharIterator *iter); |
|
187 |
|
188 /** |
|
189 * Function type declaration for UCharIterator.next(). |
|
190 * |
|
191 * Return the code unit at the current index and increment |
|
192 * the index (post-increment, like s[i++]), |
|
193 * or return U_SENTINEL if there is none (index is at the limit). |
|
194 * |
|
195 * @param iter the UCharIterator structure ("this pointer") |
|
196 * @return the current code unit (and post-increment the current index) |
|
197 * |
|
198 * @see UCharIterator |
|
199 * @stable ICU 2.1 |
|
200 */ |
|
201 typedef UChar32 U_CALLCONV |
|
202 UCharIteratorNext(UCharIterator *iter); |
|
203 |
|
204 /** |
|
205 * Function type declaration for UCharIterator.previous(). |
|
206 * |
|
207 * Decrement the index and return the code unit from there |
|
208 * (pre-decrement, like s[--i]), |
|
209 * or return U_SENTINEL if there is none (index is at the start). |
|
210 * |
|
211 * @param iter the UCharIterator structure ("this pointer") |
|
212 * @return the previous code unit (after pre-decrementing the current index) |
|
213 * |
|
214 * @see UCharIterator |
|
215 * @stable ICU 2.1 |
|
216 */ |
|
217 typedef UChar32 U_CALLCONV |
|
218 UCharIteratorPrevious(UCharIterator *iter); |
|
219 |
|
220 /** |
|
221 * Function type declaration for UCharIterator.reservedFn(). |
|
222 * Reserved for future use. |
|
223 * |
|
224 * @param iter the UCharIterator structure ("this pointer") |
|
225 * @param something some integer argument |
|
226 * @return some integer |
|
227 * |
|
228 * @see UCharIterator |
|
229 * @stable ICU 2.1 |
|
230 */ |
|
231 typedef int32_t U_CALLCONV |
|
232 UCharIteratorReserved(UCharIterator *iter, int32_t something); |
|
233 |
|
234 /** |
|
235 * Function type declaration for UCharIterator.getState(). |
|
236 * |
|
237 * Get the "state" of the iterator in the form of a single 32-bit word. |
|
238 * It is recommended that the state value be calculated to be as small as |
|
239 * is feasible. For strings with limited lengths, fewer than 32 bits may |
|
240 * be sufficient. |
|
241 * |
|
242 * This is used together with setState()/UCharIteratorSetState |
|
243 * to save and restore the iterator position more efficiently than with |
|
244 * getIndex()/move(). |
|
245 * |
|
246 * The iterator state is defined as a uint32_t value because it is designed |
|
247 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state |
|
248 * of the character iterator. |
|
249 * |
|
250 * With some UCharIterator implementations (e.g., UTF-8), |
|
251 * getting and setting the UTF-16 index with existing functions |
|
252 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but |
|
253 * relatively slow because the iterator has to "walk" from a known index |
|
254 * to the requested one. |
|
255 * This takes more time the farther it needs to go. |
|
256 * |
|
257 * An opaque state value allows an iterator implementation to provide |
|
258 * an internal index (UTF-8: the source byte array index) for |
|
259 * fast, constant-time restoration. |
|
260 * |
|
261 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
|
262 * the UTF-16 index may not be restored as well, but the iterator can deliver |
|
263 * the correct text contents and move relative to the current position |
|
264 * without performance degradation. |
|
265 * |
|
266 * Some UCharIterator implementations may not be able to return |
|
267 * a valid state for each position, in which case they return UITER_NO_STATE instead. |
|
268 * This will be clearly documented for each such iterator (none of the public ones here). |
|
269 * |
|
270 * @param iter the UCharIterator structure ("this pointer") |
|
271 * @return the state word |
|
272 * |
|
273 * @see UCharIterator |
|
274 * @see UCharIteratorSetState |
|
275 * @see UITER_NO_STATE |
|
276 * @stable ICU 2.6 |
|
277 */ |
|
278 typedef uint32_t U_CALLCONV |
|
279 UCharIteratorGetState(const UCharIterator *iter); |
|
280 |
|
281 /** |
|
282 * Function type declaration for UCharIterator.setState(). |
|
283 * |
|
284 * Restore the "state" of the iterator using a state word from a getState() call. |
|
285 * The iterator object need not be the same one as for which getState() was called, |
|
286 * but it must be of the same type (set up using the same uiter_setXYZ function) |
|
287 * and it must iterate over the same string |
|
288 * (binary identical regardless of memory address). |
|
289 * For more about the state word see UCharIteratorGetState. |
|
290 * |
|
291 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
|
292 * the UTF-16 index may not be restored as well, but the iterator can deliver |
|
293 * the correct text contents and move relative to the current position |
|
294 * without performance degradation. |
|
295 * |
|
296 * @param iter the UCharIterator structure ("this pointer") |
|
297 * @param state the state word from a getState() call |
|
298 * on a same-type, same-string iterator |
|
299 * @param pErrorCode Must be a valid pointer to an error code value, |
|
300 * which must not indicate a failure before the function call. |
|
301 * |
|
302 * @see UCharIterator |
|
303 * @see UCharIteratorGetState |
|
304 * @stable ICU 2.6 |
|
305 */ |
|
306 typedef void U_CALLCONV |
|
307 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
|
308 |
|
309 |
|
310 /** |
|
311 * C API for code unit iteration. |
|
312 * This can be used as a C wrapper around |
|
313 * CharacterIterator, Replaceable, or implemented using simple strings, etc. |
|
314 * |
|
315 * There are two roles for using UCharIterator: |
|
316 * |
|
317 * A "provider" sets the necessary function pointers and controls the "protected" |
|
318 * fields of the UCharIterator structure. A "provider" passes a UCharIterator |
|
319 * into C APIs that need a UCharIterator as an abstract, flexible string interface. |
|
320 * |
|
321 * Implementations of such C APIs are "callers" of UCharIterator functions; |
|
322 * they only use the "public" function pointers and never access the "protected" |
|
323 * fields directly. |
|
324 * |
|
325 * The current() and next() functions only check the current index against the |
|
326 * limit, and previous() only checks the current index against the start, |
|
327 * to see if the iterator already reached the end of the iteration range. |
|
328 * |
|
329 * The assumption - in all iterators - is that the index is moved via the API, |
|
330 * which means it won't go out of bounds, or the index is modified by |
|
331 * user code that knows enough about the iterator implementation to set valid |
|
332 * index values. |
|
333 * |
|
334 * UCharIterator functions return code unit values 0..0xffff, |
|
335 * or U_SENTINEL if the iteration bounds are reached. |
|
336 * |
|
337 * @stable ICU 2.1 |
|
338 */ |
|
339 struct UCharIterator { |
|
340 /** |
|
341 * (protected) Pointer to string or wrapped object or similar. |
|
342 * Not used by caller. |
|
343 * @stable ICU 2.1 |
|
344 */ |
|
345 const void *context; |
|
346 |
|
347 /** |
|
348 * (protected) Length of string or similar. |
|
349 * Not used by caller. |
|
350 * @stable ICU 2.1 |
|
351 */ |
|
352 int32_t length; |
|
353 |
|
354 /** |
|
355 * (protected) Start index or similar. |
|
356 * Not used by caller. |
|
357 * @stable ICU 2.1 |
|
358 */ |
|
359 int32_t start; |
|
360 |
|
361 /** |
|
362 * (protected) Current index or similar. |
|
363 * Not used by caller. |
|
364 * @stable ICU 2.1 |
|
365 */ |
|
366 int32_t index; |
|
367 |
|
368 /** |
|
369 * (protected) Limit index or similar. |
|
370 * Not used by caller. |
|
371 * @stable ICU 2.1 |
|
372 */ |
|
373 int32_t limit; |
|
374 |
|
375 /** |
|
376 * (protected) Used by UTF-8 iterators and possibly others. |
|
377 * @stable ICU 2.1 |
|
378 */ |
|
379 int32_t reservedField; |
|
380 |
|
381 /** |
|
382 * (public) Returns the current position or the |
|
383 * start or limit index of the iteration range. |
|
384 * |
|
385 * @see UCharIteratorGetIndex |
|
386 * @stable ICU 2.1 |
|
387 */ |
|
388 UCharIteratorGetIndex *getIndex; |
|
389 |
|
390 /** |
|
391 * (public) Moves the current position relative to the start or limit of the |
|
392 * iteration range, or relative to the current position itself. |
|
393 * The movement is expressed in numbers of code units forward |
|
394 * or backward by specifying a positive or negative delta. |
|
395 * |
|
396 * @see UCharIteratorMove |
|
397 * @stable ICU 2.1 |
|
398 */ |
|
399 UCharIteratorMove *move; |
|
400 |
|
401 /** |
|
402 * (public) Check if current() and next() can still |
|
403 * return another code unit. |
|
404 * |
|
405 * @see UCharIteratorHasNext |
|
406 * @stable ICU 2.1 |
|
407 */ |
|
408 UCharIteratorHasNext *hasNext; |
|
409 |
|
410 /** |
|
411 * (public) Check if previous() can still return another code unit. |
|
412 * |
|
413 * @see UCharIteratorHasPrevious |
|
414 * @stable ICU 2.1 |
|
415 */ |
|
416 UCharIteratorHasPrevious *hasPrevious; |
|
417 |
|
418 /** |
|
419 * (public) Return the code unit at the current position, |
|
420 * or U_SENTINEL if there is none (index is at the limit). |
|
421 * |
|
422 * @see UCharIteratorCurrent |
|
423 * @stable ICU 2.1 |
|
424 */ |
|
425 UCharIteratorCurrent *current; |
|
426 |
|
427 /** |
|
428 * (public) Return the code unit at the current index and increment |
|
429 * the index (post-increment, like s[i++]), |
|
430 * or return U_SENTINEL if there is none (index is at the limit). |
|
431 * |
|
432 * @see UCharIteratorNext |
|
433 * @stable ICU 2.1 |
|
434 */ |
|
435 UCharIteratorNext *next; |
|
436 |
|
437 /** |
|
438 * (public) Decrement the index and return the code unit from there |
|
439 * (pre-decrement, like s[--i]), |
|
440 * or return U_SENTINEL if there is none (index is at the start). |
|
441 * |
|
442 * @see UCharIteratorPrevious |
|
443 * @stable ICU 2.1 |
|
444 */ |
|
445 UCharIteratorPrevious *previous; |
|
446 |
|
447 /** |
|
448 * (public) Reserved for future use. Currently NULL. |
|
449 * |
|
450 * @see UCharIteratorReserved |
|
451 * @stable ICU 2.1 |
|
452 */ |
|
453 UCharIteratorReserved *reservedFn; |
|
454 |
|
455 /** |
|
456 * (public) Return the state of the iterator, to be restored later with setState(). |
|
457 * This function pointer is NULL if the iterator does not implement it. |
|
458 * |
|
459 * @see UCharIteratorGet |
|
460 * @stable ICU 2.6 |
|
461 */ |
|
462 UCharIteratorGetState *getState; |
|
463 |
|
464 /** |
|
465 * (public) Restore the iterator state from the state word from a call |
|
466 * to getState(). |
|
467 * This function pointer is NULL if the iterator does not implement it. |
|
468 * |
|
469 * @see UCharIteratorSet |
|
470 * @stable ICU 2.6 |
|
471 */ |
|
472 UCharIteratorSetState *setState; |
|
473 }; |
|
474 |
|
475 /** |
|
476 * Helper function for UCharIterator to get the code point |
|
477 * at the current index. |
|
478 * |
|
479 * Return the code point that includes the code unit at the current position, |
|
480 * or U_SENTINEL if there is none (index is at the limit). |
|
481 * If the current code unit is a lead or trail surrogate, |
|
482 * then the following or preceding surrogate is used to form |
|
483 * the code point value. |
|
484 * |
|
485 * @param iter the UCharIterator structure ("this pointer") |
|
486 * @return the current code point |
|
487 * |
|
488 * @see UCharIterator |
|
489 * @see U16_GET |
|
490 * @see UnicodeString::char32At() |
|
491 * @stable ICU 2.1 |
|
492 */ |
|
493 U_STABLE UChar32 U_EXPORT2 |
|
494 uiter_current32(UCharIterator *iter); |
|
495 |
|
496 /** |
|
497 * Helper function for UCharIterator to get the next code point. |
|
498 * |
|
499 * Return the code point at the current index and increment |
|
500 * the index (post-increment, like s[i++]), |
|
501 * or return U_SENTINEL if there is none (index is at the limit). |
|
502 * |
|
503 * @param iter the UCharIterator structure ("this pointer") |
|
504 * @return the current code point (and post-increment the current index) |
|
505 * |
|
506 * @see UCharIterator |
|
507 * @see U16_NEXT |
|
508 * @stable ICU 2.1 |
|
509 */ |
|
510 U_STABLE UChar32 U_EXPORT2 |
|
511 uiter_next32(UCharIterator *iter); |
|
512 |
|
513 /** |
|
514 * Helper function for UCharIterator to get the previous code point. |
|
515 * |
|
516 * Decrement the index and return the code point from there |
|
517 * (pre-decrement, like s[--i]), |
|
518 * or return U_SENTINEL if there is none (index is at the start). |
|
519 * |
|
520 * @param iter the UCharIterator structure ("this pointer") |
|
521 * @return the previous code point (after pre-decrementing the current index) |
|
522 * |
|
523 * @see UCharIterator |
|
524 * @see U16_PREV |
|
525 * @stable ICU 2.1 |
|
526 */ |
|
527 U_STABLE UChar32 U_EXPORT2 |
|
528 uiter_previous32(UCharIterator *iter); |
|
529 |
|
530 /** |
|
531 * Get the "state" of the iterator in the form of a single 32-bit word. |
|
532 * This is a convenience function that calls iter->getState(iter) |
|
533 * if iter->getState is not NULL; |
|
534 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. |
|
535 * |
|
536 * Some UCharIterator implementations may not be able to return |
|
537 * a valid state for each position, in which case they return UITER_NO_STATE instead. |
|
538 * This will be clearly documented for each such iterator (none of the public ones here). |
|
539 * |
|
540 * @param iter the UCharIterator structure ("this pointer") |
|
541 * @return the state word |
|
542 * |
|
543 * @see UCharIterator |
|
544 * @see UCharIteratorGetState |
|
545 * @see UITER_NO_STATE |
|
546 * @stable ICU 2.6 |
|
547 */ |
|
548 U_STABLE uint32_t U_EXPORT2 |
|
549 uiter_getState(const UCharIterator *iter); |
|
550 |
|
551 /** |
|
552 * Restore the "state" of the iterator using a state word from a getState() call. |
|
553 * This is a convenience function that calls iter->setState(iter, state, pErrorCode) |
|
554 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. |
|
555 * |
|
556 * @param iter the UCharIterator structure ("this pointer") |
|
557 * @param state the state word from a getState() call |
|
558 * on a same-type, same-string iterator |
|
559 * @param pErrorCode Must be a valid pointer to an error code value, |
|
560 * which must not indicate a failure before the function call. |
|
561 * |
|
562 * @see UCharIterator |
|
563 * @see UCharIteratorSetState |
|
564 * @stable ICU 2.6 |
|
565 */ |
|
566 U_STABLE void U_EXPORT2 |
|
567 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
|
568 |
|
569 /** |
|
570 * Set up a UCharIterator to iterate over a string. |
|
571 * |
|
572 * Sets the UCharIterator function pointers for iteration over the string s |
|
573 * with iteration boundaries start=index=0 and length=limit=string length. |
|
574 * The "provider" may set the start, index, and limit values at any time |
|
575 * within the range 0..length. |
|
576 * The length field will be ignored. |
|
577 * |
|
578 * The string pointer s is set into UCharIterator.context without copying |
|
579 * or reallocating the string contents. |
|
580 * |
|
581 * getState() simply returns the current index. |
|
582 * move() will always return the final index. |
|
583 * |
|
584 * @param iter UCharIterator structure to be set for iteration |
|
585 * @param s String to iterate over |
|
586 * @param length Length of s, or -1 if NUL-terminated |
|
587 * |
|
588 * @see UCharIterator |
|
589 * @stable ICU 2.1 |
|
590 */ |
|
591 U_STABLE void U_EXPORT2 |
|
592 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); |
|
593 |
|
594 /** |
|
595 * Set up a UCharIterator to iterate over a UTF-16BE string |
|
596 * (byte vector with a big-endian pair of bytes per UChar). |
|
597 * |
|
598 * Everything works just like with a normal UChar iterator (uiter_setString), |
|
599 * except that UChars are assembled from byte pairs, |
|
600 * and that the length argument here indicates an even number of bytes. |
|
601 * |
|
602 * getState() simply returns the current index. |
|
603 * move() will always return the final index. |
|
604 * |
|
605 * @param iter UCharIterator structure to be set for iteration |
|
606 * @param s UTF-16BE string to iterate over |
|
607 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated |
|
608 * (NUL means pair of 0 bytes at even index from s) |
|
609 * |
|
610 * @see UCharIterator |
|
611 * @see uiter_setString |
|
612 * @stable ICU 2.6 |
|
613 */ |
|
614 U_STABLE void U_EXPORT2 |
|
615 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); |
|
616 |
|
617 /** |
|
618 * Set up a UCharIterator to iterate over a UTF-8 string. |
|
619 * |
|
620 * Sets the UCharIterator function pointers for iteration over the UTF-8 string s |
|
621 * with UTF-8 iteration boundaries 0 and length. |
|
622 * The implementation counts the UTF-16 index on the fly and |
|
623 * lazily evaluates the UTF-16 length of the text. |
|
624 * |
|
625 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. |
|
626 * When the reservedField is not 0, then it contains a supplementary code point |
|
627 * and the UTF-16 index is between the two corresponding surrogates. |
|
628 * At that point, the UTF-8 index is behind that code point. |
|
629 * |
|
630 * The UTF-8 string pointer s is set into UCharIterator.context without copying |
|
631 * or reallocating the string contents. |
|
632 * |
|
633 * getState() returns a state value consisting of |
|
634 * - the current UTF-8 source byte index (bits 31..1) |
|
635 * - a flag (bit 0) that indicates whether the UChar position is in the middle |
|
636 * of a surrogate pair |
|
637 * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) |
|
638 * |
|
639 * getState() cannot also encode the UTF-16 index in the state value. |
|
640 * move(relative to limit or length), or |
|
641 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. |
|
642 * |
|
643 * @param iter UCharIterator structure to be set for iteration |
|
644 * @param s UTF-8 string to iterate over |
|
645 * @param length Length of s in bytes, or -1 if NUL-terminated |
|
646 * |
|
647 * @see UCharIterator |
|
648 * @stable ICU 2.6 |
|
649 */ |
|
650 U_STABLE void U_EXPORT2 |
|
651 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); |
|
652 |
|
653 #ifdef XP_CPLUSPLUS |
|
654 |
|
655 /** |
|
656 * Set up a UCharIterator to wrap around a C++ CharacterIterator. |
|
657 * |
|
658 * Sets the UCharIterator function pointers for iteration using the |
|
659 * CharacterIterator charIter. |
|
660 * |
|
661 * The CharacterIterator pointer charIter is set into UCharIterator.context |
|
662 * without copying or cloning the CharacterIterator object. |
|
663 * The other "protected" UCharIterator fields are set to 0 and will be ignored. |
|
664 * The iteration index and boundaries are controlled by the CharacterIterator. |
|
665 * |
|
666 * getState() simply returns the current index. |
|
667 * move() will always return the final index. |
|
668 * |
|
669 * @param iter UCharIterator structure to be set for iteration |
|
670 * @param charIter CharacterIterator to wrap |
|
671 * |
|
672 * @see UCharIterator |
|
673 * @stable ICU 2.1 |
|
674 */ |
|
675 U_STABLE void U_EXPORT2 |
|
676 uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter); |
|
677 |
|
678 /** |
|
679 * Set up a UCharIterator to iterate over a C++ Replaceable. |
|
680 * |
|
681 * Sets the UCharIterator function pointers for iteration over the |
|
682 * Replaceable rep with iteration boundaries start=index=0 and |
|
683 * length=limit=rep->length(). |
|
684 * The "provider" may set the start, index, and limit values at any time |
|
685 * within the range 0..length=rep->length(). |
|
686 * The length field will be ignored. |
|
687 * |
|
688 * The Replaceable pointer rep is set into UCharIterator.context without copying |
|
689 * or cloning/reallocating the Replaceable object. |
|
690 * |
|
691 * getState() simply returns the current index. |
|
692 * move() will always return the final index. |
|
693 * |
|
694 * @param iter UCharIterator structure to be set for iteration |
|
695 * @param rep Replaceable to iterate over |
|
696 * |
|
697 * @see UCharIterator |
|
698 * @stable ICU 2.1 |
|
699 */ |
|
700 U_STABLE void U_EXPORT2 |
|
701 uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep); |
|
702 |
|
703 #endif |
|
704 |
|
705 U_CDECL_END |
|
706 |
|
707 #endif |