|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2003, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: unorm_it.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2003jan21 |
|
14 * created by: Markus W. Scherer |
|
15 */ |
|
16 |
|
17 #ifndef __UNORM_IT_H__ |
|
18 #define __UNORM_IT_H__ |
|
19 |
|
20 #include "unicode/utypes.h" |
|
21 |
|
22 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION |
|
23 |
|
24 #include "unicode/uiter.h" |
|
25 #include "unicode/unorm.h" |
|
26 |
|
27 /** |
|
28 * Normalizing UCharIterator wrapper. |
|
29 * This internal API basically duplicates the functionality of the C++ Normalizer |
|
30 * but |
|
31 * - it actually implements a character iterator (UCharIterator) |
|
32 * with few restrictions (see unorm_setIter()) |
|
33 * - it supports UCharIterator getState()/setState() |
|
34 * - it uses lower-level APIs and buffers more text and states, |
|
35 * hopefully resulting in higher performance |
|
36 * |
|
37 * Usage example: |
|
38 * \code |
|
39 * function(UCharIterator *srcIter) { |
|
40 * UNormIterator *uni; |
|
41 * UCharIterator *iter; |
|
42 * UErrorCode errorCode; |
|
43 * |
|
44 * errorCode=U_ZERO_ERROR; |
|
45 * uni=unorm_openIter(&errorCode); |
|
46 * if(U_FAILURE(errorCode)) { |
|
47 * // report error |
|
48 * return; |
|
49 * } |
|
50 * |
|
51 * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); |
|
52 * if(U_FAILURE(errorCode)) { |
|
53 * // report error |
|
54 * } else { |
|
55 * // use iter to iterate over the canonically ordered |
|
56 * // version of srcIter's text |
|
57 * uint32_t state; |
|
58 * |
|
59 * ... |
|
60 * |
|
61 * state=uiter_getState(iter); |
|
62 * if(state!=UITER_NO_STATE) { |
|
63 * // use valid state, store it, use iter some more |
|
64 * ... |
|
65 * |
|
66 * // later restore iter to the saved state: |
|
67 * uiter_setState(iter, state, &errorCode); |
|
68 * |
|
69 * ... |
|
70 * } |
|
71 * |
|
72 * ... |
|
73 * } |
|
74 * unorm_closeIter(uni); |
|
75 * } |
|
76 * \endcode |
|
77 * |
|
78 * See also the ICU test suites. |
|
79 * |
|
80 * @internal |
|
81 */ |
|
82 struct UNormIterator; |
|
83 typedef struct UNormIterator UNormIterator; |
|
84 |
|
85 /** |
|
86 * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter |
|
87 * of unorm_openIter(). |
|
88 * |
|
89 * @internal |
|
90 */ |
|
91 #define UNORM_ITER_SIZE 1024 |
|
92 |
|
93 /** |
|
94 * Open a normalizing iterator. Must be closed later. |
|
95 * Use unorm_setIter(). |
|
96 * |
|
97 * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold |
|
98 * the UNormIterator if possible; can be NULL. |
|
99 * @param stackMemSize Number of bytes at stackMem; can be 0, |
|
100 * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. |
|
101 * @param pErrorCode ICU error code |
|
102 * @return an allocated and pre-initialized UNormIterator |
|
103 * @internal |
|
104 */ |
|
105 U_CAPI UNormIterator * U_EXPORT2 |
|
106 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); |
|
107 |
|
108 /** |
|
109 * Close a normalizing iterator. |
|
110 * |
|
111 * @param uni UNormIterator from unorm_openIter() |
|
112 * @internal |
|
113 */ |
|
114 U_CAPI void U_EXPORT2 |
|
115 unorm_closeIter(UNormIterator *uni); |
|
116 |
|
117 /** |
|
118 * Set a UCharIterator and a normalization mode for the normalizing iterator |
|
119 * to wrap. The normalizing iterator will read from the character iterator, |
|
120 * normalize the text, and in turn deliver it with its own wrapper UCharIterator |
|
121 * interface which it returns. |
|
122 * |
|
123 * The source iterator remains at its current position through the unorm_setIter() |
|
124 * call but will be used and moved as soon as the |
|
125 * the returned normalizing iterator is. |
|
126 * |
|
127 * The returned interface pointer is valid for as long as the normalizing iterator |
|
128 * is open and until another unorm_setIter() call is made on it. |
|
129 * |
|
130 * The normalizing iterator's UCharIterator interface has the following properties: |
|
131 * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX |
|
132 * - getState() will return UITER_NO_STATE for unknown states for positions |
|
133 * that are not at normalization boundaries |
|
134 * |
|
135 * @param uni UNormIterator from unorm_openIter() |
|
136 * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator. |
|
137 * Must support getState() and setState(). |
|
138 * @param mode The normalization mode. |
|
139 * @param pErrorCode ICU error code |
|
140 * @return an alias to the normalizing iterator's UCharIterator interface |
|
141 * @internal |
|
142 */ |
|
143 U_CAPI UCharIterator * U_EXPORT2 |
|
144 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode); |
|
145 |
|
146 #endif /* uconfig.h switches */ |
|
147 |
|
148 #endif |