|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 1997-2004, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * |
|
7 * File UCHAR.H |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 04/02/97 aliu Creation. |
|
13 * 03/29/99 helena Updated for C APIs. |
|
14 * 4/15/99 Madhu Updated for C Implementation and Javadoc |
|
15 * 5/20/99 Madhu Added the function u_getVersion() |
|
16 * 8/19/1999 srl Upgraded scripts to Unicode 3.0 |
|
17 * 8/27/1999 schererm UCharDirection constants: U_... |
|
18 * 11/11/1999 weiv added u_isalnum(), cleaned comments |
|
19 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). |
|
20 ****************************************************************************** |
|
21 */ |
|
22 |
|
23 #ifndef UCHAR_H |
|
24 #define UCHAR_H |
|
25 |
|
26 #include "unicode/utypes.h" |
|
27 |
|
28 U_CDECL_BEGIN |
|
29 |
|
30 /*==========================================================================*/ |
|
31 /* Unicode version number */ |
|
32 /*==========================================================================*/ |
|
33 /** |
|
34 * Unicode version number, default for the current ICU version. |
|
35 * The actual Unicode Character Database (UCD) data is stored in uprops.dat |
|
36 * and may be generated from UCD files from a different Unicode version. |
|
37 * Call u_getUnicodeVersion to get the actual Unicode version of the data. |
|
38 * |
|
39 * @see u_getUnicodeVersion |
|
40 * @stable ICU 2.0 |
|
41 */ |
|
42 #define U_UNICODE_VERSION "4.0.1" |
|
43 |
|
44 /** |
|
45 * \file |
|
46 * \brief C API: Unicode Properties |
|
47 * |
|
48 * This C API provides low-level access to the Unicode Character Database. |
|
49 * In addition to raw property values, some convenience functions calculate |
|
50 * derived properties, for example for Java-style programming. |
|
51 * |
|
52 * Unicode assigns each code point (not just assigned character) values for |
|
53 * many properties. |
|
54 * Most of them are simple boolean flags, or constants from a small enumerated list. |
|
55 * For some properties, values are strings or other relatively more complex types. |
|
56 * |
|
57 * For more information see |
|
58 * "About the Unicode Character Database" (http://www.unicode.org/ucd/) |
|
59 * and the ICU User Guide chapter on Properties (http://oss.software.ibm.com/icu/userguide/properties.html). |
|
60 * |
|
61 * Many functions are designed to match java.lang.Character functions. |
|
62 * See the individual function documentation, |
|
63 * and see the JDK 1.4.1 java.lang.Character documentation |
|
64 * at http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html |
|
65 * |
|
66 * There are also functions that provide easy migration from C/POSIX functions |
|
67 * like isblank(). Their use is generally discouraged because the C/POSIX |
|
68 * standards do not define their semantics beyond the ASCII range, which means |
|
69 * that different implementations exhibit very different behavior. |
|
70 * Instead, Unicode properties should be used directly. |
|
71 * |
|
72 * There are also only a few, broad C/POSIX character classes, and they tend |
|
73 * to be used for conflicting purposes. For example, the "isalpha()" class |
|
74 * is sometimes used to determine word boundaries, while a more sophisticated |
|
75 * approach would at least distinguish initial letters from continuation |
|
76 * characters (the latter including combining marks). |
|
77 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) |
|
78 * Another example: There is no "istitle()" class for titlecase characters. |
|
79 * |
|
80 * A summary of the behavior of some C/POSIX character classification implementations |
|
81 * for Unicode is available at http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html |
|
82 * |
|
83 * <strong>Important</strong>: |
|
84 * The behavior of the ICU C/POSIX-style character classification |
|
85 * functions is subject to change according to discussion of the above summary. |
|
86 * |
|
87 * Note: There are several ICU whitespace functions. |
|
88 * Comparison: |
|
89 * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; |
|
90 * most of general categories "Z" (separators) + most whitespace ISO controls |
|
91 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) |
|
92 * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces |
|
93 * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) |
|
94 * - u_isspace: Z + whitespace ISO controls (including no-break spaces) |
|
95 * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP |
|
96 */ |
|
97 |
|
98 /** |
|
99 * Constants. |
|
100 */ |
|
101 |
|
102 /** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ |
|
103 #define UCHAR_MIN_VALUE 0 |
|
104 |
|
105 /** |
|
106 * The highest Unicode code point value (scalar value) according to |
|
107 * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). |
|
108 * For a single character, UChar32 is a simple type that can hold any code point value. |
|
109 * |
|
110 * @see UChar32 |
|
111 * @stable ICU 2.0 |
|
112 */ |
|
113 #define UCHAR_MAX_VALUE 0x10ffff |
|
114 |
|
115 /** |
|
116 * Get a single-bit bit set (a flag) from a bit number 0..31. |
|
117 * @stable ICU 2.1 |
|
118 */ |
|
119 #define U_MASK(x) ((uint32_t)1<<(x)) |
|
120 |
|
121 /* |
|
122 * !! Note: Several comments in this file are machine-read by the |
|
123 * genpname tool. These comments describe the correspondence between |
|
124 * icu enum constants and UCD entities. Do not delete them. Update |
|
125 * these comments as needed. |
|
126 * |
|
127 * Any comment of the form "/ *[name]* /" (spaces added) is such |
|
128 * a comment. |
|
129 * |
|
130 * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic |
|
131 * name, which must match PropertyValueAliases.txt. |
|
132 */ |
|
133 |
|
134 /** |
|
135 * Selection constants for Unicode properties. |
|
136 * These constants are used in functions like u_hasBinaryProperty to select |
|
137 * one of the Unicode properties. |
|
138 * |
|
139 * The properties APIs are intended to reflect Unicode properties as defined |
|
140 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). |
|
141 * For details about the properties see http://www.unicode.org/ucd/ . |
|
142 * For names of Unicode properties see the UCD file PropertyAliases.txt. |
|
143 * |
|
144 * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, |
|
145 * then properties marked with "new in Unicode 3.2" are not or not fully available. |
|
146 * Check u_getUnicodeVersion to be sure. |
|
147 * |
|
148 * @see u_hasBinaryProperty |
|
149 * @see u_getIntPropertyValue |
|
150 * @see u_getUnicodeVersion |
|
151 * @stable ICU 2.1 |
|
152 */ |
|
153 typedef enum UProperty { |
|
154 /* See note !!. Comments of the form "Binary property Dash", |
|
155 "Enumerated property Script", "Double property Numeric_Value", |
|
156 and "String property Age" are read by genpname. */ |
|
157 |
|
158 /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that |
|
159 debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, |
|
160 rather than UCHAR_BINARY_START. Likewise for other *_START |
|
161 identifiers. */ |
|
162 |
|
163 /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. |
|
164 Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ |
|
165 UCHAR_ALPHABETIC=0, |
|
166 /** First constant for binary Unicode properties. @stable ICU 2.1 */ |
|
167 UCHAR_BINARY_START=UCHAR_ALPHABETIC, |
|
168 /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ |
|
169 UCHAR_ASCII_HEX_DIGIT, |
|
170 /** Binary property Bidi_Control. |
|
171 Format controls which have specific functions |
|
172 in the Bidi Algorithm. @stable ICU 2.1 */ |
|
173 UCHAR_BIDI_CONTROL, |
|
174 /** Binary property Bidi_Mirrored. |
|
175 Characters that may change display in RTL text. |
|
176 Same as u_isMirrored. |
|
177 See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ |
|
178 UCHAR_BIDI_MIRRORED, |
|
179 /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ |
|
180 UCHAR_DASH, |
|
181 /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). |
|
182 Ignorable in most processing. |
|
183 <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ |
|
184 UCHAR_DEFAULT_IGNORABLE_CODE_POINT, |
|
185 /** Binary property Deprecated (new in Unicode 3.2). |
|
186 The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ |
|
187 UCHAR_DEPRECATED, |
|
188 /** Binary property Diacritic. Characters that linguistically modify |
|
189 the meaning of another character to which they apply. @stable ICU 2.1 */ |
|
190 UCHAR_DIACRITIC, |
|
191 /** Binary property Extender. |
|
192 Extend the value or shape of a preceding alphabetic character, |
|
193 e.g., length and iteration marks. @stable ICU 2.1 */ |
|
194 UCHAR_EXTENDER, |
|
195 /** Binary property Full_Composition_Exclusion. |
|
196 CompositionExclusions.txt+Singleton Decompositions+ |
|
197 Non-Starter Decompositions. @stable ICU 2.1 */ |
|
198 UCHAR_FULL_COMPOSITION_EXCLUSION, |
|
199 /** Binary property Grapheme_Base (new in Unicode 3.2). |
|
200 For programmatic determination of grapheme cluster boundaries. |
|
201 [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ |
|
202 UCHAR_GRAPHEME_BASE, |
|
203 /** Binary property Grapheme_Extend (new in Unicode 3.2). |
|
204 For programmatic determination of grapheme cluster boundaries. |
|
205 Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ |
|
206 UCHAR_GRAPHEME_EXTEND, |
|
207 /** Binary property Grapheme_Link (new in Unicode 3.2). |
|
208 For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ |
|
209 UCHAR_GRAPHEME_LINK, |
|
210 /** Binary property Hex_Digit. |
|
211 Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ |
|
212 UCHAR_HEX_DIGIT, |
|
213 /** Binary property Hyphen. Dashes used to mark connections |
|
214 between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ |
|
215 UCHAR_HYPHEN, |
|
216 /** Binary property ID_Continue. |
|
217 Characters that can continue an identifier. |
|
218 DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." |
|
219 ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ |
|
220 UCHAR_ID_CONTINUE, |
|
221 /** Binary property ID_Start. |
|
222 Characters that can start an identifier. |
|
223 Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ |
|
224 UCHAR_ID_START, |
|
225 /** Binary property Ideographic. |
|
226 CJKV ideographs. @stable ICU 2.1 */ |
|
227 UCHAR_IDEOGRAPHIC, |
|
228 /** Binary property IDS_Binary_Operator (new in Unicode 3.2). |
|
229 For programmatic determination of |
|
230 Ideographic Description Sequences. @stable ICU 2.1 */ |
|
231 UCHAR_IDS_BINARY_OPERATOR, |
|
232 /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). |
|
233 For programmatic determination of |
|
234 Ideographic Description Sequences. @stable ICU 2.1 */ |
|
235 UCHAR_IDS_TRINARY_OPERATOR, |
|
236 /** Binary property Join_Control. |
|
237 Format controls for cursive joining and ligation. @stable ICU 2.1 */ |
|
238 UCHAR_JOIN_CONTROL, |
|
239 /** Binary property Logical_Order_Exception (new in Unicode 3.2). |
|
240 Characters that do not use logical order and |
|
241 require special handling in most processing. @stable ICU 2.1 */ |
|
242 UCHAR_LOGICAL_ORDER_EXCEPTION, |
|
243 /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. |
|
244 Ll+Other_Lowercase @stable ICU 2.1 */ |
|
245 UCHAR_LOWERCASE, |
|
246 /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ |
|
247 UCHAR_MATH, |
|
248 /** Binary property Noncharacter_Code_Point. |
|
249 Code points that are explicitly defined as illegal |
|
250 for the encoding of characters. @stable ICU 2.1 */ |
|
251 UCHAR_NONCHARACTER_CODE_POINT, |
|
252 /** Binary property Quotation_Mark. @stable ICU 2.1 */ |
|
253 UCHAR_QUOTATION_MARK, |
|
254 /** Binary property Radical (new in Unicode 3.2). |
|
255 For programmatic determination of |
|
256 Ideographic Description Sequences. @stable ICU 2.1 */ |
|
257 UCHAR_RADICAL, |
|
258 /** Binary property Soft_Dotted (new in Unicode 3.2). |
|
259 Characters with a "soft dot", like i or j. |
|
260 An accent placed on these characters causes |
|
261 the dot to disappear. @stable ICU 2.1 */ |
|
262 UCHAR_SOFT_DOTTED, |
|
263 /** Binary property Terminal_Punctuation. |
|
264 Punctuation characters that generally mark |
|
265 the end of textual units. @stable ICU 2.1 */ |
|
266 UCHAR_TERMINAL_PUNCTUATION, |
|
267 /** Binary property Unified_Ideograph (new in Unicode 3.2). |
|
268 For programmatic determination of |
|
269 Ideographic Description Sequences. @stable ICU 2.1 */ |
|
270 UCHAR_UNIFIED_IDEOGRAPH, |
|
271 /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. |
|
272 Lu+Other_Uppercase @stable ICU 2.1 */ |
|
273 UCHAR_UPPERCASE, |
|
274 /** Binary property White_Space. |
|
275 Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. |
|
276 Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ |
|
277 UCHAR_WHITE_SPACE, |
|
278 /** Binary property XID_Continue. |
|
279 ID_Continue modified to allow closure under |
|
280 normalization forms NFKC and NFKD. @stable ICU 2.1 */ |
|
281 UCHAR_XID_CONTINUE, |
|
282 /** Binary property XID_Start. ID_Start modified to allow |
|
283 closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ |
|
284 UCHAR_XID_START, |
|
285 /** Binary property Case_Sensitive. Either the source of a case |
|
286 mapping or _in_ the target of a case mapping. Not the same as |
|
287 the general category Cased_Letter. @stable ICU 2.6 */ |
|
288 UCHAR_CASE_SENSITIVE, |
|
289 /** Binary property STerm (new in Unicode 4.0.1). |
|
290 Sentence Terminal. Used in UAX #29: Text Boundaries |
|
291 (http://www.unicode.org/reports/tr29/) |
|
292 @draft ICU 3.0 */ |
|
293 UCHAR_S_TERM, |
|
294 /** Binary property Variation_Selector (new in Unicode 4.0.1). |
|
295 Indicates all those characters that qualify as Variation Selectors. |
|
296 For details on the behavior of these characters, |
|
297 see StandardizedVariants.html and 15.6 Variation Selectors. |
|
298 @draft ICU 3.0 */ |
|
299 UCHAR_VARIATION_SELECTOR, |
|
300 /** Binary property NFD_Inert. |
|
301 ICU-specific property for characters that are inert under NFD, |
|
302 i.e., they do not interact with adjacent characters. |
|
303 Used for example in normalizing transforms in incremental mode |
|
304 to find the boundary of safely normalizable text despite possible |
|
305 text additions. |
|
306 |
|
307 There is one such property per normalization form. |
|
308 These properties are computed as follows - an inert character is: |
|
309 a) unassigned, or ALL of the following: |
|
310 b) of combining class 0. |
|
311 c) not decomposed by this normalization form. |
|
312 AND if NFC or NFKC, |
|
313 d) can never compose with a previous character. |
|
314 e) can never compose with a following character. |
|
315 f) can never change if another character is added. |
|
316 Example: a-breve might satisfy all but f, but if you |
|
317 add an ogonek it changes to a-ogonek + breve |
|
318 |
|
319 See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, |
|
320 and icu/source/common/unormimp.h . |
|
321 @draft ICU 3.0 */ |
|
322 UCHAR_NFD_INERT, |
|
323 /** Binary property NFKD_Inert. |
|
324 ICU-specific property for characters that are inert under NFKD, |
|
325 i.e., they do not interact with adjacent characters. |
|
326 Used for example in normalizing transforms in incremental mode |
|
327 to find the boundary of safely normalizable text despite possible |
|
328 text additions. |
|
329 @see UCHAR_NFD_INERT |
|
330 @draft ICU 3.0 */ |
|
331 UCHAR_NFKD_INERT, |
|
332 /** Binary property NFC_Inert. |
|
333 ICU-specific property for characters that are inert under NFC, |
|
334 i.e., they do not interact with adjacent characters. |
|
335 Used for example in normalizing transforms in incremental mode |
|
336 to find the boundary of safely normalizable text despite possible |
|
337 text additions. |
|
338 @see UCHAR_NFD_INERT |
|
339 @draft ICU 3.0 */ |
|
340 UCHAR_NFC_INERT, |
|
341 /** Binary property NFKC_Inert. |
|
342 ICU-specific property for characters that are inert under NFKC, |
|
343 i.e., they do not interact with adjacent characters. |
|
344 Used for example in normalizing transforms in incremental mode |
|
345 to find the boundary of safely normalizable text despite possible |
|
346 text additions. |
|
347 @see UCHAR_NFD_INERT |
|
348 @draft ICU 3.0 */ |
|
349 UCHAR_NFKC_INERT, |
|
350 /** Binary Property Segment_Starter. |
|
351 ICU-specific property for characters that are starters in terms of |
|
352 Unicode normalization and combining character sequences. |
|
353 They have ccc=0 and do not occur in non-initial position of the |
|
354 canonical decomposition of any character |
|
355 (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)). |
|
356 ICU uses this property for segmenting a string for generating a set of |
|
357 canonically equivalent strings, e.g. for canonical closure while |
|
358 processing collation tailoring rules. |
|
359 @draft ICU 3.0 */ |
|
360 UCHAR_SEGMENT_STARTER, |
|
361 /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ |
|
362 UCHAR_BINARY_LIMIT, |
|
363 |
|
364 /** Enumerated property Bidi_Class. |
|
365 Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ |
|
366 UCHAR_BIDI_CLASS=0x1000, |
|
367 /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ |
|
368 UCHAR_INT_START=UCHAR_BIDI_CLASS, |
|
369 /** Enumerated property Block. |
|
370 Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ |
|
371 UCHAR_BLOCK, |
|
372 /** Enumerated property Canonical_Combining_Class. |
|
373 Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ |
|
374 UCHAR_CANONICAL_COMBINING_CLASS, |
|
375 /** Enumerated property Decomposition_Type. |
|
376 Returns UDecompositionType values. @stable ICU 2.2 */ |
|
377 UCHAR_DECOMPOSITION_TYPE, |
|
378 /** Enumerated property East_Asian_Width. |
|
379 See http://www.unicode.org/reports/tr11/ |
|
380 Returns UEastAsianWidth values. @stable ICU 2.2 */ |
|
381 UCHAR_EAST_ASIAN_WIDTH, |
|
382 /** Enumerated property General_Category. |
|
383 Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ |
|
384 UCHAR_GENERAL_CATEGORY, |
|
385 /** Enumerated property Joining_Group. |
|
386 Returns UJoiningGroup values. @stable ICU 2.2 */ |
|
387 UCHAR_JOINING_GROUP, |
|
388 /** Enumerated property Joining_Type. |
|
389 Returns UJoiningType values. @stable ICU 2.2 */ |
|
390 UCHAR_JOINING_TYPE, |
|
391 /** Enumerated property Line_Break. |
|
392 Returns ULineBreak values. @stable ICU 2.2 */ |
|
393 UCHAR_LINE_BREAK, |
|
394 /** Enumerated property Numeric_Type. |
|
395 Returns UNumericType values. @stable ICU 2.2 */ |
|
396 UCHAR_NUMERIC_TYPE, |
|
397 /** Enumerated property Script. |
|
398 Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ |
|
399 UCHAR_SCRIPT, |
|
400 /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. |
|
401 Returns UHangulSyllableType values. @stable ICU 2.6 */ |
|
402 UCHAR_HANGUL_SYLLABLE_TYPE, |
|
403 /** Enumerated property NFD_Quick_Check. |
|
404 Returns UNormalizationCheckResult values. @draft ICU 3.0 */ |
|
405 UCHAR_NFD_QUICK_CHECK, |
|
406 /** Enumerated property NFKD_Quick_Check. |
|
407 Returns UNormalizationCheckResult values. @draft ICU 3.0 */ |
|
408 UCHAR_NFKD_QUICK_CHECK, |
|
409 /** Enumerated property NFC_Quick_Check. |
|
410 Returns UNormalizationCheckResult values. @draft ICU 3.0 */ |
|
411 UCHAR_NFC_QUICK_CHECK, |
|
412 /** Enumerated property NFKC_Quick_Check. |
|
413 Returns UNormalizationCheckResult values. @draft ICU 3.0 */ |
|
414 UCHAR_NFKC_QUICK_CHECK, |
|
415 /** Enumerated property Lead_Canonical_Combining_Class. |
|
416 ICU-specific property for the ccc of the first code point |
|
417 of the decomposition, or lccc(c)=ccc(NFD(c)[0]). |
|
418 Useful for checking for canonically ordered text; |
|
419 see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . |
|
420 Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */ |
|
421 UCHAR_LEAD_CANONICAL_COMBINING_CLASS, |
|
422 /** Enumerated property Trail_Canonical_Combining_Class. |
|
423 ICU-specific property for the ccc of the last code point |
|
424 of the decomposition, or tccc(c)=ccc(NFD(c)[last]). |
|
425 Useful for checking for canonically ordered text; |
|
426 see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . |
|
427 Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */ |
|
428 UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, |
|
429 /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ |
|
430 UCHAR_INT_LIMIT, |
|
431 |
|
432 /** Bitmask property General_Category_Mask. |
|
433 This is the General_Category property returned as a bit mask. |
|
434 When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), |
|
435 returns bit masks for UCharCategory values where exactly one bit is set. |
|
436 When used with u_getPropertyValueName() and u_getPropertyValueEnum(), |
|
437 a multi-bit mask is used for sets of categories like "Letters". |
|
438 Mask values should be cast to uint32_t. |
|
439 @stable ICU 2.4 */ |
|
440 UCHAR_GENERAL_CATEGORY_MASK=0x2000, |
|
441 /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ |
|
442 UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, |
|
443 /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */ |
|
444 UCHAR_MASK_LIMIT, |
|
445 |
|
446 /** Double property Numeric_Value. |
|
447 Corresponds to u_getNumericValue. @stable ICU 2.4 */ |
|
448 UCHAR_NUMERIC_VALUE=0x3000, |
|
449 /** First constant for double Unicode properties. @stable ICU 2.4 */ |
|
450 UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, |
|
451 /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */ |
|
452 UCHAR_DOUBLE_LIMIT, |
|
453 |
|
454 /** String property Age. |
|
455 Corresponds to u_charAge. @stable ICU 2.4 */ |
|
456 UCHAR_AGE=0x4000, |
|
457 /** First constant for string Unicode properties. @stable ICU 2.4 */ |
|
458 UCHAR_STRING_START=UCHAR_AGE, |
|
459 /** String property Bidi_Mirroring_Glyph. |
|
460 Corresponds to u_charMirror. @stable ICU 2.4 */ |
|
461 UCHAR_BIDI_MIRRORING_GLYPH, |
|
462 /** String property Case_Folding. |
|
463 Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ |
|
464 UCHAR_CASE_FOLDING, |
|
465 /** String property ISO_Comment. |
|
466 Corresponds to u_getISOComment. @stable ICU 2.4 */ |
|
467 UCHAR_ISO_COMMENT, |
|
468 /** String property Lowercase_Mapping. |
|
469 Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ |
|
470 UCHAR_LOWERCASE_MAPPING, |
|
471 /** String property Name. |
|
472 Corresponds to u_charName. @stable ICU 2.4 */ |
|
473 UCHAR_NAME, |
|
474 /** String property Simple_Case_Folding. |
|
475 Corresponds to u_foldCase. @stable ICU 2.4 */ |
|
476 UCHAR_SIMPLE_CASE_FOLDING, |
|
477 /** String property Simple_Lowercase_Mapping. |
|
478 Corresponds to u_tolower. @stable ICU 2.4 */ |
|
479 UCHAR_SIMPLE_LOWERCASE_MAPPING, |
|
480 /** String property Simple_Titlecase_Mapping. |
|
481 Corresponds to u_totitle. @stable ICU 2.4 */ |
|
482 UCHAR_SIMPLE_TITLECASE_MAPPING, |
|
483 /** String property Simple_Uppercase_Mapping. |
|
484 Corresponds to u_toupper. @stable ICU 2.4 */ |
|
485 UCHAR_SIMPLE_UPPERCASE_MAPPING, |
|
486 /** String property Titlecase_Mapping. |
|
487 Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ |
|
488 UCHAR_TITLECASE_MAPPING, |
|
489 /** String property Unicode_1_Name. |
|
490 Corresponds to u_charName. @stable ICU 2.4 */ |
|
491 UCHAR_UNICODE_1_NAME, |
|
492 /** String property Uppercase_Mapping. |
|
493 Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ |
|
494 UCHAR_UPPERCASE_MAPPING, |
|
495 /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ |
|
496 UCHAR_STRING_LIMIT, |
|
497 |
|
498 /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ |
|
499 UCHAR_INVALID_CODE = -1 |
|
500 } UProperty; |
|
501 |
|
502 /** |
|
503 * Data for enumerated Unicode general category types. |
|
504 * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . |
|
505 * @stable ICU 2.0 |
|
506 */ |
|
507 typedef enum UCharCategory |
|
508 { |
|
509 /** See note !!. Comments of the form "Cn" are read by genpname. */ |
|
510 |
|
511 /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ |
|
512 U_UNASSIGNED = 0, |
|
513 /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ |
|
514 U_GENERAL_OTHER_TYPES = 0, |
|
515 /** Lu @stable ICU 2.0 */ |
|
516 U_UPPERCASE_LETTER = 1, |
|
517 /** Ll @stable ICU 2.0 */ |
|
518 U_LOWERCASE_LETTER = 2, |
|
519 /** Lt @stable ICU 2.0 */ |
|
520 U_TITLECASE_LETTER = 3, |
|
521 /** Lm @stable ICU 2.0 */ |
|
522 U_MODIFIER_LETTER = 4, |
|
523 /** Lo @stable ICU 2.0 */ |
|
524 U_OTHER_LETTER = 5, |
|
525 /** Mn @stable ICU 2.0 */ |
|
526 U_NON_SPACING_MARK = 6, |
|
527 /** Me @stable ICU 2.0 */ |
|
528 U_ENCLOSING_MARK = 7, |
|
529 /** Mc @stable ICU 2.0 */ |
|
530 U_COMBINING_SPACING_MARK = 8, |
|
531 /** Nd @stable ICU 2.0 */ |
|
532 U_DECIMAL_DIGIT_NUMBER = 9, |
|
533 /** Nl @stable ICU 2.0 */ |
|
534 U_LETTER_NUMBER = 10, |
|
535 /** No @stable ICU 2.0 */ |
|
536 U_OTHER_NUMBER = 11, |
|
537 /** Zs @stable ICU 2.0 */ |
|
538 U_SPACE_SEPARATOR = 12, |
|
539 /** Zl @stable ICU 2.0 */ |
|
540 U_LINE_SEPARATOR = 13, |
|
541 /** Zp @stable ICU 2.0 */ |
|
542 U_PARAGRAPH_SEPARATOR = 14, |
|
543 /** Cc @stable ICU 2.0 */ |
|
544 U_CONTROL_CHAR = 15, |
|
545 /** Cf @stable ICU 2.0 */ |
|
546 U_FORMAT_CHAR = 16, |
|
547 /** Co @stable ICU 2.0 */ |
|
548 U_PRIVATE_USE_CHAR = 17, |
|
549 /** Cs @stable ICU 2.0 */ |
|
550 U_SURROGATE = 18, |
|
551 /** Pd @stable ICU 2.0 */ |
|
552 U_DASH_PUNCTUATION = 19, |
|
553 /** Ps @stable ICU 2.0 */ |
|
554 U_START_PUNCTUATION = 20, |
|
555 /** Pe @stable ICU 2.0 */ |
|
556 U_END_PUNCTUATION = 21, |
|
557 /** Pc @stable ICU 2.0 */ |
|
558 U_CONNECTOR_PUNCTUATION = 22, |
|
559 /** Po @stable ICU 2.0 */ |
|
560 U_OTHER_PUNCTUATION = 23, |
|
561 /** Sm @stable ICU 2.0 */ |
|
562 U_MATH_SYMBOL = 24, |
|
563 /** Sc @stable ICU 2.0 */ |
|
564 U_CURRENCY_SYMBOL = 25, |
|
565 /** Sk @stable ICU 2.0 */ |
|
566 U_MODIFIER_SYMBOL = 26, |
|
567 /** So @stable ICU 2.0 */ |
|
568 U_OTHER_SYMBOL = 27, |
|
569 /** Pi @stable ICU 2.0 */ |
|
570 U_INITIAL_PUNCTUATION = 28, |
|
571 /** Pf @stable ICU 2.0 */ |
|
572 U_FINAL_PUNCTUATION = 29, |
|
573 /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */ |
|
574 U_CHAR_CATEGORY_COUNT |
|
575 } UCharCategory; |
|
576 |
|
577 /** |
|
578 * U_GC_XX_MASK constants are bit flags corresponding to Unicode |
|
579 * general category values. |
|
580 * For each category, the nth bit is set if the numeric value of the |
|
581 * corresponding UCharCategory constant is n. |
|
582 * |
|
583 * There are also some U_GC_Y_MASK constants for groups of general categories |
|
584 * like L for all letter categories. |
|
585 * |
|
586 * @see u_charType |
|
587 * @see U_GET_GC_MASK |
|
588 * @see UCharCategory |
|
589 * @stable ICU 2.1 |
|
590 */ |
|
591 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) |
|
592 |
|
593 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
594 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) |
|
595 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
596 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) |
|
597 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
598 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) |
|
599 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
600 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) |
|
601 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
602 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) |
|
603 |
|
604 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
605 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) |
|
606 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
607 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) |
|
608 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
609 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) |
|
610 |
|
611 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
612 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) |
|
613 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
614 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) |
|
615 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
616 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) |
|
617 |
|
618 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
619 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) |
|
620 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
621 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) |
|
622 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
623 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) |
|
624 |
|
625 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
626 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) |
|
627 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
628 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) |
|
629 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
630 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) |
|
631 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
632 #define U_GC_CS_MASK U_MASK(U_SURROGATE) |
|
633 |
|
634 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
635 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) |
|
636 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
637 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) |
|
638 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
639 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) |
|
640 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
641 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) |
|
642 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
643 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) |
|
644 |
|
645 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
646 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) |
|
647 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
648 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) |
|
649 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
650 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) |
|
651 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
652 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) |
|
653 |
|
654 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
655 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) |
|
656 /** Mask constant for a UCharCategory. @stable ICU 2.1 */ |
|
657 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) |
|
658 |
|
659 |
|
660 /** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ |
|
661 #define U_GC_L_MASK \ |
|
662 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) |
|
663 |
|
664 /** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ |
|
665 #define U_GC_LC_MASK \ |
|
666 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) |
|
667 |
|
668 /** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ |
|
669 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) |
|
670 |
|
671 /** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ |
|
672 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) |
|
673 |
|
674 /** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ |
|
675 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) |
|
676 |
|
677 /** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ |
|
678 #define U_GC_C_MASK \ |
|
679 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) |
|
680 |
|
681 /** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ |
|
682 #define U_GC_P_MASK \ |
|
683 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ |
|
684 U_GC_PI_MASK|U_GC_PF_MASK) |
|
685 |
|
686 /** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ |
|
687 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) |
|
688 |
|
689 /** |
|
690 * This specifies the language directional property of a character set. |
|
691 * @stable ICU 2.0 |
|
692 */ |
|
693 typedef enum UCharDirection { |
|
694 /** See note !!. Comments of the form "EN" are read by genpname. */ |
|
695 |
|
696 /** L @stable ICU 2.0 */ |
|
697 U_LEFT_TO_RIGHT = 0, |
|
698 /** R @stable ICU 2.0 */ |
|
699 U_RIGHT_TO_LEFT = 1, |
|
700 /** EN @stable ICU 2.0 */ |
|
701 U_EUROPEAN_NUMBER = 2, |
|
702 /** ES @stable ICU 2.0 */ |
|
703 U_EUROPEAN_NUMBER_SEPARATOR = 3, |
|
704 /** ET @stable ICU 2.0 */ |
|
705 U_EUROPEAN_NUMBER_TERMINATOR = 4, |
|
706 /** AN @stable ICU 2.0 */ |
|
707 U_ARABIC_NUMBER = 5, |
|
708 /** CS @stable ICU 2.0 */ |
|
709 U_COMMON_NUMBER_SEPARATOR = 6, |
|
710 /** B @stable ICU 2.0 */ |
|
711 U_BLOCK_SEPARATOR = 7, |
|
712 /** S @stable ICU 2.0 */ |
|
713 U_SEGMENT_SEPARATOR = 8, |
|
714 /** WS @stable ICU 2.0 */ |
|
715 U_WHITE_SPACE_NEUTRAL = 9, |
|
716 /** ON @stable ICU 2.0 */ |
|
717 U_OTHER_NEUTRAL = 10, |
|
718 /** LRE @stable ICU 2.0 */ |
|
719 U_LEFT_TO_RIGHT_EMBEDDING = 11, |
|
720 /** LRO @stable ICU 2.0 */ |
|
721 U_LEFT_TO_RIGHT_OVERRIDE = 12, |
|
722 /** AL @stable ICU 2.0 */ |
|
723 U_RIGHT_TO_LEFT_ARABIC = 13, |
|
724 /** RLE @stable ICU 2.0 */ |
|
725 U_RIGHT_TO_LEFT_EMBEDDING = 14, |
|
726 /** RLO @stable ICU 2.0 */ |
|
727 U_RIGHT_TO_LEFT_OVERRIDE = 15, |
|
728 /** PDF @stable ICU 2.0 */ |
|
729 U_POP_DIRECTIONAL_FORMAT = 16, |
|
730 /** NSM @stable ICU 2.0 */ |
|
731 U_DIR_NON_SPACING_MARK = 17, |
|
732 /** BN @stable ICU 2.0 */ |
|
733 U_BOUNDARY_NEUTRAL = 18, |
|
734 /** @stable ICU 2.0 */ |
|
735 U_CHAR_DIRECTION_COUNT |
|
736 } UCharDirection; |
|
737 |
|
738 /** |
|
739 * Constants for Unicode blocks, see the Unicode Data file Blocks.txt |
|
740 * @stable ICU 2.0 |
|
741 */ |
|
742 enum UBlockCode { |
|
743 |
|
744 /** New No_Block value in Unicode 4. @stable ICU 2.6 */ |
|
745 UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ |
|
746 |
|
747 /** @stable ICU 2.0 */ |
|
748 UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/ |
|
749 |
|
750 /** @stable ICU 2.0 */ |
|
751 UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ |
|
752 |
|
753 /** @stable ICU 2.0 */ |
|
754 UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ |
|
755 |
|
756 /** @stable ICU 2.0 */ |
|
757 UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ |
|
758 |
|
759 /** @stable ICU 2.0 */ |
|
760 UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ |
|
761 |
|
762 /** @stable ICU 2.0 */ |
|
763 UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ |
|
764 |
|
765 /** @stable ICU 2.0 */ |
|
766 UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ |
|
767 |
|
768 /** |
|
769 * Unicode 3.2 renames this block to "Greek and Coptic". |
|
770 * @stable ICU 2.0 |
|
771 */ |
|
772 UBLOCK_GREEK =8, /*[0370]*/ |
|
773 |
|
774 /** @stable ICU 2.0 */ |
|
775 UBLOCK_CYRILLIC =9, /*[0400]*/ |
|
776 |
|
777 /** @stable ICU 2.0 */ |
|
778 UBLOCK_ARMENIAN =10, /*[0530]*/ |
|
779 |
|
780 /** @stable ICU 2.0 */ |
|
781 UBLOCK_HEBREW =11, /*[0590]*/ |
|
782 |
|
783 /** @stable ICU 2.0 */ |
|
784 UBLOCK_ARABIC =12, /*[0600]*/ |
|
785 |
|
786 /** @stable ICU 2.0 */ |
|
787 UBLOCK_SYRIAC =13, /*[0700]*/ |
|
788 |
|
789 /** @stable ICU 2.0 */ |
|
790 UBLOCK_THAANA =14, /*[0780]*/ |
|
791 |
|
792 /** @stable ICU 2.0 */ |
|
793 UBLOCK_DEVANAGARI =15, /*[0900]*/ |
|
794 |
|
795 /** @stable ICU 2.0 */ |
|
796 UBLOCK_BENGALI =16, /*[0980]*/ |
|
797 |
|
798 /** @stable ICU 2.0 */ |
|
799 UBLOCK_GURMUKHI =17, /*[0A00]*/ |
|
800 |
|
801 /** @stable ICU 2.0 */ |
|
802 UBLOCK_GUJARATI =18, /*[0A80]*/ |
|
803 |
|
804 /** @stable ICU 2.0 */ |
|
805 UBLOCK_ORIYA =19, /*[0B00]*/ |
|
806 |
|
807 /** @stable ICU 2.0 */ |
|
808 UBLOCK_TAMIL =20, /*[0B80]*/ |
|
809 |
|
810 /** @stable ICU 2.0 */ |
|
811 UBLOCK_TELUGU =21, /*[0C00]*/ |
|
812 |
|
813 /** @stable ICU 2.0 */ |
|
814 UBLOCK_KANNADA =22, /*[0C80]*/ |
|
815 |
|
816 /** @stable ICU 2.0 */ |
|
817 UBLOCK_MALAYALAM =23, /*[0D00]*/ |
|
818 |
|
819 /** @stable ICU 2.0 */ |
|
820 UBLOCK_SINHALA =24, /*[0D80]*/ |
|
821 |
|
822 /** @stable ICU 2.0 */ |
|
823 UBLOCK_THAI =25, /*[0E00]*/ |
|
824 |
|
825 /** @stable ICU 2.0 */ |
|
826 UBLOCK_LAO =26, /*[0E80]*/ |
|
827 |
|
828 /** @stable ICU 2.0 */ |
|
829 UBLOCK_TIBETAN =27, /*[0F00]*/ |
|
830 |
|
831 /** @stable ICU 2.0 */ |
|
832 UBLOCK_MYANMAR =28, /*[1000]*/ |
|
833 |
|
834 /** @stable ICU 2.0 */ |
|
835 UBLOCK_GEORGIAN =29, /*[10A0]*/ |
|
836 |
|
837 /** @stable ICU 2.0 */ |
|
838 UBLOCK_HANGUL_JAMO =30, /*[1100]*/ |
|
839 |
|
840 /** @stable ICU 2.0 */ |
|
841 UBLOCK_ETHIOPIC =31, /*[1200]*/ |
|
842 |
|
843 /** @stable ICU 2.0 */ |
|
844 UBLOCK_CHEROKEE =32, /*[13A0]*/ |
|
845 |
|
846 /** @stable ICU 2.0 */ |
|
847 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ |
|
848 |
|
849 /** @stable ICU 2.0 */ |
|
850 UBLOCK_OGHAM =34, /*[1680]*/ |
|
851 |
|
852 /** @stable ICU 2.0 */ |
|
853 UBLOCK_RUNIC =35, /*[16A0]*/ |
|
854 |
|
855 /** @stable ICU 2.0 */ |
|
856 UBLOCK_KHMER =36, /*[1780]*/ |
|
857 |
|
858 /** @stable ICU 2.0 */ |
|
859 UBLOCK_MONGOLIAN =37, /*[1800]*/ |
|
860 |
|
861 /** @stable ICU 2.0 */ |
|
862 UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ |
|
863 |
|
864 /** @stable ICU 2.0 */ |
|
865 UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ |
|
866 |
|
867 /** @stable ICU 2.0 */ |
|
868 UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ |
|
869 |
|
870 /** @stable ICU 2.0 */ |
|
871 UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ |
|
872 |
|
873 /** @stable ICU 2.0 */ |
|
874 UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ |
|
875 |
|
876 /** |
|
877 * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". |
|
878 * @stable ICU 2.0 |
|
879 */ |
|
880 UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ |
|
881 |
|
882 /** @stable ICU 2.0 */ |
|
883 UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ |
|
884 |
|
885 /** @stable ICU 2.0 */ |
|
886 UBLOCK_NUMBER_FORMS =45, /*[2150]*/ |
|
887 |
|
888 /** @stable ICU 2.0 */ |
|
889 UBLOCK_ARROWS =46, /*[2190]*/ |
|
890 |
|
891 /** @stable ICU 2.0 */ |
|
892 UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ |
|
893 |
|
894 /** @stable ICU 2.0 */ |
|
895 UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ |
|
896 |
|
897 /** @stable ICU 2.0 */ |
|
898 UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ |
|
899 |
|
900 /** @stable ICU 2.0 */ |
|
901 UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ |
|
902 |
|
903 /** @stable ICU 2.0 */ |
|
904 UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ |
|
905 |
|
906 /** @stable ICU 2.0 */ |
|
907 UBLOCK_BOX_DRAWING =52, /*[2500]*/ |
|
908 |
|
909 /** @stable ICU 2.0 */ |
|
910 UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ |
|
911 |
|
912 /** @stable ICU 2.0 */ |
|
913 UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ |
|
914 |
|
915 /** @stable ICU 2.0 */ |
|
916 UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ |
|
917 |
|
918 /** @stable ICU 2.0 */ |
|
919 UBLOCK_DINGBATS =56, /*[2700]*/ |
|
920 |
|
921 /** @stable ICU 2.0 */ |
|
922 UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ |
|
923 |
|
924 /** @stable ICU 2.0 */ |
|
925 UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ |
|
926 |
|
927 /** @stable ICU 2.0 */ |
|
928 UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ |
|
929 |
|
930 /** @stable ICU 2.0 */ |
|
931 UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ |
|
932 |
|
933 /** @stable ICU 2.0 */ |
|
934 UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ |
|
935 |
|
936 /** @stable ICU 2.0 */ |
|
937 UBLOCK_HIRAGANA =62, /*[3040]*/ |
|
938 |
|
939 /** @stable ICU 2.0 */ |
|
940 UBLOCK_KATAKANA =63, /*[30A0]*/ |
|
941 |
|
942 /** @stable ICU 2.0 */ |
|
943 UBLOCK_BOPOMOFO =64, /*[3100]*/ |
|
944 |
|
945 /** @stable ICU 2.0 */ |
|
946 UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ |
|
947 |
|
948 /** @stable ICU 2.0 */ |
|
949 UBLOCK_KANBUN =66, /*[3190]*/ |
|
950 |
|
951 /** @stable ICU 2.0 */ |
|
952 UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ |
|
953 |
|
954 /** @stable ICU 2.0 */ |
|
955 UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ |
|
956 |
|
957 /** @stable ICU 2.0 */ |
|
958 UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ |
|
959 |
|
960 /** @stable ICU 2.0 */ |
|
961 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ |
|
962 |
|
963 /** @stable ICU 2.0 */ |
|
964 UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ |
|
965 |
|
966 /** @stable ICU 2.0 */ |
|
967 UBLOCK_YI_SYLLABLES =72, /*[A000]*/ |
|
968 |
|
969 /** @stable ICU 2.0 */ |
|
970 UBLOCK_YI_RADICALS =73, /*[A490]*/ |
|
971 |
|
972 /** @stable ICU 2.0 */ |
|
973 UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ |
|
974 |
|
975 /** @stable ICU 2.0 */ |
|
976 UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ |
|
977 |
|
978 /** @stable ICU 2.0 */ |
|
979 UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ |
|
980 |
|
981 /** @stable ICU 2.0 */ |
|
982 UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ |
|
983 |
|
984 /** |
|
985 * Same as UBLOCK_PRIVATE_USE_AREA. |
|
986 * Until Unicode 3.1.1, the corresponding block name was "Private Use", |
|
987 * and multiple code point ranges had this block. |
|
988 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and |
|
989 * adds separate blocks for the supplementary PUAs. |
|
990 * |
|
991 * @stable ICU 2.0 |
|
992 */ |
|
993 UBLOCK_PRIVATE_USE = 78, |
|
994 /** |
|
995 * Same as UBLOCK_PRIVATE_USE. |
|
996 * Until Unicode 3.1.1, the corresponding block name was "Private Use", |
|
997 * and multiple code point ranges had this block. |
|
998 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and |
|
999 * adds separate blocks for the supplementary PUAs. |
|
1000 * |
|
1001 * @stable ICU 2.0 |
|
1002 */ |
|
1003 UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/ |
|
1004 |
|
1005 /** @stable ICU 2.0 */ |
|
1006 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ |
|
1007 |
|
1008 /** @stable ICU 2.0 */ |
|
1009 UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ |
|
1010 |
|
1011 /** @stable ICU 2.0 */ |
|
1012 UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ |
|
1013 |
|
1014 /** @stable ICU 2.0 */ |
|
1015 UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ |
|
1016 |
|
1017 /** @stable ICU 2.0 */ |
|
1018 UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ |
|
1019 |
|
1020 /** @stable ICU 2.0 */ |
|
1021 UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ |
|
1022 |
|
1023 /** @stable ICU 2.0 */ |
|
1024 UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ |
|
1025 |
|
1026 /** @stable ICU 2.0 */ |
|
1027 UBLOCK_SPECIALS =86, /*[FFF0]*/ |
|
1028 |
|
1029 /** @stable ICU 2.0 */ |
|
1030 UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ |
|
1031 |
|
1032 /* New blocks in Unicode 3.1 */ |
|
1033 |
|
1034 /** @stable ICU 2.0 */ |
|
1035 UBLOCK_OLD_ITALIC = 88 , /*[10300]*/ |
|
1036 /** @stable ICU 2.0 */ |
|
1037 UBLOCK_GOTHIC = 89 , /*[10330]*/ |
|
1038 /** @stable ICU 2.0 */ |
|
1039 UBLOCK_DESERET = 90 , /*[10400]*/ |
|
1040 /** @stable ICU 2.0 */ |
|
1041 UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/ |
|
1042 /** @stable ICU 2.0 */ |
|
1043 UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/ |
|
1044 /** @stable ICU 2.0 */ |
|
1045 UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/ |
|
1046 /** @stable ICU 2.0 */ |
|
1047 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/ |
|
1048 /** @stable ICU 2.0 */ |
|
1049 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/ |
|
1050 /** @stable ICU 2.0 */ |
|
1051 UBLOCK_TAGS = 96, /*[E0000]*/ |
|
1052 |
|
1053 /* New blocks in Unicode 3.2 */ |
|
1054 |
|
1055 /** |
|
1056 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". |
|
1057 * @stable ICU 2.2 |
|
1058 */ |
|
1059 UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, |
|
1060 /** @draft ICU 3.0 */ |
|
1061 UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/ |
|
1062 /** @stable ICU 2.2 */ |
|
1063 UBLOCK_TAGALOG = 98, /*[1700]*/ |
|
1064 /** @stable ICU 2.2 */ |
|
1065 UBLOCK_HANUNOO = 99, /*[1720]*/ |
|
1066 /** @stable ICU 2.2 */ |
|
1067 UBLOCK_BUHID = 100, /*[1740]*/ |
|
1068 /** @stable ICU 2.2 */ |
|
1069 UBLOCK_TAGBANWA = 101, /*[1760]*/ |
|
1070 /** @stable ICU 2.2 */ |
|
1071 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ |
|
1072 /** @stable ICU 2.2 */ |
|
1073 UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ |
|
1074 /** @stable ICU 2.2 */ |
|
1075 UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ |
|
1076 /** @stable ICU 2.2 */ |
|
1077 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ |
|
1078 /** @stable ICU 2.2 */ |
|
1079 UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ |
|
1080 /** @stable ICU 2.2 */ |
|
1081 UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ |
|
1082 /** @stable ICU 2.2 */ |
|
1083 UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ |
|
1084 /** @stable ICU 2.2 */ |
|
1085 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ |
|
1086 /** @stable ICU 2.2 */ |
|
1087 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ |
|
1088 |
|
1089 /* New blocks in Unicode 4 */ |
|
1090 |
|
1091 /** @stable ICU 2.6 */ |
|
1092 UBLOCK_LIMBU = 111, /*[1900]*/ |
|
1093 /** @stable ICU 2.6 */ |
|
1094 UBLOCK_TAI_LE = 112, /*[1950]*/ |
|
1095 /** @stable ICU 2.6 */ |
|
1096 UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ |
|
1097 /** @stable ICU 2.6 */ |
|
1098 UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ |
|
1099 /** @stable ICU 2.6 */ |
|
1100 UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ |
|
1101 /** @stable ICU 2.6 */ |
|
1102 UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ |
|
1103 /** @stable ICU 2.6 */ |
|
1104 UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ |
|
1105 /** @stable ICU 2.6 */ |
|
1106 UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ |
|
1107 /** @stable ICU 2.6 */ |
|
1108 UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ |
|
1109 /** @stable ICU 2.6 */ |
|
1110 UBLOCK_UGARITIC = 120, /*[10380]*/ |
|
1111 /** @stable ICU 2.6 */ |
|
1112 UBLOCK_SHAVIAN = 121, /*[10450]*/ |
|
1113 /** @stable ICU 2.6 */ |
|
1114 UBLOCK_OSMANYA = 122, /*[10480]*/ |
|
1115 /** @stable ICU 2.6 */ |
|
1116 UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ |
|
1117 /** @stable ICU 2.6 */ |
|
1118 UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ |
|
1119 /** @stable ICU 2.6 */ |
|
1120 UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ |
|
1121 |
|
1122 /** @stable ICU 2.0 */ |
|
1123 UBLOCK_COUNT, |
|
1124 |
|
1125 /** @stable ICU 2.0 */ |
|
1126 UBLOCK_INVALID_CODE=-1 |
|
1127 }; |
|
1128 |
|
1129 /** @stable ICU 2.0 */ |
|
1130 typedef enum UBlockCode UBlockCode; |
|
1131 |
|
1132 /** |
|
1133 * East Asian Width constants. |
|
1134 * |
|
1135 * @see UCHAR_EAST_ASIAN_WIDTH |
|
1136 * @see u_getIntPropertyValue |
|
1137 * @stable ICU 2.2 |
|
1138 */ |
|
1139 typedef enum UEastAsianWidth { |
|
1140 U_EA_NEUTRAL, /*[N]*/ /*See note !!*/ |
|
1141 U_EA_AMBIGUOUS, /*[A]*/ |
|
1142 U_EA_HALFWIDTH, /*[H]*/ |
|
1143 U_EA_FULLWIDTH, /*[F]*/ |
|
1144 U_EA_NARROW, /*[Na]*/ |
|
1145 U_EA_WIDE, /*[W]*/ |
|
1146 U_EA_COUNT |
|
1147 } UEastAsianWidth; |
|
1148 /* |
|
1149 * Implementation note: |
|
1150 * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c. |
|
1151 */ |
|
1152 |
|
1153 /** |
|
1154 * Selector constants for u_charName(). |
|
1155 * u_charName() returns the "modern" name of a |
|
1156 * Unicode character; or the name that was defined in |
|
1157 * Unicode version 1.0, before the Unicode standard merged |
|
1158 * with ISO-10646; or an "extended" name that gives each |
|
1159 * Unicode code point a unique name. |
|
1160 * |
|
1161 * @see u_charName |
|
1162 * @stable ICU 2.0 |
|
1163 */ |
|
1164 typedef enum UCharNameChoice { |
|
1165 U_UNICODE_CHAR_NAME, |
|
1166 U_UNICODE_10_CHAR_NAME, |
|
1167 U_EXTENDED_CHAR_NAME, |
|
1168 U_CHAR_NAME_CHOICE_COUNT |
|
1169 } UCharNameChoice; |
|
1170 |
|
1171 /** |
|
1172 * Selector constants for u_getPropertyName() and |
|
1173 * u_getPropertyValueName(). These selectors are used to choose which |
|
1174 * name is returned for a given property or value. All properties and |
|
1175 * values have a long name. Most have a short name, but some do not. |
|
1176 * Unicode allows for additional names, beyond the long and short |
|
1177 * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where |
|
1178 * i=1, 2,... |
|
1179 * |
|
1180 * @see u_getPropertyName() |
|
1181 * @see u_getPropertyValueName() |
|
1182 * @stable ICU 2.4 |
|
1183 */ |
|
1184 typedef enum UPropertyNameChoice { |
|
1185 U_SHORT_PROPERTY_NAME, |
|
1186 U_LONG_PROPERTY_NAME, |
|
1187 U_PROPERTY_NAME_CHOICE_COUNT |
|
1188 } UPropertyNameChoice; |
|
1189 |
|
1190 /** |
|
1191 * Decomposition Type constants. |
|
1192 * |
|
1193 * @see UCHAR_DECOMPOSITION_TYPE |
|
1194 * @stable ICU 2.2 |
|
1195 */ |
|
1196 typedef enum UDecompositionType { |
|
1197 U_DT_NONE, /*[none]*/ /*See note !!*/ |
|
1198 U_DT_CANONICAL, /*[can]*/ |
|
1199 U_DT_COMPAT, /*[com]*/ |
|
1200 U_DT_CIRCLE, /*[enc]*/ |
|
1201 U_DT_FINAL, /*[fin]*/ |
|
1202 U_DT_FONT, /*[font]*/ |
|
1203 U_DT_FRACTION, /*[fra]*/ |
|
1204 U_DT_INITIAL, /*[init]*/ |
|
1205 U_DT_ISOLATED, /*[iso]*/ |
|
1206 U_DT_MEDIAL, /*[med]*/ |
|
1207 U_DT_NARROW, /*[nar]*/ |
|
1208 U_DT_NOBREAK, /*[nb]*/ |
|
1209 U_DT_SMALL, /*[sml]*/ |
|
1210 U_DT_SQUARE, /*[sqr]*/ |
|
1211 U_DT_SUB, /*[sub]*/ |
|
1212 U_DT_SUPER, /*[sup]*/ |
|
1213 U_DT_VERTICAL, /*[vert]*/ |
|
1214 U_DT_WIDE, /*[wide]*/ |
|
1215 U_DT_COUNT /* 18 */ |
|
1216 } UDecompositionType; |
|
1217 |
|
1218 /** |
|
1219 * Joining Type constants. |
|
1220 * |
|
1221 * @see UCHAR_JOINING_TYPE |
|
1222 * @stable ICU 2.2 |
|
1223 */ |
|
1224 typedef enum UJoiningType { |
|
1225 U_JT_NON_JOINING, /*[U]*/ /*See note !!*/ |
|
1226 U_JT_JOIN_CAUSING, /*[C]*/ |
|
1227 U_JT_DUAL_JOINING, /*[D]*/ |
|
1228 U_JT_LEFT_JOINING, /*[L]*/ |
|
1229 U_JT_RIGHT_JOINING, /*[R]*/ |
|
1230 U_JT_TRANSPARENT, /*[T]*/ |
|
1231 U_JT_COUNT /* 6 */ |
|
1232 } UJoiningType; |
|
1233 |
|
1234 /** |
|
1235 * Joining Group constants. |
|
1236 * |
|
1237 * @see UCHAR_JOINING_GROUP |
|
1238 * @stable ICU 2.2 |
|
1239 */ |
|
1240 typedef enum UJoiningGroup { |
|
1241 U_JG_NO_JOINING_GROUP, |
|
1242 U_JG_AIN, |
|
1243 U_JG_ALAPH, |
|
1244 U_JG_ALEF, |
|
1245 U_JG_BEH, |
|
1246 U_JG_BETH, |
|
1247 U_JG_DAL, |
|
1248 U_JG_DALATH_RISH, |
|
1249 U_JG_E, |
|
1250 U_JG_FEH, |
|
1251 U_JG_FINAL_SEMKATH, |
|
1252 U_JG_GAF, |
|
1253 U_JG_GAMAL, |
|
1254 U_JG_HAH, |
|
1255 U_JG_HAMZA_ON_HEH_GOAL, |
|
1256 U_JG_HE, |
|
1257 U_JG_HEH, |
|
1258 U_JG_HEH_GOAL, |
|
1259 U_JG_HETH, |
|
1260 U_JG_KAF, |
|
1261 U_JG_KAPH, |
|
1262 U_JG_KNOTTED_HEH, |
|
1263 U_JG_LAM, |
|
1264 U_JG_LAMADH, |
|
1265 U_JG_MEEM, |
|
1266 U_JG_MIM, |
|
1267 U_JG_NOON, |
|
1268 U_JG_NUN, |
|
1269 U_JG_PE, |
|
1270 U_JG_QAF, |
|
1271 U_JG_QAPH, |
|
1272 U_JG_REH, |
|
1273 U_JG_REVERSED_PE, |
|
1274 U_JG_SAD, |
|
1275 U_JG_SADHE, |
|
1276 U_JG_SEEN, |
|
1277 U_JG_SEMKATH, |
|
1278 U_JG_SHIN, |
|
1279 U_JG_SWASH_KAF, |
|
1280 U_JG_SYRIAC_WAW, |
|
1281 U_JG_TAH, |
|
1282 U_JG_TAW, |
|
1283 U_JG_TEH_MARBUTA, |
|
1284 U_JG_TETH, |
|
1285 U_JG_WAW, |
|
1286 U_JG_YEH, |
|
1287 U_JG_YEH_BARREE, |
|
1288 U_JG_YEH_WITH_TAIL, |
|
1289 U_JG_YUDH, |
|
1290 U_JG_YUDH_HE, |
|
1291 U_JG_ZAIN, |
|
1292 U_JG_FE, /**< @stable ICU 2.6 */ |
|
1293 U_JG_KHAPH, /**< @stable ICU 2.6 */ |
|
1294 U_JG_ZHAIN, /**< @stable ICU 2.6 */ |
|
1295 U_JG_COUNT |
|
1296 } UJoiningGroup; |
|
1297 |
|
1298 /** |
|
1299 * Line Break constants. |
|
1300 * |
|
1301 * @see UCHAR_LINE_BREAK |
|
1302 * @stable ICU 2.2 |
|
1303 */ |
|
1304 typedef enum ULineBreak { |
|
1305 U_LB_UNKNOWN, /*[XX]*/ /*See note !!*/ |
|
1306 U_LB_AMBIGUOUS, /*[AI]*/ |
|
1307 U_LB_ALPHABETIC, /*[AL]*/ |
|
1308 U_LB_BREAK_BOTH, /*[B2]*/ |
|
1309 U_LB_BREAK_AFTER, /*[BA]*/ |
|
1310 U_LB_BREAK_BEFORE, /*[BB]*/ |
|
1311 U_LB_MANDATORY_BREAK, /*[BK]*/ |
|
1312 U_LB_CONTINGENT_BREAK, /*[CB]*/ |
|
1313 U_LB_CLOSE_PUNCTUATION, /*[CL]*/ |
|
1314 U_LB_COMBINING_MARK, /*[CM]*/ |
|
1315 U_LB_CARRIAGE_RETURN, /*[CR]*/ |
|
1316 U_LB_EXCLAMATION, /*[EX]*/ |
|
1317 U_LB_GLUE, /*[GL]*/ |
|
1318 U_LB_HYPHEN, /*[HY]*/ |
|
1319 U_LB_IDEOGRAPHIC, /*[ID]*/ |
|
1320 U_LB_INSEPERABLE, |
|
1321 /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @draft ICU 3.0 */ |
|
1322 U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/ |
|
1323 U_LB_INFIX_NUMERIC, /*[IS]*/ |
|
1324 U_LB_LINE_FEED, /*[LF]*/ |
|
1325 U_LB_NONSTARTER, /*[NS]*/ |
|
1326 U_LB_NUMERIC, /*[NU]*/ |
|
1327 U_LB_OPEN_PUNCTUATION, /*[OP]*/ |
|
1328 U_LB_POSTFIX_NUMERIC, /*[PO]*/ |
|
1329 U_LB_PREFIX_NUMERIC, /*[PR]*/ |
|
1330 U_LB_QUOTATION, /*[QU]*/ |
|
1331 U_LB_COMPLEX_CONTEXT, /*[SA]*/ |
|
1332 U_LB_SURROGATE, /*[SG]*/ |
|
1333 U_LB_SPACE, /*[SP]*/ |
|
1334 U_LB_BREAK_SYMBOLS, /*[SY]*/ |
|
1335 U_LB_ZWSPACE, /*[ZW]*/ |
|
1336 U_LB_NEXT_LINE, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ |
|
1337 U_LB_WORD_JOINER, /*[WJ]*/ |
|
1338 U_LB_COUNT |
|
1339 } ULineBreak; |
|
1340 |
|
1341 /** |
|
1342 * Numeric Type constants. |
|
1343 * |
|
1344 * @see UCHAR_NUMERIC_TYPE |
|
1345 * @stable ICU 2.2 |
|
1346 */ |
|
1347 typedef enum UNumericType { |
|
1348 U_NT_NONE, /*[None]*/ /*See note !!*/ |
|
1349 U_NT_DECIMAL, /*[de]*/ |
|
1350 U_NT_DIGIT, /*[di]*/ |
|
1351 U_NT_NUMERIC, /*[nu]*/ |
|
1352 U_NT_COUNT |
|
1353 } UNumericType; |
|
1354 |
|
1355 /** |
|
1356 * Hangul Syllable Type constants. |
|
1357 * |
|
1358 * @see UCHAR_HANGUL_SYLLABLE_TYPE |
|
1359 * @stable ICU 2.6 |
|
1360 */ |
|
1361 typedef enum UHangulSyllableType { |
|
1362 U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/ |
|
1363 U_HST_LEADING_JAMO, /*[L]*/ |
|
1364 U_HST_VOWEL_JAMO, /*[V]*/ |
|
1365 U_HST_TRAILING_JAMO, /*[T]*/ |
|
1366 U_HST_LV_SYLLABLE, /*[LV]*/ |
|
1367 U_HST_LVT_SYLLABLE, /*[LVT]*/ |
|
1368 U_HST_COUNT |
|
1369 } UHangulSyllableType; |
|
1370 |
|
1371 /** |
|
1372 * Check a binary Unicode property for a code point. |
|
1373 * |
|
1374 * Unicode, especially in version 3.2, defines many more properties than the |
|
1375 * original set in UnicodeData.txt. |
|
1376 * |
|
1377 * The properties APIs are intended to reflect Unicode properties as defined |
|
1378 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). |
|
1379 * For details about the properties see http://www.unicode.org/ucd/ . |
|
1380 * For names of Unicode properties see the UCD file PropertyAliases.txt. |
|
1381 * |
|
1382 * Important: If ICU is built with UCD files from Unicode versions below 3.2, |
|
1383 * then properties marked with "new in Unicode 3.2" are not or not fully available. |
|
1384 * |
|
1385 * @param c Code point to test. |
|
1386 * @param which UProperty selector constant, identifies which binary property to check. |
|
1387 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. |
|
1388 * @return TRUE or FALSE according to the binary Unicode property value for c. |
|
1389 * Also FALSE if 'which' is out of bounds or if the Unicode version |
|
1390 * does not have data for the property at all, or not for this code point. |
|
1391 * |
|
1392 * @see UProperty |
|
1393 * @see u_getIntPropertyValue |
|
1394 * @see u_getUnicodeVersion |
|
1395 * @stable ICU 2.1 |
|
1396 */ |
|
1397 U_STABLE UBool U_EXPORT2 |
|
1398 u_hasBinaryProperty(UChar32 c, UProperty which); |
|
1399 |
|
1400 /** |
|
1401 * Check if a code point has the Alphabetic Unicode property. |
|
1402 * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). |
|
1403 * This is different from u_isalpha! |
|
1404 * @param c Code point to test |
|
1405 * @return true if the code point has the Alphabetic Unicode property, false otherwise |
|
1406 * |
|
1407 * @see UCHAR_ALPHABETIC |
|
1408 * @see u_isalpha |
|
1409 * @see u_hasBinaryProperty |
|
1410 * @stable ICU 2.1 |
|
1411 */ |
|
1412 U_STABLE UBool U_EXPORT2 |
|
1413 u_isUAlphabetic(UChar32 c); |
|
1414 |
|
1415 /** |
|
1416 * Check if a code point has the Lowercase Unicode property. |
|
1417 * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). |
|
1418 * This is different from u_islower! |
|
1419 * @param c Code point to test |
|
1420 * @return true if the code point has the Lowercase Unicode property, false otherwise |
|
1421 * |
|
1422 * @see UCHAR_LOWERCASE |
|
1423 * @see u_islower |
|
1424 * @see u_hasBinaryProperty |
|
1425 * @stable ICU 2.1 |
|
1426 */ |
|
1427 U_STABLE UBool U_EXPORT2 |
|
1428 u_isULowercase(UChar32 c); |
|
1429 |
|
1430 /** |
|
1431 * Check if a code point has the Uppercase Unicode property. |
|
1432 * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). |
|
1433 * This is different from u_isupper! |
|
1434 * @param c Code point to test |
|
1435 * @return true if the code point has the Uppercase Unicode property, false otherwise |
|
1436 * |
|
1437 * @see UCHAR_UPPERCASE |
|
1438 * @see u_isupper |
|
1439 * @see u_hasBinaryProperty |
|
1440 * @stable ICU 2.1 |
|
1441 */ |
|
1442 U_STABLE UBool U_EXPORT2 |
|
1443 u_isUUppercase(UChar32 c); |
|
1444 |
|
1445 /** |
|
1446 * Check if a code point has the White_Space Unicode property. |
|
1447 * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). |
|
1448 * This is different from both u_isspace and u_isWhitespace! |
|
1449 * |
|
1450 * Note: There are several ICU whitespace functions; please see the uchar.h |
|
1451 * file documentation for a detailed comparison. |
|
1452 * |
|
1453 * @param c Code point to test |
|
1454 * @return true if the code point has the White_Space Unicode property, false otherwise. |
|
1455 * |
|
1456 * @see UCHAR_WHITE_SPACE |
|
1457 * @see u_isWhitespace |
|
1458 * @see u_isspace |
|
1459 * @see u_isJavaSpaceChar |
|
1460 * @see u_hasBinaryProperty |
|
1461 * @stable ICU 2.1 |
|
1462 */ |
|
1463 U_STABLE UBool U_EXPORT2 |
|
1464 u_isUWhiteSpace(UChar32 c); |
|
1465 |
|
1466 /** |
|
1467 * Get the property value for an enumerated or integer Unicode property for a code point. |
|
1468 * Also returns binary and mask property values. |
|
1469 * |
|
1470 * Unicode, especially in version 3.2, defines many more properties than the |
|
1471 * original set in UnicodeData.txt. |
|
1472 * |
|
1473 * The properties APIs are intended to reflect Unicode properties as defined |
|
1474 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). |
|
1475 * For details about the properties see http://www.unicode.org/ . |
|
1476 * For names of Unicode properties see the UCD file PropertyAliases.txt. |
|
1477 * |
|
1478 * Sample usage: |
|
1479 * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); |
|
1480 * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); |
|
1481 * |
|
1482 * @param c Code point to test. |
|
1483 * @param which UProperty selector constant, identifies which property to check. |
|
1484 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT |
|
1485 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT |
|
1486 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. |
|
1487 * @return Numeric value that is directly the property value or, |
|
1488 * for enumerated properties, corresponds to the numeric value of the enumerated |
|
1489 * constant of the respective property value enumeration type |
|
1490 * (cast to enum type if necessary). |
|
1491 * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. |
|
1492 * Returns a bit-mask for mask properties. |
|
1493 * Returns 0 if 'which' is out of bounds or if the Unicode version |
|
1494 * does not have data for the property at all, or not for this code point. |
|
1495 * |
|
1496 * @see UProperty |
|
1497 * @see u_hasBinaryProperty |
|
1498 * @see u_getIntPropertyMinValue |
|
1499 * @see u_getIntPropertyMaxValue |
|
1500 * @see u_getUnicodeVersion |
|
1501 * @stable ICU 2.2 |
|
1502 */ |
|
1503 U_STABLE int32_t U_EXPORT2 |
|
1504 u_getIntPropertyValue(UChar32 c, UProperty which); |
|
1505 |
|
1506 /** |
|
1507 * Get the minimum value for an enumerated/integer/binary Unicode property. |
|
1508 * Can be used together with u_getIntPropertyMaxValue |
|
1509 * to allocate arrays of UnicodeSet or similar. |
|
1510 * |
|
1511 * @param which UProperty selector constant, identifies which binary property to check. |
|
1512 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT |
|
1513 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. |
|
1514 * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. |
|
1515 * 0 if the property selector is out of range. |
|
1516 * |
|
1517 * @see UProperty |
|
1518 * @see u_hasBinaryProperty |
|
1519 * @see u_getUnicodeVersion |
|
1520 * @see u_getIntPropertyMaxValue |
|
1521 * @see u_getIntPropertyValue |
|
1522 * @stable ICU 2.2 |
|
1523 */ |
|
1524 U_STABLE int32_t U_EXPORT2 |
|
1525 u_getIntPropertyMinValue(UProperty which); |
|
1526 |
|
1527 /** |
|
1528 * Get the maximum value for an enumerated/integer/binary Unicode property. |
|
1529 * Can be used together with u_getIntPropertyMinValue |
|
1530 * to allocate arrays of UnicodeSet or similar. |
|
1531 * |
|
1532 * Examples for min/max values (for Unicode 3.2): |
|
1533 * |
|
1534 * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) |
|
1535 * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) |
|
1536 * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) |
|
1537 * |
|
1538 * For undefined UProperty constant values, min/max values will be 0/-1. |
|
1539 * |
|
1540 * @param which UProperty selector constant, identifies which binary property to check. |
|
1541 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT |
|
1542 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. |
|
1543 * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. |
|
1544 * <=0 if the property selector is out of range. |
|
1545 * |
|
1546 * @see UProperty |
|
1547 * @see u_hasBinaryProperty |
|
1548 * @see u_getUnicodeVersion |
|
1549 * @see u_getIntPropertyMaxValue |
|
1550 * @see u_getIntPropertyValue |
|
1551 * @stable ICU 2.2 |
|
1552 */ |
|
1553 U_STABLE int32_t U_EXPORT2 |
|
1554 u_getIntPropertyMaxValue(UProperty which); |
|
1555 |
|
1556 /** |
|
1557 * Get the numeric value for a Unicode code point as defined in the |
|
1558 * Unicode Character Database. |
|
1559 * |
|
1560 * A "double" return type is necessary because |
|
1561 * some numeric values are fractions, negative, or too large for int32_t. |
|
1562 * |
|
1563 * For characters without any numeric values in the Unicode Character Database, |
|
1564 * this function will return U_NO_NUMERIC_VALUE. |
|
1565 * |
|
1566 * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() |
|
1567 * also supports negative values, large values, and fractions, |
|
1568 * while Java's getNumericValue() returns values 10..35 for ASCII letters. |
|
1569 * |
|
1570 * @param c Code point to get the numeric value for. |
|
1571 * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. |
|
1572 * |
|
1573 * @see U_NO_NUMERIC_VALUE |
|
1574 * @stable ICU 2.2 |
|
1575 */ |
|
1576 U_STABLE double U_EXPORT2 |
|
1577 u_getNumericValue(UChar32 c); |
|
1578 |
|
1579 /** |
|
1580 * Special value that is returned by u_getNumericValue when |
|
1581 * no numeric value is defined for a code point. |
|
1582 * |
|
1583 * @see u_getNumericValue |
|
1584 * @stable ICU 2.2 |
|
1585 */ |
|
1586 #define U_NO_NUMERIC_VALUE ((double)-123456789.) |
|
1587 |
|
1588 /** |
|
1589 * Determines whether the specified code point has the general category "Ll" |
|
1590 * (lowercase letter). |
|
1591 * |
|
1592 * Same as java.lang.Character.isLowerCase(). |
|
1593 * |
|
1594 * This misses some characters that are also lowercase but |
|
1595 * have a different general category value. |
|
1596 * In order to include those, use UCHAR_LOWERCASE. |
|
1597 * |
|
1598 * In addition to being equivalent to a Java function, this also serves |
|
1599 * as a C/POSIX migration function. |
|
1600 * See the comments about C/POSIX character classification functions in the |
|
1601 * documentation at the top of this header file. |
|
1602 * |
|
1603 * @param c the code point to be tested |
|
1604 * @return TRUE if the code point is an Ll lowercase letter |
|
1605 * |
|
1606 * @see UCHAR_LOWERCASE |
|
1607 * @see u_isupper |
|
1608 * @see u_istitle |
|
1609 * @see u_islower |
|
1610 * @stable ICU 2.0 |
|
1611 */ |
|
1612 U_STABLE UBool U_EXPORT2 |
|
1613 u_islower(UChar32 c); |
|
1614 |
|
1615 /** |
|
1616 * Determines whether the specified code point has the general category "Lu" |
|
1617 * (uppercase letter). |
|
1618 * |
|
1619 * Same as java.lang.Character.isUpperCase(). |
|
1620 * |
|
1621 * This misses some characters that are also uppercase but |
|
1622 * have a different general category value. |
|
1623 * In order to include those, use UCHAR_UPPERCASE. |
|
1624 * |
|
1625 * In addition to being equivalent to a Java function, this also serves |
|
1626 * as a C/POSIX migration function. |
|
1627 * See the comments about C/POSIX character classification functions in the |
|
1628 * documentation at the top of this header file. |
|
1629 * |
|
1630 * @param c the code point to be tested |
|
1631 * @return TRUE if the code point is an Lu uppercase letter |
|
1632 * |
|
1633 * @see UCHAR_UPPERCASE |
|
1634 * @see u_islower |
|
1635 * @see u_istitle |
|
1636 * @see u_tolower |
|
1637 * @stable ICU 2.0 |
|
1638 */ |
|
1639 U_STABLE UBool U_EXPORT2 |
|
1640 u_isupper(UChar32 c); |
|
1641 |
|
1642 /** |
|
1643 * Determines whether the specified code point is a titlecase letter. |
|
1644 * True for general category "Lt" (titlecase letter). |
|
1645 * |
|
1646 * Same as java.lang.Character.isTitleCase(). |
|
1647 * |
|
1648 * @param c the code point to be tested |
|
1649 * @return TRUE if the code point is an Lt titlecase letter |
|
1650 * |
|
1651 * @see u_isupper |
|
1652 * @see u_islower |
|
1653 * @see u_totitle |
|
1654 * @stable ICU 2.0 |
|
1655 */ |
|
1656 U_STABLE UBool U_EXPORT2 |
|
1657 u_istitle(UChar32 c); |
|
1658 |
|
1659 /** |
|
1660 * Determines whether the specified code point is a digit character according to Java. |
|
1661 * True for characters with general category "Nd" (decimal digit numbers). |
|
1662 * Beginning with Unicode 4, this is the same as |
|
1663 * testing for the Numeric_Type of Decimal. |
|
1664 * |
|
1665 * Same as java.lang.Character.isDigit(). |
|
1666 * |
|
1667 * In addition to being equivalent to a Java function, this also serves |
|
1668 * as a C/POSIX migration function. |
|
1669 * See the comments about C/POSIX character classification functions in the |
|
1670 * documentation at the top of this header file. |
|
1671 * |
|
1672 * @param c the code point to be tested |
|
1673 * @return TRUE if the code point is a digit character according to Character.isDigit() |
|
1674 * |
|
1675 * @stable ICU 2.0 |
|
1676 */ |
|
1677 U_STABLE UBool U_EXPORT2 |
|
1678 u_isdigit(UChar32 c); |
|
1679 |
|
1680 /** |
|
1681 * Determines whether the specified code point is a letter character. |
|
1682 * True for general categories "L" (letters). |
|
1683 * |
|
1684 * Same as java.lang.Character.isLetter(). |
|
1685 * |
|
1686 * In addition to being equivalent to a Java function, this also serves |
|
1687 * as a C/POSIX migration function. |
|
1688 * See the comments about C/POSIX character classification functions in the |
|
1689 * documentation at the top of this header file. |
|
1690 * |
|
1691 * @param c the code point to be tested |
|
1692 * @return TRUE if the code point is a letter character |
|
1693 * |
|
1694 * @see u_isdigit |
|
1695 * @see u_isalnum |
|
1696 * @stable ICU 2.0 |
|
1697 */ |
|
1698 U_STABLE UBool U_EXPORT2 |
|
1699 u_isalpha(UChar32 c); |
|
1700 |
|
1701 /** |
|
1702 * Determines whether the specified code point is an alphanumeric character |
|
1703 * (letter or digit) according to Java. |
|
1704 * True for characters with general categories |
|
1705 * "L" (letters) and "Nd" (decimal digit numbers). |
|
1706 * |
|
1707 * Same as java.lang.Character.isLetterOrDigit(). |
|
1708 * |
|
1709 * In addition to being equivalent to a Java function, this also serves |
|
1710 * as a C/POSIX migration function. |
|
1711 * See the comments about C/POSIX character classification functions in the |
|
1712 * documentation at the top of this header file. |
|
1713 * |
|
1714 * @param c the code point to be tested |
|
1715 * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() |
|
1716 * |
|
1717 * @stable ICU 2.0 |
|
1718 */ |
|
1719 U_STABLE UBool U_EXPORT2 |
|
1720 u_isalnum(UChar32 c); |
|
1721 |
|
1722 /** |
|
1723 * Determines whether the specified code point is a hexadecimal digit. |
|
1724 * This is equivalent to u_digit(c, 16)>=0. |
|
1725 * True for characters with general category "Nd" (decimal digit numbers) |
|
1726 * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. |
|
1727 * (That is, for letters with code points |
|
1728 * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) |
|
1729 * |
|
1730 * In order to narrow the definition of hexadecimal digits to only ASCII |
|
1731 * characters, use (c<=0x7f && u_isxdigit(c)). |
|
1732 * |
|
1733 * This is a C/POSIX migration function. |
|
1734 * See the comments about C/POSIX character classification functions in the |
|
1735 * documentation at the top of this header file. |
|
1736 * |
|
1737 * @param c the code point to be tested |
|
1738 * @return TRUE if the code point is a hexadecimal digit |
|
1739 * |
|
1740 * @stable ICU 2.6 |
|
1741 */ |
|
1742 U_STABLE UBool U_EXPORT2 |
|
1743 u_isxdigit(UChar32 c); |
|
1744 |
|
1745 /** |
|
1746 * Determines whether the specified code point is a punctuation character. |
|
1747 * True for characters with general categories "P" (punctuation). |
|
1748 * |
|
1749 * This is a C/POSIX migration function. |
|
1750 * See the comments about C/POSIX character classification functions in the |
|
1751 * documentation at the top of this header file. |
|
1752 * |
|
1753 * @param c the code point to be tested |
|
1754 * @return TRUE if the code point is a punctuation character |
|
1755 * |
|
1756 * @stable ICU 2.6 |
|
1757 */ |
|
1758 U_STABLE UBool U_EXPORT2 |
|
1759 u_ispunct(UChar32 c); |
|
1760 |
|
1761 /** |
|
1762 * Determines whether the specified code point is a "graphic" character |
|
1763 * (printable, excluding spaces). |
|
1764 * TRUE for all characters except those with general categories |
|
1765 * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), |
|
1766 * "Cn" (unassigned), and "Z" (separators). |
|
1767 * |
|
1768 * This is a C/POSIX migration function. |
|
1769 * See the comments about C/POSIX character classification functions in the |
|
1770 * documentation at the top of this header file. |
|
1771 * |
|
1772 * @param c the code point to be tested |
|
1773 * @return TRUE if the code point is a "graphic" character |
|
1774 * |
|
1775 * @stable ICU 2.6 |
|
1776 */ |
|
1777 U_STABLE UBool U_EXPORT2 |
|
1778 u_isgraph(UChar32 c); |
|
1779 |
|
1780 /** |
|
1781 * Determines whether the specified code point is a "blank" or "horizontal space", |
|
1782 * a character that visibly separates words on a line. |
|
1783 * The following are equivalent definitions: |
|
1784 * |
|
1785 * TRUE for Unicode White_Space characters except for "vertical space controls" |
|
1786 * where "vertical space controls" are the following characters: |
|
1787 * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) |
|
1788 * |
|
1789 * same as |
|
1790 * |
|
1791 * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) |
|
1792 * except Zero Width Space (ZWSP, U+200B). |
|
1793 * |
|
1794 * Note: There are several ICU whitespace functions; please see the uchar.h |
|
1795 * file documentation for a detailed comparison. |
|
1796 * |
|
1797 * This is a C/POSIX migration function. |
|
1798 * See the comments about C/POSIX character classification functions in the |
|
1799 * documentation at the top of this header file. |
|
1800 * |
|
1801 * @param c the code point to be tested |
|
1802 * @return TRUE if the code point is a "blank" |
|
1803 * |
|
1804 * @stable ICU 2.6 |
|
1805 */ |
|
1806 U_STABLE UBool U_EXPORT2 |
|
1807 u_isblank(UChar32 c); |
|
1808 |
|
1809 /** |
|
1810 * Determines whether the specified code point is "defined", |
|
1811 * which usually means that it is assigned a character. |
|
1812 * True for general categories other than "Cn" (other, not assigned), |
|
1813 * i.e., true for all code points mentioned in UnicodeData.txt. |
|
1814 * |
|
1815 * Note that non-character code points (e.g., U+FDD0) are not "defined" |
|
1816 * (they are Cn), but surrogate code points are "defined" (Cs). |
|
1817 * |
|
1818 * Same as java.lang.Character.isDefined(). |
|
1819 * |
|
1820 * @param c the code point to be tested |
|
1821 * @return TRUE if the code point is assigned a character |
|
1822 * |
|
1823 * @see u_isdigit |
|
1824 * @see u_isalpha |
|
1825 * @see u_isalnum |
|
1826 * @see u_isupper |
|
1827 * @see u_islower |
|
1828 * @see u_istitle |
|
1829 * @stable ICU 2.0 |
|
1830 */ |
|
1831 U_STABLE UBool U_EXPORT2 |
|
1832 u_isdefined(UChar32 c); |
|
1833 |
|
1834 /** |
|
1835 * Determines if the specified character is a space character or not. |
|
1836 * |
|
1837 * Note: There are several ICU whitespace functions; please see the uchar.h |
|
1838 * file documentation for a detailed comparison. |
|
1839 * |
|
1840 * This is a C/POSIX migration function. |
|
1841 * See the comments about C/POSIX character classification functions in the |
|
1842 * documentation at the top of this header file. |
|
1843 * |
|
1844 * @param c the character to be tested |
|
1845 * @return true if the character is a space character; false otherwise. |
|
1846 * |
|
1847 * @see u_isJavaSpaceChar |
|
1848 * @see u_isWhitespace |
|
1849 * @see u_isUWhiteSpace |
|
1850 * @stable ICU 2.0 |
|
1851 */ |
|
1852 U_STABLE UBool U_EXPORT2 |
|
1853 u_isspace(UChar32 c); |
|
1854 |
|
1855 /** |
|
1856 * Determine if the specified code point is a space character according to Java. |
|
1857 * True for characters with general categories "Z" (separators), |
|
1858 * which does not include control codes (e.g., TAB or Line Feed). |
|
1859 * |
|
1860 * Same as java.lang.Character.isSpaceChar(). |
|
1861 * |
|
1862 * Note: There are several ICU whitespace functions; please see the uchar.h |
|
1863 * file documentation for a detailed comparison. |
|
1864 * |
|
1865 * @param c the code point to be tested |
|
1866 * @return TRUE if the code point is a space character according to Character.isSpaceChar() |
|
1867 * |
|
1868 * @see u_isspace |
|
1869 * @see u_isWhitespace |
|
1870 * @see u_isUWhiteSpace |
|
1871 * @stable ICU 2.6 |
|
1872 */ |
|
1873 U_STABLE UBool U_EXPORT2 |
|
1874 u_isJavaSpaceChar(UChar32 c); |
|
1875 |
|
1876 /** |
|
1877 * Determines if the specified code point is a whitespace character according to Java/ICU. |
|
1878 * A character is considered to be a Java whitespace character if and only |
|
1879 * if it satisfies one of the following criteria: |
|
1880 * |
|
1881 * - It is a Unicode separator (categories "Z"), but is not |
|
1882 * a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). |
|
1883 * - It is U+0009 HORIZONTAL TABULATION. |
|
1884 * - It is U+000A LINE FEED. |
|
1885 * - It is U+000B VERTICAL TABULATION. |
|
1886 * - It is U+000C FORM FEED. |
|
1887 * - It is U+000D CARRIAGE RETURN. |
|
1888 * - It is U+001C FILE SEPARATOR. |
|
1889 * - It is U+001D GROUP SEPARATOR. |
|
1890 * - It is U+001E RECORD SEPARATOR. |
|
1891 * - It is U+001F UNIT SEPARATOR. |
|
1892 * - It is U+0085 NEXT LINE. |
|
1893 * |
|
1894 * Same as java.lang.Character.isWhitespace() except that Java omits U+0085. |
|
1895 * |
|
1896 * Note: There are several ICU whitespace functions; please see the uchar.h |
|
1897 * file documentation for a detailed comparison. |
|
1898 * |
|
1899 * @param c the code point to be tested |
|
1900 * @return TRUE if the code point is a whitespace character according to Java/ICU |
|
1901 * |
|
1902 * @see u_isspace |
|
1903 * @see u_isJavaSpaceChar |
|
1904 * @see u_isUWhiteSpace |
|
1905 * @stable ICU 2.0 |
|
1906 */ |
|
1907 U_STABLE UBool U_EXPORT2 |
|
1908 u_isWhitespace(UChar32 c); |
|
1909 |
|
1910 /** |
|
1911 * Determines whether the specified code point is a control character |
|
1912 * (as defined by this function). |
|
1913 * A control character is one of the following: |
|
1914 * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) |
|
1915 * - U_CONTROL_CHAR (Cc) |
|
1916 * - U_FORMAT_CHAR (Cf) |
|
1917 * - U_LINE_SEPARATOR (Zl) |
|
1918 * - U_PARAGRAPH_SEPARATOR (Zp) |
|
1919 * |
|
1920 * This is a C/POSIX migration function. |
|
1921 * See the comments about C/POSIX character classification functions in the |
|
1922 * documentation at the top of this header file. |
|
1923 * |
|
1924 * @param c the code point to be tested |
|
1925 * @return TRUE if the code point is a control character |
|
1926 * |
|
1927 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT |
|
1928 * @see u_isprint |
|
1929 * @stable ICU 2.0 |
|
1930 */ |
|
1931 U_STABLE UBool U_EXPORT2 |
|
1932 u_iscntrl(UChar32 c); |
|
1933 |
|
1934 /** |
|
1935 * Determines whether the specified code point is an ISO control code. |
|
1936 * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). |
|
1937 * |
|
1938 * Same as java.lang.Character.isISOControl(). |
|
1939 * |
|
1940 * @param c the code point to be tested |
|
1941 * @return TRUE if the code point is an ISO control code |
|
1942 * |
|
1943 * @see u_iscntrl |
|
1944 * @stable ICU 2.6 |
|
1945 */ |
|
1946 U_STABLE UBool U_EXPORT2 |
|
1947 u_isISOControl(UChar32 c); |
|
1948 |
|
1949 /** |
|
1950 * Determines whether the specified code point is a printable character. |
|
1951 * True for general categories <em>other</em> than "C" (controls). |
|
1952 * |
|
1953 * This is a C/POSIX migration function. |
|
1954 * See the comments about C/POSIX character classification functions in the |
|
1955 * documentation at the top of this header file. |
|
1956 * |
|
1957 * @param c the code point to be tested |
|
1958 * @return TRUE if the code point is a printable character |
|
1959 * |
|
1960 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT |
|
1961 * @see u_iscntrl |
|
1962 * @stable ICU 2.0 |
|
1963 */ |
|
1964 U_STABLE UBool U_EXPORT2 |
|
1965 u_isprint(UChar32 c); |
|
1966 |
|
1967 /** |
|
1968 * Determines whether the specified code point is a base character. |
|
1969 * True for general categories "L" (letters), "N" (numbers), |
|
1970 * "Mc" (spacing combining marks), and "Me" (enclosing marks). |
|
1971 * |
|
1972 * Note that this is different from the Unicode definition in |
|
1973 * chapter 3.5, conformance clause D13, |
|
1974 * which defines base characters to be all characters (not Cn) |
|
1975 * that do not graphically combine with preceding characters (M) |
|
1976 * and that are neither control (Cc) or format (Cf) characters. |
|
1977 * |
|
1978 * @param c the code point to be tested |
|
1979 * @return TRUE if the code point is a base character according to this function |
|
1980 * |
|
1981 * @see u_isalpha |
|
1982 * @see u_isdigit |
|
1983 * @stable ICU 2.0 |
|
1984 */ |
|
1985 U_STABLE UBool U_EXPORT2 |
|
1986 u_isbase(UChar32 c); |
|
1987 |
|
1988 /** |
|
1989 * Returns the bidirectional category value for the code point, |
|
1990 * which is used in the Unicode bidirectional algorithm |
|
1991 * (UAX #9 http://www.unicode.org/reports/tr9/). |
|
1992 * Note that some <em>unassigned</em> code points have bidi values |
|
1993 * of R or AL because they are in blocks that are reserved |
|
1994 * for Right-To-Left scripts. |
|
1995 * |
|
1996 * Same as java.lang.Character.getDirectionality() |
|
1997 * |
|
1998 * @param c the code point to be tested |
|
1999 * @return the bidirectional category (UCharDirection) value |
|
2000 * |
|
2001 * @see UCharDirection |
|
2002 * @stable ICU 2.0 |
|
2003 */ |
|
2004 U_STABLE UCharDirection U_EXPORT2 |
|
2005 u_charDirection(UChar32 c); |
|
2006 |
|
2007 /** |
|
2008 * Determines whether the code point has the Bidi_Mirrored property. |
|
2009 * This property is set for characters that are commonly used in |
|
2010 * Right-To-Left contexts and need to be displayed with a "mirrored" |
|
2011 * glyph. |
|
2012 * |
|
2013 * Same as java.lang.Character.isMirrored(). |
|
2014 * Same as UCHAR_BIDI_MIRRORED |
|
2015 * |
|
2016 * @param c the code point to be tested |
|
2017 * @return TRUE if the character has the Bidi_Mirrored property |
|
2018 * |
|
2019 * @see UCHAR_BIDI_MIRRORED |
|
2020 * @stable ICU 2.0 |
|
2021 */ |
|
2022 U_STABLE UBool U_EXPORT2 |
|
2023 u_isMirrored(UChar32 c); |
|
2024 |
|
2025 /** |
|
2026 * Maps the specified character to a "mirror-image" character. |
|
2027 * For characters with the Bidi_Mirrored property, implementations |
|
2028 * sometimes need a "poor man's" mapping to another Unicode |
|
2029 * character (code point) such that the default glyph may serve |
|
2030 * as the mirror-image of the default glyph of the specified |
|
2031 * character. This is useful for text conversion to and from |
|
2032 * codepages with visual order, and for displays without glyph |
|
2033 * selecetion capabilities. |
|
2034 * |
|
2035 * @param c the code point to be mapped |
|
2036 * @return another Unicode code point that may serve as a mirror-image |
|
2037 * substitute, or c itself if there is no such mapping or c |
|
2038 * does not have the Bidi_Mirrored property |
|
2039 * |
|
2040 * @see UCHAR_BIDI_MIRRORED |
|
2041 * @see u_isMirrored |
|
2042 * @stable ICU 2.0 |
|
2043 */ |
|
2044 U_STABLE UChar32 U_EXPORT2 |
|
2045 u_charMirror(UChar32 c); |
|
2046 |
|
2047 /** |
|
2048 * Returns the general category value for the code point. |
|
2049 * |
|
2050 * Same as java.lang.Character.getType(). |
|
2051 * |
|
2052 * @param c the code point to be tested |
|
2053 * @return the general category (UCharCategory) value |
|
2054 * |
|
2055 * @see UCharCategory |
|
2056 * @stable ICU 2.0 |
|
2057 */ |
|
2058 U_STABLE int8_t U_EXPORT2 |
|
2059 u_charType(UChar32 c); |
|
2060 |
|
2061 /** |
|
2062 * Get a single-bit bit set for the general category of a character. |
|
2063 * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. |
|
2064 * Same as U_MASK(u_charType(c)). |
|
2065 * |
|
2066 * @param c the code point to be tested |
|
2067 * @return a single-bit mask corresponding to the general category (UCharCategory) value |
|
2068 * |
|
2069 * @see u_charType |
|
2070 * @see UCharCategory |
|
2071 * @see U_GC_CN_MASK |
|
2072 * @stable ICU 2.1 |
|
2073 */ |
|
2074 #define U_GET_GC_MASK(c) U_MASK(u_charType(c)) |
|
2075 |
|
2076 /** |
|
2077 * Callback from u_enumCharTypes(), is called for each contiguous range |
|
2078 * of code points c (where start<=c<limit) |
|
2079 * with the same Unicode general category ("character type"). |
|
2080 * |
|
2081 * The callback function can stop the enumeration by returning FALSE. |
|
2082 * |
|
2083 * @param context an opaque pointer, as passed into utrie_enum() |
|
2084 * @param start the first code point in a contiguous range with value |
|
2085 * @param limit one past the last code point in a contiguous range with value |
|
2086 * @param type the general category for all code points in [start..limit[ |
|
2087 * @return FALSE to stop the enumeration |
|
2088 * |
|
2089 * @stable ICU 2.1 |
|
2090 * @see UCharCategory |
|
2091 * @see u_enumCharTypes |
|
2092 */ |
|
2093 typedef UBool U_CALLCONV |
|
2094 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); |
|
2095 |
|
2096 /** |
|
2097 * Enumerate efficiently all code points with their Unicode general categories. |
|
2098 * |
|
2099 * This is useful for building data structures (e.g., UnicodeSet's), |
|
2100 * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. |
|
2101 * |
|
2102 * For each contiguous range of code points with a given general category ("character type"), |
|
2103 * the UCharEnumTypeRange function is called. |
|
2104 * Adjacent ranges have different types. |
|
2105 * The Unicode Standard guarantees that the numeric value of the type is 0..31. |
|
2106 * |
|
2107 * @param enumRange a pointer to a function that is called for each contiguous range |
|
2108 * of code points with the same general category |
|
2109 * @param context an opaque pointer that is passed on to the callback function |
|
2110 * |
|
2111 * @stable ICU 2.1 |
|
2112 * @see UCharCategory |
|
2113 * @see UCharEnumTypeRange |
|
2114 */ |
|
2115 U_STABLE void U_EXPORT2 |
|
2116 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); |
|
2117 |
|
2118 #if !UCONFIG_NO_NORMALIZATION |
|
2119 |
|
2120 /** |
|
2121 * Returns the combining class of the code point as specified in UnicodeData.txt. |
|
2122 * |
|
2123 * @param c the code point of the character |
|
2124 * @return the combining class of the character |
|
2125 * @stable ICU 2.0 |
|
2126 */ |
|
2127 U_STABLE uint8_t U_EXPORT2 |
|
2128 u_getCombiningClass(UChar32 c); |
|
2129 |
|
2130 #endif |
|
2131 |
|
2132 /** |
|
2133 * Returns the decimal digit value of a decimal digit character. |
|
2134 * Such characters have the general category "Nd" (decimal digit numbers) |
|
2135 * and a Numeric_Type of Decimal. |
|
2136 * |
|
2137 * Unlike ICU releases before 2.6, no digit values are returned for any |
|
2138 * Han characters because Han number characters are often used with a special |
|
2139 * Chinese-style number format (with characters for powers of 10 in between) |
|
2140 * instead of in decimal-positional notation. |
|
2141 * Unicode 4 explicitly assigns Han number characters the Numeric_Type |
|
2142 * Numeric instead of Decimal. |
|
2143 * See Jitterbug 1483 for more details. |
|
2144 * |
|
2145 * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() |
|
2146 * for complete numeric Unicode properties. |
|
2147 * |
|
2148 * @param c the code point for which to get the decimal digit value |
|
2149 * @return the decimal digit value of c, |
|
2150 * or -1 if c is not a decimal digit character |
|
2151 * |
|
2152 * @see u_getNumericValue |
|
2153 * @stable ICU 2.0 |
|
2154 */ |
|
2155 U_STABLE int32_t U_EXPORT2 |
|
2156 u_charDigitValue(UChar32 c); |
|
2157 |
|
2158 /** |
|
2159 * Returns the Unicode allocation block that contains the character. |
|
2160 * |
|
2161 * @param c the code point to be tested |
|
2162 * @return the block value (UBlockCode) for c |
|
2163 * |
|
2164 * @see UBlockCode |
|
2165 * @stable ICU 2.0 |
|
2166 */ |
|
2167 U_STABLE UBlockCode U_EXPORT2 |
|
2168 ublock_getCode(UChar32 c); |
|
2169 |
|
2170 /** |
|
2171 * Retrieve the name of a Unicode character. |
|
2172 * Depending on <code>nameChoice</code>, the character name written |
|
2173 * into the buffer is the "modern" name or the name that was defined |
|
2174 * in Unicode version 1.0. |
|
2175 * The name contains only "invariant" characters |
|
2176 * like A-Z, 0-9, space, and '-'. |
|
2177 * Unicode 1.0 names are only retrieved if they are different from the modern |
|
2178 * names and if the data file contains the data for them. gennames may or may |
|
2179 * not be called with a command line option to include 1.0 names in unames.dat. |
|
2180 * |
|
2181 * @param code The character (code point) for which to get the name. |
|
2182 * It must be <code>0<=code<=0x10ffff</code>. |
|
2183 * @param nameChoice Selector for which name to get. |
|
2184 * @param buffer Destination address for copying the name. |
|
2185 * The name will always be zero-terminated. |
|
2186 * If there is no name, then the buffer will be set to the empty string. |
|
2187 * @param bufferLength <code>==sizeof(buffer)</code> |
|
2188 * @param pErrorCode Pointer to a UErrorCode variable; |
|
2189 * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> |
|
2190 * returns. |
|
2191 * @return The length of the name, or 0 if there is no name for this character. |
|
2192 * If the bufferLength is less than or equal to the length, then the buffer |
|
2193 * contains the truncated name and the returned length indicates the full |
|
2194 * length of the name. |
|
2195 * The length does not include the zero-termination. |
|
2196 * |
|
2197 * @see UCharNameChoice |
|
2198 * @see u_charFromName |
|
2199 * @see u_enumCharNames |
|
2200 * @stable ICU 2.0 |
|
2201 */ |
|
2202 U_STABLE int32_t U_EXPORT2 |
|
2203 u_charName(UChar32 code, UCharNameChoice nameChoice, |
|
2204 char *buffer, int32_t bufferLength, |
|
2205 UErrorCode *pErrorCode); |
|
2206 |
|
2207 /** |
|
2208 * Get the ISO 10646 comment for a character. |
|
2209 * The ISO 10646 comment is an informative field in the Unicode Character |
|
2210 * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list. |
|
2211 * |
|
2212 * @param c The character (code point) for which to get the ISO comment. |
|
2213 * It must be <code>0<=c<=0x10ffff</code>. |
|
2214 * @param dest Destination address for copying the comment. |
|
2215 * The comment will be zero-terminated if possible. |
|
2216 * If there is no comment, then the buffer will be set to the empty string. |
|
2217 * @param destCapacity <code>==sizeof(dest)</code> |
|
2218 * @param pErrorCode Pointer to a UErrorCode variable; |
|
2219 * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> |
|
2220 * returns. |
|
2221 * @return The length of the comment, or 0 if there is no comment for this character. |
|
2222 * If the destCapacity is less than or equal to the length, then the buffer |
|
2223 * contains the truncated name and the returned length indicates the full |
|
2224 * length of the name. |
|
2225 * The length does not include the zero-termination. |
|
2226 * |
|
2227 * @stable ICU 2.2 |
|
2228 */ |
|
2229 U_STABLE int32_t U_EXPORT2 |
|
2230 u_getISOComment(UChar32 c, |
|
2231 char *dest, int32_t destCapacity, |
|
2232 UErrorCode *pErrorCode); |
|
2233 |
|
2234 /** |
|
2235 * Find a Unicode character by its name and return its code point value. |
|
2236 * The name is matched exactly and completely. |
|
2237 * If the name does not correspond to a code point, <i>pErrorCode</i> |
|
2238 * is set to <code>U_INVALID_CHAR_FOUND</code>. |
|
2239 * A Unicode 1.0 name is matched only if it differs from the modern name. |
|
2240 * Unicode names are all uppercase. Extended names are lowercase followed |
|
2241 * by an uppercase hexadecimal number, and within angle brackets. |
|
2242 * |
|
2243 * @param nameChoice Selector for which name to match. |
|
2244 * @param name The name to match. |
|
2245 * @param pErrorCode Pointer to a UErrorCode variable |
|
2246 * @return The Unicode value of the code point with the given name, |
|
2247 * or an undefined value if there is no such code point. |
|
2248 * |
|
2249 * @see UCharNameChoice |
|
2250 * @see u_charName |
|
2251 * @see u_enumCharNames |
|
2252 * @stable ICU 1.7 |
|
2253 */ |
|
2254 U_STABLE UChar32 U_EXPORT2 |
|
2255 u_charFromName(UCharNameChoice nameChoice, |
|
2256 const char *name, |
|
2257 UErrorCode *pErrorCode); |
|
2258 |
|
2259 /** |
|
2260 * Type of a callback function for u_enumCharNames() that gets called |
|
2261 * for each Unicode character with the code point value and |
|
2262 * the character name. |
|
2263 * If such a function returns FALSE, then the enumeration is stopped. |
|
2264 * |
|
2265 * @param context The context pointer that was passed to u_enumCharNames(). |
|
2266 * @param code The Unicode code point for the character with this name. |
|
2267 * @param nameChoice Selector for which kind of names is enumerated. |
|
2268 * @param name The character's name, zero-terminated. |
|
2269 * @param length The length of the name. |
|
2270 * @return TRUE if the enumeration should continue, FALSE to stop it. |
|
2271 * |
|
2272 * @see UCharNameChoice |
|
2273 * @see u_enumCharNames |
|
2274 * @stable ICU 1.7 |
|
2275 */ |
|
2276 typedef UBool UEnumCharNamesFn(void *context, |
|
2277 UChar32 code, |
|
2278 UCharNameChoice nameChoice, |
|
2279 const char *name, |
|
2280 int32_t length); |
|
2281 |
|
2282 /** |
|
2283 * Enumerate all assigned Unicode characters between the start and limit |
|
2284 * code points (start inclusive, limit exclusive) and call a function |
|
2285 * for each, passing the code point value and the character name. |
|
2286 * For Unicode 1.0 names, only those are enumerated that differ from the |
|
2287 * modern names. |
|
2288 * |
|
2289 * @param start The first code point in the enumeration range. |
|
2290 * @param limit One more than the last code point in the enumeration range |
|
2291 * (the first one after the range). |
|
2292 * @param fn The function that is to be called for each character name. |
|
2293 * @param context An arbitrary pointer that is passed to the function. |
|
2294 * @param nameChoice Selector for which kind of names to enumerate. |
|
2295 * @param pErrorCode Pointer to a UErrorCode variable |
|
2296 * |
|
2297 * @see UCharNameChoice |
|
2298 * @see UEnumCharNamesFn |
|
2299 * @see u_charName |
|
2300 * @see u_charFromName |
|
2301 * @stable ICU 1.7 |
|
2302 */ |
|
2303 U_STABLE void U_EXPORT2 |
|
2304 u_enumCharNames(UChar32 start, UChar32 limit, |
|
2305 UEnumCharNamesFn *fn, |
|
2306 void *context, |
|
2307 UCharNameChoice nameChoice, |
|
2308 UErrorCode *pErrorCode); |
|
2309 |
|
2310 /** |
|
2311 * Return the Unicode name for a given property, as given in the |
|
2312 * Unicode database file PropertyAliases.txt. |
|
2313 * |
|
2314 * In addition, this function maps the property |
|
2315 * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / |
|
2316 * "General_Category_Mask". These names are not in |
|
2317 * PropertyAliases.txt. |
|
2318 * |
|
2319 * @param property UProperty selector other than UCHAR_INVALID_CODE. |
|
2320 * If out of range, NULL is returned. |
|
2321 * |
|
2322 * @param nameChoice selector for which name to get. If out of range, |
|
2323 * NULL is returned. All properties have a long name. Most |
|
2324 * have a short name, but some do not. Unicode allows for |
|
2325 * additional names; if present these will be returned by |
|
2326 * U_LONG_PROPERTY_NAME + i, where i=1, 2,... |
|
2327 * |
|
2328 * @return a pointer to the name, or NULL if either the |
|
2329 * property or the nameChoice is out of range. If a given |
|
2330 * nameChoice returns NULL, then all larger values of |
|
2331 * nameChoice will return NULL, with one exception: if NULL is |
|
2332 * returned for U_SHORT_PROPERTY_NAME, then |
|
2333 * U_LONG_PROPERTY_NAME (and higher) may still return a |
|
2334 * non-NULL value. The returned pointer is valid until |
|
2335 * u_cleanup() is called. |
|
2336 * |
|
2337 * @see UProperty |
|
2338 * @see UPropertyNameChoice |
|
2339 * @stable ICU 2.4 |
|
2340 */ |
|
2341 U_STABLE const char* U_EXPORT2 |
|
2342 u_getPropertyName(UProperty property, |
|
2343 UPropertyNameChoice nameChoice); |
|
2344 |
|
2345 /** |
|
2346 * Return the UProperty enum for a given property name, as specified |
|
2347 * in the Unicode database file PropertyAliases.txt. Short, long, and |
|
2348 * any other variants are recognized. |
|
2349 * |
|
2350 * In addition, this function maps the synthetic names "gcm" / |
|
2351 * "General_Category_Mask" to the property |
|
2352 * UCHAR_GENERAL_CATEGORY_MASK. These names are not in |
|
2353 * PropertyAliases.txt. |
|
2354 * |
|
2355 * @param alias the property name to be matched. The name is compared |
|
2356 * using "loose matching" as described in PropertyAliases.txt. |
|
2357 * |
|
2358 * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name |
|
2359 * does not match any property. |
|
2360 * |
|
2361 * @see UProperty |
|
2362 * @stable ICU 2.4 |
|
2363 */ |
|
2364 U_STABLE UProperty U_EXPORT2 |
|
2365 u_getPropertyEnum(const char* alias); |
|
2366 |
|
2367 /** |
|
2368 * Return the Unicode name for a given property value, as given in the |
|
2369 * Unicode database file PropertyValueAliases.txt. |
|
2370 * |
|
2371 * Note: Some of the names in PropertyValueAliases.txt can only be |
|
2372 * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not |
|
2373 * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / |
|
2374 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" |
|
2375 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". |
|
2376 * |
|
2377 * @param property UProperty selector constant. |
|
2378 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT |
|
2379 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT |
|
2380 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. |
|
2381 * If out of range, NULL is returned. |
|
2382 * |
|
2383 * @param value selector for a value for the given property. If out |
|
2384 * of range, NULL is returned. In general, valid values range |
|
2385 * from 0 up to some maximum. There are a few exceptions: |
|
2386 * (1.) UCHAR_BLOCK values begin at the non-zero value |
|
2387 * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS |
|
2388 * values are not contiguous and range from 0..240. (3.) |
|
2389 * UCHAR_GENERAL_CATEGORY_MASK values are not values of |
|
2390 * UCharCategory, but rather mask values produced by |
|
2391 * U_GET_GC_MASK(). This allows grouped categories such as |
|
2392 * [:L:] to be represented. Mask values range |
|
2393 * non-contiguously from 1..U_GC_P_MASK. |
|
2394 * |
|
2395 * @param nameChoice selector for which name to get. If out of range, |
|
2396 * NULL is returned. All values have a long name. Most have |
|
2397 * a short name, but some do not. Unicode allows for |
|
2398 * additional names; if present these will be returned by |
|
2399 * U_LONG_PROPERTY_NAME + i, where i=1, 2,... |
|
2400 |
|
2401 * @return a pointer to the name, or NULL if either the |
|
2402 * property or the nameChoice is out of range. If a given |
|
2403 * nameChoice returns NULL, then all larger values of |
|
2404 * nameChoice will return NULL, with one exception: if NULL is |
|
2405 * returned for U_SHORT_PROPERTY_NAME, then |
|
2406 * U_LONG_PROPERTY_NAME (and higher) may still return a |
|
2407 * non-NULL value. The returned pointer is valid until |
|
2408 * u_cleanup() is called. |
|
2409 * |
|
2410 * @see UProperty |
|
2411 * @see UPropertyNameChoice |
|
2412 * @stable ICU 2.4 |
|
2413 */ |
|
2414 U_STABLE const char* U_EXPORT2 |
|
2415 u_getPropertyValueName(UProperty property, |
|
2416 int32_t value, |
|
2417 UPropertyNameChoice nameChoice); |
|
2418 |
|
2419 /** |
|
2420 * Return the property value integer for a given value name, as |
|
2421 * specified in the Unicode database file PropertyValueAliases.txt. |
|
2422 * Short, long, and any other variants are recognized. |
|
2423 * |
|
2424 * Note: Some of the names in PropertyValueAliases.txt will only be |
|
2425 * recognized with UCHAR_GENERAL_CATEGORY_MASK, not |
|
2426 * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / |
|
2427 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" |
|
2428 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". |
|
2429 * |
|
2430 * @param property UProperty selector constant. |
|
2431 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT |
|
2432 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT |
|
2433 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. |
|
2434 * If out of range, UCHAR_INVALID_CODE is returned. |
|
2435 * |
|
2436 * @param alias the value name to be matched. The name is compared |
|
2437 * using "loose matching" as described in |
|
2438 * PropertyValueAliases.txt. |
|
2439 * |
|
2440 * @return a value integer or UCHAR_INVALID_CODE if the given name |
|
2441 * does not match any value of the given property, or if the |
|
2442 * property is invalid. Note: U CHAR_GENERAL_CATEGORY values |
|
2443 * are not values of UCharCategory, but rather mask values |
|
2444 * produced by U_GET_GC_MASK(). This allows grouped |
|
2445 * categories such as [:L:] to be represented. |
|
2446 * |
|
2447 * @see UProperty |
|
2448 * @stable ICU 2.4 |
|
2449 */ |
|
2450 U_STABLE int32_t U_EXPORT2 |
|
2451 u_getPropertyValueEnum(UProperty property, |
|
2452 const char* alias); |
|
2453 |
|
2454 /** |
|
2455 * Determines if the specified character is permissible as the |
|
2456 * first character in an identifier according to Unicode |
|
2457 * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). |
|
2458 * True for characters with general categories "L" (letters) and "Nl" (letter numbers). |
|
2459 * |
|
2460 * Same as java.lang.Character.isUnicodeIdentifierStart(). |
|
2461 * Same as UCHAR_ID_START |
|
2462 * |
|
2463 * @param c the code point to be tested |
|
2464 * @return TRUE if the code point may start an identifier |
|
2465 * |
|
2466 * @see UCHAR_ID_START |
|
2467 * @see u_isalpha |
|
2468 * @see u_isIDPart |
|
2469 * @stable ICU 2.0 |
|
2470 */ |
|
2471 U_STABLE UBool U_EXPORT2 |
|
2472 u_isIDStart(UChar32 c); |
|
2473 |
|
2474 /** |
|
2475 * Determines if the specified character is permissible |
|
2476 * in an identifier according to Java. |
|
2477 * True for characters with general categories "L" (letters), |
|
2478 * "Nl" (letter numbers), "Nd" (decimal digits), |
|
2479 * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and |
|
2480 * u_isIDIgnorable(c). |
|
2481 * |
|
2482 * Same as java.lang.Character.isUnicodeIdentifierPart(). |
|
2483 * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) |
|
2484 * except that Unicode recommends to ignore Cf which is less than |
|
2485 * u_isIDIgnorable(c). |
|
2486 * |
|
2487 * @param c the code point to be tested |
|
2488 * @return TRUE if the code point may occur in an identifier according to Java |
|
2489 * |
|
2490 * @see UCHAR_ID_CONTINUE |
|
2491 * @see u_isIDStart |
|
2492 * @see u_isIDIgnorable |
|
2493 * @stable ICU 2.0 |
|
2494 */ |
|
2495 U_STABLE UBool U_EXPORT2 |
|
2496 u_isIDPart(UChar32 c); |
|
2497 |
|
2498 /** |
|
2499 * Determines if the specified character should be regarded |
|
2500 * as an ignorable character in an identifier, |
|
2501 * according to Java. |
|
2502 * True for characters with general category "Cf" (format controls) as well as |
|
2503 * non-whitespace ISO controls |
|
2504 * (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F). |
|
2505 * |
|
2506 * Same as java.lang.Character.isIdentifierIgnorable() |
|
2507 * except that Java also returns TRUE for U+0085 Next Line |
|
2508 * (it omits U+0085 from whitespace ISO controls). |
|
2509 * |
|
2510 * Note that Unicode just recommends to ignore Cf (format controls). |
|
2511 * |
|
2512 * @param c the code point to be tested |
|
2513 * @return TRUE if the code point is ignorable in identifiers according to Java |
|
2514 * |
|
2515 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT |
|
2516 * @see u_isIDStart |
|
2517 * @see u_isIDPart |
|
2518 * @stable ICU 2.0 |
|
2519 */ |
|
2520 U_STABLE UBool U_EXPORT2 |
|
2521 u_isIDIgnorable(UChar32 c); |
|
2522 |
|
2523 /** |
|
2524 * Determines if the specified character is permissible as the |
|
2525 * first character in a Java identifier. |
|
2526 * In addition to u_isIDStart(c), true for characters with |
|
2527 * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). |
|
2528 * |
|
2529 * Same as java.lang.Character.isJavaIdentifierStart(). |
|
2530 * |
|
2531 * @param c the code point to be tested |
|
2532 * @return TRUE if the code point may start a Java identifier |
|
2533 * |
|
2534 * @see u_isJavaIDPart |
|
2535 * @see u_isalpha |
|
2536 * @see u_isIDStart |
|
2537 * @stable ICU 2.0 |
|
2538 */ |
|
2539 U_STABLE UBool U_EXPORT2 |
|
2540 u_isJavaIDStart(UChar32 c); |
|
2541 |
|
2542 /** |
|
2543 * Determines if the specified character is permissible |
|
2544 * in a Java identifier. |
|
2545 * In addition to u_isIDPart(c), true for characters with |
|
2546 * general category "Sc" (currency symbols). |
|
2547 * |
|
2548 * Same as java.lang.Character.isJavaIdentifierPart(). |
|
2549 * |
|
2550 * @param c the code point to be tested |
|
2551 * @return TRUE if the code point may occur in a Java identifier |
|
2552 * |
|
2553 * @see u_isIDIgnorable |
|
2554 * @see u_isJavaIDStart |
|
2555 * @see u_isalpha |
|
2556 * @see u_isdigit |
|
2557 * @see u_isIDPart |
|
2558 * @stable ICU 2.0 |
|
2559 */ |
|
2560 U_STABLE UBool U_EXPORT2 |
|
2561 u_isJavaIDPart(UChar32 c); |
|
2562 |
|
2563 /** |
|
2564 * The given character is mapped to its lowercase equivalent according to |
|
2565 * UnicodeData.txt; if the character has no lowercase equivalent, the character |
|
2566 * itself is returned. |
|
2567 * |
|
2568 * Same as java.lang.Character.toLowerCase(). |
|
2569 * |
|
2570 * This function only returns the simple, single-code point case mapping. |
|
2571 * Full case mappings may result in zero, one or more code points and depend |
|
2572 * on context or language etc. |
|
2573 * Full case mappings are applied by the string case mapping functions, |
|
2574 * see ustring.h and the UnicodeString class. |
|
2575 * |
|
2576 * @param c the code point to be mapped |
|
2577 * @return the Simple_Lowercase_Mapping of the code point, if any; |
|
2578 * otherwise the code point itself. |
|
2579 * @stable ICU 2.0 |
|
2580 */ |
|
2581 U_STABLE UChar32 U_EXPORT2 |
|
2582 u_tolower(UChar32 c); |
|
2583 |
|
2584 /** |
|
2585 * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; |
|
2586 * if the character has no uppercase equivalent, the character itself is |
|
2587 * returned. |
|
2588 * |
|
2589 * Same as java.lang.Character.toUpperCase(). |
|
2590 * |
|
2591 * This function only returns the simple, single-code point case mapping. |
|
2592 * Full case mappings may result in zero, one or more code points and depend |
|
2593 * on context or language etc. |
|
2594 * Full case mappings are applied by the string case mapping functions, |
|
2595 * see ustring.h and the UnicodeString class. |
|
2596 * |
|
2597 * @param c the code point to be mapped |
|
2598 * @return the Simple_Uppercase_Mapping of the code point, if any; |
|
2599 * otherwise the code point itself. |
|
2600 * @stable ICU 2.0 |
|
2601 */ |
|
2602 U_STABLE UChar32 U_EXPORT2 |
|
2603 u_toupper(UChar32 c); |
|
2604 |
|
2605 /** |
|
2606 * The given character is mapped to its titlecase equivalent |
|
2607 * according to UnicodeData.txt; |
|
2608 * if none is defined, the character itself is returned. |
|
2609 * |
|
2610 * Same as java.lang.Character.toTitleCase(). |
|
2611 * |
|
2612 * This function only returns the simple, single-code point case mapping. |
|
2613 * Full case mappings may result in zero, one or more code points and depend |
|
2614 * on context or language etc. |
|
2615 * Full case mappings are applied by the string case mapping functions, |
|
2616 * see ustring.h and the UnicodeString class. |
|
2617 * |
|
2618 * @param c the code point to be mapped |
|
2619 * @return the Simple_Titlecase_Mapping of the code point, if any; |
|
2620 * otherwise the code point itself. |
|
2621 * @stable ICU 2.0 |
|
2622 */ |
|
2623 U_STABLE UChar32 U_EXPORT2 |
|
2624 u_totitle(UChar32 c); |
|
2625 |
|
2626 /** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ |
|
2627 #define U_FOLD_CASE_DEFAULT 0 |
|
2628 |
|
2629 /** |
|
2630 * Option value for case folding: |
|
2631 * |
|
2632 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I |
|
2633 * and dotless i appropriately for Turkic languages (tr, az). |
|
2634 * |
|
2635 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that |
|
2636 * are to be included for default mappings and |
|
2637 * excluded for the Turkic-specific mappings. |
|
2638 * |
|
2639 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that |
|
2640 * are to be excluded for default mappings and |
|
2641 * included for the Turkic-specific mappings. |
|
2642 * |
|
2643 * @stable ICU 2.0 |
|
2644 */ |
|
2645 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 |
|
2646 |
|
2647 /** |
|
2648 * The given character is mapped to its case folding equivalent according to |
|
2649 * UnicodeData.txt and CaseFolding.txt; |
|
2650 * if the character has no case folding equivalent, the character |
|
2651 * itself is returned. |
|
2652 * |
|
2653 * This function only returns the simple, single-code point case mapping. |
|
2654 * Full case mappings may result in zero, one or more code points and depend |
|
2655 * on context or language etc. |
|
2656 * Full case mappings are applied by the string case mapping functions, |
|
2657 * see ustring.h and the UnicodeString class. |
|
2658 * |
|
2659 * @param c the code point to be mapped |
|
2660 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
|
2661 * @return the Simple_Case_Folding of the code point, if any; |
|
2662 * otherwise the code point itself. |
|
2663 * @stable ICU 2.0 |
|
2664 */ |
|
2665 U_STABLE UChar32 U_EXPORT2 |
|
2666 u_foldCase(UChar32 c, uint32_t options); |
|
2667 |
|
2668 /** |
|
2669 * Returns the decimal digit value of the code point in the |
|
2670 * specified radix. |
|
2671 * |
|
2672 * If the radix is not in the range <code>2<=radix<=36</code> or if the |
|
2673 * value of <code>c</code> is not a valid digit in the specified |
|
2674 * radix, <code>-1</code> is returned. A character is a valid digit |
|
2675 * if at least one of the following is true: |
|
2676 * <ul> |
|
2677 * <li>The character has a decimal digit value. |
|
2678 * Such characters have the general category "Nd" (decimal digit numbers) |
|
2679 * and a Numeric_Type of Decimal. |
|
2680 * In this case the value is the character's decimal digit value.</li> |
|
2681 * <li>The character is one of the uppercase Latin letters |
|
2682 * <code>'A'</code> through <code>'Z'</code>. |
|
2683 * In this case the value is <code>c-'A'+10</code>.</li> |
|
2684 * <li>The character is one of the lowercase Latin letters |
|
2685 * <code>'a'</code> through <code>'z'</code>. |
|
2686 * In this case the value is <code>ch-'a'+10</code>.</li> |
|
2687 * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) |
|
2688 * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) |
|
2689 * are recognized.</li> |
|
2690 * </ul> |
|
2691 * |
|
2692 * Same as java.lang.Character.digit(). |
|
2693 * |
|
2694 * @param ch the code point to be tested. |
|
2695 * @param radix the radix. |
|
2696 * @return the numeric value represented by the character in the |
|
2697 * specified radix, |
|
2698 * or -1 if there is no value or if the value exceeds the radix. |
|
2699 * |
|
2700 * @see UCHAR_NUMERIC_TYPE |
|
2701 * @see u_forDigit |
|
2702 * @see u_charDigitValue |
|
2703 * @see u_isdigit |
|
2704 * @stable ICU 2.0 |
|
2705 */ |
|
2706 U_STABLE int32_t U_EXPORT2 |
|
2707 u_digit(UChar32 ch, int8_t radix); |
|
2708 |
|
2709 /** |
|
2710 * Determines the character representation for a specific digit in |
|
2711 * the specified radix. If the value of <code>radix</code> is not a |
|
2712 * valid radix, or the value of <code>digit</code> is not a valid |
|
2713 * digit in the specified radix, the null character |
|
2714 * (<code>U+0000</code>) is returned. |
|
2715 * <p> |
|
2716 * The <code>radix</code> argument is valid if it is greater than or |
|
2717 * equal to 2 and less than or equal to 36. |
|
2718 * The <code>digit</code> argument is valid if |
|
2719 * <code>0 <= digit < radix</code>. |
|
2720 * <p> |
|
2721 * If the digit is less than 10, then |
|
2722 * <code>'0' + digit</code> is returned. Otherwise, the value |
|
2723 * <code>'a' + digit - 10</code> is returned. |
|
2724 * |
|
2725 * Same as java.lang.Character.forDigit(). |
|
2726 * |
|
2727 * @param digit the number to convert to a character. |
|
2728 * @param radix the radix. |
|
2729 * @return the <code>char</code> representation of the specified digit |
|
2730 * in the specified radix. |
|
2731 * |
|
2732 * @see u_digit |
|
2733 * @see u_charDigitValue |
|
2734 * @see u_isdigit |
|
2735 * @stable ICU 2.0 |
|
2736 */ |
|
2737 U_STABLE UChar32 U_EXPORT2 |
|
2738 u_forDigit(int32_t digit, int8_t radix); |
|
2739 |
|
2740 /** |
|
2741 * Get the "age" of the code point. |
|
2742 * The "age" is the Unicode version when the code point was first |
|
2743 * designated (as a non-character or for Private Use) |
|
2744 * or assigned a character. |
|
2745 * This can be useful to avoid emitting code points to receiving |
|
2746 * processes that do not accept newer characters. |
|
2747 * The data is from the UCD file DerivedAge.txt. |
|
2748 * |
|
2749 * @param c The code point. |
|
2750 * @param versionArray The Unicode version number array, to be filled in. |
|
2751 * |
|
2752 * @stable ICU 2.1 |
|
2753 */ |
|
2754 U_STABLE void U_EXPORT2 |
|
2755 u_charAge(UChar32 c, UVersionInfo versionArray); |
|
2756 |
|
2757 /** |
|
2758 * Gets the Unicode version information. |
|
2759 * The version array is filled in with the version information |
|
2760 * for the Unicode standard that is currently used by ICU. |
|
2761 * For example, Unicode version 3.1.1 is represented as an array with |
|
2762 * the values { 3, 1, 1, 0 }. |
|
2763 * |
|
2764 * @param versionArray an output array that will be filled in with |
|
2765 * the Unicode version number |
|
2766 * @stable ICU 2.0 |
|
2767 */ |
|
2768 U_STABLE void U_EXPORT2 |
|
2769 u_getUnicodeVersion(UVersionInfo versionArray); |
|
2770 |
|
2771 /** |
|
2772 * Get the FC_NFKC_Closure property string for a character. |
|
2773 * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" |
|
2774 * or for "FNC": http://www.unicode.org/reports/tr15/ |
|
2775 * |
|
2776 * @param c The character (code point) for which to get the FC_NFKC_Closure string. |
|
2777 * It must be <code>0<=c<=0x10ffff</code>. |
|
2778 * @param dest Destination address for copying the string. |
|
2779 * The string will be zero-terminated if possible. |
|
2780 * If there is no FC_NFKC_Closure string, |
|
2781 * then the buffer will be set to the empty string. |
|
2782 * @param destCapacity <code>==sizeof(dest)</code> |
|
2783 * @param pErrorCode Pointer to a UErrorCode variable. |
|
2784 * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. |
|
2785 * If the destCapacity is less than or equal to the length, then the buffer |
|
2786 * contains the truncated name and the returned length indicates the full |
|
2787 * length of the name. |
|
2788 * The length does not include the zero-termination. |
|
2789 * |
|
2790 * @stable ICU 2.2 |
|
2791 */ |
|
2792 U_STABLE int32_t U_EXPORT2 |
|
2793 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); |
|
2794 |
|
2795 U_CDECL_END |
|
2796 |
|
2797 #endif /*_UCHAR*/ |
|
2798 /*eof*/ |