author | Simon Howkins <simonh@symbian.org> |
Mon, 15 Nov 2010 14:53:34 +0000 | |
branch | RCL_3 |
changeset 105 | 871af676edac |
parent 0 | dd21522fd290 |
permissions | -rw-r--r-- |
0
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
1 |
/* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
2 |
******************************************************************************* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
3 |
* Copyright (c) 1996-2004, International Business Machines Corporation |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
4 |
* and others. All Rights Reserved. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
5 |
******************************************************************************* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
6 |
* File unorm.h |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
7 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
8 |
* Created by: Vladimir Weinstein 12052000 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
9 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
10 |
* Modification history : |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
11 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
12 |
* Date Name Description |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
13 |
* 02/01/01 synwee Added normalization quickcheck enum and method. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
14 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
15 |
#ifndef UNORM_H |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
16 |
#define UNORM_H |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
17 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
18 |
#include "unicode/utypes.h" |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
19 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
20 |
#if !UCONFIG_NO_NORMALIZATION |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
21 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
22 |
#include "unicode/uiter.h" |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
23 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
24 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
25 |
* \file |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
26 |
* \brief C API: Unicode Normalization |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
27 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
28 |
* <h2>Unicode normalization API</h2> |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
29 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
30 |
* <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
31 |
* decomposed form, allowing for easier sorting and searching of text. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
32 |
* <code>unorm_normalize</code> supports the standard normalization forms described in |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
33 |
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
34 |
* Unicode Standard Annex #15 — Unicode Normalization Forms</a>. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
35 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
36 |
* Characters with accents or other adornments can be encoded in |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
37 |
* several different ways in Unicode. For example, take the character A-acute. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
38 |
* In Unicode, this can be encoded as a single character (the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
39 |
* "composed" form): |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
40 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
41 |
* \code |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
42 |
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
43 |
* \endcode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
44 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
45 |
* or as two separate characters (the "decomposed" form): |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
46 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
47 |
* \code |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
48 |
* 0041 LATIN CAPITAL LETTER A |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
49 |
* 0301 COMBINING ACUTE ACCENT |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
50 |
* \endcode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
51 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
52 |
* To a user of your program, however, both of these sequences should be |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
53 |
* treated as the same "user-level" character "A with acute accent". When you are searching or |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
54 |
* comparing text, you must ensure that these two sequences are treated |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
55 |
* equivalently. In addition, you must handle characters with more than one |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
56 |
* accent. Sometimes the order of a character's combining accents is |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
57 |
* significant, while in other cases accent sequences in different orders are |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
58 |
* really equivalent. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
59 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
60 |
* Similarly, the string "ffi" can be encoded as three separate letters: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
61 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
62 |
* \code |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
63 |
* 0066 LATIN SMALL LETTER F |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
64 |
* 0066 LATIN SMALL LETTER F |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
65 |
* 0069 LATIN SMALL LETTER I |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
66 |
* \endcode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
67 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
68 |
* or as the single character |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
69 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
70 |
* \code |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
71 |
* FB03 LATIN SMALL LIGATURE FFI |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
72 |
* \endcode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
73 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
74 |
* The ffi ligature is not a distinct semantic character, and strictly speaking |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
75 |
* it shouldn't be in Unicode at all, but it was included for compatibility |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
76 |
* with existing character sets that already provided it. The Unicode standard |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
77 |
* identifies such characters by giving them "compatibility" decompositions |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
78 |
* into the corresponding semantic characters. When sorting and searching, you |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
79 |
* will often want to use these mappings. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
80 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
81 |
* <code>unorm_normalize</code> helps solve these problems by transforming text into the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
82 |
* canonical composed and decomposed forms as shown in the first example above. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
83 |
* In addition, you can have it perform compatibility decompositions so that |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
84 |
* you can treat compatibility characters the same as their equivalents. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
85 |
* Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
86 |
* order, so that you do not have to worry about accent rearrangement on your |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
87 |
* own. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
88 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
89 |
* Form FCD, "Fast C or D", is also designed for collation. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
90 |
* It allows to work on strings that are not necessarily normalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
91 |
* with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
92 |
* characters and their decomposed equivalents the same. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
93 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
94 |
* It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
95 |
* may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
96 |
* themselves. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
97 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
98 |
* The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
99 |
* results in a string that is canonically ordered. This means that precomposed characters are allowed for as long |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
100 |
* as their decompositions do not need canonical reordering. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
101 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
102 |
* Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts - |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
103 |
* already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
104 |
* return UNORM_YES for most strings in practice. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
105 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
106 |
* unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
107 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
108 |
* For more details on FCD see the collation design document: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
109 |
* http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
110 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
111 |
* ICU collation performs either NFD or FCD normalization automatically if normalization |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
112 |
* is turned on for the collator object. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
113 |
* Beyond collation and string search, normalized strings may be useful for string equivalence comparisons, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
114 |
* transliteration/transcription, unique representations, etc. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
115 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
116 |
* The W3C generally recommends to exchange texts in NFC. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
117 |
* Note also that most legacy character encodings use only precomposed forms and often do not |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
118 |
* encode any combining marks by themselves. For conversion to such character encodings the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
119 |
* Unicode text needs to be normalized to NFC. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
120 |
* For more usage examples, see the Unicode Standard Annex. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
121 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
122 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
123 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
124 |
* Constants for normalization modes. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
125 |
* @stable ICU 2.0 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
126 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
127 |
typedef enum { |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
128 |
/** No decomposition/composition. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
129 |
UNORM_NONE = 1, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
130 |
/** Canonical decomposition. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
131 |
UNORM_NFD = 2, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
132 |
/** Compatibility decomposition. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
133 |
UNORM_NFKD = 3, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
134 |
/** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
135 |
UNORM_NFC = 4, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
136 |
/** Default normalization. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
137 |
UNORM_DEFAULT = UNORM_NFC, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
138 |
/** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
139 |
UNORM_NFKC =5, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
140 |
/** "Fast C or D" form. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
141 |
UNORM_FCD = 6, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
142 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
143 |
/** One more than the highest normalization mode constant. @stable ICU 2.0 */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
144 |
UNORM_MODE_COUNT |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
145 |
} UNormalizationMode; |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
146 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
147 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
148 |
* Constants for options flags for normalization. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
149 |
* Use 0 for default options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
150 |
* including normalization according to the Unicode version |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
151 |
* that is currently supported by ICU (see u_getUnicodeVersion). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
152 |
* @stable ICU 2.6 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
153 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
154 |
enum { |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
155 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
156 |
* Options bit set value to select Unicode 3.2 normalization |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
157 |
* (except NormalizationCorrections). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
158 |
* At most one Unicode version can be selected at a time. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
159 |
* @stable ICU 2.6 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
160 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
161 |
UNORM_UNICODE_3_2=0x20 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
162 |
}; |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
163 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
164 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
165 |
* Lowest-order bit number of unorm_compare() options bits corresponding to |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
166 |
* normalization options bits. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
167 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
168 |
* The options parameter for unorm_compare() uses most bits for |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
169 |
* itself and for various comparison and folding flags. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
170 |
* The most significant bits, however, are shifted down and passed on |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
171 |
* to the normalization implementation. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
172 |
* (That is, from unorm_compare(..., options, ...), |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
173 |
* options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
174 |
* internal normalization functions.) |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
175 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
176 |
* @see unorm_compare |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
177 |
* @stable ICU 2.6 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
178 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
179 |
#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
180 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
181 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
182 |
* Normalize a string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
183 |
* The string will be normalized according the specified normalization mode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
184 |
* and options. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
185 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
186 |
* @param source The string to normalize. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
187 |
* @param sourceLength The length of source, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
188 |
* @param mode The normalization mode; one of UNORM_NONE, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
189 |
* UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
190 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
191 |
* @param result A pointer to a buffer to receive the result string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
192 |
* The result string is NUL-terminated if possible. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
193 |
* @param resultLength The maximum size of result. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
194 |
* @param status A pointer to a UErrorCode to receive any errors. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
195 |
* @return The total buffer size needed; if greater than resultLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
196 |
* the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
197 |
* @stable ICU 2.0 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
198 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
199 |
U_STABLE int32_t U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
200 |
unorm_normalize(const UChar *source, int32_t sourceLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
201 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
202 |
UChar *result, int32_t resultLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
203 |
UErrorCode *status); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
204 |
#endif |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
205 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
206 |
* Result values for unorm_quickCheck(). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
207 |
* For details see Unicode Technical Report 15. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
208 |
* @stable ICU 2.0 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
209 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
210 |
typedef enum UNormalizationCheckResult { |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
211 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
212 |
* Indicates that string is not in the normalized format |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
213 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
214 |
UNORM_NO, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
215 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
216 |
* Indicates that string is in the normalized format |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
217 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
218 |
UNORM_YES, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
219 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
220 |
* Indicates that string cannot be determined if it is in the normalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
221 |
* format without further thorough checks. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
222 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
223 |
UNORM_MAYBE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
224 |
} UNormalizationCheckResult; |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
225 |
#if !UCONFIG_NO_NORMALIZATION |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
226 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
227 |
* Performing quick check on a string, to quickly determine if the string is |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
228 |
* in a particular normalization format. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
229 |
* Three types of result can be returned UNORM_YES, UNORM_NO or |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
230 |
* UNORM_MAYBE. Result UNORM_YES indicates that the argument |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
231 |
* string is in the desired normalized format, UNORM_NO determines that |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
232 |
* argument string is not in the desired normalized format. A |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
233 |
* UNORM_MAYBE result indicates that a more thorough check is required, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
234 |
* the user may have to put the string in its normalized form and compare the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
235 |
* results. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
236 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
237 |
* @param source string for determining if it is in a normalized format |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
238 |
* @param sourcelength length of source to test, or -1 if NUL-terminated |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
239 |
* @param mode which normalization form to test for |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
240 |
* @param status a pointer to a UErrorCode to receive any errors |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
241 |
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
242 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
243 |
* @see unorm_isNormalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
244 |
* @stable ICU 2.0 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
245 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
246 |
U_STABLE UNormalizationCheckResult U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
247 |
unorm_quickCheck(const UChar *source, int32_t sourcelength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
248 |
UNormalizationMode mode, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
249 |
UErrorCode *status); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
250 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
251 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
252 |
* Performing quick check on a string; same as unorm_quickCheck but |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
253 |
* takes an extra options parameter like most normalization functions. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
254 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
255 |
* @param src String that is to be tested if it is in a normalization format. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
256 |
* @param srcLength Length of source to test, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
257 |
* @param mode Which normalization form to test for. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
258 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
259 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
260 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
261 |
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
262 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
263 |
* @see unorm_quickCheck |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
264 |
* @see unorm_isNormalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
265 |
* @stable ICU 2.6 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
266 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
267 |
U_STABLE UNormalizationCheckResult U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
268 |
unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
269 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
270 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
271 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
272 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
273 |
* Test if a string is in a given normalization form. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
274 |
* This is semantically equivalent to source.equals(normalize(source, mode)) . |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
275 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
276 |
* Unlike unorm_quickCheck(), this function returns a definitive result, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
277 |
* never a "maybe". |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
278 |
* For NFD, NFKD, and FCD, both functions work exactly the same. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
279 |
* For NFC and NFKC where quickCheck may return "maybe", this function will |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
280 |
* perform further tests to arrive at a TRUE/FALSE result. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
281 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
282 |
* @param src String that is to be tested if it is in a normalization format. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
283 |
* @param srcLength Length of source to test, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
284 |
* @param mode Which normalization form to test for. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
285 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
286 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
287 |
* @return Boolean value indicating whether the source string is in the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
288 |
* "mode" normalization form. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
289 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
290 |
* @see unorm_quickCheck |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
291 |
* @stable ICU 2.2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
292 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
293 |
U_STABLE UBool U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
294 |
unorm_isNormalized(const UChar *src, int32_t srcLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
295 |
UNormalizationMode mode, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
296 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
297 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
298 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
299 |
* Test if a string is in a given normalization form; same as unorm_isNormalized but |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
300 |
* takes an extra options parameter like most normalization functions. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
301 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
302 |
* @param src String that is to be tested if it is in a normalization format. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
303 |
* @param srcLength Length of source to test, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
304 |
* @param mode Which normalization form to test for. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
305 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
306 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
307 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
308 |
* @return Boolean value indicating whether the source string is in the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
309 |
* "mode/options" normalization form. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
310 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
311 |
* @see unorm_quickCheck |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
312 |
* @see unorm_isNormalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
313 |
* @stable ICU 2.6 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
314 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
315 |
U_STABLE UBool U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
316 |
unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
317 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
318 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
319 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
320 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
321 |
* Iterative normalization forward. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
322 |
* This function (together with unorm_previous) is somewhat |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
323 |
* similar to the C++ Normalizer class (see its non-static functions). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
324 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
325 |
* Iterative normalization is useful when only a small portion of a longer |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
326 |
* string/text needs to be processed. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
327 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
328 |
* For example, the likelihood may be high that processing the first 10% of some |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
329 |
* text will be sufficient to find certain data. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
330 |
* Another example: When one wants to concatenate two normalized strings and get a |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
331 |
* normalized result, it is much more efficient to normalize just a small part of |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
332 |
* the result around the concatenation place instead of re-normalizing everything. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
333 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
334 |
* The input text is an instance of the C character iteration API UCharIterator. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
335 |
* It may wrap around a simple string, a CharacterIterator, a Replaceable, or any |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
336 |
* other kind of text object. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
337 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
338 |
* If a buffer overflow occurs, then the caller needs to reset the iterator to the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
339 |
* old index and call the function again with a larger buffer - if the caller cares |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
340 |
* for the actual output. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
341 |
* Regardless of the output buffer, the iterator will always be moved to the next |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
342 |
* normalization boundary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
343 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
344 |
* This function (like unorm_previous) serves two purposes: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
345 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
346 |
* 1) To find the next boundary so that the normalization of the part of the text |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
347 |
* from the current position to that boundary does not affect and is not affected |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
348 |
* by the part of the text beyond that boundary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
349 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
350 |
* 2) To normalize the text up to the boundary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
351 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
352 |
* The second step is optional, per the doNormalize parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
353 |
* It is omitted for operations like string concatenation, where the two adjacent |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
354 |
* string ends need to be normalized together. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
355 |
* In such a case, the output buffer will just contain a copy of the text up to the |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
356 |
* boundary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
357 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
358 |
* pNeededToNormalize is an output-only parameter. Its output value is only defined |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
359 |
* if normalization was requested (doNormalize) and successful (especially, no |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
360 |
* buffer overflow). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
361 |
* It is useful for operations like a normalizing transliterator, where one would |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
362 |
* not want to replace a piece of text if it is not modified. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
363 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
364 |
* If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
365 |
* if the normalization was necessary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
366 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
367 |
* If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
368 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
369 |
* If the buffer overflows, then *pNeededToNormalize will be undefined; |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
370 |
* essentially, whenever U_FAILURE is true (like in buffer overflows), this result |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
371 |
* will be undefined. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
372 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
373 |
* @param src The input text in the form of a C character iterator. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
374 |
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
375 |
* @param destCapacity The number of UChars that fit into dest. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
376 |
* @param mode The normalization mode. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
377 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
378 |
* @param doNormalize Indicates if the source text up to the next boundary |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
379 |
* is to be normalized (TRUE) or just copied (FALSE). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
380 |
* @param pNeededToNormalize Output flag indicating if the normalization resulted in |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
381 |
* different text from the input. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
382 |
* Not defined if an error occurs including buffer overflow. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
383 |
* Always FALSE if !doNormalize. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
384 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
385 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
386 |
* @return Length of output (number of UChars) when successful or buffer overflow. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
387 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
388 |
* @see unorm_previous |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
389 |
* @see unorm_normalize |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
390 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
391 |
* @stable ICU 2.1 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
392 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
393 |
U_STABLE int32_t U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
394 |
unorm_next(UCharIterator *src, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
395 |
UChar *dest, int32_t destCapacity, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
396 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
397 |
UBool doNormalize, UBool *pNeededToNormalize, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
398 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
399 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
400 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
401 |
* Iterative normalization backward. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
402 |
* This function (together with unorm_next) is somewhat |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
403 |
* similar to the C++ Normalizer class (see its non-static functions). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
404 |
* For all details see unorm_next. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
405 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
406 |
* @param src The input text in the form of a C character iterator. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
407 |
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
408 |
* @param destCapacity The number of UChars that fit into dest. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
409 |
* @param mode The normalization mode. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
410 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
411 |
* @param doNormalize Indicates if the source text up to the next boundary |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
412 |
* is to be normalized (TRUE) or just copied (FALSE). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
413 |
* @param pNeededToNormalize Output flag indicating if the normalization resulted in |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
414 |
* different text from the input. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
415 |
* Not defined if an error occurs including buffer overflow. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
416 |
* Always FALSE if !doNormalize. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
417 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
418 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
419 |
* @return Length of output (number of UChars) when successful or buffer overflow. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
420 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
421 |
* @see unorm_next |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
422 |
* @see unorm_normalize |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
423 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
424 |
* @stable ICU 2.1 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
425 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
426 |
U_STABLE int32_t U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
427 |
unorm_previous(UCharIterator *src, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
428 |
UChar *dest, int32_t destCapacity, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
429 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
430 |
UBool doNormalize, UBool *pNeededToNormalize, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
431 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
432 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
433 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
434 |
* Concatenate normalized strings, making sure that the result is normalized as well. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
435 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
436 |
* If both the left and the right strings are in |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
437 |
* the normalization form according to "mode/options", |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
438 |
* then the result will be |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
439 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
440 |
* \code |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
441 |
* dest=normalize(left+right, mode, options) |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
442 |
* \endcode |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
443 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
444 |
* With the input strings already being normalized, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
445 |
* this function will use unorm_next() and unorm_previous() |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
446 |
* to find the adjacent end pieces of the input strings. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
447 |
* Only the concatenation of these end pieces will be normalized and |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
448 |
* then concatenated with the remaining parts of the input strings. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
449 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
450 |
* It is allowed to have dest==left to avoid copying the entire left string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
451 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
452 |
* @param left Left source string, may be same as dest. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
453 |
* @param leftLength Length of left source string, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
454 |
* @param right Right source string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
455 |
* @param rightLength Length of right source string, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
456 |
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
457 |
* @param destCapacity The number of UChars that fit into dest. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
458 |
* @param mode The normalization mode. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
459 |
* @param options The normalization options, ORed together (0 for no options). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
460 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
461 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
462 |
* @return Length of output (number of UChars) when successful or buffer overflow. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
463 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
464 |
* @see unorm_normalize |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
465 |
* @see unorm_next |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
466 |
* @see unorm_previous |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
467 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
468 |
* @stable ICU 2.1 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
469 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
470 |
U_STABLE int32_t U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
471 |
unorm_concatenate(const UChar *left, int32_t leftLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
472 |
const UChar *right, int32_t rightLength, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
473 |
UChar *dest, int32_t destCapacity, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
474 |
UNormalizationMode mode, int32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
475 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
476 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
477 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
478 |
* Option bit for unorm_compare: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
479 |
* Both input strings are assumed to fulfill FCD conditions. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
480 |
* @stable ICU 2.2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
481 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
482 |
#define UNORM_INPUT_IS_FCD 0x20000 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
483 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
484 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
485 |
* Option bit for unorm_compare: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
486 |
* Perform case-insensitive comparison. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
487 |
* @stable ICU 2.2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
488 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
489 |
#define U_COMPARE_IGNORE_CASE 0x10000 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
490 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
491 |
#ifndef U_COMPARE_CODE_POINT_ORDER |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
492 |
/* see also unistr.h and ustring.h */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
493 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
494 |
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
495 |
* Compare strings in code point order instead of code unit order. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
496 |
* @stable ICU 2.2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
497 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
498 |
#define U_COMPARE_CODE_POINT_ORDER 0x8000 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
499 |
#endif |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
500 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
501 |
/** |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
502 |
* Compare two strings for canonical equivalence. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
503 |
* Further options include case-insensitive comparison and |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
504 |
* code point order (as opposed to code unit order). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
505 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
506 |
* Canonical equivalence between two strings is defined as their normalized |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
507 |
* forms (NFD or NFC) being identical. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
508 |
* This function compares strings incrementally instead of normalizing |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
509 |
* (and optionally case-folding) both strings entirely, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
510 |
* improving performance significantly. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
511 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
512 |
* Bulk normalization is only necessary if the strings do not fulfill the FCD |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
513 |
* conditions. Only in this case, and only if the strings are relatively long, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
514 |
* is memory allocated temporarily. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
515 |
* For FCD strings and short non-FCD strings there is no memory allocation. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
516 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
517 |
* Semantically, this is equivalent to |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
518 |
* strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
519 |
* where code point order and foldCase are all optional. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
520 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
521 |
* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
522 |
* the case folding must be performed first, then the normalization. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
523 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
524 |
* @param s1 First source string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
525 |
* @param length1 Length of first source string, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
526 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
527 |
* @param s2 Second source string. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
528 |
* @param length2 Length of second source string, or -1 if NUL-terminated. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
529 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
530 |
* @param options A bit set of options: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
531 |
* - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
532 |
* Case-sensitive comparison in code unit order, and the input strings |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
533 |
* are quick-checked for FCD. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
534 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
535 |
* - UNORM_INPUT_IS_FCD |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
536 |
* Set if the caller knows that both s1 and s2 fulfill the FCD conditions. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
537 |
* If not set, the function will quickCheck for FCD |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
538 |
* and normalize if necessary. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
539 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
540 |
* - U_COMPARE_CODE_POINT_ORDER |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
541 |
* Set to choose code point order instead of code unit order |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
542 |
* (see u_strCompare for details). |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
543 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
544 |
* - U_COMPARE_IGNORE_CASE |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
545 |
* Set to compare strings case-insensitively using case folding, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
546 |
* instead of case-sensitively. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
547 |
* If set, then the following case folding options are used. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
548 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
549 |
* - Options as used with case-insensitive comparisons, currently: |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
550 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
551 |
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
552 |
* (see u_strCaseCompare for details) |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
553 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
554 |
* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
555 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
556 |
* @param pErrorCode ICU error code in/out parameter. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
557 |
* Must fulfill U_SUCCESS before the function call. |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
558 |
* @return <0 or 0 or >0 as usual for string comparisons |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
559 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
560 |
* @see unorm_normalize |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
561 |
* @see UNORM_FCD |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
562 |
* @see u_strCompare |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
563 |
* @see u_strCaseCompare |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
564 |
* |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
565 |
* @stable ICU 2.2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
566 |
*/ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
567 |
U_STABLE int32_t U_EXPORT2 |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
568 |
unorm_compare(const UChar *s1, int32_t length1, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
569 |
const UChar *s2, int32_t length2, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
570 |
uint32_t options, |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
571 |
UErrorCode *pErrorCode); |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
572 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
573 |
#endif /* #if !UCONFIG_NO_NORMALIZATION */ |
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
574 |
|
dd21522fd290
Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff
changeset
|
575 |
#endif |