|
1 /************************************************* |
|
2 * Unicode Property Table handler * |
|
3 *************************************************/ |
|
4 |
|
5 #ifndef _UCP_H |
|
6 #define _UCP_H |
|
7 |
|
8 /* This file contains definitions of the property values that are returned by |
|
9 the function _pcre_ucp_findprop(). New values that are added for new releases |
|
10 of Unicode should always be at the end of each enum, for backwards |
|
11 compatibility. */ |
|
12 |
|
13 /* These are the general character categories. */ |
|
14 |
|
15 enum { |
|
16 ucp_C, /* Other */ |
|
17 ucp_L, /* Letter */ |
|
18 ucp_M, /* Mark */ |
|
19 ucp_N, /* Number */ |
|
20 ucp_P, /* Punctuation */ |
|
21 ucp_S, /* Symbol */ |
|
22 ucp_Z /* Separator */ |
|
23 }; |
|
24 |
|
25 /* These are the particular character types. */ |
|
26 |
|
27 enum { |
|
28 ucp_Cc, /* Control */ |
|
29 ucp_Cf, /* Format */ |
|
30 ucp_Cn, /* Unassigned */ |
|
31 ucp_Co, /* Private use */ |
|
32 ucp_Cs, /* Surrogate */ |
|
33 ucp_Ll, /* Lower case letter */ |
|
34 ucp_Lm, /* Modifier letter */ |
|
35 ucp_Lo, /* Other letter */ |
|
36 ucp_Lt, /* Title case letter */ |
|
37 ucp_Lu, /* Upper case letter */ |
|
38 ucp_Mc, /* Spacing mark */ |
|
39 ucp_Me, /* Enclosing mark */ |
|
40 ucp_Mn, /* Non-spacing mark */ |
|
41 ucp_Nd, /* Decimal number */ |
|
42 ucp_Nl, /* Letter number */ |
|
43 ucp_No, /* Other number */ |
|
44 ucp_Pc, /* Connector punctuation */ |
|
45 ucp_Pd, /* Dash punctuation */ |
|
46 ucp_Pe, /* Close punctuation */ |
|
47 ucp_Pf, /* Final punctuation */ |
|
48 ucp_Pi, /* Initial punctuation */ |
|
49 ucp_Po, /* Other punctuation */ |
|
50 ucp_Ps, /* Open punctuation */ |
|
51 ucp_Sc, /* Currency symbol */ |
|
52 ucp_Sk, /* Modifier symbol */ |
|
53 ucp_Sm, /* Mathematical symbol */ |
|
54 ucp_So, /* Other symbol */ |
|
55 ucp_Zl, /* Line separator */ |
|
56 ucp_Zp, /* Paragraph separator */ |
|
57 ucp_Zs /* Space separator */ |
|
58 }; |
|
59 |
|
60 /* These are the script identifications. */ |
|
61 |
|
62 enum { |
|
63 ucp_Arabic, |
|
64 ucp_Armenian, |
|
65 ucp_Bengali, |
|
66 ucp_Bopomofo, |
|
67 ucp_Braille, |
|
68 ucp_Buginese, |
|
69 ucp_Buhid, |
|
70 ucp_Canadian_Aboriginal, |
|
71 ucp_Cherokee, |
|
72 ucp_Common, |
|
73 ucp_Coptic, |
|
74 ucp_Cypriot, |
|
75 ucp_Cyrillic, |
|
76 ucp_Deseret, |
|
77 ucp_Devanagari, |
|
78 ucp_Ethiopic, |
|
79 ucp_Georgian, |
|
80 ucp_Glagolitic, |
|
81 ucp_Gothic, |
|
82 ucp_Greek, |
|
83 ucp_Gujarati, |
|
84 ucp_Gurmukhi, |
|
85 ucp_Han, |
|
86 ucp_Hangul, |
|
87 ucp_Hanunoo, |
|
88 ucp_Hebrew, |
|
89 ucp_Hiragana, |
|
90 ucp_Inherited, |
|
91 ucp_Kannada, |
|
92 ucp_Katakana, |
|
93 ucp_Kharoshthi, |
|
94 ucp_Khmer, |
|
95 ucp_Lao, |
|
96 ucp_Latin, |
|
97 ucp_Limbu, |
|
98 ucp_Linear_B, |
|
99 ucp_Malayalam, |
|
100 ucp_Mongolian, |
|
101 ucp_Myanmar, |
|
102 ucp_New_Tai_Lue, |
|
103 ucp_Ogham, |
|
104 ucp_Old_Italic, |
|
105 ucp_Old_Persian, |
|
106 ucp_Oriya, |
|
107 ucp_Osmanya, |
|
108 ucp_Runic, |
|
109 ucp_Shavian, |
|
110 ucp_Sinhala, |
|
111 ucp_Syloti_Nagri, |
|
112 ucp_Syriac, |
|
113 ucp_Tagalog, |
|
114 ucp_Tagbanwa, |
|
115 ucp_Tai_Le, |
|
116 ucp_Tamil, |
|
117 ucp_Telugu, |
|
118 ucp_Thaana, |
|
119 ucp_Thai, |
|
120 ucp_Tibetan, |
|
121 ucp_Tifinagh, |
|
122 ucp_Ugaritic, |
|
123 ucp_Yi, |
|
124 /* New for Unicode 5.0: */ |
|
125 ucp_Balinese, |
|
126 ucp_Cuneiform, |
|
127 ucp_Nko, |
|
128 ucp_Phags_Pa, |
|
129 ucp_Phoenician, |
|
130 /* New for Unicode 5.1: */ |
|
131 ucp_Carian, |
|
132 ucp_Cham, |
|
133 ucp_Kayah_Li, |
|
134 ucp_Lepcha, |
|
135 ucp_Lycian, |
|
136 ucp_Lydian, |
|
137 ucp_Ol_Chiki, |
|
138 ucp_Rejang, |
|
139 ucp_Saurashtra, |
|
140 ucp_Sundanese, |
|
141 ucp_Vai |
|
142 }; |
|
143 |
|
144 #endif |
|
145 |
|
146 /* End of ucp.h */ |