|
1 #ifndef Py_UNICODEOBJECT_H |
|
2 #define Py_UNICODEOBJECT_H |
|
3 |
|
4 #include <stdarg.h> |
|
5 |
|
6 /* |
|
7 |
|
8 Unicode implementation based on original code by Fredrik Lundh, |
|
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the |
|
10 Unicode Integration Proposal (see file Misc/unicode.txt). |
|
11 |
|
12 Copyright (c) Corporation for National Research Initiatives. |
|
13 |
|
14 |
|
15 Original header: |
|
16 -------------------------------------------------------------------- |
|
17 |
|
18 * Yet another Unicode string type for Python. This type supports the |
|
19 * 16-bit Basic Multilingual Plane (BMP) only. |
|
20 * |
|
21 * Written by Fredrik Lundh, January 1999. |
|
22 * |
|
23 * Copyright (c) 1999 by Secret Labs AB. |
|
24 * Copyright (c) 1999 by Fredrik Lundh. |
|
25 * |
|
26 * fredrik@pythonware.com |
|
27 * http://www.pythonware.com |
|
28 * |
|
29 * -------------------------------------------------------------------- |
|
30 * This Unicode String Type is |
|
31 * |
|
32 * Copyright (c) 1999 by Secret Labs AB |
|
33 * Copyright (c) 1999 by Fredrik Lundh |
|
34 * |
|
35 * By obtaining, using, and/or copying this software and/or its |
|
36 * associated documentation, you agree that you have read, understood, |
|
37 * and will comply with the following terms and conditions: |
|
38 * |
|
39 * Permission to use, copy, modify, and distribute this software and its |
|
40 * associated documentation for any purpose and without fee is hereby |
|
41 * granted, provided that the above copyright notice appears in all |
|
42 * copies, and that both that copyright notice and this permission notice |
|
43 * appear in supporting documentation, and that the name of Secret Labs |
|
44 * AB or the author not be used in advertising or publicity pertaining to |
|
45 * distribution of the software without specific, written prior |
|
46 * permission. |
|
47 * |
|
48 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO |
|
49 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
50 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR |
|
51 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
52 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
|
53 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
|
54 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
55 * -------------------------------------------------------------------- */ |
|
56 |
|
57 #include <ctype.h> |
|
58 |
|
59 /* === Internal API ======================================================= */ |
|
60 |
|
61 /* --- Internal Unicode Format -------------------------------------------- */ |
|
62 |
|
63 #ifndef Py_USING_UNICODE |
|
64 |
|
65 #define PyUnicode_Check(op) 0 |
|
66 #define PyUnicode_CheckExact(op) 0 |
|
67 |
|
68 #else |
|
69 |
|
70 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is |
|
71 properly set, but the default rules below doesn't set it. I'll |
|
72 sort this out some other day -- fredrik@pythonware.com */ |
|
73 |
|
74 #ifndef Py_UNICODE_SIZE |
|
75 #error Must define Py_UNICODE_SIZE |
|
76 #endif |
|
77 |
|
78 /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode |
|
79 strings are stored as UCS-2 (with limited support for UTF-16) */ |
|
80 |
|
81 #if Py_UNICODE_SIZE >= 4 |
|
82 #define Py_UNICODE_WIDE |
|
83 #endif |
|
84 |
|
85 /* Set these flags if the platform has "wchar.h", "wctype.h" and the |
|
86 wchar_t type is a 16-bit unsigned type */ |
|
87 /* #define HAVE_WCHAR_H */ |
|
88 /* #define HAVE_USABLE_WCHAR_T */ |
|
89 |
|
90 /* Defaults for various platforms */ |
|
91 #ifndef PY_UNICODE_TYPE |
|
92 |
|
93 /* Windows has a usable wchar_t type (unless we're using UCS-4) */ |
|
94 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2 |
|
95 # define HAVE_USABLE_WCHAR_T |
|
96 # define PY_UNICODE_TYPE wchar_t |
|
97 # endif |
|
98 |
|
99 # if defined(Py_UNICODE_WIDE) |
|
100 # define PY_UNICODE_TYPE Py_UCS4 |
|
101 # endif |
|
102 |
|
103 #endif |
|
104 |
|
105 /* If the compiler provides a wchar_t type we try to support it |
|
106 through the interface functions PyUnicode_FromWideChar() and |
|
107 PyUnicode_AsWideChar(). */ |
|
108 |
|
109 #ifdef HAVE_USABLE_WCHAR_T |
|
110 # ifndef HAVE_WCHAR_H |
|
111 # define HAVE_WCHAR_H |
|
112 # endif |
|
113 #endif |
|
114 |
|
115 #ifdef HAVE_WCHAR_H |
|
116 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ |
|
117 # ifdef _HAVE_BSDI |
|
118 # include <time.h> |
|
119 # endif |
|
120 # include <wchar.h> |
|
121 #endif |
|
122 |
|
123 /* |
|
124 * Use this typedef when you need to represent a UTF-16 surrogate pair |
|
125 * as single unsigned integer. |
|
126 */ |
|
127 #if SIZEOF_INT >= 4 |
|
128 typedef unsigned int Py_UCS4; |
|
129 #elif SIZEOF_LONG >= 4 |
|
130 typedef unsigned long Py_UCS4; |
|
131 #endif |
|
132 |
|
133 typedef PY_UNICODE_TYPE Py_UNICODE; |
|
134 |
|
135 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */ |
|
136 |
|
137 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds |
|
138 produce different external names and thus cause import errors in |
|
139 case Python interpreters and extensions with mixed compiled in |
|
140 Unicode width assumptions are combined. */ |
|
141 |
|
142 #ifndef Py_UNICODE_WIDE |
|
143 |
|
144 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString |
|
145 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString |
|
146 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject |
|
147 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString |
|
148 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String |
|
149 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString |
|
150 # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String |
|
151 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String |
|
152 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String |
|
153 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode |
|
154 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString |
|
155 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar |
|
156 # define PyUnicode_Compare PyUnicodeUCS2_Compare |
|
157 # define PyUnicode_Concat PyUnicodeUCS2_Concat |
|
158 # define PyUnicode_Contains PyUnicodeUCS2_Contains |
|
159 # define PyUnicode_Count PyUnicodeUCS2_Count |
|
160 # define PyUnicode_Decode PyUnicodeUCS2_Decode |
|
161 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII |
|
162 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap |
|
163 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 |
|
164 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape |
|
165 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 |
|
166 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful |
|
167 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 |
|
168 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful |
|
169 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 |
|
170 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful |
|
171 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape |
|
172 # define PyUnicode_Encode PyUnicodeUCS2_Encode |
|
173 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII |
|
174 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap |
|
175 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal |
|
176 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1 |
|
177 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape |
|
178 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32 |
|
179 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16 |
|
180 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8 |
|
181 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape |
|
182 # define PyUnicode_Find PyUnicodeUCS2_Find |
|
183 # define PyUnicode_Format PyUnicodeUCS2_Format |
|
184 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject |
|
185 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject |
|
186 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal |
|
187 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode |
|
188 # define PyUnicode_FromString PyUnicodeUCS2_FromString |
|
189 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize |
|
190 # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV |
|
191 # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat |
|
192 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar |
|
193 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding |
|
194 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax |
|
195 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize |
|
196 # define PyUnicode_Join PyUnicodeUCS2_Join |
|
197 # define PyUnicode_Partition PyUnicodeUCS2_Partition |
|
198 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition |
|
199 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit |
|
200 # define PyUnicode_Replace PyUnicodeUCS2_Replace |
|
201 # define PyUnicode_Resize PyUnicodeUCS2_Resize |
|
202 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare |
|
203 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding |
|
204 # define PyUnicode_Split PyUnicodeUCS2_Split |
|
205 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines |
|
206 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch |
|
207 # define PyUnicode_Translate PyUnicodeUCS2_Translate |
|
208 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap |
|
209 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString |
|
210 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini |
|
211 # define _PyUnicode_Init _PyUnicodeUCS2_Init |
|
212 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist |
|
213 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha |
|
214 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit |
|
215 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit |
|
216 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak |
|
217 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase |
|
218 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric |
|
219 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase |
|
220 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase |
|
221 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace |
|
222 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit |
|
223 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit |
|
224 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase |
|
225 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric |
|
226 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase |
|
227 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase |
|
228 |
|
229 #else |
|
230 |
|
231 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString |
|
232 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString |
|
233 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject |
|
234 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString |
|
235 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String |
|
236 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString |
|
237 # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String |
|
238 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String |
|
239 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String |
|
240 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode |
|
241 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString |
|
242 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar |
|
243 # define PyUnicode_Compare PyUnicodeUCS4_Compare |
|
244 # define PyUnicode_Concat PyUnicodeUCS4_Concat |
|
245 # define PyUnicode_Contains PyUnicodeUCS4_Contains |
|
246 # define PyUnicode_Count PyUnicodeUCS4_Count |
|
247 # define PyUnicode_Decode PyUnicodeUCS4_Decode |
|
248 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII |
|
249 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap |
|
250 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 |
|
251 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape |
|
252 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 |
|
253 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful |
|
254 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 |
|
255 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful |
|
256 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 |
|
257 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful |
|
258 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape |
|
259 # define PyUnicode_Encode PyUnicodeUCS4_Encode |
|
260 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII |
|
261 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap |
|
262 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal |
|
263 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1 |
|
264 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape |
|
265 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32 |
|
266 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16 |
|
267 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8 |
|
268 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape |
|
269 # define PyUnicode_Find PyUnicodeUCS4_Find |
|
270 # define PyUnicode_Format PyUnicodeUCS4_Format |
|
271 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject |
|
272 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject |
|
273 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal |
|
274 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode |
|
275 # define PyUnicode_FromString PyUnicodeUCS4_FromString |
|
276 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize |
|
277 # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV |
|
278 # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat |
|
279 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar |
|
280 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding |
|
281 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax |
|
282 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize |
|
283 # define PyUnicode_Join PyUnicodeUCS4_Join |
|
284 # define PyUnicode_Partition PyUnicodeUCS4_Partition |
|
285 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition |
|
286 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit |
|
287 # define PyUnicode_Replace PyUnicodeUCS4_Replace |
|
288 # define PyUnicode_Resize PyUnicodeUCS4_Resize |
|
289 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare |
|
290 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding |
|
291 # define PyUnicode_Split PyUnicodeUCS4_Split |
|
292 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines |
|
293 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch |
|
294 # define PyUnicode_Translate PyUnicodeUCS4_Translate |
|
295 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap |
|
296 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString |
|
297 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini |
|
298 # define _PyUnicode_Init _PyUnicodeUCS4_Init |
|
299 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist |
|
300 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha |
|
301 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit |
|
302 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit |
|
303 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak |
|
304 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase |
|
305 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric |
|
306 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase |
|
307 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase |
|
308 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace |
|
309 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit |
|
310 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit |
|
311 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase |
|
312 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric |
|
313 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase |
|
314 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase |
|
315 |
|
316 |
|
317 #endif |
|
318 |
|
319 /* --- Internal Unicode Operations ---------------------------------------- */ |
|
320 |
|
321 /* If you want Python to use the compiler's wctype.h functions instead |
|
322 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or |
|
323 configure Python using --with-wctype-functions. This reduces the |
|
324 interpreter's code size. */ |
|
325 |
|
326 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) |
|
327 |
|
328 #include <wctype.h> |
|
329 |
|
330 #define Py_UNICODE_ISSPACE(ch) iswspace(ch) |
|
331 |
|
332 #define Py_UNICODE_ISLOWER(ch) iswlower(ch) |
|
333 #define Py_UNICODE_ISUPPER(ch) iswupper(ch) |
|
334 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
|
335 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
|
336 |
|
337 #define Py_UNICODE_TOLOWER(ch) towlower(ch) |
|
338 #define Py_UNICODE_TOUPPER(ch) towupper(ch) |
|
339 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
|
340 |
|
341 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
|
342 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
|
343 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
|
344 |
|
345 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
|
346 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
|
347 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
|
348 |
|
349 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch) |
|
350 |
|
351 #else |
|
352 |
|
353 /* Since splitting on whitespace is an important use case, and whitespace |
|
354 in most situations is solely ASCII whitespace, we optimize for the common |
|
355 case by using a quick look-up table with an inlined check. |
|
356 */ |
|
357 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; |
|
358 |
|
359 #define Py_UNICODE_ISSPACE(ch) \ |
|
360 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) |
|
361 |
|
362 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) |
|
363 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) |
|
364 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
|
365 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
|
366 |
|
367 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) |
|
368 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) |
|
369 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
|
370 |
|
371 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
|
372 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
|
373 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
|
374 |
|
375 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
|
376 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
|
377 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
|
378 |
|
379 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) |
|
380 |
|
381 #endif |
|
382 |
|
383 #define Py_UNICODE_ISALNUM(ch) \ |
|
384 (Py_UNICODE_ISALPHA(ch) || \ |
|
385 Py_UNICODE_ISDECIMAL(ch) || \ |
|
386 Py_UNICODE_ISDIGIT(ch) || \ |
|
387 Py_UNICODE_ISNUMERIC(ch)) |
|
388 |
|
389 #define Py_UNICODE_COPY(target, source, length) \ |
|
390 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) |
|
391 |
|
392 #define Py_UNICODE_FILL(target, value, length) do\ |
|
393 {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ |
|
394 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ |
|
395 } while (0) |
|
396 |
|
397 /* check if substring matches at given offset. the offset must be |
|
398 valid, and the substring must not be empty */ |
|
399 #define Py_UNICODE_MATCH(string, offset, substring) \ |
|
400 ((*((string)->str + (offset)) == *((substring)->str)) && \ |
|
401 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ |
|
402 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) |
|
403 |
|
404 #ifdef __cplusplus |
|
405 extern "C" { |
|
406 #endif |
|
407 |
|
408 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); |
|
409 |
|
410 /* --- Unicode Type ------------------------------------------------------- */ |
|
411 |
|
412 typedef struct { |
|
413 PyObject_HEAD |
|
414 Py_ssize_t length; /* Length of raw Unicode data in buffer */ |
|
415 Py_UNICODE *str; /* Raw Unicode buffer */ |
|
416 long hash; /* Hash value; -1 if not set */ |
|
417 PyObject *defenc; /* (Default) Encoded version as Python |
|
418 string, or NULL; this is used for |
|
419 implementing the buffer protocol */ |
|
420 } PyUnicodeObject; |
|
421 |
|
422 PyAPI_DATA(PyTypeObject) PyUnicode_Type; |
|
423 |
|
424 #define PyUnicode_Check(op) \ |
|
425 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) |
|
426 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) |
|
427 |
|
428 /* Fast access macros */ |
|
429 #define PyUnicode_GET_SIZE(op) \ |
|
430 (((PyUnicodeObject *)(op))->length) |
|
431 #define PyUnicode_GET_DATA_SIZE(op) \ |
|
432 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)) |
|
433 #define PyUnicode_AS_UNICODE(op) \ |
|
434 (((PyUnicodeObject *)(op))->str) |
|
435 #define PyUnicode_AS_DATA(op) \ |
|
436 ((const char *)((PyUnicodeObject *)(op))->str) |
|
437 |
|
438 /* --- Constants ---------------------------------------------------------- */ |
|
439 |
|
440 /* This Unicode character will be used as replacement character during |
|
441 decoding if the errors argument is set to "replace". Note: the |
|
442 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in |
|
443 Unicode 3.0. */ |
|
444 |
|
445 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD) |
|
446 |
|
447 /* === Public API ========================================================= */ |
|
448 |
|
449 /* --- Plain Py_UNICODE --------------------------------------------------- */ |
|
450 |
|
451 /* Create a Unicode Object from the Py_UNICODE buffer u of the given |
|
452 size. |
|
453 |
|
454 u may be NULL which causes the contents to be undefined. It is the |
|
455 user's responsibility to fill in the needed data afterwards. Note |
|
456 that modifying the Unicode object contents after construction is |
|
457 only allowed if u was set to NULL. |
|
458 |
|
459 The buffer is copied into the new object. */ |
|
460 |
|
461 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( |
|
462 const Py_UNICODE *u, /* Unicode buffer */ |
|
463 Py_ssize_t size /* size of buffer */ |
|
464 ); |
|
465 |
|
466 /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ |
|
467 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( |
|
468 const char *u, /* char buffer */ |
|
469 Py_ssize_t size /* size of buffer */ |
|
470 ); |
|
471 |
|
472 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated |
|
473 Latin-1 encoded bytes */ |
|
474 PyAPI_FUNC(PyObject*) PyUnicode_FromString( |
|
475 const char *u /* string */ |
|
476 ); |
|
477 |
|
478 /* Return a read-only pointer to the Unicode object's internal |
|
479 Py_UNICODE buffer. */ |
|
480 |
|
481 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( |
|
482 PyObject *unicode /* Unicode object */ |
|
483 ); |
|
484 |
|
485 /* Get the length of the Unicode object. */ |
|
486 |
|
487 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( |
|
488 PyObject *unicode /* Unicode object */ |
|
489 ); |
|
490 |
|
491 /* Get the maximum ordinal for a Unicode character. */ |
|
492 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); |
|
493 |
|
494 /* Resize an already allocated Unicode object to the new size length. |
|
495 |
|
496 *unicode is modified to point to the new (resized) object and 0 |
|
497 returned on success. |
|
498 |
|
499 This API may only be called by the function which also called the |
|
500 Unicode constructor. The refcount on the object must be 1. Otherwise, |
|
501 an error is returned. |
|
502 |
|
503 Error handling is implemented as follows: an exception is set, -1 |
|
504 is returned and *unicode left untouched. |
|
505 |
|
506 */ |
|
507 |
|
508 PyAPI_FUNC(int) PyUnicode_Resize( |
|
509 PyObject **unicode, /* Pointer to the Unicode object */ |
|
510 Py_ssize_t length /* New length */ |
|
511 ); |
|
512 |
|
513 /* Coerce obj to an Unicode object and return a reference with |
|
514 *incremented* refcount. |
|
515 |
|
516 Coercion is done in the following way: |
|
517 |
|
518 1. String and other char buffer compatible objects are decoded |
|
519 under the assumptions that they contain data using the current |
|
520 default encoding. Decoding is done in "strict" mode. |
|
521 |
|
522 2. All other objects (including Unicode objects) raise an |
|
523 exception. |
|
524 |
|
525 The API returns NULL in case of an error. The caller is responsible |
|
526 for decref'ing the returned objects. |
|
527 |
|
528 */ |
|
529 |
|
530 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( |
|
531 register PyObject *obj, /* Object */ |
|
532 const char *encoding, /* encoding */ |
|
533 const char *errors /* error handling */ |
|
534 ); |
|
535 |
|
536 /* Coerce obj to an Unicode object and return a reference with |
|
537 *incremented* refcount. |
|
538 |
|
539 Unicode objects are passed back as-is (subclasses are converted to |
|
540 true Unicode objects), all other objects are delegated to |
|
541 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in |
|
542 using the default encoding as basis for decoding the object. |
|
543 |
|
544 The API returns NULL in case of an error. The caller is responsible |
|
545 for decref'ing the returned objects. |
|
546 |
|
547 */ |
|
548 |
|
549 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( |
|
550 register PyObject *obj /* Object */ |
|
551 ); |
|
552 |
|
553 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); |
|
554 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); |
|
555 |
|
556 /* Format the object based on the format_spec, as defined in PEP 3101 |
|
557 (Advanced String Formatting). */ |
|
558 PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, |
|
559 Py_UNICODE *format_spec, |
|
560 Py_ssize_t format_spec_len); |
|
561 |
|
562 /* --- wchar_t support for platforms which support it --------------------- */ |
|
563 |
|
564 #ifdef HAVE_WCHAR_H |
|
565 |
|
566 /* Create a Unicode Object from the whcar_t buffer w of the given |
|
567 size. |
|
568 |
|
569 The buffer is copied into the new object. */ |
|
570 |
|
571 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( |
|
572 register const wchar_t *w, /* wchar_t buffer */ |
|
573 Py_ssize_t size /* size of buffer */ |
|
574 ); |
|
575 |
|
576 /* Copies the Unicode Object contents into the wchar_t buffer w. At |
|
577 most size wchar_t characters are copied. |
|
578 |
|
579 Note that the resulting wchar_t string may or may not be |
|
580 0-terminated. It is the responsibility of the caller to make sure |
|
581 that the wchar_t string is 0-terminated in case this is required by |
|
582 the application. |
|
583 |
|
584 Returns the number of wchar_t characters copied (excluding a |
|
585 possibly trailing 0-termination character) or -1 in case of an |
|
586 error. */ |
|
587 |
|
588 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( |
|
589 PyUnicodeObject *unicode, /* Unicode object */ |
|
590 register wchar_t *w, /* wchar_t buffer */ |
|
591 Py_ssize_t size /* size of buffer */ |
|
592 ); |
|
593 |
|
594 #endif |
|
595 |
|
596 /* --- Unicode ordinals --------------------------------------------------- */ |
|
597 |
|
598 /* Create a Unicode Object from the given Unicode code point ordinal. |
|
599 |
|
600 The ordinal must be in range(0x10000) on narrow Python builds |
|
601 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is |
|
602 raised in case it is not. |
|
603 |
|
604 */ |
|
605 |
|
606 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); |
|
607 |
|
608 /* === Builtin Codecs ===================================================== |
|
609 |
|
610 Many of these APIs take two arguments encoding and errors. These |
|
611 parameters encoding and errors have the same semantics as the ones |
|
612 of the builtin unicode() API. |
|
613 |
|
614 Setting encoding to NULL causes the default encoding to be used. |
|
615 |
|
616 Error handling is set by errors which may also be set to NULL |
|
617 meaning to use the default handling defined for the codec. Default |
|
618 error handling for all builtin codecs is "strict" (ValueErrors are |
|
619 raised). |
|
620 |
|
621 The codecs all use a similar interface. Only deviation from the |
|
622 generic ones are documented. |
|
623 |
|
624 */ |
|
625 |
|
626 /* --- Manage the default encoding ---------------------------------------- */ |
|
627 |
|
628 /* Return a Python string holding the default encoded value of the |
|
629 Unicode object. |
|
630 |
|
631 The resulting string is cached in the Unicode object for subsequent |
|
632 usage by this function. The cached version is needed to implement |
|
633 the character buffer interface and will live (at least) as long as |
|
634 the Unicode object itself. |
|
635 |
|
636 The refcount of the string is *not* incremented. |
|
637 |
|
638 *** Exported for internal use by the interpreter only !!! *** |
|
639 |
|
640 */ |
|
641 |
|
642 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( |
|
643 PyObject *, const char *); |
|
644 |
|
645 /* Returns the currently active default encoding. |
|
646 |
|
647 The default encoding is currently implemented as run-time settable |
|
648 process global. This may change in future versions of the |
|
649 interpreter to become a parameter which is managed on a per-thread |
|
650 basis. |
|
651 |
|
652 */ |
|
653 |
|
654 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); |
|
655 |
|
656 /* Sets the currently active default encoding. |
|
657 |
|
658 Returns 0 on success, -1 in case of an error. |
|
659 |
|
660 */ |
|
661 |
|
662 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding( |
|
663 const char *encoding /* Encoding name in standard form */ |
|
664 ); |
|
665 |
|
666 /* --- Generic Codecs ----------------------------------------------------- */ |
|
667 |
|
668 /* Create a Unicode object by decoding the encoded string s of the |
|
669 given size. */ |
|
670 |
|
671 PyAPI_FUNC(PyObject*) PyUnicode_Decode( |
|
672 const char *s, /* encoded string */ |
|
673 Py_ssize_t size, /* size of buffer */ |
|
674 const char *encoding, /* encoding */ |
|
675 const char *errors /* error handling */ |
|
676 ); |
|
677 |
|
678 /* Encodes a Py_UNICODE buffer of the given size and returns a |
|
679 Python string object. */ |
|
680 |
|
681 PyAPI_FUNC(PyObject*) PyUnicode_Encode( |
|
682 const Py_UNICODE *s, /* Unicode char buffer */ |
|
683 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ |
|
684 const char *encoding, /* encoding */ |
|
685 const char *errors /* error handling */ |
|
686 ); |
|
687 |
|
688 /* Encodes a Unicode object and returns the result as Python |
|
689 object. */ |
|
690 |
|
691 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( |
|
692 PyObject *unicode, /* Unicode object */ |
|
693 const char *encoding, /* encoding */ |
|
694 const char *errors /* error handling */ |
|
695 ); |
|
696 |
|
697 /* Encodes a Unicode object and returns the result as Python string |
|
698 object. */ |
|
699 |
|
700 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( |
|
701 PyObject *unicode, /* Unicode object */ |
|
702 const char *encoding, /* encoding */ |
|
703 const char *errors /* error handling */ |
|
704 ); |
|
705 |
|
706 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( |
|
707 PyObject* string /* 256 character map */ |
|
708 ); |
|
709 |
|
710 |
|
711 /* --- UTF-7 Codecs ------------------------------------------------------- */ |
|
712 |
|
713 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( |
|
714 const char *string, /* UTF-7 encoded string */ |
|
715 Py_ssize_t length, /* size of string */ |
|
716 const char *errors /* error handling */ |
|
717 ); |
|
718 |
|
719 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( |
|
720 const char *string, /* UTF-7 encoded string */ |
|
721 Py_ssize_t length, /* size of string */ |
|
722 const char *errors, /* error handling */ |
|
723 Py_ssize_t *consumed /* bytes consumed */ |
|
724 ); |
|
725 |
|
726 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( |
|
727 const Py_UNICODE *data, /* Unicode char buffer */ |
|
728 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
|
729 int encodeSetO, /* force the encoder to encode characters in |
|
730 Set O, as described in RFC2152 */ |
|
731 int encodeWhiteSpace, /* force the encoder to encode space, tab, |
|
732 carriage return and linefeed characters */ |
|
733 const char *errors /* error handling */ |
|
734 ); |
|
735 |
|
736 /* --- UTF-8 Codecs ------------------------------------------------------- */ |
|
737 |
|
738 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( |
|
739 const char *string, /* UTF-8 encoded string */ |
|
740 Py_ssize_t length, /* size of string */ |
|
741 const char *errors /* error handling */ |
|
742 ); |
|
743 |
|
744 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( |
|
745 const char *string, /* UTF-8 encoded string */ |
|
746 Py_ssize_t length, /* size of string */ |
|
747 const char *errors, /* error handling */ |
|
748 Py_ssize_t *consumed /* bytes consumed */ |
|
749 ); |
|
750 |
|
751 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( |
|
752 PyObject *unicode /* Unicode object */ |
|
753 ); |
|
754 |
|
755 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( |
|
756 const Py_UNICODE *data, /* Unicode char buffer */ |
|
757 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
|
758 const char *errors /* error handling */ |
|
759 ); |
|
760 |
|
761 /* --- UTF-32 Codecs ------------------------------------------------------ */ |
|
762 |
|
763 /* Decodes length bytes from a UTF-32 encoded buffer string and returns |
|
764 the corresponding Unicode object. |
|
765 |
|
766 errors (if non-NULL) defines the error handling. It defaults |
|
767 to "strict". |
|
768 |
|
769 If byteorder is non-NULL, the decoder starts decoding using the |
|
770 given byte order: |
|
771 |
|
772 *byteorder == -1: little endian |
|
773 *byteorder == 0: native order |
|
774 *byteorder == 1: big endian |
|
775 |
|
776 In native mode, the first four bytes of the stream are checked for a |
|
777 BOM mark. If found, the BOM mark is analysed, the byte order |
|
778 adjusted and the BOM skipped. In the other modes, no BOM mark |
|
779 interpretation is done. After completion, *byteorder is set to the |
|
780 current byte order at the end of input data. |
|
781 |
|
782 If byteorder is NULL, the codec starts in native order mode. |
|
783 |
|
784 */ |
|
785 |
|
786 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( |
|
787 const char *string, /* UTF-32 encoded string */ |
|
788 Py_ssize_t length, /* size of string */ |
|
789 const char *errors, /* error handling */ |
|
790 int *byteorder /* pointer to byteorder to use |
|
791 0=native;-1=LE,1=BE; updated on |
|
792 exit */ |
|
793 ); |
|
794 |
|
795 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( |
|
796 const char *string, /* UTF-32 encoded string */ |
|
797 Py_ssize_t length, /* size of string */ |
|
798 const char *errors, /* error handling */ |
|
799 int *byteorder, /* pointer to byteorder to use |
|
800 0=native;-1=LE,1=BE; updated on |
|
801 exit */ |
|
802 Py_ssize_t *consumed /* bytes consumed */ |
|
803 ); |
|
804 |
|
805 /* Returns a Python string using the UTF-32 encoding in native byte |
|
806 order. The string always starts with a BOM mark. */ |
|
807 |
|
808 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( |
|
809 PyObject *unicode /* Unicode object */ |
|
810 ); |
|
811 |
|
812 /* Returns a Python string object holding the UTF-32 encoded value of |
|
813 the Unicode data. |
|
814 |
|
815 If byteorder is not 0, output is written according to the following |
|
816 byte order: |
|
817 |
|
818 byteorder == -1: little endian |
|
819 byteorder == 0: native byte order (writes a BOM mark) |
|
820 byteorder == 1: big endian |
|
821 |
|
822 If byteorder is 0, the output string will always start with the |
|
823 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is |
|
824 prepended. |
|
825 |
|
826 */ |
|
827 |
|
828 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( |
|
829 const Py_UNICODE *data, /* Unicode char buffer */ |
|
830 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
|
831 const char *errors, /* error handling */ |
|
832 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
|
833 ); |
|
834 |
|
835 /* --- UTF-16 Codecs ------------------------------------------------------ */ |
|
836 |
|
837 /* Decodes length bytes from a UTF-16 encoded buffer string and returns |
|
838 the corresponding Unicode object. |
|
839 |
|
840 errors (if non-NULL) defines the error handling. It defaults |
|
841 to "strict". |
|
842 |
|
843 If byteorder is non-NULL, the decoder starts decoding using the |
|
844 given byte order: |
|
845 |
|
846 *byteorder == -1: little endian |
|
847 *byteorder == 0: native order |
|
848 *byteorder == 1: big endian |
|
849 |
|
850 In native mode, the first two bytes of the stream are checked for a |
|
851 BOM mark. If found, the BOM mark is analysed, the byte order |
|
852 adjusted and the BOM skipped. In the other modes, no BOM mark |
|
853 interpretation is done. After completion, *byteorder is set to the |
|
854 current byte order at the end of input data. |
|
855 |
|
856 If byteorder is NULL, the codec starts in native order mode. |
|
857 |
|
858 */ |
|
859 |
|
860 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( |
|
861 const char *string, /* UTF-16 encoded string */ |
|
862 Py_ssize_t length, /* size of string */ |
|
863 const char *errors, /* error handling */ |
|
864 int *byteorder /* pointer to byteorder to use |
|
865 0=native;-1=LE,1=BE; updated on |
|
866 exit */ |
|
867 ); |
|
868 |
|
869 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( |
|
870 const char *string, /* UTF-16 encoded string */ |
|
871 Py_ssize_t length, /* size of string */ |
|
872 const char *errors, /* error handling */ |
|
873 int *byteorder, /* pointer to byteorder to use |
|
874 0=native;-1=LE,1=BE; updated on |
|
875 exit */ |
|
876 Py_ssize_t *consumed /* bytes consumed */ |
|
877 ); |
|
878 |
|
879 /* Returns a Python string using the UTF-16 encoding in native byte |
|
880 order. The string always starts with a BOM mark. */ |
|
881 |
|
882 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( |
|
883 PyObject *unicode /* Unicode object */ |
|
884 ); |
|
885 |
|
886 /* Returns a Python string object holding the UTF-16 encoded value of |
|
887 the Unicode data. |
|
888 |
|
889 If byteorder is not 0, output is written according to the following |
|
890 byte order: |
|
891 |
|
892 byteorder == -1: little endian |
|
893 byteorder == 0: native byte order (writes a BOM mark) |
|
894 byteorder == 1: big endian |
|
895 |
|
896 If byteorder is 0, the output string will always start with the |
|
897 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is |
|
898 prepended. |
|
899 |
|
900 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to |
|
901 UCS-2. This trick makes it possible to add full UTF-16 capabilities |
|
902 at a later point without compromising the APIs. |
|
903 |
|
904 */ |
|
905 |
|
906 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( |
|
907 const Py_UNICODE *data, /* Unicode char buffer */ |
|
908 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
|
909 const char *errors, /* error handling */ |
|
910 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
|
911 ); |
|
912 |
|
913 /* --- Unicode-Escape Codecs ---------------------------------------------- */ |
|
914 |
|
915 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( |
|
916 const char *string, /* Unicode-Escape encoded string */ |
|
917 Py_ssize_t length, /* size of string */ |
|
918 const char *errors /* error handling */ |
|
919 ); |
|
920 |
|
921 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( |
|
922 PyObject *unicode /* Unicode object */ |
|
923 ); |
|
924 |
|
925 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( |
|
926 const Py_UNICODE *data, /* Unicode char buffer */ |
|
927 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ |
|
928 ); |
|
929 |
|
930 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ |
|
931 |
|
932 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( |
|
933 const char *string, /* Raw-Unicode-Escape encoded string */ |
|
934 Py_ssize_t length, /* size of string */ |
|
935 const char *errors /* error handling */ |
|
936 ); |
|
937 |
|
938 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( |
|
939 PyObject *unicode /* Unicode object */ |
|
940 ); |
|
941 |
|
942 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( |
|
943 const Py_UNICODE *data, /* Unicode char buffer */ |
|
944 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ |
|
945 ); |
|
946 |
|
947 /* --- Unicode Internal Codec --------------------------------------------- |
|
948 |
|
949 Only for internal use in _codecsmodule.c */ |
|
950 |
|
951 PyObject *_PyUnicode_DecodeUnicodeInternal( |
|
952 const char *string, |
|
953 Py_ssize_t length, |
|
954 const char *errors |
|
955 ); |
|
956 |
|
957 /* --- Latin-1 Codecs ----------------------------------------------------- |
|
958 |
|
959 Note: Latin-1 corresponds to the first 256 Unicode ordinals. |
|
960 |
|
961 */ |
|
962 |
|
963 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( |
|
964 const char *string, /* Latin-1 encoded string */ |
|
965 Py_ssize_t length, /* size of string */ |
|
966 const char *errors /* error handling */ |
|
967 ); |
|
968 |
|
969 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( |
|
970 PyObject *unicode /* Unicode object */ |
|
971 ); |
|
972 |
|
973 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( |
|
974 const Py_UNICODE *data, /* Unicode char buffer */ |
|
975 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
976 const char *errors /* error handling */ |
|
977 ); |
|
978 |
|
979 /* --- ASCII Codecs ------------------------------------------------------- |
|
980 |
|
981 Only 7-bit ASCII data is excepted. All other codes generate errors. |
|
982 |
|
983 */ |
|
984 |
|
985 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( |
|
986 const char *string, /* ASCII encoded string */ |
|
987 Py_ssize_t length, /* size of string */ |
|
988 const char *errors /* error handling */ |
|
989 ); |
|
990 |
|
991 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( |
|
992 PyObject *unicode /* Unicode object */ |
|
993 ); |
|
994 |
|
995 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( |
|
996 const Py_UNICODE *data, /* Unicode char buffer */ |
|
997 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
998 const char *errors /* error handling */ |
|
999 ); |
|
1000 |
|
1001 /* --- Character Map Codecs ----------------------------------------------- |
|
1002 |
|
1003 This codec uses mappings to encode and decode characters. |
|
1004 |
|
1005 Decoding mappings must map single string characters to single |
|
1006 Unicode characters, integers (which are then interpreted as Unicode |
|
1007 ordinals) or None (meaning "undefined mapping" and causing an |
|
1008 error). |
|
1009 |
|
1010 Encoding mappings must map single Unicode characters to single |
|
1011 string characters, integers (which are then interpreted as Latin-1 |
|
1012 ordinals) or None (meaning "undefined mapping" and causing an |
|
1013 error). |
|
1014 |
|
1015 If a character lookup fails with a LookupError, the character is |
|
1016 copied as-is meaning that its ordinal value will be interpreted as |
|
1017 Unicode or Latin-1 ordinal resp. Because of this mappings only need |
|
1018 to contain those mappings which map characters to different code |
|
1019 points. |
|
1020 |
|
1021 */ |
|
1022 |
|
1023 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( |
|
1024 const char *string, /* Encoded string */ |
|
1025 Py_ssize_t length, /* size of string */ |
|
1026 PyObject *mapping, /* character mapping |
|
1027 (char ordinal -> unicode ordinal) */ |
|
1028 const char *errors /* error handling */ |
|
1029 ); |
|
1030 |
|
1031 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( |
|
1032 PyObject *unicode, /* Unicode object */ |
|
1033 PyObject *mapping /* character mapping |
|
1034 (unicode ordinal -> char ordinal) */ |
|
1035 ); |
|
1036 |
|
1037 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( |
|
1038 const Py_UNICODE *data, /* Unicode char buffer */ |
|
1039 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
1040 PyObject *mapping, /* character mapping |
|
1041 (unicode ordinal -> char ordinal) */ |
|
1042 const char *errors /* error handling */ |
|
1043 ); |
|
1044 |
|
1045 /* Translate a Py_UNICODE buffer of the given length by applying a |
|
1046 character mapping table to it and return the resulting Unicode |
|
1047 object. |
|
1048 |
|
1049 The mapping table must map Unicode ordinal integers to Unicode |
|
1050 ordinal integers or None (causing deletion of the character). |
|
1051 |
|
1052 Mapping tables may be dictionaries or sequences. Unmapped character |
|
1053 ordinals (ones which cause a LookupError) are left untouched and |
|
1054 are copied as-is. |
|
1055 |
|
1056 */ |
|
1057 |
|
1058 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( |
|
1059 const Py_UNICODE *data, /* Unicode char buffer */ |
|
1060 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
1061 PyObject *table, /* Translate table */ |
|
1062 const char *errors /* error handling */ |
|
1063 ); |
|
1064 |
|
1065 #ifdef MS_WIN32 |
|
1066 |
|
1067 /* --- MBCS codecs for Windows -------------------------------------------- */ |
|
1068 |
|
1069 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( |
|
1070 const char *string, /* MBCS encoded string */ |
|
1071 Py_ssize_t length, /* size of string */ |
|
1072 const char *errors /* error handling */ |
|
1073 ); |
|
1074 |
|
1075 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( |
|
1076 const char *string, /* MBCS encoded string */ |
|
1077 Py_ssize_t length, /* size of string */ |
|
1078 const char *errors, /* error handling */ |
|
1079 Py_ssize_t *consumed /* bytes consumed */ |
|
1080 ); |
|
1081 |
|
1082 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( |
|
1083 PyObject *unicode /* Unicode object */ |
|
1084 ); |
|
1085 |
|
1086 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( |
|
1087 const Py_UNICODE *data, /* Unicode char buffer */ |
|
1088 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
1089 const char *errors /* error handling */ |
|
1090 ); |
|
1091 |
|
1092 #endif /* MS_WIN32 */ |
|
1093 |
|
1094 /* --- Decimal Encoder ---------------------------------------------------- */ |
|
1095 |
|
1096 /* Takes a Unicode string holding a decimal value and writes it into |
|
1097 an output buffer using standard ASCII digit codes. |
|
1098 |
|
1099 The output buffer has to provide at least length+1 bytes of storage |
|
1100 area. The output string is 0-terminated. |
|
1101 |
|
1102 The encoder converts whitespace to ' ', decimal characters to their |
|
1103 corresponding ASCII digit and all other Latin-1 characters except |
|
1104 \0 as-is. Characters outside this range (Unicode ordinals 1-256) |
|
1105 are treated as errors. This includes embedded NULL bytes. |
|
1106 |
|
1107 Error handling is defined by the errors argument: |
|
1108 |
|
1109 NULL or "strict": raise a ValueError |
|
1110 "ignore": ignore the wrong characters (these are not copied to the |
|
1111 output buffer) |
|
1112 "replace": replaces illegal characters with '?' |
|
1113 |
|
1114 Returns 0 on success, -1 on failure. |
|
1115 |
|
1116 */ |
|
1117 |
|
1118 PyAPI_FUNC(int) PyUnicode_EncodeDecimal( |
|
1119 Py_UNICODE *s, /* Unicode buffer */ |
|
1120 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
|
1121 char *output, /* Output buffer; must have size >= length */ |
|
1122 const char *errors /* error handling */ |
|
1123 ); |
|
1124 |
|
1125 /* --- Methods & Slots ---------------------------------------------------- |
|
1126 |
|
1127 These are capable of handling Unicode objects and strings on input |
|
1128 (we refer to them as strings in the descriptions) and return |
|
1129 Unicode objects or integers as apporpriate. */ |
|
1130 |
|
1131 /* Concat two strings giving a new Unicode string. */ |
|
1132 |
|
1133 PyAPI_FUNC(PyObject*) PyUnicode_Concat( |
|
1134 PyObject *left, /* Left string */ |
|
1135 PyObject *right /* Right string */ |
|
1136 ); |
|
1137 |
|
1138 /* Split a string giving a list of Unicode strings. |
|
1139 |
|
1140 If sep is NULL, splitting will be done at all whitespace |
|
1141 substrings. Otherwise, splits occur at the given separator. |
|
1142 |
|
1143 At most maxsplit splits will be done. If negative, no limit is set. |
|
1144 |
|
1145 Separators are not included in the resulting list. |
|
1146 |
|
1147 */ |
|
1148 |
|
1149 PyAPI_FUNC(PyObject*) PyUnicode_Split( |
|
1150 PyObject *s, /* String to split */ |
|
1151 PyObject *sep, /* String separator */ |
|
1152 Py_ssize_t maxsplit /* Maxsplit count */ |
|
1153 ); |
|
1154 |
|
1155 /* Dito, but split at line breaks. |
|
1156 |
|
1157 CRLF is considered to be one line break. Line breaks are not |
|
1158 included in the resulting list. */ |
|
1159 |
|
1160 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( |
|
1161 PyObject *s, /* String to split */ |
|
1162 int keepends /* If true, line end markers are included */ |
|
1163 ); |
|
1164 |
|
1165 /* Partition a string using a given separator. */ |
|
1166 |
|
1167 PyAPI_FUNC(PyObject*) PyUnicode_Partition( |
|
1168 PyObject *s, /* String to partition */ |
|
1169 PyObject *sep /* String separator */ |
|
1170 ); |
|
1171 |
|
1172 /* Partition a string using a given separator, searching from the end of the |
|
1173 string. */ |
|
1174 |
|
1175 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( |
|
1176 PyObject *s, /* String to partition */ |
|
1177 PyObject *sep /* String separator */ |
|
1178 ); |
|
1179 |
|
1180 /* Split a string giving a list of Unicode strings. |
|
1181 |
|
1182 If sep is NULL, splitting will be done at all whitespace |
|
1183 substrings. Otherwise, splits occur at the given separator. |
|
1184 |
|
1185 At most maxsplit splits will be done. But unlike PyUnicode_Split |
|
1186 PyUnicode_RSplit splits from the end of the string. If negative, |
|
1187 no limit is set. |
|
1188 |
|
1189 Separators are not included in the resulting list. |
|
1190 |
|
1191 */ |
|
1192 |
|
1193 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( |
|
1194 PyObject *s, /* String to split */ |
|
1195 PyObject *sep, /* String separator */ |
|
1196 Py_ssize_t maxsplit /* Maxsplit count */ |
|
1197 ); |
|
1198 |
|
1199 /* Translate a string by applying a character mapping table to it and |
|
1200 return the resulting Unicode object. |
|
1201 |
|
1202 The mapping table must map Unicode ordinal integers to Unicode |
|
1203 ordinal integers or None (causing deletion of the character). |
|
1204 |
|
1205 Mapping tables may be dictionaries or sequences. Unmapped character |
|
1206 ordinals (ones which cause a LookupError) are left untouched and |
|
1207 are copied as-is. |
|
1208 |
|
1209 */ |
|
1210 |
|
1211 PyAPI_FUNC(PyObject *) PyUnicode_Translate( |
|
1212 PyObject *str, /* String */ |
|
1213 PyObject *table, /* Translate table */ |
|
1214 const char *errors /* error handling */ |
|
1215 ); |
|
1216 |
|
1217 /* Join a sequence of strings using the given separator and return |
|
1218 the resulting Unicode string. */ |
|
1219 |
|
1220 PyAPI_FUNC(PyObject*) PyUnicode_Join( |
|
1221 PyObject *separator, /* Separator string */ |
|
1222 PyObject *seq /* Sequence object */ |
|
1223 ); |
|
1224 |
|
1225 /* Return 1 if substr matches str[start:end] at the given tail end, 0 |
|
1226 otherwise. */ |
|
1227 |
|
1228 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( |
|
1229 PyObject *str, /* String */ |
|
1230 PyObject *substr, /* Prefix or Suffix string */ |
|
1231 Py_ssize_t start, /* Start index */ |
|
1232 Py_ssize_t end, /* Stop index */ |
|
1233 int direction /* Tail end: -1 prefix, +1 suffix */ |
|
1234 ); |
|
1235 |
|
1236 /* Return the first position of substr in str[start:end] using the |
|
1237 given search direction or -1 if not found. -2 is returned in case |
|
1238 an error occurred and an exception is set. */ |
|
1239 |
|
1240 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( |
|
1241 PyObject *str, /* String */ |
|
1242 PyObject *substr, /* Substring to find */ |
|
1243 Py_ssize_t start, /* Start index */ |
|
1244 Py_ssize_t end, /* Stop index */ |
|
1245 int direction /* Find direction: +1 forward, -1 backward */ |
|
1246 ); |
|
1247 |
|
1248 /* Count the number of occurrences of substr in str[start:end]. */ |
|
1249 |
|
1250 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( |
|
1251 PyObject *str, /* String */ |
|
1252 PyObject *substr, /* Substring to count */ |
|
1253 Py_ssize_t start, /* Start index */ |
|
1254 Py_ssize_t end /* Stop index */ |
|
1255 ); |
|
1256 |
|
1257 /* Replace at most maxcount occurrences of substr in str with replstr |
|
1258 and return the resulting Unicode object. */ |
|
1259 |
|
1260 PyAPI_FUNC(PyObject *) PyUnicode_Replace( |
|
1261 PyObject *str, /* String */ |
|
1262 PyObject *substr, /* Substring to find */ |
|
1263 PyObject *replstr, /* Substring to replace */ |
|
1264 Py_ssize_t maxcount /* Max. number of replacements to apply; |
|
1265 -1 = all */ |
|
1266 ); |
|
1267 |
|
1268 /* Compare two strings and return -1, 0, 1 for less than, equal, |
|
1269 greater than resp. */ |
|
1270 |
|
1271 PyAPI_FUNC(int) PyUnicode_Compare( |
|
1272 PyObject *left, /* Left string */ |
|
1273 PyObject *right /* Right string */ |
|
1274 ); |
|
1275 |
|
1276 /* Rich compare two strings and return one of the following: |
|
1277 |
|
1278 - NULL in case an exception was raised |
|
1279 - Py_True or Py_False for successfuly comparisons |
|
1280 - Py_NotImplemented in case the type combination is unknown |
|
1281 |
|
1282 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in |
|
1283 case the conversion of the arguments to Unicode fails with a |
|
1284 UnicodeDecodeError. |
|
1285 |
|
1286 Possible values for op: |
|
1287 |
|
1288 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE |
|
1289 |
|
1290 */ |
|
1291 |
|
1292 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( |
|
1293 PyObject *left, /* Left string */ |
|
1294 PyObject *right, /* Right string */ |
|
1295 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ |
|
1296 ); |
|
1297 |
|
1298 /* Apply a argument tuple or dictionary to a format string and return |
|
1299 the resulting Unicode string. */ |
|
1300 |
|
1301 PyAPI_FUNC(PyObject *) PyUnicode_Format( |
|
1302 PyObject *format, /* Format string */ |
|
1303 PyObject *args /* Argument tuple or dictionary */ |
|
1304 ); |
|
1305 |
|
1306 /* Checks whether element is contained in container and return 1/0 |
|
1307 accordingly. |
|
1308 |
|
1309 element has to coerce to an one element Unicode string. -1 is |
|
1310 returned in case of an error. */ |
|
1311 |
|
1312 PyAPI_FUNC(int) PyUnicode_Contains( |
|
1313 PyObject *container, /* Container string */ |
|
1314 PyObject *element /* Element string */ |
|
1315 ); |
|
1316 |
|
1317 /* Externally visible for str.strip(unicode) */ |
|
1318 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( |
|
1319 PyUnicodeObject *self, |
|
1320 int striptype, |
|
1321 PyObject *sepobj |
|
1322 ); |
|
1323 |
|
1324 /* === Characters Type APIs =============================================== */ |
|
1325 |
|
1326 /* These should not be used directly. Use the Py_UNICODE_IS* and |
|
1327 Py_UNICODE_TO* macros instead. |
|
1328 |
|
1329 These APIs are implemented in Objects/unicodectype.c. |
|
1330 |
|
1331 */ |
|
1332 |
|
1333 PyAPI_FUNC(int) _PyUnicode_IsLowercase( |
|
1334 Py_UNICODE ch /* Unicode character */ |
|
1335 ); |
|
1336 |
|
1337 PyAPI_FUNC(int) _PyUnicode_IsUppercase( |
|
1338 Py_UNICODE ch /* Unicode character */ |
|
1339 ); |
|
1340 |
|
1341 PyAPI_FUNC(int) _PyUnicode_IsTitlecase( |
|
1342 Py_UNICODE ch /* Unicode character */ |
|
1343 ); |
|
1344 |
|
1345 PyAPI_FUNC(int) _PyUnicode_IsWhitespace( |
|
1346 const Py_UNICODE ch /* Unicode character */ |
|
1347 ); |
|
1348 |
|
1349 PyAPI_FUNC(int) _PyUnicode_IsLinebreak( |
|
1350 const Py_UNICODE ch /* Unicode character */ |
|
1351 ); |
|
1352 |
|
1353 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( |
|
1354 Py_UNICODE ch /* Unicode character */ |
|
1355 ); |
|
1356 |
|
1357 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( |
|
1358 Py_UNICODE ch /* Unicode character */ |
|
1359 ); |
|
1360 |
|
1361 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( |
|
1362 Py_UNICODE ch /* Unicode character */ |
|
1363 ); |
|
1364 |
|
1365 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( |
|
1366 Py_UNICODE ch /* Unicode character */ |
|
1367 ); |
|
1368 |
|
1369 PyAPI_FUNC(int) _PyUnicode_ToDigit( |
|
1370 Py_UNICODE ch /* Unicode character */ |
|
1371 ); |
|
1372 |
|
1373 PyAPI_FUNC(double) _PyUnicode_ToNumeric( |
|
1374 Py_UNICODE ch /* Unicode character */ |
|
1375 ); |
|
1376 |
|
1377 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( |
|
1378 Py_UNICODE ch /* Unicode character */ |
|
1379 ); |
|
1380 |
|
1381 PyAPI_FUNC(int) _PyUnicode_IsDigit( |
|
1382 Py_UNICODE ch /* Unicode character */ |
|
1383 ); |
|
1384 |
|
1385 PyAPI_FUNC(int) _PyUnicode_IsNumeric( |
|
1386 Py_UNICODE ch /* Unicode character */ |
|
1387 ); |
|
1388 |
|
1389 PyAPI_FUNC(int) _PyUnicode_IsAlpha( |
|
1390 Py_UNICODE ch /* Unicode character */ |
|
1391 ); |
|
1392 |
|
1393 #ifdef __cplusplus |
|
1394 } |
|
1395 #endif |
|
1396 #endif /* Py_USING_UNICODE */ |
|
1397 #endif /* !Py_UNICODEOBJECT_H */ |