|
1 /* String (str/bytes) object implementation */ |
|
2 |
|
3 #define PY_SSIZE_T_CLEAN |
|
4 |
|
5 #include "Python.h" |
|
6 #include <ctype.h> |
|
7 |
|
8 #ifdef COUNT_ALLOCS |
|
9 int null_strings, one_strings; |
|
10 #endif |
|
11 |
|
12 static PyStringObject *characters[UCHAR_MAX + 1]; |
|
13 static PyStringObject *nullstring; |
|
14 |
|
15 /* This dictionary holds all interned strings. Note that references to |
|
16 strings in this dictionary are *not* counted in the string's ob_refcnt. |
|
17 When the interned string reaches a refcnt of 0 the string deallocation |
|
18 function will delete the reference from this dictionary. |
|
19 |
|
20 Another way to look at this is that to say that the actual reference |
|
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) |
|
22 */ |
|
23 static PyObject *interned; |
|
24 |
|
25 /* |
|
26 For both PyString_FromString() and PyString_FromStringAndSize(), the |
|
27 parameter `size' denotes number of characters to allocate, not counting any |
|
28 null terminating character. |
|
29 |
|
30 For PyString_FromString(), the parameter `str' points to a null-terminated |
|
31 string containing exactly `size' bytes. |
|
32 |
|
33 For PyString_FromStringAndSize(), the parameter the parameter `str' is |
|
34 either NULL or else points to a string containing at least `size' bytes. |
|
35 For PyString_FromStringAndSize(), the string in the `str' parameter does |
|
36 not have to be null-terminated. (Therefore it is safe to construct a |
|
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.) |
|
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1' |
|
39 bytes (setting the last byte to the null terminating character) and you can |
|
40 fill in the data yourself. If `str' is non-NULL then the resulting |
|
41 PyString object must be treated as immutable and you must not fill in nor |
|
42 alter the data yourself, since the strings may be shared. |
|
43 |
|
44 The PyObject member `op->ob_size', which denotes the number of "extra |
|
45 items" in a variable-size object, will contain the number of bytes |
|
46 allocated for string data, not counting the null terminating character. It |
|
47 is therefore equal to the equal to the `size' parameter (for |
|
48 PyString_FromStringAndSize()) or the length of the string in the `str' |
|
49 parameter (for PyString_FromString()). |
|
50 */ |
|
51 PyObject * |
|
52 PyString_FromStringAndSize(const char *str, Py_ssize_t size) |
|
53 { |
|
54 register PyStringObject *op; |
|
55 if (size < 0) { |
|
56 PyErr_SetString(PyExc_SystemError, |
|
57 "Negative size passed to PyString_FromStringAndSize"); |
|
58 return NULL; |
|
59 } |
|
60 if (size == 0 && (op = nullstring) != NULL) { |
|
61 #ifdef COUNT_ALLOCS |
|
62 null_strings++; |
|
63 #endif |
|
64 Py_INCREF(op); |
|
65 return (PyObject *)op; |
|
66 } |
|
67 if (size == 1 && str != NULL && |
|
68 (op = characters[*str & UCHAR_MAX]) != NULL) |
|
69 { |
|
70 #ifdef COUNT_ALLOCS |
|
71 one_strings++; |
|
72 #endif |
|
73 Py_INCREF(op); |
|
74 return (PyObject *)op; |
|
75 } |
|
76 |
|
77 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { |
|
78 PyErr_SetString(PyExc_OverflowError, "string is too large"); |
|
79 return NULL; |
|
80 } |
|
81 |
|
82 /* Inline PyObject_NewVar */ |
|
83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); |
|
84 if (op == NULL) |
|
85 return PyErr_NoMemory(); |
|
86 PyObject_INIT_VAR(op, &PyString_Type, size); |
|
87 op->ob_shash = -1; |
|
88 op->ob_sstate = SSTATE_NOT_INTERNED; |
|
89 if (str != NULL) |
|
90 Py_MEMCPY(op->ob_sval, str, size); |
|
91 op->ob_sval[size] = '\0'; |
|
92 /* share short strings */ |
|
93 if (size == 0) { |
|
94 PyObject *t = (PyObject *)op; |
|
95 PyString_InternInPlace(&t); |
|
96 op = (PyStringObject *)t; |
|
97 nullstring = op; |
|
98 Py_INCREF(op); |
|
99 } else if (size == 1 && str != NULL) { |
|
100 PyObject *t = (PyObject *)op; |
|
101 PyString_InternInPlace(&t); |
|
102 op = (PyStringObject *)t; |
|
103 characters[*str & UCHAR_MAX] = op; |
|
104 Py_INCREF(op); |
|
105 } |
|
106 return (PyObject *) op; |
|
107 } |
|
108 |
|
109 PyObject * |
|
110 PyString_FromString(const char *str) |
|
111 { |
|
112 register size_t size; |
|
113 register PyStringObject *op; |
|
114 |
|
115 assert(str != NULL); |
|
116 size = strlen(str); |
|
117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { |
|
118 PyErr_SetString(PyExc_OverflowError, |
|
119 "string is too long for a Python string"); |
|
120 return NULL; |
|
121 } |
|
122 if (size == 0 && (op = nullstring) != NULL) { |
|
123 #ifdef COUNT_ALLOCS |
|
124 null_strings++; |
|
125 #endif |
|
126 Py_INCREF(op); |
|
127 return (PyObject *)op; |
|
128 } |
|
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { |
|
130 #ifdef COUNT_ALLOCS |
|
131 one_strings++; |
|
132 #endif |
|
133 Py_INCREF(op); |
|
134 return (PyObject *)op; |
|
135 } |
|
136 |
|
137 /* Inline PyObject_NewVar */ |
|
138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); |
|
139 if (op == NULL) |
|
140 return PyErr_NoMemory(); |
|
141 PyObject_INIT_VAR(op, &PyString_Type, size); |
|
142 op->ob_shash = -1; |
|
143 op->ob_sstate = SSTATE_NOT_INTERNED; |
|
144 Py_MEMCPY(op->ob_sval, str, size+1); |
|
145 /* share short strings */ |
|
146 if (size == 0) { |
|
147 PyObject *t = (PyObject *)op; |
|
148 PyString_InternInPlace(&t); |
|
149 op = (PyStringObject *)t; |
|
150 nullstring = op; |
|
151 Py_INCREF(op); |
|
152 } else if (size == 1) { |
|
153 PyObject *t = (PyObject *)op; |
|
154 PyString_InternInPlace(&t); |
|
155 op = (PyStringObject *)t; |
|
156 characters[*str & UCHAR_MAX] = op; |
|
157 Py_INCREF(op); |
|
158 } |
|
159 return (PyObject *) op; |
|
160 } |
|
161 |
|
162 PyObject * |
|
163 PyString_FromFormatV(const char *format, va_list vargs) |
|
164 { |
|
165 va_list count; |
|
166 Py_ssize_t n = 0; |
|
167 const char* f; |
|
168 char *s; |
|
169 PyObject* string; |
|
170 |
|
171 #ifdef VA_LIST_IS_ARRAY |
|
172 Py_MEMCPY(count, vargs, sizeof(va_list)); |
|
173 #else |
|
174 #ifdef __va_copy |
|
175 __va_copy(count, vargs); |
|
176 #else |
|
177 count = vargs; |
|
178 #endif |
|
179 #endif |
|
180 /* step 1: figure out how large a buffer we need */ |
|
181 for (f = format; *f; f++) { |
|
182 if (*f == '%') { |
|
183 const char* p = f; |
|
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) |
|
185 ; |
|
186 |
|
187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since |
|
188 * they don't affect the amount of space we reserve. |
|
189 */ |
|
190 if ((*f == 'l' || *f == 'z') && |
|
191 (f[1] == 'd' || f[1] == 'u')) |
|
192 ++f; |
|
193 |
|
194 switch (*f) { |
|
195 case 'c': |
|
196 (void)va_arg(count, int); |
|
197 /* fall through... */ |
|
198 case '%': |
|
199 n++; |
|
200 break; |
|
201 case 'd': case 'u': case 'i': case 'x': |
|
202 (void) va_arg(count, int); |
|
203 /* 20 bytes is enough to hold a 64-bit |
|
204 integer. Decimal takes the most space. |
|
205 This isn't enough for octal. */ |
|
206 n += 20; |
|
207 break; |
|
208 case 's': |
|
209 s = va_arg(count, char*); |
|
210 n += strlen(s); |
|
211 break; |
|
212 case 'p': |
|
213 (void) va_arg(count, int); |
|
214 /* maximum 64-bit pointer representation: |
|
215 * 0xffffffffffffffff |
|
216 * so 19 characters is enough. |
|
217 * XXX I count 18 -- what's the extra for? |
|
218 */ |
|
219 n += 19; |
|
220 break; |
|
221 default: |
|
222 /* if we stumble upon an unknown |
|
223 formatting code, copy the rest of |
|
224 the format string to the output |
|
225 string. (we cannot just skip the |
|
226 code, since there's no way to know |
|
227 what's in the argument list) */ |
|
228 n += strlen(p); |
|
229 goto expand; |
|
230 } |
|
231 } else |
|
232 n++; |
|
233 } |
|
234 expand: |
|
235 /* step 2: fill the buffer */ |
|
236 /* Since we've analyzed how much space we need for the worst case, |
|
237 use sprintf directly instead of the slower PyOS_snprintf. */ |
|
238 string = PyString_FromStringAndSize(NULL, n); |
|
239 if (!string) |
|
240 return NULL; |
|
241 |
|
242 s = PyString_AsString(string); |
|
243 |
|
244 for (f = format; *f; f++) { |
|
245 if (*f == '%') { |
|
246 const char* p = f++; |
|
247 Py_ssize_t i; |
|
248 int longflag = 0; |
|
249 int size_tflag = 0; |
|
250 /* parse the width.precision part (we're only |
|
251 interested in the precision value, if any) */ |
|
252 n = 0; |
|
253 while (isdigit(Py_CHARMASK(*f))) |
|
254 n = (n*10) + *f++ - '0'; |
|
255 if (*f == '.') { |
|
256 f++; |
|
257 n = 0; |
|
258 while (isdigit(Py_CHARMASK(*f))) |
|
259 n = (n*10) + *f++ - '0'; |
|
260 } |
|
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) |
|
262 f++; |
|
263 /* handle the long flag, but only for %ld and %lu. |
|
264 others can be added when necessary. */ |
|
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { |
|
266 longflag = 1; |
|
267 ++f; |
|
268 } |
|
269 /* handle the size_t flag. */ |
|
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { |
|
271 size_tflag = 1; |
|
272 ++f; |
|
273 } |
|
274 |
|
275 switch (*f) { |
|
276 case 'c': |
|
277 *s++ = va_arg(vargs, int); |
|
278 break; |
|
279 case 'd': |
|
280 if (longflag) |
|
281 sprintf(s, "%ld", va_arg(vargs, long)); |
|
282 else if (size_tflag) |
|
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d", |
|
284 va_arg(vargs, Py_ssize_t)); |
|
285 else |
|
286 sprintf(s, "%d", va_arg(vargs, int)); |
|
287 s += strlen(s); |
|
288 break; |
|
289 case 'u': |
|
290 if (longflag) |
|
291 sprintf(s, "%lu", |
|
292 va_arg(vargs, unsigned long)); |
|
293 else if (size_tflag) |
|
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u", |
|
295 va_arg(vargs, size_t)); |
|
296 else |
|
297 sprintf(s, "%u", |
|
298 va_arg(vargs, unsigned int)); |
|
299 s += strlen(s); |
|
300 break; |
|
301 case 'i': |
|
302 sprintf(s, "%i", va_arg(vargs, int)); |
|
303 s += strlen(s); |
|
304 break; |
|
305 case 'x': |
|
306 sprintf(s, "%x", va_arg(vargs, int)); |
|
307 s += strlen(s); |
|
308 break; |
|
309 case 's': |
|
310 p = va_arg(vargs, char*); |
|
311 i = strlen(p); |
|
312 if (n > 0 && i > n) |
|
313 i = n; |
|
314 Py_MEMCPY(s, p, i); |
|
315 s += i; |
|
316 break; |
|
317 case 'p': |
|
318 sprintf(s, "%p", va_arg(vargs, void*)); |
|
319 /* %p is ill-defined: ensure leading 0x. */ |
|
320 if (s[1] == 'X') |
|
321 s[1] = 'x'; |
|
322 else if (s[1] != 'x') { |
|
323 memmove(s+2, s, strlen(s)+1); |
|
324 s[0] = '0'; |
|
325 s[1] = 'x'; |
|
326 } |
|
327 s += strlen(s); |
|
328 break; |
|
329 case '%': |
|
330 *s++ = '%'; |
|
331 break; |
|
332 default: |
|
333 strcpy(s, p); |
|
334 s += strlen(s); |
|
335 goto end; |
|
336 } |
|
337 } else |
|
338 *s++ = *f; |
|
339 } |
|
340 |
|
341 end: |
|
342 _PyString_Resize(&string, s - PyString_AS_STRING(string)); |
|
343 return string; |
|
344 } |
|
345 |
|
346 PyObject * |
|
347 PyString_FromFormat(const char *format, ...) |
|
348 { |
|
349 PyObject* ret; |
|
350 va_list vargs; |
|
351 |
|
352 #ifdef HAVE_STDARG_PROTOTYPES |
|
353 va_start(vargs, format); |
|
354 #else |
|
355 va_start(vargs); |
|
356 #endif |
|
357 ret = PyString_FromFormatV(format, vargs); |
|
358 va_end(vargs); |
|
359 return ret; |
|
360 } |
|
361 |
|
362 |
|
363 PyObject *PyString_Decode(const char *s, |
|
364 Py_ssize_t size, |
|
365 const char *encoding, |
|
366 const char *errors) |
|
367 { |
|
368 PyObject *v, *str; |
|
369 |
|
370 str = PyString_FromStringAndSize(s, size); |
|
371 if (str == NULL) |
|
372 return NULL; |
|
373 v = PyString_AsDecodedString(str, encoding, errors); |
|
374 Py_DECREF(str); |
|
375 return v; |
|
376 } |
|
377 |
|
378 PyObject *PyString_AsDecodedObject(PyObject *str, |
|
379 const char *encoding, |
|
380 const char *errors) |
|
381 { |
|
382 PyObject *v; |
|
383 |
|
384 if (!PyString_Check(str)) { |
|
385 PyErr_BadArgument(); |
|
386 goto onError; |
|
387 } |
|
388 |
|
389 if (encoding == NULL) { |
|
390 #ifdef Py_USING_UNICODE |
|
391 encoding = PyUnicode_GetDefaultEncoding(); |
|
392 #else |
|
393 PyErr_SetString(PyExc_ValueError, "no encoding specified"); |
|
394 goto onError; |
|
395 #endif |
|
396 } |
|
397 |
|
398 /* Decode via the codec registry */ |
|
399 v = PyCodec_Decode(str, encoding, errors); |
|
400 if (v == NULL) |
|
401 goto onError; |
|
402 |
|
403 return v; |
|
404 |
|
405 onError: |
|
406 return NULL; |
|
407 } |
|
408 |
|
409 PyObject *PyString_AsDecodedString(PyObject *str, |
|
410 const char *encoding, |
|
411 const char *errors) |
|
412 { |
|
413 PyObject *v; |
|
414 |
|
415 v = PyString_AsDecodedObject(str, encoding, errors); |
|
416 if (v == NULL) |
|
417 goto onError; |
|
418 |
|
419 #ifdef Py_USING_UNICODE |
|
420 /* Convert Unicode to a string using the default encoding */ |
|
421 if (PyUnicode_Check(v)) { |
|
422 PyObject *temp = v; |
|
423 v = PyUnicode_AsEncodedString(v, NULL, NULL); |
|
424 Py_DECREF(temp); |
|
425 if (v == NULL) |
|
426 goto onError; |
|
427 } |
|
428 #endif |
|
429 if (!PyString_Check(v)) { |
|
430 PyErr_Format(PyExc_TypeError, |
|
431 "decoder did not return a string object (type=%.400s)", |
|
432 Py_TYPE(v)->tp_name); |
|
433 Py_DECREF(v); |
|
434 goto onError; |
|
435 } |
|
436 |
|
437 return v; |
|
438 |
|
439 onError: |
|
440 return NULL; |
|
441 } |
|
442 |
|
443 PyObject *PyString_Encode(const char *s, |
|
444 Py_ssize_t size, |
|
445 const char *encoding, |
|
446 const char *errors) |
|
447 { |
|
448 PyObject *v, *str; |
|
449 |
|
450 str = PyString_FromStringAndSize(s, size); |
|
451 if (str == NULL) |
|
452 return NULL; |
|
453 v = PyString_AsEncodedString(str, encoding, errors); |
|
454 Py_DECREF(str); |
|
455 return v; |
|
456 } |
|
457 |
|
458 PyObject *PyString_AsEncodedObject(PyObject *str, |
|
459 const char *encoding, |
|
460 const char *errors) |
|
461 { |
|
462 PyObject *v; |
|
463 |
|
464 if (!PyString_Check(str)) { |
|
465 PyErr_BadArgument(); |
|
466 goto onError; |
|
467 } |
|
468 |
|
469 if (encoding == NULL) { |
|
470 #ifdef Py_USING_UNICODE |
|
471 encoding = PyUnicode_GetDefaultEncoding(); |
|
472 #else |
|
473 PyErr_SetString(PyExc_ValueError, "no encoding specified"); |
|
474 goto onError; |
|
475 #endif |
|
476 } |
|
477 |
|
478 /* Encode via the codec registry */ |
|
479 v = PyCodec_Encode(str, encoding, errors); |
|
480 if (v == NULL) |
|
481 goto onError; |
|
482 |
|
483 return v; |
|
484 |
|
485 onError: |
|
486 return NULL; |
|
487 } |
|
488 |
|
489 PyObject *PyString_AsEncodedString(PyObject *str, |
|
490 const char *encoding, |
|
491 const char *errors) |
|
492 { |
|
493 PyObject *v; |
|
494 |
|
495 v = PyString_AsEncodedObject(str, encoding, errors); |
|
496 if (v == NULL) |
|
497 goto onError; |
|
498 |
|
499 #ifdef Py_USING_UNICODE |
|
500 /* Convert Unicode to a string using the default encoding */ |
|
501 if (PyUnicode_Check(v)) { |
|
502 PyObject *temp = v; |
|
503 v = PyUnicode_AsEncodedString(v, NULL, NULL); |
|
504 Py_DECREF(temp); |
|
505 if (v == NULL) |
|
506 goto onError; |
|
507 } |
|
508 #endif |
|
509 if (!PyString_Check(v)) { |
|
510 PyErr_Format(PyExc_TypeError, |
|
511 "encoder did not return a string object (type=%.400s)", |
|
512 Py_TYPE(v)->tp_name); |
|
513 Py_DECREF(v); |
|
514 goto onError; |
|
515 } |
|
516 |
|
517 return v; |
|
518 |
|
519 onError: |
|
520 return NULL; |
|
521 } |
|
522 |
|
523 static void |
|
524 string_dealloc(PyObject *op) |
|
525 { |
|
526 switch (PyString_CHECK_INTERNED(op)) { |
|
527 case SSTATE_NOT_INTERNED: |
|
528 break; |
|
529 |
|
530 case SSTATE_INTERNED_MORTAL: |
|
531 /* revive dead object temporarily for DelItem */ |
|
532 Py_REFCNT(op) = 3; |
|
533 if (PyDict_DelItem(interned, op) != 0) |
|
534 Py_FatalError( |
|
535 "deletion of interned string failed"); |
|
536 break; |
|
537 |
|
538 case SSTATE_INTERNED_IMMORTAL: |
|
539 Py_FatalError("Immortal interned string died."); |
|
540 |
|
541 default: |
|
542 Py_FatalError("Inconsistent interned string state."); |
|
543 } |
|
544 Py_TYPE(op)->tp_free(op); |
|
545 } |
|
546 |
|
547 /* Unescape a backslash-escaped string. If unicode is non-zero, |
|
548 the string is a u-literal. If recode_encoding is non-zero, |
|
549 the string is UTF-8 encoded and should be re-encoded in the |
|
550 specified encoding. */ |
|
551 |
|
552 PyObject *PyString_DecodeEscape(const char *s, |
|
553 Py_ssize_t len, |
|
554 const char *errors, |
|
555 Py_ssize_t unicode, |
|
556 const char *recode_encoding) |
|
557 { |
|
558 int c; |
|
559 char *p, *buf; |
|
560 const char *end; |
|
561 PyObject *v; |
|
562 Py_ssize_t newlen = recode_encoding ? 4*len:len; |
|
563 v = PyString_FromStringAndSize((char *)NULL, newlen); |
|
564 if (v == NULL) |
|
565 return NULL; |
|
566 p = buf = PyString_AsString(v); |
|
567 end = s + len; |
|
568 while (s < end) { |
|
569 if (*s != '\\') { |
|
570 non_esc: |
|
571 #ifdef Py_USING_UNICODE |
|
572 if (recode_encoding && (*s & 0x80)) { |
|
573 PyObject *u, *w; |
|
574 char *r; |
|
575 const char* t; |
|
576 Py_ssize_t rn; |
|
577 t = s; |
|
578 /* Decode non-ASCII bytes as UTF-8. */ |
|
579 while (t < end && (*t & 0x80)) t++; |
|
580 u = PyUnicode_DecodeUTF8(s, t - s, errors); |
|
581 if(!u) goto failed; |
|
582 |
|
583 /* Recode them in target encoding. */ |
|
584 w = PyUnicode_AsEncodedString( |
|
585 u, recode_encoding, errors); |
|
586 Py_DECREF(u); |
|
587 if (!w) goto failed; |
|
588 |
|
589 /* Append bytes to output buffer. */ |
|
590 assert(PyString_Check(w)); |
|
591 r = PyString_AS_STRING(w); |
|
592 rn = PyString_GET_SIZE(w); |
|
593 Py_MEMCPY(p, r, rn); |
|
594 p += rn; |
|
595 Py_DECREF(w); |
|
596 s = t; |
|
597 } else { |
|
598 *p++ = *s++; |
|
599 } |
|
600 #else |
|
601 *p++ = *s++; |
|
602 #endif |
|
603 continue; |
|
604 } |
|
605 s++; |
|
606 if (s==end) { |
|
607 PyErr_SetString(PyExc_ValueError, |
|
608 "Trailing \\ in string"); |
|
609 goto failed; |
|
610 } |
|
611 switch (*s++) { |
|
612 /* XXX This assumes ASCII! */ |
|
613 case '\n': break; |
|
614 case '\\': *p++ = '\\'; break; |
|
615 case '\'': *p++ = '\''; break; |
|
616 case '\"': *p++ = '\"'; break; |
|
617 case 'b': *p++ = '\b'; break; |
|
618 case 'f': *p++ = '\014'; break; /* FF */ |
|
619 case 't': *p++ = '\t'; break; |
|
620 case 'n': *p++ = '\n'; break; |
|
621 case 'r': *p++ = '\r'; break; |
|
622 case 'v': *p++ = '\013'; break; /* VT */ |
|
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */ |
|
624 case '0': case '1': case '2': case '3': |
|
625 case '4': case '5': case '6': case '7': |
|
626 c = s[-1] - '0'; |
|
627 if (s < end && '0' <= *s && *s <= '7') { |
|
628 c = (c<<3) + *s++ - '0'; |
|
629 if (s < end && '0' <= *s && *s <= '7') |
|
630 c = (c<<3) + *s++ - '0'; |
|
631 } |
|
632 *p++ = c; |
|
633 break; |
|
634 case 'x': |
|
635 if (s+1 < end && |
|
636 isxdigit(Py_CHARMASK(s[0])) && |
|
637 isxdigit(Py_CHARMASK(s[1]))) |
|
638 { |
|
639 unsigned int x = 0; |
|
640 c = Py_CHARMASK(*s); |
|
641 s++; |
|
642 if (isdigit(c)) |
|
643 x = c - '0'; |
|
644 else if (islower(c)) |
|
645 x = 10 + c - 'a'; |
|
646 else |
|
647 x = 10 + c - 'A'; |
|
648 x = x << 4; |
|
649 c = Py_CHARMASK(*s); |
|
650 s++; |
|
651 if (isdigit(c)) |
|
652 x += c - '0'; |
|
653 else if (islower(c)) |
|
654 x += 10 + c - 'a'; |
|
655 else |
|
656 x += 10 + c - 'A'; |
|
657 *p++ = x; |
|
658 break; |
|
659 } |
|
660 if (!errors || strcmp(errors, "strict") == 0) { |
|
661 PyErr_SetString(PyExc_ValueError, |
|
662 "invalid \\x escape"); |
|
663 goto failed; |
|
664 } |
|
665 if (strcmp(errors, "replace") == 0) { |
|
666 *p++ = '?'; |
|
667 } else if (strcmp(errors, "ignore") == 0) |
|
668 /* do nothing */; |
|
669 else { |
|
670 PyErr_Format(PyExc_ValueError, |
|
671 "decoding error; " |
|
672 "unknown error handling code: %.400s", |
|
673 errors); |
|
674 goto failed; |
|
675 } |
|
676 #ifndef Py_USING_UNICODE |
|
677 case 'u': |
|
678 case 'U': |
|
679 case 'N': |
|
680 if (unicode) { |
|
681 PyErr_SetString(PyExc_ValueError, |
|
682 "Unicode escapes not legal " |
|
683 "when Unicode disabled"); |
|
684 goto failed; |
|
685 } |
|
686 #endif |
|
687 default: |
|
688 *p++ = '\\'; |
|
689 s--; |
|
690 goto non_esc; /* an arbitry number of unescaped |
|
691 UTF-8 bytes may follow. */ |
|
692 } |
|
693 } |
|
694 if (p-buf < newlen) |
|
695 _PyString_Resize(&v, p - buf); |
|
696 return v; |
|
697 failed: |
|
698 Py_DECREF(v); |
|
699 return NULL; |
|
700 } |
|
701 |
|
702 /* -------------------------------------------------------------------- */ |
|
703 /* object api */ |
|
704 |
|
705 static Py_ssize_t |
|
706 string_getsize(register PyObject *op) |
|
707 { |
|
708 char *s; |
|
709 Py_ssize_t len; |
|
710 if (PyString_AsStringAndSize(op, &s, &len)) |
|
711 return -1; |
|
712 return len; |
|
713 } |
|
714 |
|
715 static /*const*/ char * |
|
716 string_getbuffer(register PyObject *op) |
|
717 { |
|
718 char *s; |
|
719 Py_ssize_t len; |
|
720 if (PyString_AsStringAndSize(op, &s, &len)) |
|
721 return NULL; |
|
722 return s; |
|
723 } |
|
724 |
|
725 Py_ssize_t |
|
726 PyString_Size(register PyObject *op) |
|
727 { |
|
728 if (!PyString_Check(op)) |
|
729 return string_getsize(op); |
|
730 return Py_SIZE(op); |
|
731 } |
|
732 |
|
733 /*const*/ char * |
|
734 PyString_AsString(register PyObject *op) |
|
735 { |
|
736 if (!PyString_Check(op)) |
|
737 return string_getbuffer(op); |
|
738 return ((PyStringObject *)op) -> ob_sval; |
|
739 } |
|
740 |
|
741 int |
|
742 PyString_AsStringAndSize(register PyObject *obj, |
|
743 register char **s, |
|
744 register Py_ssize_t *len) |
|
745 { |
|
746 if (s == NULL) { |
|
747 PyErr_BadInternalCall(); |
|
748 return -1; |
|
749 } |
|
750 |
|
751 if (!PyString_Check(obj)) { |
|
752 #ifdef Py_USING_UNICODE |
|
753 if (PyUnicode_Check(obj)) { |
|
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); |
|
755 if (obj == NULL) |
|
756 return -1; |
|
757 } |
|
758 else |
|
759 #endif |
|
760 { |
|
761 PyErr_Format(PyExc_TypeError, |
|
762 "expected string or Unicode object, " |
|
763 "%.200s found", Py_TYPE(obj)->tp_name); |
|
764 return -1; |
|
765 } |
|
766 } |
|
767 |
|
768 *s = PyString_AS_STRING(obj); |
|
769 if (len != NULL) |
|
770 *len = PyString_GET_SIZE(obj); |
|
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) { |
|
772 PyErr_SetString(PyExc_TypeError, |
|
773 "expected string without null bytes"); |
|
774 return -1; |
|
775 } |
|
776 return 0; |
|
777 } |
|
778 |
|
779 /* -------------------------------------------------------------------- */ |
|
780 /* Methods */ |
|
781 |
|
782 #include "stringlib/stringdefs.h" |
|
783 #include "stringlib/fastsearch.h" |
|
784 |
|
785 #include "stringlib/count.h" |
|
786 #include "stringlib/find.h" |
|
787 #include "stringlib/partition.h" |
|
788 |
|
789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping |
|
790 #include "stringlib/localeutil.h" |
|
791 |
|
792 |
|
793 |
|
794 static int |
|
795 string_print(PyStringObject *op, FILE *fp, int flags) |
|
796 { |
|
797 Py_ssize_t i, str_len; |
|
798 char c; |
|
799 int quote; |
|
800 |
|
801 /* XXX Ought to check for interrupts when writing long strings */ |
|
802 if (! PyString_CheckExact(op)) { |
|
803 int ret; |
|
804 /* A str subclass may have its own __str__ method. */ |
|
805 op = (PyStringObject *) PyObject_Str((PyObject *)op); |
|
806 if (op == NULL) |
|
807 return -1; |
|
808 ret = string_print(op, fp, flags); |
|
809 Py_DECREF(op); |
|
810 return ret; |
|
811 } |
|
812 if (flags & Py_PRINT_RAW) { |
|
813 char *data = op->ob_sval; |
|
814 Py_ssize_t size = Py_SIZE(op); |
|
815 Py_BEGIN_ALLOW_THREADS |
|
816 while (size > INT_MAX) { |
|
817 /* Very long strings cannot be written atomically. |
|
818 * But don't write exactly INT_MAX bytes at a time |
|
819 * to avoid memory aligment issues. |
|
820 */ |
|
821 const int chunk_size = INT_MAX & ~0x3FFF; |
|
822 fwrite(data, 1, chunk_size, fp); |
|
823 data += chunk_size; |
|
824 size -= chunk_size; |
|
825 } |
|
826 #ifdef __VMS |
|
827 if (size) fwrite(data, (int)size, 1, fp); |
|
828 #else |
|
829 fwrite(data, 1, (int)size, fp); |
|
830 #endif |
|
831 Py_END_ALLOW_THREADS |
|
832 return 0; |
|
833 } |
|
834 |
|
835 /* figure out which quote to use; single is preferred */ |
|
836 quote = '\''; |
|
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) && |
|
838 !memchr(op->ob_sval, '"', Py_SIZE(op))) |
|
839 quote = '"'; |
|
840 |
|
841 str_len = Py_SIZE(op); |
|
842 Py_BEGIN_ALLOW_THREADS |
|
843 fputc(quote, fp); |
|
844 for (i = 0; i < str_len; i++) { |
|
845 /* Since strings are immutable and the caller should have a |
|
846 reference, accessing the interal buffer should not be an issue |
|
847 with the GIL released. */ |
|
848 c = op->ob_sval[i]; |
|
849 if (c == quote || c == '\\') |
|
850 fprintf(fp, "\\%c", c); |
|
851 else if (c == '\t') |
|
852 fprintf(fp, "\\t"); |
|
853 else if (c == '\n') |
|
854 fprintf(fp, "\\n"); |
|
855 else if (c == '\r') |
|
856 fprintf(fp, "\\r"); |
|
857 else if (c < ' ' || c >= 0x7f) |
|
858 fprintf(fp, "\\x%02x", c & 0xff); |
|
859 else |
|
860 fputc(c, fp); |
|
861 } |
|
862 fputc(quote, fp); |
|
863 Py_END_ALLOW_THREADS |
|
864 return 0; |
|
865 } |
|
866 |
|
867 PyObject * |
|
868 PyString_Repr(PyObject *obj, int smartquotes) |
|
869 { |
|
870 register PyStringObject* op = (PyStringObject*) obj; |
|
871 size_t newsize = 2 + 4 * Py_SIZE(op); |
|
872 PyObject *v; |
|
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) { |
|
874 PyErr_SetString(PyExc_OverflowError, |
|
875 "string is too large to make repr"); |
|
876 return NULL; |
|
877 } |
|
878 v = PyString_FromStringAndSize((char *)NULL, newsize); |
|
879 if (v == NULL) { |
|
880 return NULL; |
|
881 } |
|
882 else { |
|
883 register Py_ssize_t i; |
|
884 register char c; |
|
885 register char *p; |
|
886 int quote; |
|
887 |
|
888 /* figure out which quote to use; single is preferred */ |
|
889 quote = '\''; |
|
890 if (smartquotes && |
|
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) && |
|
892 !memchr(op->ob_sval, '"', Py_SIZE(op))) |
|
893 quote = '"'; |
|
894 |
|
895 p = PyString_AS_STRING(v); |
|
896 *p++ = quote; |
|
897 for (i = 0; i < Py_SIZE(op); i++) { |
|
898 /* There's at least enough room for a hex escape |
|
899 and a closing quote. */ |
|
900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5); |
|
901 c = op->ob_sval[i]; |
|
902 if (c == quote || c == '\\') |
|
903 *p++ = '\\', *p++ = c; |
|
904 else if (c == '\t') |
|
905 *p++ = '\\', *p++ = 't'; |
|
906 else if (c == '\n') |
|
907 *p++ = '\\', *p++ = 'n'; |
|
908 else if (c == '\r') |
|
909 *p++ = '\\', *p++ = 'r'; |
|
910 else if (c < ' ' || c >= 0x7f) { |
|
911 /* For performance, we don't want to call |
|
912 PyOS_snprintf here (extra layers of |
|
913 function call). */ |
|
914 sprintf(p, "\\x%02x", c & 0xff); |
|
915 p += 4; |
|
916 } |
|
917 else |
|
918 *p++ = c; |
|
919 } |
|
920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1); |
|
921 *p++ = quote; |
|
922 *p = '\0'; |
|
923 _PyString_Resize( |
|
924 &v, (p - PyString_AS_STRING(v))); |
|
925 return v; |
|
926 } |
|
927 } |
|
928 |
|
929 static PyObject * |
|
930 string_repr(PyObject *op) |
|
931 { |
|
932 return PyString_Repr(op, 1); |
|
933 } |
|
934 |
|
935 static PyObject * |
|
936 string_str(PyObject *s) |
|
937 { |
|
938 assert(PyString_Check(s)); |
|
939 if (PyString_CheckExact(s)) { |
|
940 Py_INCREF(s); |
|
941 return s; |
|
942 } |
|
943 else { |
|
944 /* Subtype -- return genuine string with the same value. */ |
|
945 PyStringObject *t = (PyStringObject *) s; |
|
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t)); |
|
947 } |
|
948 } |
|
949 |
|
950 static Py_ssize_t |
|
951 string_length(PyStringObject *a) |
|
952 { |
|
953 return Py_SIZE(a); |
|
954 } |
|
955 |
|
956 static PyObject * |
|
957 string_concat(register PyStringObject *a, register PyObject *bb) |
|
958 { |
|
959 register Py_ssize_t size; |
|
960 register PyStringObject *op; |
|
961 if (!PyString_Check(bb)) { |
|
962 #ifdef Py_USING_UNICODE |
|
963 if (PyUnicode_Check(bb)) |
|
964 return PyUnicode_Concat((PyObject *)a, bb); |
|
965 #endif |
|
966 if (PyByteArray_Check(bb)) |
|
967 return PyByteArray_Concat((PyObject *)a, bb); |
|
968 PyErr_Format(PyExc_TypeError, |
|
969 "cannot concatenate 'str' and '%.200s' objects", |
|
970 Py_TYPE(bb)->tp_name); |
|
971 return NULL; |
|
972 } |
|
973 #define b ((PyStringObject *)bb) |
|
974 /* Optimize cases with empty left or right operand */ |
|
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) && |
|
976 PyString_CheckExact(a) && PyString_CheckExact(b)) { |
|
977 if (Py_SIZE(a) == 0) { |
|
978 Py_INCREF(bb); |
|
979 return bb; |
|
980 } |
|
981 Py_INCREF(a); |
|
982 return (PyObject *)a; |
|
983 } |
|
984 size = Py_SIZE(a) + Py_SIZE(b); |
|
985 /* Check that string sizes are not negative, to prevent an |
|
986 overflow in cases where we are passed incorrectly-created |
|
987 strings with negative lengths (due to a bug in other code). |
|
988 */ |
|
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || |
|
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { |
|
991 PyErr_SetString(PyExc_OverflowError, |
|
992 "strings are too large to concat"); |
|
993 return NULL; |
|
994 } |
|
995 |
|
996 /* Inline PyObject_NewVar */ |
|
997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { |
|
998 PyErr_SetString(PyExc_OverflowError, |
|
999 "strings are too large to concat"); |
|
1000 return NULL; |
|
1001 } |
|
1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); |
|
1003 if (op == NULL) |
|
1004 return PyErr_NoMemory(); |
|
1005 PyObject_INIT_VAR(op, &PyString_Type, size); |
|
1006 op->ob_shash = -1; |
|
1007 op->ob_sstate = SSTATE_NOT_INTERNED; |
|
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); |
|
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b)); |
|
1010 op->ob_sval[size] = '\0'; |
|
1011 return (PyObject *) op; |
|
1012 #undef b |
|
1013 } |
|
1014 |
|
1015 static PyObject * |
|
1016 string_repeat(register PyStringObject *a, register Py_ssize_t n) |
|
1017 { |
|
1018 register Py_ssize_t i; |
|
1019 register Py_ssize_t j; |
|
1020 register Py_ssize_t size; |
|
1021 register PyStringObject *op; |
|
1022 size_t nbytes; |
|
1023 if (n < 0) |
|
1024 n = 0; |
|
1025 /* watch out for overflows: the size can overflow int, |
|
1026 * and the # of bytes needed can overflow size_t |
|
1027 */ |
|
1028 size = Py_SIZE(a) * n; |
|
1029 if (n && size / n != Py_SIZE(a)) { |
|
1030 PyErr_SetString(PyExc_OverflowError, |
|
1031 "repeated string is too long"); |
|
1032 return NULL; |
|
1033 } |
|
1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) { |
|
1035 Py_INCREF(a); |
|
1036 return (PyObject *)a; |
|
1037 } |
|
1038 nbytes = (size_t)size; |
|
1039 if (nbytes + sizeof(PyStringObject) <= nbytes) { |
|
1040 PyErr_SetString(PyExc_OverflowError, |
|
1041 "repeated string is too long"); |
|
1042 return NULL; |
|
1043 } |
|
1044 op = (PyStringObject *) |
|
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes); |
|
1046 if (op == NULL) |
|
1047 return PyErr_NoMemory(); |
|
1048 PyObject_INIT_VAR(op, &PyString_Type, size); |
|
1049 op->ob_shash = -1; |
|
1050 op->ob_sstate = SSTATE_NOT_INTERNED; |
|
1051 op->ob_sval[size] = '\0'; |
|
1052 if (Py_SIZE(a) == 1 && n > 0) { |
|
1053 memset(op->ob_sval, a->ob_sval[0] , n); |
|
1054 return (PyObject *) op; |
|
1055 } |
|
1056 i = 0; |
|
1057 if (i < size) { |
|
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); |
|
1059 i = Py_SIZE(a); |
|
1060 } |
|
1061 while (i < size) { |
|
1062 j = (i <= size-i) ? i : size-i; |
|
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); |
|
1064 i += j; |
|
1065 } |
|
1066 return (PyObject *) op; |
|
1067 } |
|
1068 |
|
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */ |
|
1070 |
|
1071 static PyObject * |
|
1072 string_slice(register PyStringObject *a, register Py_ssize_t i, |
|
1073 register Py_ssize_t j) |
|
1074 /* j -- may be negative! */ |
|
1075 { |
|
1076 if (i < 0) |
|
1077 i = 0; |
|
1078 if (j < 0) |
|
1079 j = 0; /* Avoid signed/unsigned bug in next line */ |
|
1080 if (j > Py_SIZE(a)) |
|
1081 j = Py_SIZE(a); |
|
1082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) { |
|
1083 /* It's the same as a */ |
|
1084 Py_INCREF(a); |
|
1085 return (PyObject *)a; |
|
1086 } |
|
1087 if (j < i) |
|
1088 j = i; |
|
1089 return PyString_FromStringAndSize(a->ob_sval + i, j-i); |
|
1090 } |
|
1091 |
|
1092 static int |
|
1093 string_contains(PyObject *str_obj, PyObject *sub_obj) |
|
1094 { |
|
1095 if (!PyString_CheckExact(sub_obj)) { |
|
1096 #ifdef Py_USING_UNICODE |
|
1097 if (PyUnicode_Check(sub_obj)) |
|
1098 return PyUnicode_Contains(str_obj, sub_obj); |
|
1099 #endif |
|
1100 if (!PyString_Check(sub_obj)) { |
|
1101 PyErr_Format(PyExc_TypeError, |
|
1102 "'in <string>' requires string as left operand, " |
|
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name); |
|
1104 return -1; |
|
1105 } |
|
1106 } |
|
1107 |
|
1108 return stringlib_contains_obj(str_obj, sub_obj); |
|
1109 } |
|
1110 |
|
1111 static PyObject * |
|
1112 string_item(PyStringObject *a, register Py_ssize_t i) |
|
1113 { |
|
1114 char pchar; |
|
1115 PyObject *v; |
|
1116 if (i < 0 || i >= Py_SIZE(a)) { |
|
1117 PyErr_SetString(PyExc_IndexError, "string index out of range"); |
|
1118 return NULL; |
|
1119 } |
|
1120 pchar = a->ob_sval[i]; |
|
1121 v = (PyObject *)characters[pchar & UCHAR_MAX]; |
|
1122 if (v == NULL) |
|
1123 v = PyString_FromStringAndSize(&pchar, 1); |
|
1124 else { |
|
1125 #ifdef COUNT_ALLOCS |
|
1126 one_strings++; |
|
1127 #endif |
|
1128 Py_INCREF(v); |
|
1129 } |
|
1130 return v; |
|
1131 } |
|
1132 |
|
1133 static PyObject* |
|
1134 string_richcompare(PyStringObject *a, PyStringObject *b, int op) |
|
1135 { |
|
1136 int c; |
|
1137 Py_ssize_t len_a, len_b; |
|
1138 Py_ssize_t min_len; |
|
1139 PyObject *result; |
|
1140 |
|
1141 /* Make sure both arguments are strings. */ |
|
1142 if (!(PyString_Check(a) && PyString_Check(b))) { |
|
1143 result = Py_NotImplemented; |
|
1144 goto out; |
|
1145 } |
|
1146 if (a == b) { |
|
1147 switch (op) { |
|
1148 case Py_EQ:case Py_LE:case Py_GE: |
|
1149 result = Py_True; |
|
1150 goto out; |
|
1151 case Py_NE:case Py_LT:case Py_GT: |
|
1152 result = Py_False; |
|
1153 goto out; |
|
1154 } |
|
1155 } |
|
1156 if (op == Py_EQ) { |
|
1157 /* Supporting Py_NE here as well does not save |
|
1158 much time, since Py_NE is rarely used. */ |
|
1159 if (Py_SIZE(a) == Py_SIZE(b) |
|
1160 && (a->ob_sval[0] == b->ob_sval[0] |
|
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { |
|
1162 result = Py_True; |
|
1163 } else { |
|
1164 result = Py_False; |
|
1165 } |
|
1166 goto out; |
|
1167 } |
|
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b); |
|
1169 min_len = (len_a < len_b) ? len_a : len_b; |
|
1170 if (min_len > 0) { |
|
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); |
|
1172 if (c==0) |
|
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len); |
|
1174 } else |
|
1175 c = 0; |
|
1176 if (c == 0) |
|
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; |
|
1178 switch (op) { |
|
1179 case Py_LT: c = c < 0; break; |
|
1180 case Py_LE: c = c <= 0; break; |
|
1181 case Py_EQ: assert(0); break; /* unreachable */ |
|
1182 case Py_NE: c = c != 0; break; |
|
1183 case Py_GT: c = c > 0; break; |
|
1184 case Py_GE: c = c >= 0; break; |
|
1185 default: |
|
1186 result = Py_NotImplemented; |
|
1187 goto out; |
|
1188 } |
|
1189 result = c ? Py_True : Py_False; |
|
1190 out: |
|
1191 Py_INCREF(result); |
|
1192 return result; |
|
1193 } |
|
1194 |
|
1195 int |
|
1196 _PyString_Eq(PyObject *o1, PyObject *o2) |
|
1197 { |
|
1198 PyStringObject *a = (PyStringObject*) o1; |
|
1199 PyStringObject *b = (PyStringObject*) o2; |
|
1200 return Py_SIZE(a) == Py_SIZE(b) |
|
1201 && *a->ob_sval == *b->ob_sval |
|
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; |
|
1203 } |
|
1204 |
|
1205 static long |
|
1206 string_hash(PyStringObject *a) |
|
1207 { |
|
1208 register Py_ssize_t len; |
|
1209 register unsigned char *p; |
|
1210 register long x; |
|
1211 |
|
1212 if (a->ob_shash != -1) |
|
1213 return a->ob_shash; |
|
1214 len = Py_SIZE(a); |
|
1215 p = (unsigned char *) a->ob_sval; |
|
1216 x = *p << 7; |
|
1217 while (--len >= 0) |
|
1218 x = (1000003*x) ^ *p++; |
|
1219 x ^= Py_SIZE(a); |
|
1220 if (x == -1) |
|
1221 x = -2; |
|
1222 a->ob_shash = x; |
|
1223 return x; |
|
1224 } |
|
1225 |
|
1226 static PyObject* |
|
1227 string_subscript(PyStringObject* self, PyObject* item) |
|
1228 { |
|
1229 if (PyIndex_Check(item)) { |
|
1230 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); |
|
1231 if (i == -1 && PyErr_Occurred()) |
|
1232 return NULL; |
|
1233 if (i < 0) |
|
1234 i += PyString_GET_SIZE(self); |
|
1235 return string_item(self, i); |
|
1236 } |
|
1237 else if (PySlice_Check(item)) { |
|
1238 Py_ssize_t start, stop, step, slicelength, cur, i; |
|
1239 char* source_buf; |
|
1240 char* result_buf; |
|
1241 PyObject* result; |
|
1242 |
|
1243 if (PySlice_GetIndicesEx((PySliceObject*)item, |
|
1244 PyString_GET_SIZE(self), |
|
1245 &start, &stop, &step, &slicelength) < 0) { |
|
1246 return NULL; |
|
1247 } |
|
1248 |
|
1249 if (slicelength <= 0) { |
|
1250 return PyString_FromStringAndSize("", 0); |
|
1251 } |
|
1252 else if (start == 0 && step == 1 && |
|
1253 slicelength == PyString_GET_SIZE(self) && |
|
1254 PyString_CheckExact(self)) { |
|
1255 Py_INCREF(self); |
|
1256 return (PyObject *)self; |
|
1257 } |
|
1258 else if (step == 1) { |
|
1259 return PyString_FromStringAndSize( |
|
1260 PyString_AS_STRING(self) + start, |
|
1261 slicelength); |
|
1262 } |
|
1263 else { |
|
1264 source_buf = PyString_AsString((PyObject*)self); |
|
1265 result_buf = (char *)PyMem_Malloc(slicelength); |
|
1266 if (result_buf == NULL) |
|
1267 return PyErr_NoMemory(); |
|
1268 |
|
1269 for (cur = start, i = 0; i < slicelength; |
|
1270 cur += step, i++) { |
|
1271 result_buf[i] = source_buf[cur]; |
|
1272 } |
|
1273 |
|
1274 result = PyString_FromStringAndSize(result_buf, |
|
1275 slicelength); |
|
1276 PyMem_Free(result_buf); |
|
1277 return result; |
|
1278 } |
|
1279 } |
|
1280 else { |
|
1281 PyErr_Format(PyExc_TypeError, |
|
1282 "string indices must be integers, not %.200s", |
|
1283 Py_TYPE(item)->tp_name); |
|
1284 return NULL; |
|
1285 } |
|
1286 } |
|
1287 |
|
1288 static Py_ssize_t |
|
1289 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr) |
|
1290 { |
|
1291 if ( index != 0 ) { |
|
1292 PyErr_SetString(PyExc_SystemError, |
|
1293 "accessing non-existent string segment"); |
|
1294 return -1; |
|
1295 } |
|
1296 *ptr = (void *)self->ob_sval; |
|
1297 return Py_SIZE(self); |
|
1298 } |
|
1299 |
|
1300 static Py_ssize_t |
|
1301 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr) |
|
1302 { |
|
1303 PyErr_SetString(PyExc_TypeError, |
|
1304 "Cannot use string as modifiable buffer"); |
|
1305 return -1; |
|
1306 } |
|
1307 |
|
1308 static Py_ssize_t |
|
1309 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) |
|
1310 { |
|
1311 if ( lenp ) |
|
1312 *lenp = Py_SIZE(self); |
|
1313 return 1; |
|
1314 } |
|
1315 |
|
1316 static Py_ssize_t |
|
1317 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) |
|
1318 { |
|
1319 if ( index != 0 ) { |
|
1320 PyErr_SetString(PyExc_SystemError, |
|
1321 "accessing non-existent string segment"); |
|
1322 return -1; |
|
1323 } |
|
1324 *ptr = self->ob_sval; |
|
1325 return Py_SIZE(self); |
|
1326 } |
|
1327 |
|
1328 static int |
|
1329 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) |
|
1330 { |
|
1331 return PyBuffer_FillInfo(view, (PyObject*)self, |
|
1332 (void *)self->ob_sval, Py_SIZE(self), |
|
1333 1, flags); |
|
1334 } |
|
1335 |
|
1336 static PySequenceMethods string_as_sequence = { |
|
1337 (lenfunc)string_length, /*sq_length*/ |
|
1338 (binaryfunc)string_concat, /*sq_concat*/ |
|
1339 (ssizeargfunc)string_repeat, /*sq_repeat*/ |
|
1340 (ssizeargfunc)string_item, /*sq_item*/ |
|
1341 (ssizessizeargfunc)string_slice, /*sq_slice*/ |
|
1342 0, /*sq_ass_item*/ |
|
1343 0, /*sq_ass_slice*/ |
|
1344 (objobjproc)string_contains /*sq_contains*/ |
|
1345 }; |
|
1346 |
|
1347 static PyMappingMethods string_as_mapping = { |
|
1348 (lenfunc)string_length, |
|
1349 (binaryfunc)string_subscript, |
|
1350 0, |
|
1351 }; |
|
1352 |
|
1353 static PyBufferProcs string_as_buffer = { |
|
1354 (readbufferproc)string_buffer_getreadbuf, |
|
1355 (writebufferproc)string_buffer_getwritebuf, |
|
1356 (segcountproc)string_buffer_getsegcount, |
|
1357 (charbufferproc)string_buffer_getcharbuf, |
|
1358 (getbufferproc)string_buffer_getbuffer, |
|
1359 0, /* XXX */ |
|
1360 }; |
|
1361 |
|
1362 |
|
1363 |
|
1364 #define LEFTSTRIP 0 |
|
1365 #define RIGHTSTRIP 1 |
|
1366 #define BOTHSTRIP 2 |
|
1367 |
|
1368 /* Arrays indexed by above */ |
|
1369 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; |
|
1370 |
|
1371 #define STRIPNAME(i) (stripformat[i]+3) |
|
1372 |
|
1373 |
|
1374 /* Don't call if length < 2 */ |
|
1375 #define Py_STRING_MATCH(target, offset, pattern, length) \ |
|
1376 (target[offset] == pattern[0] && \ |
|
1377 target[offset+length-1] == pattern[length-1] && \ |
|
1378 !memcmp(target+offset+1, pattern+1, length-2) ) |
|
1379 |
|
1380 |
|
1381 /* Overallocate the initial list to reduce the number of reallocs for small |
|
1382 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three |
|
1383 resizes, to sizes 4, 8, then 16. Most observed string splits are for human |
|
1384 text (roughly 11 words per line) and field delimited data (usually 1-10 |
|
1385 fields). For large strings the split algorithms are bandwidth limited |
|
1386 so increasing the preallocation likely will not improve things.*/ |
|
1387 |
|
1388 #define MAX_PREALLOC 12 |
|
1389 |
|
1390 /* 5 splits gives 6 elements */ |
|
1391 #define PREALLOC_SIZE(maxsplit) \ |
|
1392 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) |
|
1393 |
|
1394 #define SPLIT_APPEND(data, left, right) \ |
|
1395 str = PyString_FromStringAndSize((data) + (left), \ |
|
1396 (right) - (left)); \ |
|
1397 if (str == NULL) \ |
|
1398 goto onError; \ |
|
1399 if (PyList_Append(list, str)) { \ |
|
1400 Py_DECREF(str); \ |
|
1401 goto onError; \ |
|
1402 } \ |
|
1403 else \ |
|
1404 Py_DECREF(str); |
|
1405 |
|
1406 #define SPLIT_ADD(data, left, right) { \ |
|
1407 str = PyString_FromStringAndSize((data) + (left), \ |
|
1408 (right) - (left)); \ |
|
1409 if (str == NULL) \ |
|
1410 goto onError; \ |
|
1411 if (count < MAX_PREALLOC) { \ |
|
1412 PyList_SET_ITEM(list, count, str); \ |
|
1413 } else { \ |
|
1414 if (PyList_Append(list, str)) { \ |
|
1415 Py_DECREF(str); \ |
|
1416 goto onError; \ |
|
1417 } \ |
|
1418 else \ |
|
1419 Py_DECREF(str); \ |
|
1420 } \ |
|
1421 count++; } |
|
1422 |
|
1423 /* Always force the list to the expected size. */ |
|
1424 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count |
|
1425 |
|
1426 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; } |
|
1427 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; } |
|
1428 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; } |
|
1429 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } |
|
1430 |
|
1431 Py_LOCAL_INLINE(PyObject *) |
|
1432 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit) |
|
1433 { |
|
1434 const char *s = PyString_AS_STRING(self); |
|
1435 Py_ssize_t i, j, count=0; |
|
1436 PyObject *str; |
|
1437 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
1438 |
|
1439 if (list == NULL) |
|
1440 return NULL; |
|
1441 |
|
1442 i = j = 0; |
|
1443 |
|
1444 while (maxsplit-- > 0) { |
|
1445 SKIP_SPACE(s, i, len); |
|
1446 if (i==len) break; |
|
1447 j = i; i++; |
|
1448 SKIP_NONSPACE(s, i, len); |
|
1449 if (j == 0 && i == len && PyString_CheckExact(self)) { |
|
1450 /* No whitespace in self, so just use it as list[0] */ |
|
1451 Py_INCREF(self); |
|
1452 PyList_SET_ITEM(list, 0, (PyObject *)self); |
|
1453 count++; |
|
1454 break; |
|
1455 } |
|
1456 SPLIT_ADD(s, j, i); |
|
1457 } |
|
1458 |
|
1459 if (i < len) { |
|
1460 /* Only occurs when maxsplit was reached */ |
|
1461 /* Skip any remaining whitespace and copy to end of string */ |
|
1462 SKIP_SPACE(s, i, len); |
|
1463 if (i != len) |
|
1464 SPLIT_ADD(s, i, len); |
|
1465 } |
|
1466 FIX_PREALLOC_SIZE(list); |
|
1467 return list; |
|
1468 onError: |
|
1469 Py_DECREF(list); |
|
1470 return NULL; |
|
1471 } |
|
1472 |
|
1473 Py_LOCAL_INLINE(PyObject *) |
|
1474 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) |
|
1475 { |
|
1476 const char *s = PyString_AS_STRING(self); |
|
1477 register Py_ssize_t i, j, count=0; |
|
1478 PyObject *str; |
|
1479 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
1480 |
|
1481 if (list == NULL) |
|
1482 return NULL; |
|
1483 |
|
1484 i = j = 0; |
|
1485 while ((j < len) && (maxcount-- > 0)) { |
|
1486 for(; j<len; j++) { |
|
1487 /* I found that using memchr makes no difference */ |
|
1488 if (s[j] == ch) { |
|
1489 SPLIT_ADD(s, i, j); |
|
1490 i = j = j + 1; |
|
1491 break; |
|
1492 } |
|
1493 } |
|
1494 } |
|
1495 if (i == 0 && count == 0 && PyString_CheckExact(self)) { |
|
1496 /* ch not in self, so just use self as list[0] */ |
|
1497 Py_INCREF(self); |
|
1498 PyList_SET_ITEM(list, 0, (PyObject *)self); |
|
1499 count++; |
|
1500 } |
|
1501 else if (i <= len) { |
|
1502 SPLIT_ADD(s, i, len); |
|
1503 } |
|
1504 FIX_PREALLOC_SIZE(list); |
|
1505 return list; |
|
1506 |
|
1507 onError: |
|
1508 Py_DECREF(list); |
|
1509 return NULL; |
|
1510 } |
|
1511 |
|
1512 PyDoc_STRVAR(split__doc__, |
|
1513 "S.split([sep [,maxsplit]]) -> list of strings\n\ |
|
1514 \n\ |
|
1515 Return a list of the words in the string S, using sep as the\n\ |
|
1516 delimiter string. If maxsplit is given, at most maxsplit\n\ |
|
1517 splits are done. If sep is not specified or is None, any\n\ |
|
1518 whitespace string is a separator and empty strings are removed\n\ |
|
1519 from the result."); |
|
1520 |
|
1521 static PyObject * |
|
1522 string_split(PyStringObject *self, PyObject *args) |
|
1523 { |
|
1524 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; |
|
1525 Py_ssize_t maxsplit = -1, count=0; |
|
1526 const char *s = PyString_AS_STRING(self), *sub; |
|
1527 PyObject *list, *str, *subobj = Py_None; |
|
1528 #ifdef USE_FAST |
|
1529 Py_ssize_t pos; |
|
1530 #endif |
|
1531 |
|
1532 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) |
|
1533 return NULL; |
|
1534 if (maxsplit < 0) |
|
1535 maxsplit = PY_SSIZE_T_MAX; |
|
1536 if (subobj == Py_None) |
|
1537 return split_whitespace(self, len, maxsplit); |
|
1538 if (PyString_Check(subobj)) { |
|
1539 sub = PyString_AS_STRING(subobj); |
|
1540 n = PyString_GET_SIZE(subobj); |
|
1541 } |
|
1542 #ifdef Py_USING_UNICODE |
|
1543 else if (PyUnicode_Check(subobj)) |
|
1544 return PyUnicode_Split((PyObject *)self, subobj, maxsplit); |
|
1545 #endif |
|
1546 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) |
|
1547 return NULL; |
|
1548 |
|
1549 if (n == 0) { |
|
1550 PyErr_SetString(PyExc_ValueError, "empty separator"); |
|
1551 return NULL; |
|
1552 } |
|
1553 else if (n == 1) |
|
1554 return split_char(self, len, sub[0], maxsplit); |
|
1555 |
|
1556 list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
1557 if (list == NULL) |
|
1558 return NULL; |
|
1559 |
|
1560 #ifdef USE_FAST |
|
1561 i = j = 0; |
|
1562 while (maxsplit-- > 0) { |
|
1563 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); |
|
1564 if (pos < 0) |
|
1565 break; |
|
1566 j = i+pos; |
|
1567 SPLIT_ADD(s, i, j); |
|
1568 i = j + n; |
|
1569 } |
|
1570 #else |
|
1571 i = j = 0; |
|
1572 while ((j+n <= len) && (maxsplit-- > 0)) { |
|
1573 for (; j+n <= len; j++) { |
|
1574 if (Py_STRING_MATCH(s, j, sub, n)) { |
|
1575 SPLIT_ADD(s, i, j); |
|
1576 i = j = j + n; |
|
1577 break; |
|
1578 } |
|
1579 } |
|
1580 } |
|
1581 #endif |
|
1582 SPLIT_ADD(s, i, len); |
|
1583 FIX_PREALLOC_SIZE(list); |
|
1584 return list; |
|
1585 |
|
1586 onError: |
|
1587 Py_DECREF(list); |
|
1588 return NULL; |
|
1589 } |
|
1590 |
|
1591 PyDoc_STRVAR(partition__doc__, |
|
1592 "S.partition(sep) -> (head, sep, tail)\n\ |
|
1593 \n\ |
|
1594 Search for the separator sep in S, and return the part before it,\n\ |
|
1595 the separator itself, and the part after it. If the separator is not\n\ |
|
1596 found, return S and two empty strings."); |
|
1597 |
|
1598 static PyObject * |
|
1599 string_partition(PyStringObject *self, PyObject *sep_obj) |
|
1600 { |
|
1601 const char *sep; |
|
1602 Py_ssize_t sep_len; |
|
1603 |
|
1604 if (PyString_Check(sep_obj)) { |
|
1605 sep = PyString_AS_STRING(sep_obj); |
|
1606 sep_len = PyString_GET_SIZE(sep_obj); |
|
1607 } |
|
1608 #ifdef Py_USING_UNICODE |
|
1609 else if (PyUnicode_Check(sep_obj)) |
|
1610 return PyUnicode_Partition((PyObject *) self, sep_obj); |
|
1611 #endif |
|
1612 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) |
|
1613 return NULL; |
|
1614 |
|
1615 return stringlib_partition( |
|
1616 (PyObject*) self, |
|
1617 PyString_AS_STRING(self), PyString_GET_SIZE(self), |
|
1618 sep_obj, sep, sep_len |
|
1619 ); |
|
1620 } |
|
1621 |
|
1622 PyDoc_STRVAR(rpartition__doc__, |
|
1623 "S.rpartition(sep) -> (tail, sep, head)\n\ |
|
1624 \n\ |
|
1625 Search for the separator sep in S, starting at the end of S, and return\n\ |
|
1626 the part before it, the separator itself, and the part after it. If the\n\ |
|
1627 separator is not found, return two empty strings and S."); |
|
1628 |
|
1629 static PyObject * |
|
1630 string_rpartition(PyStringObject *self, PyObject *sep_obj) |
|
1631 { |
|
1632 const char *sep; |
|
1633 Py_ssize_t sep_len; |
|
1634 |
|
1635 if (PyString_Check(sep_obj)) { |
|
1636 sep = PyString_AS_STRING(sep_obj); |
|
1637 sep_len = PyString_GET_SIZE(sep_obj); |
|
1638 } |
|
1639 #ifdef Py_USING_UNICODE |
|
1640 else if (PyUnicode_Check(sep_obj)) |
|
1641 return PyUnicode_RPartition((PyObject *) self, sep_obj); |
|
1642 #endif |
|
1643 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) |
|
1644 return NULL; |
|
1645 |
|
1646 return stringlib_rpartition( |
|
1647 (PyObject*) self, |
|
1648 PyString_AS_STRING(self), PyString_GET_SIZE(self), |
|
1649 sep_obj, sep, sep_len |
|
1650 ); |
|
1651 } |
|
1652 |
|
1653 Py_LOCAL_INLINE(PyObject *) |
|
1654 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit) |
|
1655 { |
|
1656 const char *s = PyString_AS_STRING(self); |
|
1657 Py_ssize_t i, j, count=0; |
|
1658 PyObject *str; |
|
1659 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
1660 |
|
1661 if (list == NULL) |
|
1662 return NULL; |
|
1663 |
|
1664 i = j = len-1; |
|
1665 |
|
1666 while (maxsplit-- > 0) { |
|
1667 RSKIP_SPACE(s, i); |
|
1668 if (i<0) break; |
|
1669 j = i; i--; |
|
1670 RSKIP_NONSPACE(s, i); |
|
1671 if (j == len-1 && i < 0 && PyString_CheckExact(self)) { |
|
1672 /* No whitespace in self, so just use it as list[0] */ |
|
1673 Py_INCREF(self); |
|
1674 PyList_SET_ITEM(list, 0, (PyObject *)self); |
|
1675 count++; |
|
1676 break; |
|
1677 } |
|
1678 SPLIT_ADD(s, i + 1, j + 1); |
|
1679 } |
|
1680 if (i >= 0) { |
|
1681 /* Only occurs when maxsplit was reached */ |
|
1682 /* Skip any remaining whitespace and copy to beginning of string */ |
|
1683 RSKIP_SPACE(s, i); |
|
1684 if (i >= 0) |
|
1685 SPLIT_ADD(s, 0, i + 1); |
|
1686 |
|
1687 } |
|
1688 FIX_PREALLOC_SIZE(list); |
|
1689 if (PyList_Reverse(list) < 0) |
|
1690 goto onError; |
|
1691 return list; |
|
1692 onError: |
|
1693 Py_DECREF(list); |
|
1694 return NULL; |
|
1695 } |
|
1696 |
|
1697 Py_LOCAL_INLINE(PyObject *) |
|
1698 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) |
|
1699 { |
|
1700 const char *s = PyString_AS_STRING(self); |
|
1701 register Py_ssize_t i, j, count=0; |
|
1702 PyObject *str; |
|
1703 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
1704 |
|
1705 if (list == NULL) |
|
1706 return NULL; |
|
1707 |
|
1708 i = j = len - 1; |
|
1709 while ((i >= 0) && (maxcount-- > 0)) { |
|
1710 for (; i >= 0; i--) { |
|
1711 if (s[i] == ch) { |
|
1712 SPLIT_ADD(s, i + 1, j + 1); |
|
1713 j = i = i - 1; |
|
1714 break; |
|
1715 } |
|
1716 } |
|
1717 } |
|
1718 if (i < 0 && count == 0 && PyString_CheckExact(self)) { |
|
1719 /* ch not in self, so just use self as list[0] */ |
|
1720 Py_INCREF(self); |
|
1721 PyList_SET_ITEM(list, 0, (PyObject *)self); |
|
1722 count++; |
|
1723 } |
|
1724 else if (j >= -1) { |
|
1725 SPLIT_ADD(s, 0, j + 1); |
|
1726 } |
|
1727 FIX_PREALLOC_SIZE(list); |
|
1728 if (PyList_Reverse(list) < 0) |
|
1729 goto onError; |
|
1730 return list; |
|
1731 |
|
1732 onError: |
|
1733 Py_DECREF(list); |
|
1734 return NULL; |
|
1735 } |
|
1736 |
|
1737 PyDoc_STRVAR(rsplit__doc__, |
|
1738 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ |
|
1739 \n\ |
|
1740 Return a list of the words in the string S, using sep as the\n\ |
|
1741 delimiter string, starting at the end of the string and working\n\ |
|
1742 to the front. If maxsplit is given, at most maxsplit splits are\n\ |
|
1743 done. If sep is not specified or is None, any whitespace string\n\ |
|
1744 is a separator."); |
|
1745 |
|
1746 static PyObject * |
|
1747 string_rsplit(PyStringObject *self, PyObject *args) |
|
1748 { |
|
1749 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; |
|
1750 Py_ssize_t maxsplit = -1, count=0; |
|
1751 const char *s, *sub; |
|
1752 PyObject *list, *str, *subobj = Py_None; |
|
1753 |
|
1754 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) |
|
1755 return NULL; |
|
1756 if (maxsplit < 0) |
|
1757 maxsplit = PY_SSIZE_T_MAX; |
|
1758 if (subobj == Py_None) |
|
1759 return rsplit_whitespace(self, len, maxsplit); |
|
1760 if (PyString_Check(subobj)) { |
|
1761 sub = PyString_AS_STRING(subobj); |
|
1762 n = PyString_GET_SIZE(subobj); |
|
1763 } |
|
1764 #ifdef Py_USING_UNICODE |
|
1765 else if (PyUnicode_Check(subobj)) |
|
1766 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); |
|
1767 #endif |
|
1768 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) |
|
1769 return NULL; |
|
1770 |
|
1771 if (n == 0) { |
|
1772 PyErr_SetString(PyExc_ValueError, "empty separator"); |
|
1773 return NULL; |
|
1774 } |
|
1775 else if (n == 1) |
|
1776 return rsplit_char(self, len, sub[0], maxsplit); |
|
1777 |
|
1778 list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
1779 if (list == NULL) |
|
1780 return NULL; |
|
1781 |
|
1782 j = len; |
|
1783 i = j - n; |
|
1784 |
|
1785 s = PyString_AS_STRING(self); |
|
1786 while ( (i >= 0) && (maxsplit-- > 0) ) { |
|
1787 for (; i>=0; i--) { |
|
1788 if (Py_STRING_MATCH(s, i, sub, n)) { |
|
1789 SPLIT_ADD(s, i + n, j); |
|
1790 j = i; |
|
1791 i -= n; |
|
1792 break; |
|
1793 } |
|
1794 } |
|
1795 } |
|
1796 SPLIT_ADD(s, 0, j); |
|
1797 FIX_PREALLOC_SIZE(list); |
|
1798 if (PyList_Reverse(list) < 0) |
|
1799 goto onError; |
|
1800 return list; |
|
1801 |
|
1802 onError: |
|
1803 Py_DECREF(list); |
|
1804 return NULL; |
|
1805 } |
|
1806 |
|
1807 |
|
1808 PyDoc_STRVAR(join__doc__, |
|
1809 "S.join(sequence) -> string\n\ |
|
1810 \n\ |
|
1811 Return a string which is the concatenation of the strings in the\n\ |
|
1812 sequence. The separator between elements is S."); |
|
1813 |
|
1814 static PyObject * |
|
1815 string_join(PyStringObject *self, PyObject *orig) |
|
1816 { |
|
1817 char *sep = PyString_AS_STRING(self); |
|
1818 const Py_ssize_t seplen = PyString_GET_SIZE(self); |
|
1819 PyObject *res = NULL; |
|
1820 char *p; |
|
1821 Py_ssize_t seqlen = 0; |
|
1822 size_t sz = 0; |
|
1823 Py_ssize_t i; |
|
1824 PyObject *seq, *item; |
|
1825 |
|
1826 seq = PySequence_Fast(orig, ""); |
|
1827 if (seq == NULL) { |
|
1828 return NULL; |
|
1829 } |
|
1830 |
|
1831 seqlen = PySequence_Size(seq); |
|
1832 if (seqlen == 0) { |
|
1833 Py_DECREF(seq); |
|
1834 return PyString_FromString(""); |
|
1835 } |
|
1836 if (seqlen == 1) { |
|
1837 item = PySequence_Fast_GET_ITEM(seq, 0); |
|
1838 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { |
|
1839 Py_INCREF(item); |
|
1840 Py_DECREF(seq); |
|
1841 return item; |
|
1842 } |
|
1843 } |
|
1844 |
|
1845 /* There are at least two things to join, or else we have a subclass |
|
1846 * of the builtin types in the sequence. |
|
1847 * Do a pre-pass to figure out the total amount of space we'll |
|
1848 * need (sz), see whether any argument is absurd, and defer to |
|
1849 * the Unicode join if appropriate. |
|
1850 */ |
|
1851 for (i = 0; i < seqlen; i++) { |
|
1852 const size_t old_sz = sz; |
|
1853 item = PySequence_Fast_GET_ITEM(seq, i); |
|
1854 if (!PyString_Check(item)){ |
|
1855 #ifdef Py_USING_UNICODE |
|
1856 if (PyUnicode_Check(item)) { |
|
1857 /* Defer to Unicode join. |
|
1858 * CAUTION: There's no gurantee that the |
|
1859 * original sequence can be iterated over |
|
1860 * again, so we must pass seq here. |
|
1861 */ |
|
1862 PyObject *result; |
|
1863 result = PyUnicode_Join((PyObject *)self, seq); |
|
1864 Py_DECREF(seq); |
|
1865 return result; |
|
1866 } |
|
1867 #endif |
|
1868 PyErr_Format(PyExc_TypeError, |
|
1869 "sequence item %zd: expected string," |
|
1870 " %.80s found", |
|
1871 i, Py_TYPE(item)->tp_name); |
|
1872 Py_DECREF(seq); |
|
1873 return NULL; |
|
1874 } |
|
1875 sz += PyString_GET_SIZE(item); |
|
1876 if (i != 0) |
|
1877 sz += seplen; |
|
1878 if (sz < old_sz || sz > PY_SSIZE_T_MAX) { |
|
1879 PyErr_SetString(PyExc_OverflowError, |
|
1880 "join() result is too long for a Python string"); |
|
1881 Py_DECREF(seq); |
|
1882 return NULL; |
|
1883 } |
|
1884 } |
|
1885 |
|
1886 /* Allocate result space. */ |
|
1887 res = PyString_FromStringAndSize((char*)NULL, sz); |
|
1888 if (res == NULL) { |
|
1889 Py_DECREF(seq); |
|
1890 return NULL; |
|
1891 } |
|
1892 |
|
1893 /* Catenate everything. */ |
|
1894 p = PyString_AS_STRING(res); |
|
1895 for (i = 0; i < seqlen; ++i) { |
|
1896 size_t n; |
|
1897 item = PySequence_Fast_GET_ITEM(seq, i); |
|
1898 n = PyString_GET_SIZE(item); |
|
1899 Py_MEMCPY(p, PyString_AS_STRING(item), n); |
|
1900 p += n; |
|
1901 if (i < seqlen - 1) { |
|
1902 Py_MEMCPY(p, sep, seplen); |
|
1903 p += seplen; |
|
1904 } |
|
1905 } |
|
1906 |
|
1907 Py_DECREF(seq); |
|
1908 return res; |
|
1909 } |
|
1910 |
|
1911 PyObject * |
|
1912 _PyString_Join(PyObject *sep, PyObject *x) |
|
1913 { |
|
1914 assert(sep != NULL && PyString_Check(sep)); |
|
1915 assert(x != NULL); |
|
1916 return string_join((PyStringObject *)sep, x); |
|
1917 } |
|
1918 |
|
1919 Py_LOCAL_INLINE(void) |
|
1920 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) |
|
1921 { |
|
1922 if (*end > len) |
|
1923 *end = len; |
|
1924 else if (*end < 0) |
|
1925 *end += len; |
|
1926 if (*end < 0) |
|
1927 *end = 0; |
|
1928 if (*start < 0) |
|
1929 *start += len; |
|
1930 if (*start < 0) |
|
1931 *start = 0; |
|
1932 } |
|
1933 |
|
1934 Py_LOCAL_INLINE(Py_ssize_t) |
|
1935 string_find_internal(PyStringObject *self, PyObject *args, int dir) |
|
1936 { |
|
1937 PyObject *subobj; |
|
1938 const char *sub; |
|
1939 Py_ssize_t sub_len; |
|
1940 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; |
|
1941 PyObject *obj_start=Py_None, *obj_end=Py_None; |
|
1942 |
|
1943 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj, |
|
1944 &obj_start, &obj_end)) |
|
1945 return -2; |
|
1946 /* To support None in "start" and "end" arguments, meaning |
|
1947 the same as if they were not passed. |
|
1948 */ |
|
1949 if (obj_start != Py_None) |
|
1950 if (!_PyEval_SliceIndex(obj_start, &start)) |
|
1951 return -2; |
|
1952 if (obj_end != Py_None) |
|
1953 if (!_PyEval_SliceIndex(obj_end, &end)) |
|
1954 return -2; |
|
1955 |
|
1956 if (PyString_Check(subobj)) { |
|
1957 sub = PyString_AS_STRING(subobj); |
|
1958 sub_len = PyString_GET_SIZE(subobj); |
|
1959 } |
|
1960 #ifdef Py_USING_UNICODE |
|
1961 else if (PyUnicode_Check(subobj)) |
|
1962 return PyUnicode_Find( |
|
1963 (PyObject *)self, subobj, start, end, dir); |
|
1964 #endif |
|
1965 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) |
|
1966 /* XXX - the "expected a character buffer object" is pretty |
|
1967 confusing for a non-expert. remap to something else ? */ |
|
1968 return -2; |
|
1969 |
|
1970 if (dir > 0) |
|
1971 return stringlib_find_slice( |
|
1972 PyString_AS_STRING(self), PyString_GET_SIZE(self), |
|
1973 sub, sub_len, start, end); |
|
1974 else |
|
1975 return stringlib_rfind_slice( |
|
1976 PyString_AS_STRING(self), PyString_GET_SIZE(self), |
|
1977 sub, sub_len, start, end); |
|
1978 } |
|
1979 |
|
1980 |
|
1981 PyDoc_STRVAR(find__doc__, |
|
1982 "S.find(sub [,start [,end]]) -> int\n\ |
|
1983 \n\ |
|
1984 Return the lowest index in S where substring sub is found,\n\ |
|
1985 such that sub is contained within s[start:end]. Optional\n\ |
|
1986 arguments start and end are interpreted as in slice notation.\n\ |
|
1987 \n\ |
|
1988 Return -1 on failure."); |
|
1989 |
|
1990 static PyObject * |
|
1991 string_find(PyStringObject *self, PyObject *args) |
|
1992 { |
|
1993 Py_ssize_t result = string_find_internal(self, args, +1); |
|
1994 if (result == -2) |
|
1995 return NULL; |
|
1996 return PyInt_FromSsize_t(result); |
|
1997 } |
|
1998 |
|
1999 |
|
2000 PyDoc_STRVAR(index__doc__, |
|
2001 "S.index(sub [,start [,end]]) -> int\n\ |
|
2002 \n\ |
|
2003 Like S.find() but raise ValueError when the substring is not found."); |
|
2004 |
|
2005 static PyObject * |
|
2006 string_index(PyStringObject *self, PyObject *args) |
|
2007 { |
|
2008 Py_ssize_t result = string_find_internal(self, args, +1); |
|
2009 if (result == -2) |
|
2010 return NULL; |
|
2011 if (result == -1) { |
|
2012 PyErr_SetString(PyExc_ValueError, |
|
2013 "substring not found"); |
|
2014 return NULL; |
|
2015 } |
|
2016 return PyInt_FromSsize_t(result); |
|
2017 } |
|
2018 |
|
2019 |
|
2020 PyDoc_STRVAR(rfind__doc__, |
|
2021 "S.rfind(sub [,start [,end]]) -> int\n\ |
|
2022 \n\ |
|
2023 Return the highest index in S where substring sub is found,\n\ |
|
2024 such that sub is contained within s[start:end]. Optional\n\ |
|
2025 arguments start and end are interpreted as in slice notation.\n\ |
|
2026 \n\ |
|
2027 Return -1 on failure."); |
|
2028 |
|
2029 static PyObject * |
|
2030 string_rfind(PyStringObject *self, PyObject *args) |
|
2031 { |
|
2032 Py_ssize_t result = string_find_internal(self, args, -1); |
|
2033 if (result == -2) |
|
2034 return NULL; |
|
2035 return PyInt_FromSsize_t(result); |
|
2036 } |
|
2037 |
|
2038 |
|
2039 PyDoc_STRVAR(rindex__doc__, |
|
2040 "S.rindex(sub [,start [,end]]) -> int\n\ |
|
2041 \n\ |
|
2042 Like S.rfind() but raise ValueError when the substring is not found."); |
|
2043 |
|
2044 static PyObject * |
|
2045 string_rindex(PyStringObject *self, PyObject *args) |
|
2046 { |
|
2047 Py_ssize_t result = string_find_internal(self, args, -1); |
|
2048 if (result == -2) |
|
2049 return NULL; |
|
2050 if (result == -1) { |
|
2051 PyErr_SetString(PyExc_ValueError, |
|
2052 "substring not found"); |
|
2053 return NULL; |
|
2054 } |
|
2055 return PyInt_FromSsize_t(result); |
|
2056 } |
|
2057 |
|
2058 |
|
2059 Py_LOCAL_INLINE(PyObject *) |
|
2060 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) |
|
2061 { |
|
2062 char *s = PyString_AS_STRING(self); |
|
2063 Py_ssize_t len = PyString_GET_SIZE(self); |
|
2064 char *sep = PyString_AS_STRING(sepobj); |
|
2065 Py_ssize_t seplen = PyString_GET_SIZE(sepobj); |
|
2066 Py_ssize_t i, j; |
|
2067 |
|
2068 i = 0; |
|
2069 if (striptype != RIGHTSTRIP) { |
|
2070 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { |
|
2071 i++; |
|
2072 } |
|
2073 } |
|
2074 |
|
2075 j = len; |
|
2076 if (striptype != LEFTSTRIP) { |
|
2077 do { |
|
2078 j--; |
|
2079 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); |
|
2080 j++; |
|
2081 } |
|
2082 |
|
2083 if (i == 0 && j == len && PyString_CheckExact(self)) { |
|
2084 Py_INCREF(self); |
|
2085 return (PyObject*)self; |
|
2086 } |
|
2087 else |
|
2088 return PyString_FromStringAndSize(s+i, j-i); |
|
2089 } |
|
2090 |
|
2091 |
|
2092 Py_LOCAL_INLINE(PyObject *) |
|
2093 do_strip(PyStringObject *self, int striptype) |
|
2094 { |
|
2095 char *s = PyString_AS_STRING(self); |
|
2096 Py_ssize_t len = PyString_GET_SIZE(self), i, j; |
|
2097 |
|
2098 i = 0; |
|
2099 if (striptype != RIGHTSTRIP) { |
|
2100 while (i < len && isspace(Py_CHARMASK(s[i]))) { |
|
2101 i++; |
|
2102 } |
|
2103 } |
|
2104 |
|
2105 j = len; |
|
2106 if (striptype != LEFTSTRIP) { |
|
2107 do { |
|
2108 j--; |
|
2109 } while (j >= i && isspace(Py_CHARMASK(s[j]))); |
|
2110 j++; |
|
2111 } |
|
2112 |
|
2113 if (i == 0 && j == len && PyString_CheckExact(self)) { |
|
2114 Py_INCREF(self); |
|
2115 return (PyObject*)self; |
|
2116 } |
|
2117 else |
|
2118 return PyString_FromStringAndSize(s+i, j-i); |
|
2119 } |
|
2120 |
|
2121 |
|
2122 Py_LOCAL_INLINE(PyObject *) |
|
2123 do_argstrip(PyStringObject *self, int striptype, PyObject *args) |
|
2124 { |
|
2125 PyObject *sep = NULL; |
|
2126 |
|
2127 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) |
|
2128 return NULL; |
|
2129 |
|
2130 if (sep != NULL && sep != Py_None) { |
|
2131 if (PyString_Check(sep)) |
|
2132 return do_xstrip(self, striptype, sep); |
|
2133 #ifdef Py_USING_UNICODE |
|
2134 else if (PyUnicode_Check(sep)) { |
|
2135 PyObject *uniself = PyUnicode_FromObject((PyObject *)self); |
|
2136 PyObject *res; |
|
2137 if (uniself==NULL) |
|
2138 return NULL; |
|
2139 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, |
|
2140 striptype, sep); |
|
2141 Py_DECREF(uniself); |
|
2142 return res; |
|
2143 } |
|
2144 #endif |
|
2145 PyErr_Format(PyExc_TypeError, |
|
2146 #ifdef Py_USING_UNICODE |
|
2147 "%s arg must be None, str or unicode", |
|
2148 #else |
|
2149 "%s arg must be None or str", |
|
2150 #endif |
|
2151 STRIPNAME(striptype)); |
|
2152 return NULL; |
|
2153 } |
|
2154 |
|
2155 return do_strip(self, striptype); |
|
2156 } |
|
2157 |
|
2158 |
|
2159 PyDoc_STRVAR(strip__doc__, |
|
2160 "S.strip([chars]) -> string or unicode\n\ |
|
2161 \n\ |
|
2162 Return a copy of the string S with leading and trailing\n\ |
|
2163 whitespace removed.\n\ |
|
2164 If chars is given and not None, remove characters in chars instead.\n\ |
|
2165 If chars is unicode, S will be converted to unicode before stripping"); |
|
2166 |
|
2167 static PyObject * |
|
2168 string_strip(PyStringObject *self, PyObject *args) |
|
2169 { |
|
2170 if (PyTuple_GET_SIZE(args) == 0) |
|
2171 return do_strip(self, BOTHSTRIP); /* Common case */ |
|
2172 else |
|
2173 return do_argstrip(self, BOTHSTRIP, args); |
|
2174 } |
|
2175 |
|
2176 |
|
2177 PyDoc_STRVAR(lstrip__doc__, |
|
2178 "S.lstrip([chars]) -> string or unicode\n\ |
|
2179 \n\ |
|
2180 Return a copy of the string S with leading whitespace removed.\n\ |
|
2181 If chars is given and not None, remove characters in chars instead.\n\ |
|
2182 If chars is unicode, S will be converted to unicode before stripping"); |
|
2183 |
|
2184 static PyObject * |
|
2185 string_lstrip(PyStringObject *self, PyObject *args) |
|
2186 { |
|
2187 if (PyTuple_GET_SIZE(args) == 0) |
|
2188 return do_strip(self, LEFTSTRIP); /* Common case */ |
|
2189 else |
|
2190 return do_argstrip(self, LEFTSTRIP, args); |
|
2191 } |
|
2192 |
|
2193 |
|
2194 PyDoc_STRVAR(rstrip__doc__, |
|
2195 "S.rstrip([chars]) -> string or unicode\n\ |
|
2196 \n\ |
|
2197 Return a copy of the string S with trailing whitespace removed.\n\ |
|
2198 If chars is given and not None, remove characters in chars instead.\n\ |
|
2199 If chars is unicode, S will be converted to unicode before stripping"); |
|
2200 |
|
2201 static PyObject * |
|
2202 string_rstrip(PyStringObject *self, PyObject *args) |
|
2203 { |
|
2204 if (PyTuple_GET_SIZE(args) == 0) |
|
2205 return do_strip(self, RIGHTSTRIP); /* Common case */ |
|
2206 else |
|
2207 return do_argstrip(self, RIGHTSTRIP, args); |
|
2208 } |
|
2209 |
|
2210 |
|
2211 PyDoc_STRVAR(lower__doc__, |
|
2212 "S.lower() -> string\n\ |
|
2213 \n\ |
|
2214 Return a copy of the string S converted to lowercase."); |
|
2215 |
|
2216 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */ |
|
2217 #ifndef _tolower |
|
2218 #define _tolower tolower |
|
2219 #endif |
|
2220 |
|
2221 static PyObject * |
|
2222 string_lower(PyStringObject *self) |
|
2223 { |
|
2224 char *s; |
|
2225 Py_ssize_t i, n = PyString_GET_SIZE(self); |
|
2226 PyObject *newobj; |
|
2227 |
|
2228 newobj = PyString_FromStringAndSize(NULL, n); |
|
2229 if (!newobj) |
|
2230 return NULL; |
|
2231 |
|
2232 s = PyString_AS_STRING(newobj); |
|
2233 |
|
2234 Py_MEMCPY(s, PyString_AS_STRING(self), n); |
|
2235 |
|
2236 for (i = 0; i < n; i++) { |
|
2237 int c = Py_CHARMASK(s[i]); |
|
2238 if (isupper(c)) |
|
2239 s[i] = _tolower(c); |
|
2240 } |
|
2241 |
|
2242 return newobj; |
|
2243 } |
|
2244 |
|
2245 PyDoc_STRVAR(upper__doc__, |
|
2246 "S.upper() -> string\n\ |
|
2247 \n\ |
|
2248 Return a copy of the string S converted to uppercase."); |
|
2249 |
|
2250 #ifndef _toupper |
|
2251 #define _toupper toupper |
|
2252 #endif |
|
2253 |
|
2254 static PyObject * |
|
2255 string_upper(PyStringObject *self) |
|
2256 { |
|
2257 char *s; |
|
2258 Py_ssize_t i, n = PyString_GET_SIZE(self); |
|
2259 PyObject *newobj; |
|
2260 |
|
2261 newobj = PyString_FromStringAndSize(NULL, n); |
|
2262 if (!newobj) |
|
2263 return NULL; |
|
2264 |
|
2265 s = PyString_AS_STRING(newobj); |
|
2266 |
|
2267 Py_MEMCPY(s, PyString_AS_STRING(self), n); |
|
2268 |
|
2269 for (i = 0; i < n; i++) { |
|
2270 int c = Py_CHARMASK(s[i]); |
|
2271 if (islower(c)) |
|
2272 s[i] = _toupper(c); |
|
2273 } |
|
2274 |
|
2275 return newobj; |
|
2276 } |
|
2277 |
|
2278 PyDoc_STRVAR(title__doc__, |
|
2279 "S.title() -> string\n\ |
|
2280 \n\ |
|
2281 Return a titlecased version of S, i.e. words start with uppercase\n\ |
|
2282 characters, all remaining cased characters have lowercase."); |
|
2283 |
|
2284 static PyObject* |
|
2285 string_title(PyStringObject *self) |
|
2286 { |
|
2287 char *s = PyString_AS_STRING(self), *s_new; |
|
2288 Py_ssize_t i, n = PyString_GET_SIZE(self); |
|
2289 int previous_is_cased = 0; |
|
2290 PyObject *newobj; |
|
2291 |
|
2292 newobj = PyString_FromStringAndSize(NULL, n); |
|
2293 if (newobj == NULL) |
|
2294 return NULL; |
|
2295 s_new = PyString_AsString(newobj); |
|
2296 for (i = 0; i < n; i++) { |
|
2297 int c = Py_CHARMASK(*s++); |
|
2298 if (islower(c)) { |
|
2299 if (!previous_is_cased) |
|
2300 c = toupper(c); |
|
2301 previous_is_cased = 1; |
|
2302 } else if (isupper(c)) { |
|
2303 if (previous_is_cased) |
|
2304 c = tolower(c); |
|
2305 previous_is_cased = 1; |
|
2306 } else |
|
2307 previous_is_cased = 0; |
|
2308 *s_new++ = c; |
|
2309 } |
|
2310 return newobj; |
|
2311 } |
|
2312 |
|
2313 PyDoc_STRVAR(capitalize__doc__, |
|
2314 "S.capitalize() -> string\n\ |
|
2315 \n\ |
|
2316 Return a copy of the string S with only its first character\n\ |
|
2317 capitalized."); |
|
2318 |
|
2319 static PyObject * |
|
2320 string_capitalize(PyStringObject *self) |
|
2321 { |
|
2322 char *s = PyString_AS_STRING(self), *s_new; |
|
2323 Py_ssize_t i, n = PyString_GET_SIZE(self); |
|
2324 PyObject *newobj; |
|
2325 |
|
2326 newobj = PyString_FromStringAndSize(NULL, n); |
|
2327 if (newobj == NULL) |
|
2328 return NULL; |
|
2329 s_new = PyString_AsString(newobj); |
|
2330 if (0 < n) { |
|
2331 int c = Py_CHARMASK(*s++); |
|
2332 if (islower(c)) |
|
2333 *s_new = toupper(c); |
|
2334 else |
|
2335 *s_new = c; |
|
2336 s_new++; |
|
2337 } |
|
2338 for (i = 1; i < n; i++) { |
|
2339 int c = Py_CHARMASK(*s++); |
|
2340 if (isupper(c)) |
|
2341 *s_new = tolower(c); |
|
2342 else |
|
2343 *s_new = c; |
|
2344 s_new++; |
|
2345 } |
|
2346 return newobj; |
|
2347 } |
|
2348 |
|
2349 |
|
2350 PyDoc_STRVAR(count__doc__, |
|
2351 "S.count(sub[, start[, end]]) -> int\n\ |
|
2352 \n\ |
|
2353 Return the number of non-overlapping occurrences of substring sub in\n\ |
|
2354 string S[start:end]. Optional arguments start and end are interpreted\n\ |
|
2355 as in slice notation."); |
|
2356 |
|
2357 static PyObject * |
|
2358 string_count(PyStringObject *self, PyObject *args) |
|
2359 { |
|
2360 PyObject *sub_obj; |
|
2361 const char *str = PyString_AS_STRING(self), *sub; |
|
2362 Py_ssize_t sub_len; |
|
2363 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
|
2364 |
|
2365 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, |
|
2366 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
2367 return NULL; |
|
2368 |
|
2369 if (PyString_Check(sub_obj)) { |
|
2370 sub = PyString_AS_STRING(sub_obj); |
|
2371 sub_len = PyString_GET_SIZE(sub_obj); |
|
2372 } |
|
2373 #ifdef Py_USING_UNICODE |
|
2374 else if (PyUnicode_Check(sub_obj)) { |
|
2375 Py_ssize_t count; |
|
2376 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); |
|
2377 if (count == -1) |
|
2378 return NULL; |
|
2379 else |
|
2380 return PyInt_FromSsize_t(count); |
|
2381 } |
|
2382 #endif |
|
2383 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) |
|
2384 return NULL; |
|
2385 |
|
2386 string_adjust_indices(&start, &end, PyString_GET_SIZE(self)); |
|
2387 |
|
2388 return PyInt_FromSsize_t( |
|
2389 stringlib_count(str + start, end - start, sub, sub_len) |
|
2390 ); |
|
2391 } |
|
2392 |
|
2393 PyDoc_STRVAR(swapcase__doc__, |
|
2394 "S.swapcase() -> string\n\ |
|
2395 \n\ |
|
2396 Return a copy of the string S with uppercase characters\n\ |
|
2397 converted to lowercase and vice versa."); |
|
2398 |
|
2399 static PyObject * |
|
2400 string_swapcase(PyStringObject *self) |
|
2401 { |
|
2402 char *s = PyString_AS_STRING(self), *s_new; |
|
2403 Py_ssize_t i, n = PyString_GET_SIZE(self); |
|
2404 PyObject *newobj; |
|
2405 |
|
2406 newobj = PyString_FromStringAndSize(NULL, n); |
|
2407 if (newobj == NULL) |
|
2408 return NULL; |
|
2409 s_new = PyString_AsString(newobj); |
|
2410 for (i = 0; i < n; i++) { |
|
2411 int c = Py_CHARMASK(*s++); |
|
2412 if (islower(c)) { |
|
2413 *s_new = toupper(c); |
|
2414 } |
|
2415 else if (isupper(c)) { |
|
2416 *s_new = tolower(c); |
|
2417 } |
|
2418 else |
|
2419 *s_new = c; |
|
2420 s_new++; |
|
2421 } |
|
2422 return newobj; |
|
2423 } |
|
2424 |
|
2425 |
|
2426 PyDoc_STRVAR(translate__doc__, |
|
2427 "S.translate(table [,deletechars]) -> string\n\ |
|
2428 \n\ |
|
2429 Return a copy of the string S, where all characters occurring\n\ |
|
2430 in the optional argument deletechars are removed, and the\n\ |
|
2431 remaining characters have been mapped through the given\n\ |
|
2432 translation table, which must be a string of length 256."); |
|
2433 |
|
2434 static PyObject * |
|
2435 string_translate(PyStringObject *self, PyObject *args) |
|
2436 { |
|
2437 register char *input, *output; |
|
2438 const char *table; |
|
2439 register Py_ssize_t i, c, changed = 0; |
|
2440 PyObject *input_obj = (PyObject*)self; |
|
2441 const char *output_start, *del_table=NULL; |
|
2442 Py_ssize_t inlen, tablen, dellen = 0; |
|
2443 PyObject *result; |
|
2444 int trans_table[256]; |
|
2445 PyObject *tableobj, *delobj = NULL; |
|
2446 |
|
2447 if (!PyArg_UnpackTuple(args, "translate", 1, 2, |
|
2448 &tableobj, &delobj)) |
|
2449 return NULL; |
|
2450 |
|
2451 if (PyString_Check(tableobj)) { |
|
2452 table = PyString_AS_STRING(tableobj); |
|
2453 tablen = PyString_GET_SIZE(tableobj); |
|
2454 } |
|
2455 else if (tableobj == Py_None) { |
|
2456 table = NULL; |
|
2457 tablen = 256; |
|
2458 } |
|
2459 #ifdef Py_USING_UNICODE |
|
2460 else if (PyUnicode_Check(tableobj)) { |
|
2461 /* Unicode .translate() does not support the deletechars |
|
2462 parameter; instead a mapping to None will cause characters |
|
2463 to be deleted. */ |
|
2464 if (delobj != NULL) { |
|
2465 PyErr_SetString(PyExc_TypeError, |
|
2466 "deletions are implemented differently for unicode"); |
|
2467 return NULL; |
|
2468 } |
|
2469 return PyUnicode_Translate((PyObject *)self, tableobj, NULL); |
|
2470 } |
|
2471 #endif |
|
2472 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen)) |
|
2473 return NULL; |
|
2474 |
|
2475 if (tablen != 256) { |
|
2476 PyErr_SetString(PyExc_ValueError, |
|
2477 "translation table must be 256 characters long"); |
|
2478 return NULL; |
|
2479 } |
|
2480 |
|
2481 if (delobj != NULL) { |
|
2482 if (PyString_Check(delobj)) { |
|
2483 del_table = PyString_AS_STRING(delobj); |
|
2484 dellen = PyString_GET_SIZE(delobj); |
|
2485 } |
|
2486 #ifdef Py_USING_UNICODE |
|
2487 else if (PyUnicode_Check(delobj)) { |
|
2488 PyErr_SetString(PyExc_TypeError, |
|
2489 "deletions are implemented differently for unicode"); |
|
2490 return NULL; |
|
2491 } |
|
2492 #endif |
|
2493 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) |
|
2494 return NULL; |
|
2495 } |
|
2496 else { |
|
2497 del_table = NULL; |
|
2498 dellen = 0; |
|
2499 } |
|
2500 |
|
2501 inlen = PyString_GET_SIZE(input_obj); |
|
2502 result = PyString_FromStringAndSize((char *)NULL, inlen); |
|
2503 if (result == NULL) |
|
2504 return NULL; |
|
2505 output_start = output = PyString_AsString(result); |
|
2506 input = PyString_AS_STRING(input_obj); |
|
2507 |
|
2508 if (dellen == 0 && table != NULL) { |
|
2509 /* If no deletions are required, use faster code */ |
|
2510 for (i = inlen; --i >= 0; ) { |
|
2511 c = Py_CHARMASK(*input++); |
|
2512 if (Py_CHARMASK((*output++ = table[c])) != c) |
|
2513 changed = 1; |
|
2514 } |
|
2515 if (changed || !PyString_CheckExact(input_obj)) |
|
2516 return result; |
|
2517 Py_DECREF(result); |
|
2518 Py_INCREF(input_obj); |
|
2519 return input_obj; |
|
2520 } |
|
2521 |
|
2522 if (table == NULL) { |
|
2523 for (i = 0; i < 256; i++) |
|
2524 trans_table[i] = Py_CHARMASK(i); |
|
2525 } else { |
|
2526 for (i = 0; i < 256; i++) |
|
2527 trans_table[i] = Py_CHARMASK(table[i]); |
|
2528 } |
|
2529 |
|
2530 for (i = 0; i < dellen; i++) |
|
2531 trans_table[(int) Py_CHARMASK(del_table[i])] = -1; |
|
2532 |
|
2533 for (i = inlen; --i >= 0; ) { |
|
2534 c = Py_CHARMASK(*input++); |
|
2535 if (trans_table[c] != -1) |
|
2536 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) |
|
2537 continue; |
|
2538 changed = 1; |
|
2539 } |
|
2540 if (!changed && PyString_CheckExact(input_obj)) { |
|
2541 Py_DECREF(result); |
|
2542 Py_INCREF(input_obj); |
|
2543 return input_obj; |
|
2544 } |
|
2545 /* Fix the size of the resulting string */ |
|
2546 if (inlen > 0) |
|
2547 _PyString_Resize(&result, output - output_start); |
|
2548 return result; |
|
2549 } |
|
2550 |
|
2551 |
|
2552 #define FORWARD 1 |
|
2553 #define REVERSE -1 |
|
2554 |
|
2555 /* find and count characters and substrings */ |
|
2556 |
|
2557 #define findchar(target, target_len, c) \ |
|
2558 ((char *)memchr((const void *)(target), c, target_len)) |
|
2559 |
|
2560 /* String ops must return a string. */ |
|
2561 /* If the object is subclass of string, create a copy */ |
|
2562 Py_LOCAL(PyStringObject *) |
|
2563 return_self(PyStringObject *self) |
|
2564 { |
|
2565 if (PyString_CheckExact(self)) { |
|
2566 Py_INCREF(self); |
|
2567 return self; |
|
2568 } |
|
2569 return (PyStringObject *)PyString_FromStringAndSize( |
|
2570 PyString_AS_STRING(self), |
|
2571 PyString_GET_SIZE(self)); |
|
2572 } |
|
2573 |
|
2574 Py_LOCAL_INLINE(Py_ssize_t) |
|
2575 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount) |
|
2576 { |
|
2577 Py_ssize_t count=0; |
|
2578 const char *start=target; |
|
2579 const char *end=target+target_len; |
|
2580 |
|
2581 while ( (start=findchar(start, end-start, c)) != NULL ) { |
|
2582 count++; |
|
2583 if (count >= maxcount) |
|
2584 break; |
|
2585 start += 1; |
|
2586 } |
|
2587 return count; |
|
2588 } |
|
2589 |
|
2590 Py_LOCAL(Py_ssize_t) |
|
2591 findstring(const char *target, Py_ssize_t target_len, |
|
2592 const char *pattern, Py_ssize_t pattern_len, |
|
2593 Py_ssize_t start, |
|
2594 Py_ssize_t end, |
|
2595 int direction) |
|
2596 { |
|
2597 if (start < 0) { |
|
2598 start += target_len; |
|
2599 if (start < 0) |
|
2600 start = 0; |
|
2601 } |
|
2602 if (end > target_len) { |
|
2603 end = target_len; |
|
2604 } else if (end < 0) { |
|
2605 end += target_len; |
|
2606 if (end < 0) |
|
2607 end = 0; |
|
2608 } |
|
2609 |
|
2610 /* zero-length substrings always match at the first attempt */ |
|
2611 if (pattern_len == 0) |
|
2612 return (direction > 0) ? start : end; |
|
2613 |
|
2614 end -= pattern_len; |
|
2615 |
|
2616 if (direction < 0) { |
|
2617 for (; end >= start; end--) |
|
2618 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) |
|
2619 return end; |
|
2620 } else { |
|
2621 for (; start <= end; start++) |
|
2622 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) |
|
2623 return start; |
|
2624 } |
|
2625 return -1; |
|
2626 } |
|
2627 |
|
2628 Py_LOCAL_INLINE(Py_ssize_t) |
|
2629 countstring(const char *target, Py_ssize_t target_len, |
|
2630 const char *pattern, Py_ssize_t pattern_len, |
|
2631 Py_ssize_t start, |
|
2632 Py_ssize_t end, |
|
2633 int direction, Py_ssize_t maxcount) |
|
2634 { |
|
2635 Py_ssize_t count=0; |
|
2636 |
|
2637 if (start < 0) { |
|
2638 start += target_len; |
|
2639 if (start < 0) |
|
2640 start = 0; |
|
2641 } |
|
2642 if (end > target_len) { |
|
2643 end = target_len; |
|
2644 } else if (end < 0) { |
|
2645 end += target_len; |
|
2646 if (end < 0) |
|
2647 end = 0; |
|
2648 } |
|
2649 |
|
2650 /* zero-length substrings match everywhere */ |
|
2651 if (pattern_len == 0 || maxcount == 0) { |
|
2652 if (target_len+1 < maxcount) |
|
2653 return target_len+1; |
|
2654 return maxcount; |
|
2655 } |
|
2656 |
|
2657 end -= pattern_len; |
|
2658 if (direction < 0) { |
|
2659 for (; (end >= start); end--) |
|
2660 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { |
|
2661 count++; |
|
2662 if (--maxcount <= 0) break; |
|
2663 end -= pattern_len-1; |
|
2664 } |
|
2665 } else { |
|
2666 for (; (start <= end); start++) |
|
2667 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { |
|
2668 count++; |
|
2669 if (--maxcount <= 0) |
|
2670 break; |
|
2671 start += pattern_len-1; |
|
2672 } |
|
2673 } |
|
2674 return count; |
|
2675 } |
|
2676 |
|
2677 |
|
2678 /* Algorithms for different cases of string replacement */ |
|
2679 |
|
2680 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ |
|
2681 Py_LOCAL(PyStringObject *) |
|
2682 replace_interleave(PyStringObject *self, |
|
2683 const char *to_s, Py_ssize_t to_len, |
|
2684 Py_ssize_t maxcount) |
|
2685 { |
|
2686 char *self_s, *result_s; |
|
2687 Py_ssize_t self_len, result_len; |
|
2688 Py_ssize_t count, i, product; |
|
2689 PyStringObject *result; |
|
2690 |
|
2691 self_len = PyString_GET_SIZE(self); |
|
2692 |
|
2693 /* 1 at the end plus 1 after every character */ |
|
2694 count = self_len+1; |
|
2695 if (maxcount < count) |
|
2696 count = maxcount; |
|
2697 |
|
2698 /* Check for overflow */ |
|
2699 /* result_len = count * to_len + self_len; */ |
|
2700 product = count * to_len; |
|
2701 if (product / to_len != count) { |
|
2702 PyErr_SetString(PyExc_OverflowError, |
|
2703 "replace string is too long"); |
|
2704 return NULL; |
|
2705 } |
|
2706 result_len = product + self_len; |
|
2707 if (result_len < 0) { |
|
2708 PyErr_SetString(PyExc_OverflowError, |
|
2709 "replace string is too long"); |
|
2710 return NULL; |
|
2711 } |
|
2712 |
|
2713 if (! (result = (PyStringObject *) |
|
2714 PyString_FromStringAndSize(NULL, result_len)) ) |
|
2715 return NULL; |
|
2716 |
|
2717 self_s = PyString_AS_STRING(self); |
|
2718 result_s = PyString_AS_STRING(result); |
|
2719 |
|
2720 /* TODO: special case single character, which doesn't need memcpy */ |
|
2721 |
|
2722 /* Lay the first one down (guaranteed this will occur) */ |
|
2723 Py_MEMCPY(result_s, to_s, to_len); |
|
2724 result_s += to_len; |
|
2725 count -= 1; |
|
2726 |
|
2727 for (i=0; i<count; i++) { |
|
2728 *result_s++ = *self_s++; |
|
2729 Py_MEMCPY(result_s, to_s, to_len); |
|
2730 result_s += to_len; |
|
2731 } |
|
2732 |
|
2733 /* Copy the rest of the original string */ |
|
2734 Py_MEMCPY(result_s, self_s, self_len-i); |
|
2735 |
|
2736 return result; |
|
2737 } |
|
2738 |
|
2739 /* Special case for deleting a single character */ |
|
2740 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ |
|
2741 Py_LOCAL(PyStringObject *) |
|
2742 replace_delete_single_character(PyStringObject *self, |
|
2743 char from_c, Py_ssize_t maxcount) |
|
2744 { |
|
2745 char *self_s, *result_s; |
|
2746 char *start, *next, *end; |
|
2747 Py_ssize_t self_len, result_len; |
|
2748 Py_ssize_t count; |
|
2749 PyStringObject *result; |
|
2750 |
|
2751 self_len = PyString_GET_SIZE(self); |
|
2752 self_s = PyString_AS_STRING(self); |
|
2753 |
|
2754 count = countchar(self_s, self_len, from_c, maxcount); |
|
2755 if (count == 0) { |
|
2756 return return_self(self); |
|
2757 } |
|
2758 |
|
2759 result_len = self_len - count; /* from_len == 1 */ |
|
2760 assert(result_len>=0); |
|
2761 |
|
2762 if ( (result = (PyStringObject *) |
|
2763 PyString_FromStringAndSize(NULL, result_len)) == NULL) |
|
2764 return NULL; |
|
2765 result_s = PyString_AS_STRING(result); |
|
2766 |
|
2767 start = self_s; |
|
2768 end = self_s + self_len; |
|
2769 while (count-- > 0) { |
|
2770 next = findchar(start, end-start, from_c); |
|
2771 if (next == NULL) |
|
2772 break; |
|
2773 Py_MEMCPY(result_s, start, next-start); |
|
2774 result_s += (next-start); |
|
2775 start = next+1; |
|
2776 } |
|
2777 Py_MEMCPY(result_s, start, end-start); |
|
2778 |
|
2779 return result; |
|
2780 } |
|
2781 |
|
2782 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ |
|
2783 |
|
2784 Py_LOCAL(PyStringObject *) |
|
2785 replace_delete_substring(PyStringObject *self, |
|
2786 const char *from_s, Py_ssize_t from_len, |
|
2787 Py_ssize_t maxcount) { |
|
2788 char *self_s, *result_s; |
|
2789 char *start, *next, *end; |
|
2790 Py_ssize_t self_len, result_len; |
|
2791 Py_ssize_t count, offset; |
|
2792 PyStringObject *result; |
|
2793 |
|
2794 self_len = PyString_GET_SIZE(self); |
|
2795 self_s = PyString_AS_STRING(self); |
|
2796 |
|
2797 count = countstring(self_s, self_len, |
|
2798 from_s, from_len, |
|
2799 0, self_len, 1, |
|
2800 maxcount); |
|
2801 |
|
2802 if (count == 0) { |
|
2803 /* no matches */ |
|
2804 return return_self(self); |
|
2805 } |
|
2806 |
|
2807 result_len = self_len - (count * from_len); |
|
2808 assert (result_len>=0); |
|
2809 |
|
2810 if ( (result = (PyStringObject *) |
|
2811 PyString_FromStringAndSize(NULL, result_len)) == NULL ) |
|
2812 return NULL; |
|
2813 |
|
2814 result_s = PyString_AS_STRING(result); |
|
2815 |
|
2816 start = self_s; |
|
2817 end = self_s + self_len; |
|
2818 while (count-- > 0) { |
|
2819 offset = findstring(start, end-start, |
|
2820 from_s, from_len, |
|
2821 0, end-start, FORWARD); |
|
2822 if (offset == -1) |
|
2823 break; |
|
2824 next = start + offset; |
|
2825 |
|
2826 Py_MEMCPY(result_s, start, next-start); |
|
2827 |
|
2828 result_s += (next-start); |
|
2829 start = next+from_len; |
|
2830 } |
|
2831 Py_MEMCPY(result_s, start, end-start); |
|
2832 return result; |
|
2833 } |
|
2834 |
|
2835 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ |
|
2836 Py_LOCAL(PyStringObject *) |
|
2837 replace_single_character_in_place(PyStringObject *self, |
|
2838 char from_c, char to_c, |
|
2839 Py_ssize_t maxcount) |
|
2840 { |
|
2841 char *self_s, *result_s, *start, *end, *next; |
|
2842 Py_ssize_t self_len; |
|
2843 PyStringObject *result; |
|
2844 |
|
2845 /* The result string will be the same size */ |
|
2846 self_s = PyString_AS_STRING(self); |
|
2847 self_len = PyString_GET_SIZE(self); |
|
2848 |
|
2849 next = findchar(self_s, self_len, from_c); |
|
2850 |
|
2851 if (next == NULL) { |
|
2852 /* No matches; return the original string */ |
|
2853 return return_self(self); |
|
2854 } |
|
2855 |
|
2856 /* Need to make a new string */ |
|
2857 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); |
|
2858 if (result == NULL) |
|
2859 return NULL; |
|
2860 result_s = PyString_AS_STRING(result); |
|
2861 Py_MEMCPY(result_s, self_s, self_len); |
|
2862 |
|
2863 /* change everything in-place, starting with this one */ |
|
2864 start = result_s + (next-self_s); |
|
2865 *start = to_c; |
|
2866 start++; |
|
2867 end = result_s + self_len; |
|
2868 |
|
2869 while (--maxcount > 0) { |
|
2870 next = findchar(start, end-start, from_c); |
|
2871 if (next == NULL) |
|
2872 break; |
|
2873 *next = to_c; |
|
2874 start = next+1; |
|
2875 } |
|
2876 |
|
2877 return result; |
|
2878 } |
|
2879 |
|
2880 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ |
|
2881 Py_LOCAL(PyStringObject *) |
|
2882 replace_substring_in_place(PyStringObject *self, |
|
2883 const char *from_s, Py_ssize_t from_len, |
|
2884 const char *to_s, Py_ssize_t to_len, |
|
2885 Py_ssize_t maxcount) |
|
2886 { |
|
2887 char *result_s, *start, *end; |
|
2888 char *self_s; |
|
2889 Py_ssize_t self_len, offset; |
|
2890 PyStringObject *result; |
|
2891 |
|
2892 /* The result string will be the same size */ |
|
2893 |
|
2894 self_s = PyString_AS_STRING(self); |
|
2895 self_len = PyString_GET_SIZE(self); |
|
2896 |
|
2897 offset = findstring(self_s, self_len, |
|
2898 from_s, from_len, |
|
2899 0, self_len, FORWARD); |
|
2900 if (offset == -1) { |
|
2901 /* No matches; return the original string */ |
|
2902 return return_self(self); |
|
2903 } |
|
2904 |
|
2905 /* Need to make a new string */ |
|
2906 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); |
|
2907 if (result == NULL) |
|
2908 return NULL; |
|
2909 result_s = PyString_AS_STRING(result); |
|
2910 Py_MEMCPY(result_s, self_s, self_len); |
|
2911 |
|
2912 /* change everything in-place, starting with this one */ |
|
2913 start = result_s + offset; |
|
2914 Py_MEMCPY(start, to_s, from_len); |
|
2915 start += from_len; |
|
2916 end = result_s + self_len; |
|
2917 |
|
2918 while ( --maxcount > 0) { |
|
2919 offset = findstring(start, end-start, |
|
2920 from_s, from_len, |
|
2921 0, end-start, FORWARD); |
|
2922 if (offset==-1) |
|
2923 break; |
|
2924 Py_MEMCPY(start+offset, to_s, from_len); |
|
2925 start += offset+from_len; |
|
2926 } |
|
2927 |
|
2928 return result; |
|
2929 } |
|
2930 |
|
2931 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ |
|
2932 Py_LOCAL(PyStringObject *) |
|
2933 replace_single_character(PyStringObject *self, |
|
2934 char from_c, |
|
2935 const char *to_s, Py_ssize_t to_len, |
|
2936 Py_ssize_t maxcount) |
|
2937 { |
|
2938 char *self_s, *result_s; |
|
2939 char *start, *next, *end; |
|
2940 Py_ssize_t self_len, result_len; |
|
2941 Py_ssize_t count, product; |
|
2942 PyStringObject *result; |
|
2943 |
|
2944 self_s = PyString_AS_STRING(self); |
|
2945 self_len = PyString_GET_SIZE(self); |
|
2946 |
|
2947 count = countchar(self_s, self_len, from_c, maxcount); |
|
2948 if (count == 0) { |
|
2949 /* no matches, return unchanged */ |
|
2950 return return_self(self); |
|
2951 } |
|
2952 |
|
2953 /* use the difference between current and new, hence the "-1" */ |
|
2954 /* result_len = self_len + count * (to_len-1) */ |
|
2955 product = count * (to_len-1); |
|
2956 if (product / (to_len-1) != count) { |
|
2957 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); |
|
2958 return NULL; |
|
2959 } |
|
2960 result_len = self_len + product; |
|
2961 if (result_len < 0) { |
|
2962 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); |
|
2963 return NULL; |
|
2964 } |
|
2965 |
|
2966 if ( (result = (PyStringObject *) |
|
2967 PyString_FromStringAndSize(NULL, result_len)) == NULL) |
|
2968 return NULL; |
|
2969 result_s = PyString_AS_STRING(result); |
|
2970 |
|
2971 start = self_s; |
|
2972 end = self_s + self_len; |
|
2973 while (count-- > 0) { |
|
2974 next = findchar(start, end-start, from_c); |
|
2975 if (next == NULL) |
|
2976 break; |
|
2977 |
|
2978 if (next == start) { |
|
2979 /* replace with the 'to' */ |
|
2980 Py_MEMCPY(result_s, to_s, to_len); |
|
2981 result_s += to_len; |
|
2982 start += 1; |
|
2983 } else { |
|
2984 /* copy the unchanged old then the 'to' */ |
|
2985 Py_MEMCPY(result_s, start, next-start); |
|
2986 result_s += (next-start); |
|
2987 Py_MEMCPY(result_s, to_s, to_len); |
|
2988 result_s += to_len; |
|
2989 start = next+1; |
|
2990 } |
|
2991 } |
|
2992 /* Copy the remainder of the remaining string */ |
|
2993 Py_MEMCPY(result_s, start, end-start); |
|
2994 |
|
2995 return result; |
|
2996 } |
|
2997 |
|
2998 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ |
|
2999 Py_LOCAL(PyStringObject *) |
|
3000 replace_substring(PyStringObject *self, |
|
3001 const char *from_s, Py_ssize_t from_len, |
|
3002 const char *to_s, Py_ssize_t to_len, |
|
3003 Py_ssize_t maxcount) { |
|
3004 char *self_s, *result_s; |
|
3005 char *start, *next, *end; |
|
3006 Py_ssize_t self_len, result_len; |
|
3007 Py_ssize_t count, offset, product; |
|
3008 PyStringObject *result; |
|
3009 |
|
3010 self_s = PyString_AS_STRING(self); |
|
3011 self_len = PyString_GET_SIZE(self); |
|
3012 |
|
3013 count = countstring(self_s, self_len, |
|
3014 from_s, from_len, |
|
3015 0, self_len, FORWARD, maxcount); |
|
3016 if (count == 0) { |
|
3017 /* no matches, return unchanged */ |
|
3018 return return_self(self); |
|
3019 } |
|
3020 |
|
3021 /* Check for overflow */ |
|
3022 /* result_len = self_len + count * (to_len-from_len) */ |
|
3023 product = count * (to_len-from_len); |
|
3024 if (product / (to_len-from_len) != count) { |
|
3025 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); |
|
3026 return NULL; |
|
3027 } |
|
3028 result_len = self_len + product; |
|
3029 if (result_len < 0) { |
|
3030 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); |
|
3031 return NULL; |
|
3032 } |
|
3033 |
|
3034 if ( (result = (PyStringObject *) |
|
3035 PyString_FromStringAndSize(NULL, result_len)) == NULL) |
|
3036 return NULL; |
|
3037 result_s = PyString_AS_STRING(result); |
|
3038 |
|
3039 start = self_s; |
|
3040 end = self_s + self_len; |
|
3041 while (count-- > 0) { |
|
3042 offset = findstring(start, end-start, |
|
3043 from_s, from_len, |
|
3044 0, end-start, FORWARD); |
|
3045 if (offset == -1) |
|
3046 break; |
|
3047 next = start+offset; |
|
3048 if (next == start) { |
|
3049 /* replace with the 'to' */ |
|
3050 Py_MEMCPY(result_s, to_s, to_len); |
|
3051 result_s += to_len; |
|
3052 start += from_len; |
|
3053 } else { |
|
3054 /* copy the unchanged old then the 'to' */ |
|
3055 Py_MEMCPY(result_s, start, next-start); |
|
3056 result_s += (next-start); |
|
3057 Py_MEMCPY(result_s, to_s, to_len); |
|
3058 result_s += to_len; |
|
3059 start = next+from_len; |
|
3060 } |
|
3061 } |
|
3062 /* Copy the remainder of the remaining string */ |
|
3063 Py_MEMCPY(result_s, start, end-start); |
|
3064 |
|
3065 return result; |
|
3066 } |
|
3067 |
|
3068 |
|
3069 Py_LOCAL(PyStringObject *) |
|
3070 replace(PyStringObject *self, |
|
3071 const char *from_s, Py_ssize_t from_len, |
|
3072 const char *to_s, Py_ssize_t to_len, |
|
3073 Py_ssize_t maxcount) |
|
3074 { |
|
3075 if (maxcount < 0) { |
|
3076 maxcount = PY_SSIZE_T_MAX; |
|
3077 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { |
|
3078 /* nothing to do; return the original string */ |
|
3079 return return_self(self); |
|
3080 } |
|
3081 |
|
3082 if (maxcount == 0 || |
|
3083 (from_len == 0 && to_len == 0)) { |
|
3084 /* nothing to do; return the original string */ |
|
3085 return return_self(self); |
|
3086 } |
|
3087 |
|
3088 /* Handle zero-length special cases */ |
|
3089 |
|
3090 if (from_len == 0) { |
|
3091 /* insert the 'to' string everywhere. */ |
|
3092 /* >>> "Python".replace("", ".") */ |
|
3093 /* '.P.y.t.h.o.n.' */ |
|
3094 return replace_interleave(self, to_s, to_len, maxcount); |
|
3095 } |
|
3096 |
|
3097 /* Except for "".replace("", "A") == "A" there is no way beyond this */ |
|
3098 /* point for an empty self string to generate a non-empty string */ |
|
3099 /* Special case so the remaining code always gets a non-empty string */ |
|
3100 if (PyString_GET_SIZE(self) == 0) { |
|
3101 return return_self(self); |
|
3102 } |
|
3103 |
|
3104 if (to_len == 0) { |
|
3105 /* delete all occurances of 'from' string */ |
|
3106 if (from_len == 1) { |
|
3107 return replace_delete_single_character( |
|
3108 self, from_s[0], maxcount); |
|
3109 } else { |
|
3110 return replace_delete_substring(self, from_s, from_len, maxcount); |
|
3111 } |
|
3112 } |
|
3113 |
|
3114 /* Handle special case where both strings have the same length */ |
|
3115 |
|
3116 if (from_len == to_len) { |
|
3117 if (from_len == 1) { |
|
3118 return replace_single_character_in_place( |
|
3119 self, |
|
3120 from_s[0], |
|
3121 to_s[0], |
|
3122 maxcount); |
|
3123 } else { |
|
3124 return replace_substring_in_place( |
|
3125 self, from_s, from_len, to_s, to_len, maxcount); |
|
3126 } |
|
3127 } |
|
3128 |
|
3129 /* Otherwise use the more generic algorithms */ |
|
3130 if (from_len == 1) { |
|
3131 return replace_single_character(self, from_s[0], |
|
3132 to_s, to_len, maxcount); |
|
3133 } else { |
|
3134 /* len('from')>=2, len('to')>=1 */ |
|
3135 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); |
|
3136 } |
|
3137 } |
|
3138 |
|
3139 PyDoc_STRVAR(replace__doc__, |
|
3140 "S.replace (old, new[, count]) -> string\n\ |
|
3141 \n\ |
|
3142 Return a copy of string S with all occurrences of substring\n\ |
|
3143 old replaced by new. If the optional argument count is\n\ |
|
3144 given, only the first count occurrences are replaced."); |
|
3145 |
|
3146 static PyObject * |
|
3147 string_replace(PyStringObject *self, PyObject *args) |
|
3148 { |
|
3149 Py_ssize_t count = -1; |
|
3150 PyObject *from, *to; |
|
3151 const char *from_s, *to_s; |
|
3152 Py_ssize_t from_len, to_len; |
|
3153 |
|
3154 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) |
|
3155 return NULL; |
|
3156 |
|
3157 if (PyString_Check(from)) { |
|
3158 from_s = PyString_AS_STRING(from); |
|
3159 from_len = PyString_GET_SIZE(from); |
|
3160 } |
|
3161 #ifdef Py_USING_UNICODE |
|
3162 if (PyUnicode_Check(from)) |
|
3163 return PyUnicode_Replace((PyObject *)self, |
|
3164 from, to, count); |
|
3165 #endif |
|
3166 else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) |
|
3167 return NULL; |
|
3168 |
|
3169 if (PyString_Check(to)) { |
|
3170 to_s = PyString_AS_STRING(to); |
|
3171 to_len = PyString_GET_SIZE(to); |
|
3172 } |
|
3173 #ifdef Py_USING_UNICODE |
|
3174 else if (PyUnicode_Check(to)) |
|
3175 return PyUnicode_Replace((PyObject *)self, |
|
3176 from, to, count); |
|
3177 #endif |
|
3178 else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) |
|
3179 return NULL; |
|
3180 |
|
3181 return (PyObject *)replace((PyStringObject *) self, |
|
3182 from_s, from_len, |
|
3183 to_s, to_len, count); |
|
3184 } |
|
3185 |
|
3186 /** End DALKE **/ |
|
3187 |
|
3188 /* Matches the end (direction >= 0) or start (direction < 0) of self |
|
3189 * against substr, using the start and end arguments. Returns |
|
3190 * -1 on error, 0 if not found and 1 if found. |
|
3191 */ |
|
3192 Py_LOCAL(int) |
|
3193 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, |
|
3194 Py_ssize_t end, int direction) |
|
3195 { |
|
3196 Py_ssize_t len = PyString_GET_SIZE(self); |
|
3197 Py_ssize_t slen; |
|
3198 const char* sub; |
|
3199 const char* str; |
|
3200 |
|
3201 if (PyString_Check(substr)) { |
|
3202 sub = PyString_AS_STRING(substr); |
|
3203 slen = PyString_GET_SIZE(substr); |
|
3204 } |
|
3205 #ifdef Py_USING_UNICODE |
|
3206 else if (PyUnicode_Check(substr)) |
|
3207 return PyUnicode_Tailmatch((PyObject *)self, |
|
3208 substr, start, end, direction); |
|
3209 #endif |
|
3210 else if (PyObject_AsCharBuffer(substr, &sub, &slen)) |
|
3211 return -1; |
|
3212 str = PyString_AS_STRING(self); |
|
3213 |
|
3214 string_adjust_indices(&start, &end, len); |
|
3215 |
|
3216 if (direction < 0) { |
|
3217 /* startswith */ |
|
3218 if (start+slen > len) |
|
3219 return 0; |
|
3220 } else { |
|
3221 /* endswith */ |
|
3222 if (end-start < slen || start > len) |
|
3223 return 0; |
|
3224 |
|
3225 if (end-slen > start) |
|
3226 start = end - slen; |
|
3227 } |
|
3228 if (end-start >= slen) |
|
3229 return ! memcmp(str+start, sub, slen); |
|
3230 return 0; |
|
3231 } |
|
3232 |
|
3233 |
|
3234 PyDoc_STRVAR(startswith__doc__, |
|
3235 "S.startswith(prefix[, start[, end]]) -> bool\n\ |
|
3236 \n\ |
|
3237 Return True if S starts with the specified prefix, False otherwise.\n\ |
|
3238 With optional start, test S beginning at that position.\n\ |
|
3239 With optional end, stop comparing S at that position.\n\ |
|
3240 prefix can also be a tuple of strings to try."); |
|
3241 |
|
3242 static PyObject * |
|
3243 string_startswith(PyStringObject *self, PyObject *args) |
|
3244 { |
|
3245 Py_ssize_t start = 0; |
|
3246 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
3247 PyObject *subobj; |
|
3248 int result; |
|
3249 |
|
3250 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, |
|
3251 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
3252 return NULL; |
|
3253 if (PyTuple_Check(subobj)) { |
|
3254 Py_ssize_t i; |
|
3255 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
|
3256 result = _string_tailmatch(self, |
|
3257 PyTuple_GET_ITEM(subobj, i), |
|
3258 start, end, -1); |
|
3259 if (result == -1) |
|
3260 return NULL; |
|
3261 else if (result) { |
|
3262 Py_RETURN_TRUE; |
|
3263 } |
|
3264 } |
|
3265 Py_RETURN_FALSE; |
|
3266 } |
|
3267 result = _string_tailmatch(self, subobj, start, end, -1); |
|
3268 if (result == -1) |
|
3269 return NULL; |
|
3270 else |
|
3271 return PyBool_FromLong(result); |
|
3272 } |
|
3273 |
|
3274 |
|
3275 PyDoc_STRVAR(endswith__doc__, |
|
3276 "S.endswith(suffix[, start[, end]]) -> bool\n\ |
|
3277 \n\ |
|
3278 Return True if S ends with the specified suffix, False otherwise.\n\ |
|
3279 With optional start, test S beginning at that position.\n\ |
|
3280 With optional end, stop comparing S at that position.\n\ |
|
3281 suffix can also be a tuple of strings to try."); |
|
3282 |
|
3283 static PyObject * |
|
3284 string_endswith(PyStringObject *self, PyObject *args) |
|
3285 { |
|
3286 Py_ssize_t start = 0; |
|
3287 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
3288 PyObject *subobj; |
|
3289 int result; |
|
3290 |
|
3291 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, |
|
3292 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
3293 return NULL; |
|
3294 if (PyTuple_Check(subobj)) { |
|
3295 Py_ssize_t i; |
|
3296 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
|
3297 result = _string_tailmatch(self, |
|
3298 PyTuple_GET_ITEM(subobj, i), |
|
3299 start, end, +1); |
|
3300 if (result == -1) |
|
3301 return NULL; |
|
3302 else if (result) { |
|
3303 Py_RETURN_TRUE; |
|
3304 } |
|
3305 } |
|
3306 Py_RETURN_FALSE; |
|
3307 } |
|
3308 result = _string_tailmatch(self, subobj, start, end, +1); |
|
3309 if (result == -1) |
|
3310 return NULL; |
|
3311 else |
|
3312 return PyBool_FromLong(result); |
|
3313 } |
|
3314 |
|
3315 |
|
3316 PyDoc_STRVAR(encode__doc__, |
|
3317 "S.encode([encoding[,errors]]) -> object\n\ |
|
3318 \n\ |
|
3319 Encodes S using the codec registered for encoding. encoding defaults\n\ |
|
3320 to the default encoding. errors may be given to set a different error\n\ |
|
3321 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ |
|
3322 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ |
|
3323 'xmlcharrefreplace' as well as any other name registered with\n\ |
|
3324 codecs.register_error that is able to handle UnicodeEncodeErrors."); |
|
3325 |
|
3326 static PyObject * |
|
3327 string_encode(PyStringObject *self, PyObject *args) |
|
3328 { |
|
3329 char *encoding = NULL; |
|
3330 char *errors = NULL; |
|
3331 PyObject *v; |
|
3332 |
|
3333 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) |
|
3334 return NULL; |
|
3335 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); |
|
3336 if (v == NULL) |
|
3337 goto onError; |
|
3338 if (!PyString_Check(v) && !PyUnicode_Check(v)) { |
|
3339 PyErr_Format(PyExc_TypeError, |
|
3340 "encoder did not return a string/unicode object " |
|
3341 "(type=%.400s)", |
|
3342 Py_TYPE(v)->tp_name); |
|
3343 Py_DECREF(v); |
|
3344 return NULL; |
|
3345 } |
|
3346 return v; |
|
3347 |
|
3348 onError: |
|
3349 return NULL; |
|
3350 } |
|
3351 |
|
3352 |
|
3353 PyDoc_STRVAR(decode__doc__, |
|
3354 "S.decode([encoding[,errors]]) -> object\n\ |
|
3355 \n\ |
|
3356 Decodes S using the codec registered for encoding. encoding defaults\n\ |
|
3357 to the default encoding. errors may be given to set a different error\n\ |
|
3358 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ |
|
3359 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ |
|
3360 as well as any other name registered with codecs.register_error that is\n\ |
|
3361 able to handle UnicodeDecodeErrors."); |
|
3362 |
|
3363 static PyObject * |
|
3364 string_decode(PyStringObject *self, PyObject *args) |
|
3365 { |
|
3366 char *encoding = NULL; |
|
3367 char *errors = NULL; |
|
3368 PyObject *v; |
|
3369 |
|
3370 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) |
|
3371 return NULL; |
|
3372 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); |
|
3373 if (v == NULL) |
|
3374 goto onError; |
|
3375 if (!PyString_Check(v) && !PyUnicode_Check(v)) { |
|
3376 PyErr_Format(PyExc_TypeError, |
|
3377 "decoder did not return a string/unicode object " |
|
3378 "(type=%.400s)", |
|
3379 Py_TYPE(v)->tp_name); |
|
3380 Py_DECREF(v); |
|
3381 return NULL; |
|
3382 } |
|
3383 return v; |
|
3384 |
|
3385 onError: |
|
3386 return NULL; |
|
3387 } |
|
3388 |
|
3389 |
|
3390 PyDoc_STRVAR(expandtabs__doc__, |
|
3391 "S.expandtabs([tabsize]) -> string\n\ |
|
3392 \n\ |
|
3393 Return a copy of S where all tab characters are expanded using spaces.\n\ |
|
3394 If tabsize is not given, a tab size of 8 characters is assumed."); |
|
3395 |
|
3396 static PyObject* |
|
3397 string_expandtabs(PyStringObject *self, PyObject *args) |
|
3398 { |
|
3399 const char *e, *p, *qe; |
|
3400 char *q; |
|
3401 Py_ssize_t i, j, incr; |
|
3402 PyObject *u; |
|
3403 int tabsize = 8; |
|
3404 |
|
3405 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) |
|
3406 return NULL; |
|
3407 |
|
3408 /* First pass: determine size of output string */ |
|
3409 i = 0; /* chars up to and including most recent \n or \r */ |
|
3410 j = 0; /* chars since most recent \n or \r (use in tab calculations) */ |
|
3411 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ |
|
3412 for (p = PyString_AS_STRING(self); p < e; p++) |
|
3413 if (*p == '\t') { |
|
3414 if (tabsize > 0) { |
|
3415 incr = tabsize - (j % tabsize); |
|
3416 if (j > PY_SSIZE_T_MAX - incr) |
|
3417 goto overflow1; |
|
3418 j += incr; |
|
3419 } |
|
3420 } |
|
3421 else { |
|
3422 if (j > PY_SSIZE_T_MAX - 1) |
|
3423 goto overflow1; |
|
3424 j++; |
|
3425 if (*p == '\n' || *p == '\r') { |
|
3426 if (i > PY_SSIZE_T_MAX - j) |
|
3427 goto overflow1; |
|
3428 i += j; |
|
3429 j = 0; |
|
3430 } |
|
3431 } |
|
3432 |
|
3433 if (i > PY_SSIZE_T_MAX - j) |
|
3434 goto overflow1; |
|
3435 |
|
3436 /* Second pass: create output string and fill it */ |
|
3437 u = PyString_FromStringAndSize(NULL, i + j); |
|
3438 if (!u) |
|
3439 return NULL; |
|
3440 |
|
3441 j = 0; /* same as in first pass */ |
|
3442 q = PyString_AS_STRING(u); /* next output char */ |
|
3443 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ |
|
3444 |
|
3445 for (p = PyString_AS_STRING(self); p < e; p++) |
|
3446 if (*p == '\t') { |
|
3447 if (tabsize > 0) { |
|
3448 i = tabsize - (j % tabsize); |
|
3449 j += i; |
|
3450 while (i--) { |
|
3451 if (q >= qe) |
|
3452 goto overflow2; |
|
3453 *q++ = ' '; |
|
3454 } |
|
3455 } |
|
3456 } |
|
3457 else { |
|
3458 if (q >= qe) |
|
3459 goto overflow2; |
|
3460 *q++ = *p; |
|
3461 j++; |
|
3462 if (*p == '\n' || *p == '\r') |
|
3463 j = 0; |
|
3464 } |
|
3465 |
|
3466 return u; |
|
3467 |
|
3468 overflow2: |
|
3469 Py_DECREF(u); |
|
3470 overflow1: |
|
3471 PyErr_SetString(PyExc_OverflowError, "new string is too long"); |
|
3472 return NULL; |
|
3473 } |
|
3474 |
|
3475 Py_LOCAL_INLINE(PyObject *) |
|
3476 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
|
3477 { |
|
3478 PyObject *u; |
|
3479 |
|
3480 if (left < 0) |
|
3481 left = 0; |
|
3482 if (right < 0) |
|
3483 right = 0; |
|
3484 |
|
3485 if (left == 0 && right == 0 && PyString_CheckExact(self)) { |
|
3486 Py_INCREF(self); |
|
3487 return (PyObject *)self; |
|
3488 } |
|
3489 |
|
3490 u = PyString_FromStringAndSize(NULL, |
|
3491 left + PyString_GET_SIZE(self) + right); |
|
3492 if (u) { |
|
3493 if (left) |
|
3494 memset(PyString_AS_STRING(u), fill, left); |
|
3495 Py_MEMCPY(PyString_AS_STRING(u) + left, |
|
3496 PyString_AS_STRING(self), |
|
3497 PyString_GET_SIZE(self)); |
|
3498 if (right) |
|
3499 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), |
|
3500 fill, right); |
|
3501 } |
|
3502 |
|
3503 return u; |
|
3504 } |
|
3505 |
|
3506 PyDoc_STRVAR(ljust__doc__, |
|
3507 "S.ljust(width[, fillchar]) -> string\n" |
|
3508 "\n" |
|
3509 "Return S left-justified in a string of length width. Padding is\n" |
|
3510 "done using the specified fill character (default is a space)."); |
|
3511 |
|
3512 static PyObject * |
|
3513 string_ljust(PyStringObject *self, PyObject *args) |
|
3514 { |
|
3515 Py_ssize_t width; |
|
3516 char fillchar = ' '; |
|
3517 |
|
3518 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) |
|
3519 return NULL; |
|
3520 |
|
3521 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { |
|
3522 Py_INCREF(self); |
|
3523 return (PyObject*) self; |
|
3524 } |
|
3525 |
|
3526 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar); |
|
3527 } |
|
3528 |
|
3529 |
|
3530 PyDoc_STRVAR(rjust__doc__, |
|
3531 "S.rjust(width[, fillchar]) -> string\n" |
|
3532 "\n" |
|
3533 "Return S right-justified in a string of length width. Padding is\n" |
|
3534 "done using the specified fill character (default is a space)"); |
|
3535 |
|
3536 static PyObject * |
|
3537 string_rjust(PyStringObject *self, PyObject *args) |
|
3538 { |
|
3539 Py_ssize_t width; |
|
3540 char fillchar = ' '; |
|
3541 |
|
3542 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) |
|
3543 return NULL; |
|
3544 |
|
3545 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { |
|
3546 Py_INCREF(self); |
|
3547 return (PyObject*) self; |
|
3548 } |
|
3549 |
|
3550 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar); |
|
3551 } |
|
3552 |
|
3553 |
|
3554 PyDoc_STRVAR(center__doc__, |
|
3555 "S.center(width[, fillchar]) -> string\n" |
|
3556 "\n" |
|
3557 "Return S centered in a string of length width. Padding is\n" |
|
3558 "done using the specified fill character (default is a space)"); |
|
3559 |
|
3560 static PyObject * |
|
3561 string_center(PyStringObject *self, PyObject *args) |
|
3562 { |
|
3563 Py_ssize_t marg, left; |
|
3564 Py_ssize_t width; |
|
3565 char fillchar = ' '; |
|
3566 |
|
3567 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) |
|
3568 return NULL; |
|
3569 |
|
3570 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { |
|
3571 Py_INCREF(self); |
|
3572 return (PyObject*) self; |
|
3573 } |
|
3574 |
|
3575 marg = width - PyString_GET_SIZE(self); |
|
3576 left = marg / 2 + (marg & width & 1); |
|
3577 |
|
3578 return pad(self, left, marg - left, fillchar); |
|
3579 } |
|
3580 |
|
3581 PyDoc_STRVAR(zfill__doc__, |
|
3582 "S.zfill(width) -> string\n" |
|
3583 "\n" |
|
3584 "Pad a numeric string S with zeros on the left, to fill a field\n" |
|
3585 "of the specified width. The string S is never truncated."); |
|
3586 |
|
3587 static PyObject * |
|
3588 string_zfill(PyStringObject *self, PyObject *args) |
|
3589 { |
|
3590 Py_ssize_t fill; |
|
3591 PyObject *s; |
|
3592 char *p; |
|
3593 Py_ssize_t width; |
|
3594 |
|
3595 if (!PyArg_ParseTuple(args, "n:zfill", &width)) |
|
3596 return NULL; |
|
3597 |
|
3598 if (PyString_GET_SIZE(self) >= width) { |
|
3599 if (PyString_CheckExact(self)) { |
|
3600 Py_INCREF(self); |
|
3601 return (PyObject*) self; |
|
3602 } |
|
3603 else |
|
3604 return PyString_FromStringAndSize( |
|
3605 PyString_AS_STRING(self), |
|
3606 PyString_GET_SIZE(self) |
|
3607 ); |
|
3608 } |
|
3609 |
|
3610 fill = width - PyString_GET_SIZE(self); |
|
3611 |
|
3612 s = pad(self, fill, 0, '0'); |
|
3613 |
|
3614 if (s == NULL) |
|
3615 return NULL; |
|
3616 |
|
3617 p = PyString_AS_STRING(s); |
|
3618 if (p[fill] == '+' || p[fill] == '-') { |
|
3619 /* move sign to beginning of string */ |
|
3620 p[0] = p[fill]; |
|
3621 p[fill] = '0'; |
|
3622 } |
|
3623 |
|
3624 return (PyObject*) s; |
|
3625 } |
|
3626 |
|
3627 PyDoc_STRVAR(isspace__doc__, |
|
3628 "S.isspace() -> bool\n\ |
|
3629 \n\ |
|
3630 Return True if all characters in S are whitespace\n\ |
|
3631 and there is at least one character in S, False otherwise."); |
|
3632 |
|
3633 static PyObject* |
|
3634 string_isspace(PyStringObject *self) |
|
3635 { |
|
3636 register const unsigned char *p |
|
3637 = (unsigned char *) PyString_AS_STRING(self); |
|
3638 register const unsigned char *e; |
|
3639 |
|
3640 /* Shortcut for single character strings */ |
|
3641 if (PyString_GET_SIZE(self) == 1 && |
|
3642 isspace(*p)) |
|
3643 return PyBool_FromLong(1); |
|
3644 |
|
3645 /* Special case for empty strings */ |
|
3646 if (PyString_GET_SIZE(self) == 0) |
|
3647 return PyBool_FromLong(0); |
|
3648 |
|
3649 e = p + PyString_GET_SIZE(self); |
|
3650 for (; p < e; p++) { |
|
3651 if (!isspace(*p)) |
|
3652 return PyBool_FromLong(0); |
|
3653 } |
|
3654 return PyBool_FromLong(1); |
|
3655 } |
|
3656 |
|
3657 |
|
3658 PyDoc_STRVAR(isalpha__doc__, |
|
3659 "S.isalpha() -> bool\n\ |
|
3660 \n\ |
|
3661 Return True if all characters in S are alphabetic\n\ |
|
3662 and there is at least one character in S, False otherwise."); |
|
3663 |
|
3664 static PyObject* |
|
3665 string_isalpha(PyStringObject *self) |
|
3666 { |
|
3667 register const unsigned char *p |
|
3668 = (unsigned char *) PyString_AS_STRING(self); |
|
3669 register const unsigned char *e; |
|
3670 |
|
3671 /* Shortcut for single character strings */ |
|
3672 if (PyString_GET_SIZE(self) == 1 && |
|
3673 isalpha(*p)) |
|
3674 return PyBool_FromLong(1); |
|
3675 |
|
3676 /* Special case for empty strings */ |
|
3677 if (PyString_GET_SIZE(self) == 0) |
|
3678 return PyBool_FromLong(0); |
|
3679 |
|
3680 e = p + PyString_GET_SIZE(self); |
|
3681 for (; p < e; p++) { |
|
3682 if (!isalpha(*p)) |
|
3683 return PyBool_FromLong(0); |
|
3684 } |
|
3685 return PyBool_FromLong(1); |
|
3686 } |
|
3687 |
|
3688 |
|
3689 PyDoc_STRVAR(isalnum__doc__, |
|
3690 "S.isalnum() -> bool\n\ |
|
3691 \n\ |
|
3692 Return True if all characters in S are alphanumeric\n\ |
|
3693 and there is at least one character in S, False otherwise."); |
|
3694 |
|
3695 static PyObject* |
|
3696 string_isalnum(PyStringObject *self) |
|
3697 { |
|
3698 register const unsigned char *p |
|
3699 = (unsigned char *) PyString_AS_STRING(self); |
|
3700 register const unsigned char *e; |
|
3701 |
|
3702 /* Shortcut for single character strings */ |
|
3703 if (PyString_GET_SIZE(self) == 1 && |
|
3704 isalnum(*p)) |
|
3705 return PyBool_FromLong(1); |
|
3706 |
|
3707 /* Special case for empty strings */ |
|
3708 if (PyString_GET_SIZE(self) == 0) |
|
3709 return PyBool_FromLong(0); |
|
3710 |
|
3711 e = p + PyString_GET_SIZE(self); |
|
3712 for (; p < e; p++) { |
|
3713 if (!isalnum(*p)) |
|
3714 return PyBool_FromLong(0); |
|
3715 } |
|
3716 return PyBool_FromLong(1); |
|
3717 } |
|
3718 |
|
3719 |
|
3720 PyDoc_STRVAR(isdigit__doc__, |
|
3721 "S.isdigit() -> bool\n\ |
|
3722 \n\ |
|
3723 Return True if all characters in S are digits\n\ |
|
3724 and there is at least one character in S, False otherwise."); |
|
3725 |
|
3726 static PyObject* |
|
3727 string_isdigit(PyStringObject *self) |
|
3728 { |
|
3729 register const unsigned char *p |
|
3730 = (unsigned char *) PyString_AS_STRING(self); |
|
3731 register const unsigned char *e; |
|
3732 |
|
3733 /* Shortcut for single character strings */ |
|
3734 if (PyString_GET_SIZE(self) == 1 && |
|
3735 isdigit(*p)) |
|
3736 return PyBool_FromLong(1); |
|
3737 |
|
3738 /* Special case for empty strings */ |
|
3739 if (PyString_GET_SIZE(self) == 0) |
|
3740 return PyBool_FromLong(0); |
|
3741 |
|
3742 e = p + PyString_GET_SIZE(self); |
|
3743 for (; p < e; p++) { |
|
3744 if (!isdigit(*p)) |
|
3745 return PyBool_FromLong(0); |
|
3746 } |
|
3747 return PyBool_FromLong(1); |
|
3748 } |
|
3749 |
|
3750 |
|
3751 PyDoc_STRVAR(islower__doc__, |
|
3752 "S.islower() -> bool\n\ |
|
3753 \n\ |
|
3754 Return True if all cased characters in S are lowercase and there is\n\ |
|
3755 at least one cased character in S, False otherwise."); |
|
3756 |
|
3757 static PyObject* |
|
3758 string_islower(PyStringObject *self) |
|
3759 { |
|
3760 register const unsigned char *p |
|
3761 = (unsigned char *) PyString_AS_STRING(self); |
|
3762 register const unsigned char *e; |
|
3763 int cased; |
|
3764 |
|
3765 /* Shortcut for single character strings */ |
|
3766 if (PyString_GET_SIZE(self) == 1) |
|
3767 return PyBool_FromLong(islower(*p) != 0); |
|
3768 |
|
3769 /* Special case for empty strings */ |
|
3770 if (PyString_GET_SIZE(self) == 0) |
|
3771 return PyBool_FromLong(0); |
|
3772 |
|
3773 e = p + PyString_GET_SIZE(self); |
|
3774 cased = 0; |
|
3775 for (; p < e; p++) { |
|
3776 if (isupper(*p)) |
|
3777 return PyBool_FromLong(0); |
|
3778 else if (!cased && islower(*p)) |
|
3779 cased = 1; |
|
3780 } |
|
3781 return PyBool_FromLong(cased); |
|
3782 } |
|
3783 |
|
3784 |
|
3785 PyDoc_STRVAR(isupper__doc__, |
|
3786 "S.isupper() -> bool\n\ |
|
3787 \n\ |
|
3788 Return True if all cased characters in S are uppercase and there is\n\ |
|
3789 at least one cased character in S, False otherwise."); |
|
3790 |
|
3791 static PyObject* |
|
3792 string_isupper(PyStringObject *self) |
|
3793 { |
|
3794 register const unsigned char *p |
|
3795 = (unsigned char *) PyString_AS_STRING(self); |
|
3796 register const unsigned char *e; |
|
3797 int cased; |
|
3798 |
|
3799 /* Shortcut for single character strings */ |
|
3800 if (PyString_GET_SIZE(self) == 1) |
|
3801 return PyBool_FromLong(isupper(*p) != 0); |
|
3802 |
|
3803 /* Special case for empty strings */ |
|
3804 if (PyString_GET_SIZE(self) == 0) |
|
3805 return PyBool_FromLong(0); |
|
3806 |
|
3807 e = p + PyString_GET_SIZE(self); |
|
3808 cased = 0; |
|
3809 for (; p < e; p++) { |
|
3810 if (islower(*p)) |
|
3811 return PyBool_FromLong(0); |
|
3812 else if (!cased && isupper(*p)) |
|
3813 cased = 1; |
|
3814 } |
|
3815 return PyBool_FromLong(cased); |
|
3816 } |
|
3817 |
|
3818 |
|
3819 PyDoc_STRVAR(istitle__doc__, |
|
3820 "S.istitle() -> bool\n\ |
|
3821 \n\ |
|
3822 Return True if S is a titlecased string and there is at least one\n\ |
|
3823 character in S, i.e. uppercase characters may only follow uncased\n\ |
|
3824 characters and lowercase characters only cased ones. Return False\n\ |
|
3825 otherwise."); |
|
3826 |
|
3827 static PyObject* |
|
3828 string_istitle(PyStringObject *self, PyObject *uncased) |
|
3829 { |
|
3830 register const unsigned char *p |
|
3831 = (unsigned char *) PyString_AS_STRING(self); |
|
3832 register const unsigned char *e; |
|
3833 int cased, previous_is_cased; |
|
3834 |
|
3835 /* Shortcut for single character strings */ |
|
3836 if (PyString_GET_SIZE(self) == 1) |
|
3837 return PyBool_FromLong(isupper(*p) != 0); |
|
3838 |
|
3839 /* Special case for empty strings */ |
|
3840 if (PyString_GET_SIZE(self) == 0) |
|
3841 return PyBool_FromLong(0); |
|
3842 |
|
3843 e = p + PyString_GET_SIZE(self); |
|
3844 cased = 0; |
|
3845 previous_is_cased = 0; |
|
3846 for (; p < e; p++) { |
|
3847 register const unsigned char ch = *p; |
|
3848 |
|
3849 if (isupper(ch)) { |
|
3850 if (previous_is_cased) |
|
3851 return PyBool_FromLong(0); |
|
3852 previous_is_cased = 1; |
|
3853 cased = 1; |
|
3854 } |
|
3855 else if (islower(ch)) { |
|
3856 if (!previous_is_cased) |
|
3857 return PyBool_FromLong(0); |
|
3858 previous_is_cased = 1; |
|
3859 cased = 1; |
|
3860 } |
|
3861 else |
|
3862 previous_is_cased = 0; |
|
3863 } |
|
3864 return PyBool_FromLong(cased); |
|
3865 } |
|
3866 |
|
3867 |
|
3868 PyDoc_STRVAR(splitlines__doc__, |
|
3869 "S.splitlines([keepends]) -> list of strings\n\ |
|
3870 \n\ |
|
3871 Return a list of the lines in S, breaking at line boundaries.\n\ |
|
3872 Line breaks are not included in the resulting list unless keepends\n\ |
|
3873 is given and true."); |
|
3874 |
|
3875 static PyObject* |
|
3876 string_splitlines(PyStringObject *self, PyObject *args) |
|
3877 { |
|
3878 register Py_ssize_t i; |
|
3879 register Py_ssize_t j; |
|
3880 Py_ssize_t len; |
|
3881 int keepends = 0; |
|
3882 PyObject *list; |
|
3883 PyObject *str; |
|
3884 char *data; |
|
3885 |
|
3886 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) |
|
3887 return NULL; |
|
3888 |
|
3889 data = PyString_AS_STRING(self); |
|
3890 len = PyString_GET_SIZE(self); |
|
3891 |
|
3892 /* This does not use the preallocated list because splitlines is |
|
3893 usually run with hundreds of newlines. The overhead of |
|
3894 switching between PyList_SET_ITEM and append causes about a |
|
3895 2-3% slowdown for that common case. A smarter implementation |
|
3896 could move the if check out, so the SET_ITEMs are done first |
|
3897 and the appends only done when the prealloc buffer is full. |
|
3898 That's too much work for little gain.*/ |
|
3899 |
|
3900 list = PyList_New(0); |
|
3901 if (!list) |
|
3902 goto onError; |
|
3903 |
|
3904 for (i = j = 0; i < len; ) { |
|
3905 Py_ssize_t eol; |
|
3906 |
|
3907 /* Find a line and append it */ |
|
3908 while (i < len && data[i] != '\n' && data[i] != '\r') |
|
3909 i++; |
|
3910 |
|
3911 /* Skip the line break reading CRLF as one line break */ |
|
3912 eol = i; |
|
3913 if (i < len) { |
|
3914 if (data[i] == '\r' && i + 1 < len && |
|
3915 data[i+1] == '\n') |
|
3916 i += 2; |
|
3917 else |
|
3918 i++; |
|
3919 if (keepends) |
|
3920 eol = i; |
|
3921 } |
|
3922 SPLIT_APPEND(data, j, eol); |
|
3923 j = i; |
|
3924 } |
|
3925 if (j < len) { |
|
3926 SPLIT_APPEND(data, j, len); |
|
3927 } |
|
3928 |
|
3929 return list; |
|
3930 |
|
3931 onError: |
|
3932 Py_XDECREF(list); |
|
3933 return NULL; |
|
3934 } |
|
3935 |
|
3936 PyDoc_STRVAR(sizeof__doc__, |
|
3937 "S.__sizeof__() -> size of S in memory, in bytes"); |
|
3938 |
|
3939 static PyObject * |
|
3940 string_sizeof(PyStringObject *v) |
|
3941 { |
|
3942 Py_ssize_t res; |
|
3943 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize; |
|
3944 return PyInt_FromSsize_t(res); |
|
3945 } |
|
3946 |
|
3947 #undef SPLIT_APPEND |
|
3948 #undef SPLIT_ADD |
|
3949 #undef MAX_PREALLOC |
|
3950 #undef PREALLOC_SIZE |
|
3951 |
|
3952 static PyObject * |
|
3953 string_getnewargs(PyStringObject *v) |
|
3954 { |
|
3955 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); |
|
3956 } |
|
3957 |
|
3958 |
|
3959 #include "stringlib/string_format.h" |
|
3960 |
|
3961 PyDoc_STRVAR(format__doc__, |
|
3962 "S.format(*args, **kwargs) -> unicode\n\ |
|
3963 \n\ |
|
3964 "); |
|
3965 |
|
3966 static PyObject * |
|
3967 string__format__(PyObject* self, PyObject* args) |
|
3968 { |
|
3969 PyObject *format_spec; |
|
3970 PyObject *result = NULL; |
|
3971 PyObject *tmp = NULL; |
|
3972 |
|
3973 /* If 2.x, convert format_spec to the same type as value */ |
|
3974 /* This is to allow things like u''.format('') */ |
|
3975 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) |
|
3976 goto done; |
|
3977 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { |
|
3978 PyErr_Format(PyExc_TypeError, "__format__ arg must be str " |
|
3979 "or unicode, not %s", Py_TYPE(format_spec)->tp_name); |
|
3980 goto done; |
|
3981 } |
|
3982 tmp = PyObject_Str(format_spec); |
|
3983 if (tmp == NULL) |
|
3984 goto done; |
|
3985 format_spec = tmp; |
|
3986 |
|
3987 result = _PyBytes_FormatAdvanced(self, |
|
3988 PyString_AS_STRING(format_spec), |
|
3989 PyString_GET_SIZE(format_spec)); |
|
3990 done: |
|
3991 Py_XDECREF(tmp); |
|
3992 return result; |
|
3993 } |
|
3994 |
|
3995 PyDoc_STRVAR(p_format__doc__, |
|
3996 "S.__format__(format_spec) -> unicode\n\ |
|
3997 \n\ |
|
3998 "); |
|
3999 |
|
4000 |
|
4001 static PyMethodDef |
|
4002 string_methods[] = { |
|
4003 /* Counterparts of the obsolete stropmodule functions; except |
|
4004 string.maketrans(). */ |
|
4005 {"join", (PyCFunction)string_join, METH_O, join__doc__}, |
|
4006 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, |
|
4007 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, |
|
4008 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, |
|
4009 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, |
|
4010 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, |
|
4011 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, |
|
4012 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, |
|
4013 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, |
|
4014 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, |
|
4015 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, |
|
4016 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, |
|
4017 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, |
|
4018 capitalize__doc__}, |
|
4019 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, |
|
4020 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, |
|
4021 endswith__doc__}, |
|
4022 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, |
|
4023 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, |
|
4024 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, |
|
4025 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, |
|
4026 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, |
|
4027 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, |
|
4028 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, |
|
4029 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, |
|
4030 {"rpartition", (PyCFunction)string_rpartition, METH_O, |
|
4031 rpartition__doc__}, |
|
4032 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, |
|
4033 startswith__doc__}, |
|
4034 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, |
|
4035 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, |
|
4036 swapcase__doc__}, |
|
4037 {"translate", (PyCFunction)string_translate, METH_VARARGS, |
|
4038 translate__doc__}, |
|
4039 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, |
|
4040 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, |
|
4041 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, |
|
4042 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, |
|
4043 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, |
|
4044 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, |
|
4045 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, |
|
4046 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, |
|
4047 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, |
|
4048 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__}, |
|
4049 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__}, |
|
4050 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, |
|
4051 expandtabs__doc__}, |
|
4052 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, |
|
4053 splitlines__doc__}, |
|
4054 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS, |
|
4055 sizeof__doc__}, |
|
4056 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, |
|
4057 {NULL, NULL} /* sentinel */ |
|
4058 }; |
|
4059 |
|
4060 static PyObject * |
|
4061 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); |
|
4062 |
|
4063 static PyObject * |
|
4064 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
4065 { |
|
4066 PyObject *x = NULL; |
|
4067 static char *kwlist[] = {"object", 0}; |
|
4068 |
|
4069 if (type != &PyString_Type) |
|
4070 return str_subtype_new(type, args, kwds); |
|
4071 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) |
|
4072 return NULL; |
|
4073 if (x == NULL) |
|
4074 return PyString_FromString(""); |
|
4075 return PyObject_Str(x); |
|
4076 } |
|
4077 |
|
4078 static PyObject * |
|
4079 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
4080 { |
|
4081 PyObject *tmp, *pnew; |
|
4082 Py_ssize_t n; |
|
4083 |
|
4084 assert(PyType_IsSubtype(type, &PyString_Type)); |
|
4085 tmp = string_new(&PyString_Type, args, kwds); |
|
4086 if (tmp == NULL) |
|
4087 return NULL; |
|
4088 assert(PyString_CheckExact(tmp)); |
|
4089 n = PyString_GET_SIZE(tmp); |
|
4090 pnew = type->tp_alloc(type, n); |
|
4091 if (pnew != NULL) { |
|
4092 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); |
|
4093 ((PyStringObject *)pnew)->ob_shash = |
|
4094 ((PyStringObject *)tmp)->ob_shash; |
|
4095 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; |
|
4096 } |
|
4097 Py_DECREF(tmp); |
|
4098 return pnew; |
|
4099 } |
|
4100 |
|
4101 static PyObject * |
|
4102 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
|
4103 { |
|
4104 PyErr_SetString(PyExc_TypeError, |
|
4105 "The basestring type cannot be instantiated"); |
|
4106 return NULL; |
|
4107 } |
|
4108 |
|
4109 static PyObject * |
|
4110 string_mod(PyObject *v, PyObject *w) |
|
4111 { |
|
4112 if (!PyString_Check(v)) { |
|
4113 Py_INCREF(Py_NotImplemented); |
|
4114 return Py_NotImplemented; |
|
4115 } |
|
4116 return PyString_Format(v, w); |
|
4117 } |
|
4118 |
|
4119 PyDoc_STRVAR(basestring_doc, |
|
4120 "Type basestring cannot be instantiated; it is the base for str and unicode."); |
|
4121 |
|
4122 static PyNumberMethods string_as_number = { |
|
4123 0, /*nb_add*/ |
|
4124 0, /*nb_subtract*/ |
|
4125 0, /*nb_multiply*/ |
|
4126 0, /*nb_divide*/ |
|
4127 string_mod, /*nb_remainder*/ |
|
4128 }; |
|
4129 |
|
4130 |
|
4131 PyTypeObject PyBaseString_Type = { |
|
4132 PyVarObject_HEAD_INIT(&PyType_Type, 0) |
|
4133 "basestring", |
|
4134 0, |
|
4135 0, |
|
4136 0, /* tp_dealloc */ |
|
4137 0, /* tp_print */ |
|
4138 0, /* tp_getattr */ |
|
4139 0, /* tp_setattr */ |
|
4140 0, /* tp_compare */ |
|
4141 0, /* tp_repr */ |
|
4142 0, /* tp_as_number */ |
|
4143 0, /* tp_as_sequence */ |
|
4144 0, /* tp_as_mapping */ |
|
4145 0, /* tp_hash */ |
|
4146 0, /* tp_call */ |
|
4147 0, /* tp_str */ |
|
4148 0, /* tp_getattro */ |
|
4149 0, /* tp_setattro */ |
|
4150 0, /* tp_as_buffer */ |
|
4151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
4152 basestring_doc, /* tp_doc */ |
|
4153 0, /* tp_traverse */ |
|
4154 0, /* tp_clear */ |
|
4155 0, /* tp_richcompare */ |
|
4156 0, /* tp_weaklistoffset */ |
|
4157 0, /* tp_iter */ |
|
4158 0, /* tp_iternext */ |
|
4159 0, /* tp_methods */ |
|
4160 0, /* tp_members */ |
|
4161 0, /* tp_getset */ |
|
4162 &PyBaseObject_Type, /* tp_base */ |
|
4163 0, /* tp_dict */ |
|
4164 0, /* tp_descr_get */ |
|
4165 0, /* tp_descr_set */ |
|
4166 0, /* tp_dictoffset */ |
|
4167 0, /* tp_init */ |
|
4168 0, /* tp_alloc */ |
|
4169 basestring_new, /* tp_new */ |
|
4170 0, /* tp_free */ |
|
4171 }; |
|
4172 |
|
4173 PyDoc_STRVAR(string_doc, |
|
4174 "str(object) -> string\n\ |
|
4175 \n\ |
|
4176 Return a nice string representation of the object.\n\ |
|
4177 If the argument is a string, the return value is the same object."); |
|
4178 |
|
4179 PyTypeObject PyString_Type = { |
|
4180 PyVarObject_HEAD_INIT(&PyType_Type, 0) |
|
4181 "str", |
|
4182 sizeof(PyStringObject), |
|
4183 sizeof(char), |
|
4184 string_dealloc, /* tp_dealloc */ |
|
4185 (printfunc)string_print, /* tp_print */ |
|
4186 0, /* tp_getattr */ |
|
4187 0, /* tp_setattr */ |
|
4188 0, /* tp_compare */ |
|
4189 string_repr, /* tp_repr */ |
|
4190 &string_as_number, /* tp_as_number */ |
|
4191 &string_as_sequence, /* tp_as_sequence */ |
|
4192 &string_as_mapping, /* tp_as_mapping */ |
|
4193 (hashfunc)string_hash, /* tp_hash */ |
|
4194 0, /* tp_call */ |
|
4195 string_str, /* tp_str */ |
|
4196 PyObject_GenericGetAttr, /* tp_getattro */ |
|
4197 0, /* tp_setattro */ |
|
4198 &string_as_buffer, /* tp_as_buffer */ |
|
4199 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | |
|
4200 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS | |
|
4201 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */ |
|
4202 string_doc, /* tp_doc */ |
|
4203 0, /* tp_traverse */ |
|
4204 0, /* tp_clear */ |
|
4205 (richcmpfunc)string_richcompare, /* tp_richcompare */ |
|
4206 0, /* tp_weaklistoffset */ |
|
4207 0, /* tp_iter */ |
|
4208 0, /* tp_iternext */ |
|
4209 string_methods, /* tp_methods */ |
|
4210 0, /* tp_members */ |
|
4211 0, /* tp_getset */ |
|
4212 &PyBaseString_Type, /* tp_base */ |
|
4213 0, /* tp_dict */ |
|
4214 0, /* tp_descr_get */ |
|
4215 0, /* tp_descr_set */ |
|
4216 0, /* tp_dictoffset */ |
|
4217 0, /* tp_init */ |
|
4218 0, /* tp_alloc */ |
|
4219 string_new, /* tp_new */ |
|
4220 PyObject_Del, /* tp_free */ |
|
4221 }; |
|
4222 |
|
4223 void |
|
4224 PyString_Concat(register PyObject **pv, register PyObject *w) |
|
4225 { |
|
4226 register PyObject *v; |
|
4227 if (*pv == NULL) |
|
4228 return; |
|
4229 if (w == NULL || !PyString_Check(*pv)) { |
|
4230 Py_DECREF(*pv); |
|
4231 *pv = NULL; |
|
4232 return; |
|
4233 } |
|
4234 v = string_concat((PyStringObject *) *pv, w); |
|
4235 Py_DECREF(*pv); |
|
4236 *pv = v; |
|
4237 } |
|
4238 |
|
4239 void |
|
4240 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) |
|
4241 { |
|
4242 PyString_Concat(pv, w); |
|
4243 Py_XDECREF(w); |
|
4244 } |
|
4245 |
|
4246 |
|
4247 /* The following function breaks the notion that strings are immutable: |
|
4248 it changes the size of a string. We get away with this only if there |
|
4249 is only one module referencing the object. You can also think of it |
|
4250 as creating a new string object and destroying the old one, only |
|
4251 more efficiently. In any case, don't use this if the string may |
|
4252 already be known to some other part of the code... |
|
4253 Note that if there's not enough memory to resize the string, the original |
|
4254 string object at *pv is deallocated, *pv is set to NULL, an "out of |
|
4255 memory" exception is set, and -1 is returned. Else (on success) 0 is |
|
4256 returned, and the value in *pv may or may not be the same as on input. |
|
4257 As always, an extra byte is allocated for a trailing \0 byte (newsize |
|
4258 does *not* include that), and a trailing \0 byte is stored. |
|
4259 */ |
|
4260 |
|
4261 int |
|
4262 _PyString_Resize(PyObject **pv, Py_ssize_t newsize) |
|
4263 { |
|
4264 register PyObject *v; |
|
4265 register PyStringObject *sv; |
|
4266 v = *pv; |
|
4267 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || |
|
4268 PyString_CHECK_INTERNED(v)) { |
|
4269 *pv = 0; |
|
4270 Py_DECREF(v); |
|
4271 PyErr_BadInternalCall(); |
|
4272 return -1; |
|
4273 } |
|
4274 /* XXX UNREF/NEWREF interface should be more symmetrical */ |
|
4275 _Py_DEC_REFTOTAL; |
|
4276 _Py_ForgetReference(v); |
|
4277 *pv = (PyObject *) |
|
4278 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize); |
|
4279 if (*pv == NULL) { |
|
4280 PyObject_Del(v); |
|
4281 PyErr_NoMemory(); |
|
4282 return -1; |
|
4283 } |
|
4284 _Py_NewReference(*pv); |
|
4285 sv = (PyStringObject *) *pv; |
|
4286 Py_SIZE(sv) = newsize; |
|
4287 sv->ob_sval[newsize] = '\0'; |
|
4288 sv->ob_shash = -1; /* invalidate cached hash value */ |
|
4289 return 0; |
|
4290 } |
|
4291 |
|
4292 /* Helpers for formatstring */ |
|
4293 |
|
4294 Py_LOCAL_INLINE(PyObject *) |
|
4295 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) |
|
4296 { |
|
4297 Py_ssize_t argidx = *p_argidx; |
|
4298 if (argidx < arglen) { |
|
4299 (*p_argidx)++; |
|
4300 if (arglen < 0) |
|
4301 return args; |
|
4302 else |
|
4303 return PyTuple_GetItem(args, argidx); |
|
4304 } |
|
4305 PyErr_SetString(PyExc_TypeError, |
|
4306 "not enough arguments for format string"); |
|
4307 return NULL; |
|
4308 } |
|
4309 |
|
4310 /* Format codes |
|
4311 * F_LJUST '-' |
|
4312 * F_SIGN '+' |
|
4313 * F_BLANK ' ' |
|
4314 * F_ALT '#' |
|
4315 * F_ZERO '0' |
|
4316 */ |
|
4317 #define F_LJUST (1<<0) |
|
4318 #define F_SIGN (1<<1) |
|
4319 #define F_BLANK (1<<2) |
|
4320 #define F_ALT (1<<3) |
|
4321 #define F_ZERO (1<<4) |
|
4322 |
|
4323 Py_LOCAL_INLINE(int) |
|
4324 formatfloat(char *buf, size_t buflen, int flags, |
|
4325 int prec, int type, PyObject *v) |
|
4326 { |
|
4327 /* fmt = '%#.' + `prec` + `type` |
|
4328 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ |
|
4329 char fmt[20]; |
|
4330 double x; |
|
4331 x = PyFloat_AsDouble(v); |
|
4332 if (x == -1.0 && PyErr_Occurred()) { |
|
4333 PyErr_Format(PyExc_TypeError, "float argument required, " |
|
4334 "not %.200s", Py_TYPE(v)->tp_name); |
|
4335 return -1; |
|
4336 } |
|
4337 if (prec < 0) |
|
4338 prec = 6; |
|
4339 if (type == 'f' && fabs(x)/1e25 >= 1e25) |
|
4340 type = 'g'; |
|
4341 /* Worst case length calc to ensure no buffer overrun: |
|
4342 |
|
4343 'g' formats: |
|
4344 fmt = %#.<prec>g |
|
4345 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp |
|
4346 for any double rep.) |
|
4347 len = 1 + prec + 1 + 2 + 5 = 9 + prec |
|
4348 |
|
4349 'f' formats: |
|
4350 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) |
|
4351 len = 1 + 50 + 1 + prec = 52 + prec |
|
4352 |
|
4353 If prec=0 the effective precision is 1 (the leading digit is |
|
4354 always given), therefore increase the length by one. |
|
4355 |
|
4356 */ |
|
4357 if (((type == 'g' || type == 'G') && |
|
4358 buflen <= (size_t)10 + (size_t)prec) || |
|
4359 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) { |
|
4360 PyErr_SetString(PyExc_OverflowError, |
|
4361 "formatted float is too long (precision too large?)"); |
|
4362 return -1; |
|
4363 } |
|
4364 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", |
|
4365 (flags&F_ALT) ? "#" : "", |
|
4366 prec, type); |
|
4367 PyOS_ascii_formatd(buf, buflen, fmt, x); |
|
4368 return (int)strlen(buf); |
|
4369 } |
|
4370 |
|
4371 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and |
|
4372 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for |
|
4373 * Python's regular ints. |
|
4374 * Return value: a new PyString*, or NULL if error. |
|
4375 * . *pbuf is set to point into it, |
|
4376 * *plen set to the # of chars following that. |
|
4377 * Caller must decref it when done using pbuf. |
|
4378 * The string starting at *pbuf is of the form |
|
4379 * "-"? ("0x" | "0X")? digit+ |
|
4380 * "0x"/"0X" are present only for x and X conversions, with F_ALT |
|
4381 * set in flags. The case of hex digits will be correct, |
|
4382 * There will be at least prec digits, zero-filled on the left if |
|
4383 * necessary to get that many. |
|
4384 * val object to be converted |
|
4385 * flags bitmask of format flags; only F_ALT is looked at |
|
4386 * prec minimum number of digits; 0-fill on left if needed |
|
4387 * type a character in [duoxX]; u acts the same as d |
|
4388 * |
|
4389 * CAUTION: o, x and X conversions on regular ints can never |
|
4390 * produce a '-' sign, but can for Python's unbounded ints. |
|
4391 */ |
|
4392 PyObject* |
|
4393 _PyString_FormatLong(PyObject *val, int flags, int prec, int type, |
|
4394 char **pbuf, int *plen) |
|
4395 { |
|
4396 PyObject *result = NULL; |
|
4397 char *buf; |
|
4398 Py_ssize_t i; |
|
4399 int sign; /* 1 if '-', else 0 */ |
|
4400 int len; /* number of characters */ |
|
4401 Py_ssize_t llen; |
|
4402 int numdigits; /* len == numnondigits + numdigits */ |
|
4403 int numnondigits = 0; |
|
4404 |
|
4405 switch (type) { |
|
4406 case 'd': |
|
4407 case 'u': |
|
4408 result = Py_TYPE(val)->tp_str(val); |
|
4409 break; |
|
4410 case 'o': |
|
4411 result = Py_TYPE(val)->tp_as_number->nb_oct(val); |
|
4412 break; |
|
4413 case 'x': |
|
4414 case 'X': |
|
4415 numnondigits = 2; |
|
4416 result = Py_TYPE(val)->tp_as_number->nb_hex(val); |
|
4417 break; |
|
4418 default: |
|
4419 assert(!"'type' not in [duoxX]"); |
|
4420 } |
|
4421 if (!result) |
|
4422 return NULL; |
|
4423 |
|
4424 buf = PyString_AsString(result); |
|
4425 if (!buf) { |
|
4426 Py_DECREF(result); |
|
4427 return NULL; |
|
4428 } |
|
4429 |
|
4430 /* To modify the string in-place, there can only be one reference. */ |
|
4431 if (Py_REFCNT(result) != 1) { |
|
4432 PyErr_BadInternalCall(); |
|
4433 return NULL; |
|
4434 } |
|
4435 llen = PyString_Size(result); |
|
4436 if (llen > INT_MAX) { |
|
4437 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); |
|
4438 return NULL; |
|
4439 } |
|
4440 len = (int)llen; |
|
4441 if (buf[len-1] == 'L') { |
|
4442 --len; |
|
4443 buf[len] = '\0'; |
|
4444 } |
|
4445 sign = buf[0] == '-'; |
|
4446 numnondigits += sign; |
|
4447 numdigits = len - numnondigits; |
|
4448 assert(numdigits > 0); |
|
4449 |
|
4450 /* Get rid of base marker unless F_ALT */ |
|
4451 if ((flags & F_ALT) == 0) { |
|
4452 /* Need to skip 0x, 0X or 0. */ |
|
4453 int skipped = 0; |
|
4454 switch (type) { |
|
4455 case 'o': |
|
4456 assert(buf[sign] == '0'); |
|
4457 /* If 0 is only digit, leave it alone. */ |
|
4458 if (numdigits > 1) { |
|
4459 skipped = 1; |
|
4460 --numdigits; |
|
4461 } |
|
4462 break; |
|
4463 case 'x': |
|
4464 case 'X': |
|
4465 assert(buf[sign] == '0'); |
|
4466 assert(buf[sign + 1] == 'x'); |
|
4467 skipped = 2; |
|
4468 numnondigits -= 2; |
|
4469 break; |
|
4470 } |
|
4471 if (skipped) { |
|
4472 buf += skipped; |
|
4473 len -= skipped; |
|
4474 if (sign) |
|
4475 buf[0] = '-'; |
|
4476 } |
|
4477 assert(len == numnondigits + numdigits); |
|
4478 assert(numdigits > 0); |
|
4479 } |
|
4480 |
|
4481 /* Fill with leading zeroes to meet minimum width. */ |
|
4482 if (prec > numdigits) { |
|
4483 PyObject *r1 = PyString_FromStringAndSize(NULL, |
|
4484 numnondigits + prec); |
|
4485 char *b1; |
|
4486 if (!r1) { |
|
4487 Py_DECREF(result); |
|
4488 return NULL; |
|
4489 } |
|
4490 b1 = PyString_AS_STRING(r1); |
|
4491 for (i = 0; i < numnondigits; ++i) |
|
4492 *b1++ = *buf++; |
|
4493 for (i = 0; i < prec - numdigits; i++) |
|
4494 *b1++ = '0'; |
|
4495 for (i = 0; i < numdigits; i++) |
|
4496 *b1++ = *buf++; |
|
4497 *b1 = '\0'; |
|
4498 Py_DECREF(result); |
|
4499 result = r1; |
|
4500 buf = PyString_AS_STRING(result); |
|
4501 len = numnondigits + prec; |
|
4502 } |
|
4503 |
|
4504 /* Fix up case for hex conversions. */ |
|
4505 if (type == 'X') { |
|
4506 /* Need to convert all lower case letters to upper case. |
|
4507 and need to convert 0x to 0X (and -0x to -0X). */ |
|
4508 for (i = 0; i < len; i++) |
|
4509 if (buf[i] >= 'a' && buf[i] <= 'x') |
|
4510 buf[i] -= 'a'-'A'; |
|
4511 } |
|
4512 *pbuf = buf; |
|
4513 *plen = len; |
|
4514 return result; |
|
4515 } |
|
4516 |
|
4517 Py_LOCAL_INLINE(int) |
|
4518 formatint(char *buf, size_t buflen, int flags, |
|
4519 int prec, int type, PyObject *v) |
|
4520 { |
|
4521 /* fmt = '%#.' + `prec` + 'l' + `type` |
|
4522 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) |
|
4523 + 1 + 1 = 24 */ |
|
4524 char fmt[64]; /* plenty big enough! */ |
|
4525 char *sign; |
|
4526 long x; |
|
4527 |
|
4528 x = PyInt_AsLong(v); |
|
4529 if (x == -1 && PyErr_Occurred()) { |
|
4530 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", |
|
4531 Py_TYPE(v)->tp_name); |
|
4532 return -1; |
|
4533 } |
|
4534 if (x < 0 && type == 'u') { |
|
4535 type = 'd'; |
|
4536 } |
|
4537 if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) |
|
4538 sign = "-"; |
|
4539 else |
|
4540 sign = ""; |
|
4541 if (prec < 0) |
|
4542 prec = 1; |
|
4543 |
|
4544 if ((flags & F_ALT) && |
|
4545 (type == 'x' || type == 'X')) { |
|
4546 /* When converting under %#x or %#X, there are a number |
|
4547 * of issues that cause pain: |
|
4548 * - when 0 is being converted, the C standard leaves off |
|
4549 * the '0x' or '0X', which is inconsistent with other |
|
4550 * %#x/%#X conversions and inconsistent with Python's |
|
4551 * hex() function |
|
4552 * - there are platforms that violate the standard and |
|
4553 * convert 0 with the '0x' or '0X' |
|
4554 * (Metrowerks, Compaq Tru64) |
|
4555 * - there are platforms that give '0x' when converting |
|
4556 * under %#X, but convert 0 in accordance with the |
|
4557 * standard (OS/2 EMX) |
|
4558 * |
|
4559 * We can achieve the desired consistency by inserting our |
|
4560 * own '0x' or '0X' prefix, and substituting %x/%X in place |
|
4561 * of %#x/%#X. |
|
4562 * |
|
4563 * Note that this is the same approach as used in |
|
4564 * formatint() in unicodeobject.c |
|
4565 */ |
|
4566 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", |
|
4567 sign, type, prec, type); |
|
4568 } |
|
4569 else { |
|
4570 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", |
|
4571 sign, (flags&F_ALT) ? "#" : "", |
|
4572 prec, type); |
|
4573 } |
|
4574 |
|
4575 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) |
|
4576 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 |
|
4577 */ |
|
4578 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { |
|
4579 PyErr_SetString(PyExc_OverflowError, |
|
4580 "formatted integer is too long (precision too large?)"); |
|
4581 return -1; |
|
4582 } |
|
4583 if (sign[0]) |
|
4584 PyOS_snprintf(buf, buflen, fmt, -x); |
|
4585 else |
|
4586 PyOS_snprintf(buf, buflen, fmt, x); |
|
4587 return (int)strlen(buf); |
|
4588 } |
|
4589 |
|
4590 Py_LOCAL_INLINE(int) |
|
4591 formatchar(char *buf, size_t buflen, PyObject *v) |
|
4592 { |
|
4593 /* presume that the buffer is at least 2 characters long */ |
|
4594 if (PyString_Check(v)) { |
|
4595 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) |
|
4596 return -1; |
|
4597 } |
|
4598 else { |
|
4599 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) |
|
4600 return -1; |
|
4601 } |
|
4602 buf[1] = '\0'; |
|
4603 return 1; |
|
4604 } |
|
4605 |
|
4606 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) |
|
4607 |
|
4608 FORMATBUFLEN is the length of the buffer in which the floats, ints, & |
|
4609 chars are formatted. XXX This is a magic number. Each formatting |
|
4610 routine does bounds checking to ensure no overflow, but a better |
|
4611 solution may be to malloc a buffer of appropriate size for each |
|
4612 format. For now, the current solution is sufficient. |
|
4613 */ |
|
4614 #define FORMATBUFLEN (size_t)120 |
|
4615 |
|
4616 PyObject * |
|
4617 PyString_Format(PyObject *format, PyObject *args) |
|
4618 { |
|
4619 char *fmt, *res; |
|
4620 Py_ssize_t arglen, argidx; |
|
4621 Py_ssize_t reslen, rescnt, fmtcnt; |
|
4622 int args_owned = 0; |
|
4623 PyObject *result, *orig_args; |
|
4624 #ifdef Py_USING_UNICODE |
|
4625 PyObject *v, *w; |
|
4626 #endif |
|
4627 PyObject *dict = NULL; |
|
4628 if (format == NULL || !PyString_Check(format) || args == NULL) { |
|
4629 PyErr_BadInternalCall(); |
|
4630 return NULL; |
|
4631 } |
|
4632 orig_args = args; |
|
4633 fmt = PyString_AS_STRING(format); |
|
4634 fmtcnt = PyString_GET_SIZE(format); |
|
4635 reslen = rescnt = fmtcnt + 100; |
|
4636 result = PyString_FromStringAndSize((char *)NULL, reslen); |
|
4637 if (result == NULL) |
|
4638 return NULL; |
|
4639 res = PyString_AsString(result); |
|
4640 if (PyTuple_Check(args)) { |
|
4641 arglen = PyTuple_GET_SIZE(args); |
|
4642 argidx = 0; |
|
4643 } |
|
4644 else { |
|
4645 arglen = -1; |
|
4646 argidx = -2; |
|
4647 } |
|
4648 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && |
|
4649 !PyObject_TypeCheck(args, &PyBaseString_Type)) |
|
4650 dict = args; |
|
4651 while (--fmtcnt >= 0) { |
|
4652 if (*fmt != '%') { |
|
4653 if (--rescnt < 0) { |
|
4654 rescnt = fmtcnt + 100; |
|
4655 reslen += rescnt; |
|
4656 if (_PyString_Resize(&result, reslen) < 0) |
|
4657 return NULL; |
|
4658 res = PyString_AS_STRING(result) |
|
4659 + reslen - rescnt; |
|
4660 --rescnt; |
|
4661 } |
|
4662 *res++ = *fmt++; |
|
4663 } |
|
4664 else { |
|
4665 /* Got a format specifier */ |
|
4666 int flags = 0; |
|
4667 Py_ssize_t width = -1; |
|
4668 int prec = -1; |
|
4669 int c = '\0'; |
|
4670 int fill; |
|
4671 int isnumok; |
|
4672 PyObject *v = NULL; |
|
4673 PyObject *temp = NULL; |
|
4674 char *pbuf; |
|
4675 int sign; |
|
4676 Py_ssize_t len; |
|
4677 char formatbuf[FORMATBUFLEN]; |
|
4678 /* For format{float,int,char}() */ |
|
4679 #ifdef Py_USING_UNICODE |
|
4680 char *fmt_start = fmt; |
|
4681 Py_ssize_t argidx_start = argidx; |
|
4682 #endif |
|
4683 |
|
4684 fmt++; |
|
4685 if (*fmt == '(') { |
|
4686 char *keystart; |
|
4687 Py_ssize_t keylen; |
|
4688 PyObject *key; |
|
4689 int pcount = 1; |
|
4690 |
|
4691 if (dict == NULL) { |
|
4692 PyErr_SetString(PyExc_TypeError, |
|
4693 "format requires a mapping"); |
|
4694 goto error; |
|
4695 } |
|
4696 ++fmt; |
|
4697 --fmtcnt; |
|
4698 keystart = fmt; |
|
4699 /* Skip over balanced parentheses */ |
|
4700 while (pcount > 0 && --fmtcnt >= 0) { |
|
4701 if (*fmt == ')') |
|
4702 --pcount; |
|
4703 else if (*fmt == '(') |
|
4704 ++pcount; |
|
4705 fmt++; |
|
4706 } |
|
4707 keylen = fmt - keystart - 1; |
|
4708 if (fmtcnt < 0 || pcount > 0) { |
|
4709 PyErr_SetString(PyExc_ValueError, |
|
4710 "incomplete format key"); |
|
4711 goto error; |
|
4712 } |
|
4713 key = PyString_FromStringAndSize(keystart, |
|
4714 keylen); |
|
4715 if (key == NULL) |
|
4716 goto error; |
|
4717 if (args_owned) { |
|
4718 Py_DECREF(args); |
|
4719 args_owned = 0; |
|
4720 } |
|
4721 args = PyObject_GetItem(dict, key); |
|
4722 Py_DECREF(key); |
|
4723 if (args == NULL) { |
|
4724 goto error; |
|
4725 } |
|
4726 args_owned = 1; |
|
4727 arglen = -1; |
|
4728 argidx = -2; |
|
4729 } |
|
4730 while (--fmtcnt >= 0) { |
|
4731 switch (c = *fmt++) { |
|
4732 case '-': flags |= F_LJUST; continue; |
|
4733 case '+': flags |= F_SIGN; continue; |
|
4734 case ' ': flags |= F_BLANK; continue; |
|
4735 case '#': flags |= F_ALT; continue; |
|
4736 case '0': flags |= F_ZERO; continue; |
|
4737 } |
|
4738 break; |
|
4739 } |
|
4740 if (c == '*') { |
|
4741 v = getnextarg(args, arglen, &argidx); |
|
4742 if (v == NULL) |
|
4743 goto error; |
|
4744 if (!PyInt_Check(v)) { |
|
4745 PyErr_SetString(PyExc_TypeError, |
|
4746 "* wants int"); |
|
4747 goto error; |
|
4748 } |
|
4749 width = PyInt_AsLong(v); |
|
4750 if (width < 0) { |
|
4751 flags |= F_LJUST; |
|
4752 width = -width; |
|
4753 } |
|
4754 if (--fmtcnt >= 0) |
|
4755 c = *fmt++; |
|
4756 } |
|
4757 else if (c >= 0 && isdigit(c)) { |
|
4758 width = c - '0'; |
|
4759 while (--fmtcnt >= 0) { |
|
4760 c = Py_CHARMASK(*fmt++); |
|
4761 if (!isdigit(c)) |
|
4762 break; |
|
4763 if ((width*10) / 10 != width) { |
|
4764 PyErr_SetString( |
|
4765 PyExc_ValueError, |
|
4766 "width too big"); |
|
4767 goto error; |
|
4768 } |
|
4769 width = width*10 + (c - '0'); |
|
4770 } |
|
4771 } |
|
4772 if (c == '.') { |
|
4773 prec = 0; |
|
4774 if (--fmtcnt >= 0) |
|
4775 c = *fmt++; |
|
4776 if (c == '*') { |
|
4777 v = getnextarg(args, arglen, &argidx); |
|
4778 if (v == NULL) |
|
4779 goto error; |
|
4780 if (!PyInt_Check(v)) { |
|
4781 PyErr_SetString( |
|
4782 PyExc_TypeError, |
|
4783 "* wants int"); |
|
4784 goto error; |
|
4785 } |
|
4786 prec = PyInt_AsLong(v); |
|
4787 if (prec < 0) |
|
4788 prec = 0; |
|
4789 if (--fmtcnt >= 0) |
|
4790 c = *fmt++; |
|
4791 } |
|
4792 else if (c >= 0 && isdigit(c)) { |
|
4793 prec = c - '0'; |
|
4794 while (--fmtcnt >= 0) { |
|
4795 c = Py_CHARMASK(*fmt++); |
|
4796 if (!isdigit(c)) |
|
4797 break; |
|
4798 if ((prec*10) / 10 != prec) { |
|
4799 PyErr_SetString( |
|
4800 PyExc_ValueError, |
|
4801 "prec too big"); |
|
4802 goto error; |
|
4803 } |
|
4804 prec = prec*10 + (c - '0'); |
|
4805 } |
|
4806 } |
|
4807 } /* prec */ |
|
4808 if (fmtcnt >= 0) { |
|
4809 if (c == 'h' || c == 'l' || c == 'L') { |
|
4810 if (--fmtcnt >= 0) |
|
4811 c = *fmt++; |
|
4812 } |
|
4813 } |
|
4814 if (fmtcnt < 0) { |
|
4815 PyErr_SetString(PyExc_ValueError, |
|
4816 "incomplete format"); |
|
4817 goto error; |
|
4818 } |
|
4819 if (c != '%') { |
|
4820 v = getnextarg(args, arglen, &argidx); |
|
4821 if (v == NULL) |
|
4822 goto error; |
|
4823 } |
|
4824 sign = 0; |
|
4825 fill = ' '; |
|
4826 switch (c) { |
|
4827 case '%': |
|
4828 pbuf = "%"; |
|
4829 len = 1; |
|
4830 break; |
|
4831 case 's': |
|
4832 #ifdef Py_USING_UNICODE |
|
4833 if (PyUnicode_Check(v)) { |
|
4834 fmt = fmt_start; |
|
4835 argidx = argidx_start; |
|
4836 goto unicode; |
|
4837 } |
|
4838 #endif |
|
4839 temp = _PyObject_Str(v); |
|
4840 #ifdef Py_USING_UNICODE |
|
4841 if (temp != NULL && PyUnicode_Check(temp)) { |
|
4842 Py_DECREF(temp); |
|
4843 fmt = fmt_start; |
|
4844 argidx = argidx_start; |
|
4845 goto unicode; |
|
4846 } |
|
4847 #endif |
|
4848 /* Fall through */ |
|
4849 case 'r': |
|
4850 if (c == 'r') |
|
4851 temp = PyObject_Repr(v); |
|
4852 if (temp == NULL) |
|
4853 goto error; |
|
4854 if (!PyString_Check(temp)) { |
|
4855 PyErr_SetString(PyExc_TypeError, |
|
4856 "%s argument has non-string str()"); |
|
4857 Py_DECREF(temp); |
|
4858 goto error; |
|
4859 } |
|
4860 pbuf = PyString_AS_STRING(temp); |
|
4861 len = PyString_GET_SIZE(temp); |
|
4862 if (prec >= 0 && len > prec) |
|
4863 len = prec; |
|
4864 break; |
|
4865 case 'i': |
|
4866 case 'd': |
|
4867 case 'u': |
|
4868 case 'o': |
|
4869 case 'x': |
|
4870 case 'X': |
|
4871 if (c == 'i') |
|
4872 c = 'd'; |
|
4873 isnumok = 0; |
|
4874 if (PyNumber_Check(v)) { |
|
4875 PyObject *iobj=NULL; |
|
4876 |
|
4877 if (PyInt_Check(v) || (PyLong_Check(v))) { |
|
4878 iobj = v; |
|
4879 Py_INCREF(iobj); |
|
4880 } |
|
4881 else { |
|
4882 iobj = PyNumber_Int(v); |
|
4883 if (iobj==NULL) iobj = PyNumber_Long(v); |
|
4884 } |
|
4885 if (iobj!=NULL) { |
|
4886 if (PyInt_Check(iobj)) { |
|
4887 isnumok = 1; |
|
4888 pbuf = formatbuf; |
|
4889 len = formatint(pbuf, |
|
4890 sizeof(formatbuf), |
|
4891 flags, prec, c, iobj); |
|
4892 Py_DECREF(iobj); |
|
4893 if (len < 0) |
|
4894 goto error; |
|
4895 sign = 1; |
|
4896 } |
|
4897 else if (PyLong_Check(iobj)) { |
|
4898 int ilen; |
|
4899 |
|
4900 isnumok = 1; |
|
4901 temp = _PyString_FormatLong(iobj, flags, |
|
4902 prec, c, &pbuf, &ilen); |
|
4903 Py_DECREF(iobj); |
|
4904 len = ilen; |
|
4905 if (!temp) |
|
4906 goto error; |
|
4907 sign = 1; |
|
4908 } |
|
4909 else { |
|
4910 Py_DECREF(iobj); |
|
4911 } |
|
4912 } |
|
4913 } |
|
4914 if (!isnumok) { |
|
4915 PyErr_Format(PyExc_TypeError, |
|
4916 "%%%c format: a number is required, " |
|
4917 "not %.200s", c, Py_TYPE(v)->tp_name); |
|
4918 goto error; |
|
4919 } |
|
4920 if (flags & F_ZERO) |
|
4921 fill = '0'; |
|
4922 break; |
|
4923 case 'e': |
|
4924 case 'E': |
|
4925 case 'f': |
|
4926 case 'F': |
|
4927 case 'g': |
|
4928 case 'G': |
|
4929 if (c == 'F') |
|
4930 c = 'f'; |
|
4931 pbuf = formatbuf; |
|
4932 len = formatfloat(pbuf, sizeof(formatbuf), |
|
4933 flags, prec, c, v); |
|
4934 if (len < 0) |
|
4935 goto error; |
|
4936 sign = 1; |
|
4937 if (flags & F_ZERO) |
|
4938 fill = '0'; |
|
4939 break; |
|
4940 case 'c': |
|
4941 #ifdef Py_USING_UNICODE |
|
4942 if (PyUnicode_Check(v)) { |
|
4943 fmt = fmt_start; |
|
4944 argidx = argidx_start; |
|
4945 goto unicode; |
|
4946 } |
|
4947 #endif |
|
4948 pbuf = formatbuf; |
|
4949 len = formatchar(pbuf, sizeof(formatbuf), v); |
|
4950 if (len < 0) |
|
4951 goto error; |
|
4952 break; |
|
4953 default: |
|
4954 PyErr_Format(PyExc_ValueError, |
|
4955 "unsupported format character '%c' (0x%x) " |
|
4956 "at index %zd", |
|
4957 c, c, |
|
4958 (Py_ssize_t)(fmt - 1 - |
|
4959 PyString_AsString(format))); |
|
4960 goto error; |
|
4961 } |
|
4962 if (sign) { |
|
4963 if (*pbuf == '-' || *pbuf == '+') { |
|
4964 sign = *pbuf++; |
|
4965 len--; |
|
4966 } |
|
4967 else if (flags & F_SIGN) |
|
4968 sign = '+'; |
|
4969 else if (flags & F_BLANK) |
|
4970 sign = ' '; |
|
4971 else |
|
4972 sign = 0; |
|
4973 } |
|
4974 if (width < len) |
|
4975 width = len; |
|
4976 if (rescnt - (sign != 0) < width) { |
|
4977 reslen -= rescnt; |
|
4978 rescnt = width + fmtcnt + 100; |
|
4979 reslen += rescnt; |
|
4980 if (reslen < 0) { |
|
4981 Py_DECREF(result); |
|
4982 Py_XDECREF(temp); |
|
4983 return PyErr_NoMemory(); |
|
4984 } |
|
4985 if (_PyString_Resize(&result, reslen) < 0) { |
|
4986 Py_XDECREF(temp); |
|
4987 return NULL; |
|
4988 } |
|
4989 res = PyString_AS_STRING(result) |
|
4990 + reslen - rescnt; |
|
4991 } |
|
4992 if (sign) { |
|
4993 if (fill != ' ') |
|
4994 *res++ = sign; |
|
4995 rescnt--; |
|
4996 if (width > len) |
|
4997 width--; |
|
4998 } |
|
4999 if ((flags & F_ALT) && (c == 'x' || c == 'X')) { |
|
5000 assert(pbuf[0] == '0'); |
|
5001 assert(pbuf[1] == c); |
|
5002 if (fill != ' ') { |
|
5003 *res++ = *pbuf++; |
|
5004 *res++ = *pbuf++; |
|
5005 } |
|
5006 rescnt -= 2; |
|
5007 width -= 2; |
|
5008 if (width < 0) |
|
5009 width = 0; |
|
5010 len -= 2; |
|
5011 } |
|
5012 if (width > len && !(flags & F_LJUST)) { |
|
5013 do { |
|
5014 --rescnt; |
|
5015 *res++ = fill; |
|
5016 } while (--width > len); |
|
5017 } |
|
5018 if (fill == ' ') { |
|
5019 if (sign) |
|
5020 *res++ = sign; |
|
5021 if ((flags & F_ALT) && |
|
5022 (c == 'x' || c == 'X')) { |
|
5023 assert(pbuf[0] == '0'); |
|
5024 assert(pbuf[1] == c); |
|
5025 *res++ = *pbuf++; |
|
5026 *res++ = *pbuf++; |
|
5027 } |
|
5028 } |
|
5029 Py_MEMCPY(res, pbuf, len); |
|
5030 res += len; |
|
5031 rescnt -= len; |
|
5032 while (--width >= len) { |
|
5033 --rescnt; |
|
5034 *res++ = ' '; |
|
5035 } |
|
5036 if (dict && (argidx < arglen) && c != '%') { |
|
5037 PyErr_SetString(PyExc_TypeError, |
|
5038 "not all arguments converted during string formatting"); |
|
5039 Py_XDECREF(temp); |
|
5040 goto error; |
|
5041 } |
|
5042 Py_XDECREF(temp); |
|
5043 } /* '%' */ |
|
5044 } /* until end */ |
|
5045 if (argidx < arglen && !dict) { |
|
5046 PyErr_SetString(PyExc_TypeError, |
|
5047 "not all arguments converted during string formatting"); |
|
5048 goto error; |
|
5049 } |
|
5050 if (args_owned) { |
|
5051 Py_DECREF(args); |
|
5052 } |
|
5053 _PyString_Resize(&result, reslen - rescnt); |
|
5054 return result; |
|
5055 |
|
5056 #ifdef Py_USING_UNICODE |
|
5057 unicode: |
|
5058 if (args_owned) { |
|
5059 Py_DECREF(args); |
|
5060 args_owned = 0; |
|
5061 } |
|
5062 /* Fiddle args right (remove the first argidx arguments) */ |
|
5063 if (PyTuple_Check(orig_args) && argidx > 0) { |
|
5064 PyObject *v; |
|
5065 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; |
|
5066 v = PyTuple_New(n); |
|
5067 if (v == NULL) |
|
5068 goto error; |
|
5069 while (--n >= 0) { |
|
5070 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); |
|
5071 Py_INCREF(w); |
|
5072 PyTuple_SET_ITEM(v, n, w); |
|
5073 } |
|
5074 args = v; |
|
5075 } else { |
|
5076 Py_INCREF(orig_args); |
|
5077 args = orig_args; |
|
5078 } |
|
5079 args_owned = 1; |
|
5080 /* Take what we have of the result and let the Unicode formatting |
|
5081 function format the rest of the input. */ |
|
5082 rescnt = res - PyString_AS_STRING(result); |
|
5083 if (_PyString_Resize(&result, rescnt)) |
|
5084 goto error; |
|
5085 fmtcnt = PyString_GET_SIZE(format) - \ |
|
5086 (fmt - PyString_AS_STRING(format)); |
|
5087 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); |
|
5088 if (format == NULL) |
|
5089 goto error; |
|
5090 v = PyUnicode_Format(format, args); |
|
5091 Py_DECREF(format); |
|
5092 if (v == NULL) |
|
5093 goto error; |
|
5094 /* Paste what we have (result) to what the Unicode formatting |
|
5095 function returned (v) and return the result (or error) */ |
|
5096 w = PyUnicode_Concat(result, v); |
|
5097 Py_DECREF(result); |
|
5098 Py_DECREF(v); |
|
5099 Py_DECREF(args); |
|
5100 return w; |
|
5101 #endif /* Py_USING_UNICODE */ |
|
5102 |
|
5103 error: |
|
5104 Py_DECREF(result); |
|
5105 if (args_owned) { |
|
5106 Py_DECREF(args); |
|
5107 } |
|
5108 return NULL; |
|
5109 } |
|
5110 |
|
5111 void |
|
5112 PyString_InternInPlace(PyObject **p) |
|
5113 { |
|
5114 register PyStringObject *s = (PyStringObject *)(*p); |
|
5115 PyObject *t; |
|
5116 if (s == NULL || !PyString_Check(s)) |
|
5117 Py_FatalError("PyString_InternInPlace: strings only please!"); |
|
5118 /* If it's a string subclass, we don't really know what putting |
|
5119 it in the interned dict might do. */ |
|
5120 if (!PyString_CheckExact(s)) |
|
5121 return; |
|
5122 if (PyString_CHECK_INTERNED(s)) |
|
5123 return; |
|
5124 if (interned == NULL) { |
|
5125 interned = PyDict_New(); |
|
5126 if (interned == NULL) { |
|
5127 PyErr_Clear(); /* Don't leave an exception */ |
|
5128 return; |
|
5129 } |
|
5130 } |
|
5131 t = PyDict_GetItem(interned, (PyObject *)s); |
|
5132 if (t) { |
|
5133 Py_INCREF(t); |
|
5134 Py_DECREF(*p); |
|
5135 *p = t; |
|
5136 return; |
|
5137 } |
|
5138 |
|
5139 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { |
|
5140 PyErr_Clear(); |
|
5141 return; |
|
5142 } |
|
5143 /* The two references in interned are not counted by refcnt. |
|
5144 The string deallocator will take care of this */ |
|
5145 Py_REFCNT(s) -= 2; |
|
5146 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; |
|
5147 } |
|
5148 |
|
5149 void |
|
5150 PyString_InternImmortal(PyObject **p) |
|
5151 { |
|
5152 PyString_InternInPlace(p); |
|
5153 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { |
|
5154 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; |
|
5155 Py_INCREF(*p); |
|
5156 } |
|
5157 } |
|
5158 |
|
5159 |
|
5160 PyObject * |
|
5161 PyString_InternFromString(const char *cp) |
|
5162 { |
|
5163 PyObject *s = PyString_FromString(cp); |
|
5164 if (s == NULL) |
|
5165 return NULL; |
|
5166 PyString_InternInPlace(&s); |
|
5167 return s; |
|
5168 } |
|
5169 |
|
5170 void |
|
5171 PyString_Fini(void) |
|
5172 { |
|
5173 int i; |
|
5174 for (i = 0; i < UCHAR_MAX + 1; i++) { |
|
5175 Py_XDECREF(characters[i]); |
|
5176 characters[i] = NULL; |
|
5177 } |
|
5178 Py_XDECREF(nullstring); |
|
5179 nullstring = NULL; |
|
5180 } |
|
5181 |
|
5182 void _Py_ReleaseInternedStrings(void) |
|
5183 { |
|
5184 PyObject *keys; |
|
5185 PyStringObject *s; |
|
5186 Py_ssize_t i, n; |
|
5187 Py_ssize_t immortal_size = 0, mortal_size = 0; |
|
5188 |
|
5189 if (interned == NULL || !PyDict_Check(interned)) |
|
5190 return; |
|
5191 keys = PyDict_Keys(interned); |
|
5192 if (keys == NULL || !PyList_Check(keys)) { |
|
5193 PyErr_Clear(); |
|
5194 return; |
|
5195 } |
|
5196 |
|
5197 /* Since _Py_ReleaseInternedStrings() is intended to help a leak |
|
5198 detector, interned strings are not forcibly deallocated; rather, we |
|
5199 give them their stolen references back, and then clear and DECREF |
|
5200 the interned dict. */ |
|
5201 |
|
5202 n = PyList_GET_SIZE(keys); |
|
5203 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", |
|
5204 n); |
|
5205 for (i = 0; i < n; i++) { |
|
5206 s = (PyStringObject *) PyList_GET_ITEM(keys, i); |
|
5207 switch (s->ob_sstate) { |
|
5208 case SSTATE_NOT_INTERNED: |
|
5209 /* XXX Shouldn't happen */ |
|
5210 break; |
|
5211 case SSTATE_INTERNED_IMMORTAL: |
|
5212 Py_REFCNT(s) += 1; |
|
5213 immortal_size += Py_SIZE(s); |
|
5214 break; |
|
5215 case SSTATE_INTERNED_MORTAL: |
|
5216 Py_REFCNT(s) += 2; |
|
5217 mortal_size += Py_SIZE(s); |
|
5218 break; |
|
5219 default: |
|
5220 Py_FatalError("Inconsistent interned string state."); |
|
5221 } |
|
5222 s->ob_sstate = SSTATE_NOT_INTERNED; |
|
5223 } |
|
5224 fprintf(stderr, "total size of all interned strings: " |
|
5225 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " |
|
5226 "mortal/immortal\n", mortal_size, immortal_size); |
|
5227 Py_DECREF(keys); |
|
5228 PyDict_Clear(interned); |
|
5229 Py_DECREF(interned); |
|
5230 interned = NULL; |
|
5231 } |