|
1 /* PyBytes (bytearray) implementation */ |
|
2 |
|
3 #define PY_SSIZE_T_CLEAN |
|
4 #include "Python.h" |
|
5 #include "structmember.h" |
|
6 #include "bytes_methods.h" |
|
7 |
|
8 static PyByteArrayObject *nullbytes = NULL; |
|
9 |
|
10 void |
|
11 PyByteArray_Fini(void) |
|
12 { |
|
13 Py_CLEAR(nullbytes); |
|
14 } |
|
15 |
|
16 int |
|
17 PyByteArray_Init(void) |
|
18 { |
|
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type); |
|
20 if (nullbytes == NULL) |
|
21 return 0; |
|
22 nullbytes->ob_bytes = NULL; |
|
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0; |
|
24 nullbytes->ob_exports = 0; |
|
25 return 1; |
|
26 } |
|
27 |
|
28 /* end nullbytes support */ |
|
29 |
|
30 /* Helpers */ |
|
31 |
|
32 static int |
|
33 _getbytevalue(PyObject* arg, int *value) |
|
34 { |
|
35 long face_value; |
|
36 |
|
37 if (PyBytes_CheckExact(arg)) { |
|
38 if (Py_SIZE(arg) != 1) { |
|
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1"); |
|
40 return 0; |
|
41 } |
|
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]); |
|
43 return 1; |
|
44 } |
|
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) { |
|
46 face_value = PyLong_AsLong(arg); |
|
47 } |
|
48 else { |
|
49 PyObject *index = PyNumber_Index(arg); |
|
50 if (index == NULL) { |
|
51 PyErr_Format(PyExc_TypeError, |
|
52 "an integer or string of size 1 is required"); |
|
53 return 0; |
|
54 } |
|
55 face_value = PyLong_AsLong(index); |
|
56 Py_DECREF(index); |
|
57 } |
|
58 |
|
59 if (face_value < 0 || face_value >= 256) { |
|
60 /* this includes the OverflowError in case the long is too large */ |
|
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
|
62 return 0; |
|
63 } |
|
64 |
|
65 *value = face_value; |
|
66 return 1; |
|
67 } |
|
68 |
|
69 static Py_ssize_t |
|
70 bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr) |
|
71 { |
|
72 if ( index != 0 ) { |
|
73 PyErr_SetString(PyExc_SystemError, |
|
74 "accessing non-existent bytes segment"); |
|
75 return -1; |
|
76 } |
|
77 *ptr = (void *)self->ob_bytes; |
|
78 return Py_SIZE(self); |
|
79 } |
|
80 |
|
81 static Py_ssize_t |
|
82 bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr) |
|
83 { |
|
84 if ( index != 0 ) { |
|
85 PyErr_SetString(PyExc_SystemError, |
|
86 "accessing non-existent bytes segment"); |
|
87 return -1; |
|
88 } |
|
89 *ptr = (void *)self->ob_bytes; |
|
90 return Py_SIZE(self); |
|
91 } |
|
92 |
|
93 static Py_ssize_t |
|
94 bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp) |
|
95 { |
|
96 if ( lenp ) |
|
97 *lenp = Py_SIZE(self); |
|
98 return 1; |
|
99 } |
|
100 |
|
101 static Py_ssize_t |
|
102 bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr) |
|
103 { |
|
104 if ( index != 0 ) { |
|
105 PyErr_SetString(PyExc_SystemError, |
|
106 "accessing non-existent bytes segment"); |
|
107 return -1; |
|
108 } |
|
109 *ptr = self->ob_bytes; |
|
110 return Py_SIZE(self); |
|
111 } |
|
112 |
|
113 static int |
|
114 bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags) |
|
115 { |
|
116 int ret; |
|
117 void *ptr; |
|
118 if (view == NULL) { |
|
119 obj->ob_exports++; |
|
120 return 0; |
|
121 } |
|
122 if (obj->ob_bytes == NULL) |
|
123 ptr = ""; |
|
124 else |
|
125 ptr = obj->ob_bytes; |
|
126 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags); |
|
127 if (ret >= 0) { |
|
128 obj->ob_exports++; |
|
129 } |
|
130 return ret; |
|
131 } |
|
132 |
|
133 static void |
|
134 bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view) |
|
135 { |
|
136 obj->ob_exports--; |
|
137 } |
|
138 |
|
139 static Py_ssize_t |
|
140 _getbuffer(PyObject *obj, Py_buffer *view) |
|
141 { |
|
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer; |
|
143 |
|
144 if (buffer == NULL || buffer->bf_getbuffer == NULL) |
|
145 { |
|
146 PyErr_Format(PyExc_TypeError, |
|
147 "Type %.100s doesn't support the buffer API", |
|
148 Py_TYPE(obj)->tp_name); |
|
149 return -1; |
|
150 } |
|
151 |
|
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) |
|
153 return -1; |
|
154 return view->len; |
|
155 } |
|
156 |
|
157 /* Direct API functions */ |
|
158 |
|
159 PyObject * |
|
160 PyByteArray_FromObject(PyObject *input) |
|
161 { |
|
162 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type, |
|
163 input, NULL); |
|
164 } |
|
165 |
|
166 PyObject * |
|
167 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) |
|
168 { |
|
169 PyByteArrayObject *new; |
|
170 Py_ssize_t alloc; |
|
171 |
|
172 if (size < 0) { |
|
173 PyErr_SetString(PyExc_SystemError, |
|
174 "Negative size passed to PyByteArray_FromStringAndSize"); |
|
175 return NULL; |
|
176 } |
|
177 |
|
178 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type); |
|
179 if (new == NULL) |
|
180 return NULL; |
|
181 |
|
182 if (size == 0) { |
|
183 new->ob_bytes = NULL; |
|
184 alloc = 0; |
|
185 } |
|
186 else { |
|
187 alloc = size + 1; |
|
188 new->ob_bytes = PyMem_Malloc(alloc); |
|
189 if (new->ob_bytes == NULL) { |
|
190 Py_DECREF(new); |
|
191 return PyErr_NoMemory(); |
|
192 } |
|
193 if (bytes != NULL) |
|
194 memcpy(new->ob_bytes, bytes, size); |
|
195 new->ob_bytes[size] = '\0'; /* Trailing null byte */ |
|
196 } |
|
197 Py_SIZE(new) = size; |
|
198 new->ob_alloc = alloc; |
|
199 new->ob_exports = 0; |
|
200 |
|
201 return (PyObject *)new; |
|
202 } |
|
203 |
|
204 Py_ssize_t |
|
205 PyByteArray_Size(PyObject *self) |
|
206 { |
|
207 assert(self != NULL); |
|
208 assert(PyByteArray_Check(self)); |
|
209 |
|
210 return PyByteArray_GET_SIZE(self); |
|
211 } |
|
212 |
|
213 char * |
|
214 PyByteArray_AsString(PyObject *self) |
|
215 { |
|
216 assert(self != NULL); |
|
217 assert(PyByteArray_Check(self)); |
|
218 |
|
219 return PyByteArray_AS_STRING(self); |
|
220 } |
|
221 |
|
222 int |
|
223 PyByteArray_Resize(PyObject *self, Py_ssize_t size) |
|
224 { |
|
225 void *sval; |
|
226 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc; |
|
227 |
|
228 assert(self != NULL); |
|
229 assert(PyByteArray_Check(self)); |
|
230 assert(size >= 0); |
|
231 |
|
232 if (size < alloc / 2) { |
|
233 /* Major downsize; resize down to exact size */ |
|
234 alloc = size + 1; |
|
235 } |
|
236 else if (size < alloc) { |
|
237 /* Within allocated size; quick exit */ |
|
238 Py_SIZE(self) = size; |
|
239 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */ |
|
240 return 0; |
|
241 } |
|
242 else if (size <= alloc * 1.125) { |
|
243 /* Moderate upsize; overallocate similar to list_resize() */ |
|
244 alloc = size + (size >> 3) + (size < 9 ? 3 : 6); |
|
245 } |
|
246 else { |
|
247 /* Major upsize; resize up to exact size */ |
|
248 alloc = size + 1; |
|
249 } |
|
250 |
|
251 if (((PyByteArrayObject *)self)->ob_exports > 0) { |
|
252 /* |
|
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports, |
|
254 ((PyByteArrayObject *)self)->ob_bytes); |
|
255 */ |
|
256 PyErr_SetString(PyExc_BufferError, |
|
257 "Existing exports of data: object cannot be re-sized"); |
|
258 return -1; |
|
259 } |
|
260 |
|
261 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc); |
|
262 if (sval == NULL) { |
|
263 PyErr_NoMemory(); |
|
264 return -1; |
|
265 } |
|
266 |
|
267 ((PyByteArrayObject *)self)->ob_bytes = sval; |
|
268 Py_SIZE(self) = size; |
|
269 ((PyByteArrayObject *)self)->ob_alloc = alloc; |
|
270 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */ |
|
271 |
|
272 return 0; |
|
273 } |
|
274 |
|
275 PyObject * |
|
276 PyByteArray_Concat(PyObject *a, PyObject *b) |
|
277 { |
|
278 Py_ssize_t size; |
|
279 Py_buffer va, vb; |
|
280 PyByteArrayObject *result = NULL; |
|
281 |
|
282 va.len = -1; |
|
283 vb.len = -1; |
|
284 if (_getbuffer(a, &va) < 0 || |
|
285 _getbuffer(b, &vb) < 0) { |
|
286 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", |
|
287 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name); |
|
288 goto done; |
|
289 } |
|
290 |
|
291 size = va.len + vb.len; |
|
292 if (size < 0) { |
|
293 return PyErr_NoMemory(); |
|
294 goto done; |
|
295 } |
|
296 |
|
297 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size); |
|
298 if (result != NULL) { |
|
299 memcpy(result->ob_bytes, va.buf, va.len); |
|
300 memcpy(result->ob_bytes + va.len, vb.buf, vb.len); |
|
301 } |
|
302 |
|
303 done: |
|
304 if (va.len != -1) |
|
305 PyBuffer_Release(&va); |
|
306 if (vb.len != -1) |
|
307 PyBuffer_Release(&vb); |
|
308 return (PyObject *)result; |
|
309 } |
|
310 |
|
311 /* Functions stuffed into the type object */ |
|
312 |
|
313 static Py_ssize_t |
|
314 bytes_length(PyByteArrayObject *self) |
|
315 { |
|
316 return Py_SIZE(self); |
|
317 } |
|
318 |
|
319 static PyObject * |
|
320 bytes_iconcat(PyByteArrayObject *self, PyObject *other) |
|
321 { |
|
322 Py_ssize_t mysize; |
|
323 Py_ssize_t size; |
|
324 Py_buffer vo; |
|
325 |
|
326 if (_getbuffer(other, &vo) < 0) { |
|
327 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", |
|
328 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name); |
|
329 return NULL; |
|
330 } |
|
331 |
|
332 mysize = Py_SIZE(self); |
|
333 size = mysize + vo.len; |
|
334 if (size < 0) { |
|
335 PyBuffer_Release(&vo); |
|
336 return PyErr_NoMemory(); |
|
337 } |
|
338 if (size < self->ob_alloc) { |
|
339 Py_SIZE(self) = size; |
|
340 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */ |
|
341 } |
|
342 else if (PyByteArray_Resize((PyObject *)self, size) < 0) { |
|
343 PyBuffer_Release(&vo); |
|
344 return NULL; |
|
345 } |
|
346 memcpy(self->ob_bytes + mysize, vo.buf, vo.len); |
|
347 PyBuffer_Release(&vo); |
|
348 Py_INCREF(self); |
|
349 return (PyObject *)self; |
|
350 } |
|
351 |
|
352 static PyObject * |
|
353 bytes_repeat(PyByteArrayObject *self, Py_ssize_t count) |
|
354 { |
|
355 PyByteArrayObject *result; |
|
356 Py_ssize_t mysize; |
|
357 Py_ssize_t size; |
|
358 |
|
359 if (count < 0) |
|
360 count = 0; |
|
361 mysize = Py_SIZE(self); |
|
362 size = mysize * count; |
|
363 if (count != 0 && size / count != mysize) |
|
364 return PyErr_NoMemory(); |
|
365 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size); |
|
366 if (result != NULL && size != 0) { |
|
367 if (mysize == 1) |
|
368 memset(result->ob_bytes, self->ob_bytes[0], size); |
|
369 else { |
|
370 Py_ssize_t i; |
|
371 for (i = 0; i < count; i++) |
|
372 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize); |
|
373 } |
|
374 } |
|
375 return (PyObject *)result; |
|
376 } |
|
377 |
|
378 static PyObject * |
|
379 bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count) |
|
380 { |
|
381 Py_ssize_t mysize; |
|
382 Py_ssize_t size; |
|
383 |
|
384 if (count < 0) |
|
385 count = 0; |
|
386 mysize = Py_SIZE(self); |
|
387 size = mysize * count; |
|
388 if (count != 0 && size / count != mysize) |
|
389 return PyErr_NoMemory(); |
|
390 if (size < self->ob_alloc) { |
|
391 Py_SIZE(self) = size; |
|
392 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */ |
|
393 } |
|
394 else if (PyByteArray_Resize((PyObject *)self, size) < 0) |
|
395 return NULL; |
|
396 |
|
397 if (mysize == 1) |
|
398 memset(self->ob_bytes, self->ob_bytes[0], size); |
|
399 else { |
|
400 Py_ssize_t i; |
|
401 for (i = 1; i < count; i++) |
|
402 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize); |
|
403 } |
|
404 |
|
405 Py_INCREF(self); |
|
406 return (PyObject *)self; |
|
407 } |
|
408 |
|
409 static PyObject * |
|
410 bytes_getitem(PyByteArrayObject *self, Py_ssize_t i) |
|
411 { |
|
412 if (i < 0) |
|
413 i += Py_SIZE(self); |
|
414 if (i < 0 || i >= Py_SIZE(self)) { |
|
415 PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); |
|
416 return NULL; |
|
417 } |
|
418 return PyInt_FromLong((unsigned char)(self->ob_bytes[i])); |
|
419 } |
|
420 |
|
421 static PyObject * |
|
422 bytes_subscript(PyByteArrayObject *self, PyObject *index) |
|
423 { |
|
424 if (PyIndex_Check(index)) { |
|
425 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError); |
|
426 |
|
427 if (i == -1 && PyErr_Occurred()) |
|
428 return NULL; |
|
429 |
|
430 if (i < 0) |
|
431 i += PyByteArray_GET_SIZE(self); |
|
432 |
|
433 if (i < 0 || i >= Py_SIZE(self)) { |
|
434 PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); |
|
435 return NULL; |
|
436 } |
|
437 return PyInt_FromLong((unsigned char)(self->ob_bytes[i])); |
|
438 } |
|
439 else if (PySlice_Check(index)) { |
|
440 Py_ssize_t start, stop, step, slicelength, cur, i; |
|
441 if (PySlice_GetIndicesEx((PySliceObject *)index, |
|
442 PyByteArray_GET_SIZE(self), |
|
443 &start, &stop, &step, &slicelength) < 0) { |
|
444 return NULL; |
|
445 } |
|
446 |
|
447 if (slicelength <= 0) |
|
448 return PyByteArray_FromStringAndSize("", 0); |
|
449 else if (step == 1) { |
|
450 return PyByteArray_FromStringAndSize(self->ob_bytes + start, |
|
451 slicelength); |
|
452 } |
|
453 else { |
|
454 char *source_buf = PyByteArray_AS_STRING(self); |
|
455 char *result_buf = (char *)PyMem_Malloc(slicelength); |
|
456 PyObject *result; |
|
457 |
|
458 if (result_buf == NULL) |
|
459 return PyErr_NoMemory(); |
|
460 |
|
461 for (cur = start, i = 0; i < slicelength; |
|
462 cur += step, i++) { |
|
463 result_buf[i] = source_buf[cur]; |
|
464 } |
|
465 result = PyByteArray_FromStringAndSize(result_buf, slicelength); |
|
466 PyMem_Free(result_buf); |
|
467 return result; |
|
468 } |
|
469 } |
|
470 else { |
|
471 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers"); |
|
472 return NULL; |
|
473 } |
|
474 } |
|
475 |
|
476 static int |
|
477 bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi, |
|
478 PyObject *values) |
|
479 { |
|
480 Py_ssize_t avail, needed; |
|
481 void *bytes; |
|
482 Py_buffer vbytes; |
|
483 int res = 0; |
|
484 |
|
485 vbytes.len = -1; |
|
486 if (values == (PyObject *)self) { |
|
487 /* Make a copy and call this function recursively */ |
|
488 int err; |
|
489 values = PyByteArray_FromObject(values); |
|
490 if (values == NULL) |
|
491 return -1; |
|
492 err = bytes_setslice(self, lo, hi, values); |
|
493 Py_DECREF(values); |
|
494 return err; |
|
495 } |
|
496 if (values == NULL) { |
|
497 /* del b[lo:hi] */ |
|
498 bytes = NULL; |
|
499 needed = 0; |
|
500 } |
|
501 else { |
|
502 if (_getbuffer(values, &vbytes) < 0) { |
|
503 PyErr_Format(PyExc_TypeError, |
|
504 "can't set bytearray slice from %.100s", |
|
505 Py_TYPE(values)->tp_name); |
|
506 return -1; |
|
507 } |
|
508 needed = vbytes.len; |
|
509 bytes = vbytes.buf; |
|
510 } |
|
511 |
|
512 if (lo < 0) |
|
513 lo = 0; |
|
514 if (hi < lo) |
|
515 hi = lo; |
|
516 if (hi > Py_SIZE(self)) |
|
517 hi = Py_SIZE(self); |
|
518 |
|
519 avail = hi - lo; |
|
520 if (avail < 0) |
|
521 lo = hi = avail = 0; |
|
522 |
|
523 if (avail != needed) { |
|
524 if (avail > needed) { |
|
525 /* |
|
526 0 lo hi old_size |
|
527 | |<----avail----->|<-----tomove------>| |
|
528 | |<-needed->|<-----tomove------>| |
|
529 0 lo new_hi new_size |
|
530 */ |
|
531 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi, |
|
532 Py_SIZE(self) - hi); |
|
533 } |
|
534 /* XXX(nnorwitz): need to verify this can't overflow! */ |
|
535 if (PyByteArray_Resize((PyObject *)self, |
|
536 Py_SIZE(self) + needed - avail) < 0) { |
|
537 res = -1; |
|
538 goto finish; |
|
539 } |
|
540 if (avail < needed) { |
|
541 /* |
|
542 0 lo hi old_size |
|
543 | |<-avail->|<-----tomove------>| |
|
544 | |<----needed---->|<-----tomove------>| |
|
545 0 lo new_hi new_size |
|
546 */ |
|
547 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi, |
|
548 Py_SIZE(self) - lo - needed); |
|
549 } |
|
550 } |
|
551 |
|
552 if (needed > 0) |
|
553 memcpy(self->ob_bytes + lo, bytes, needed); |
|
554 |
|
555 |
|
556 finish: |
|
557 if (vbytes.len != -1) |
|
558 PyBuffer_Release(&vbytes); |
|
559 return res; |
|
560 } |
|
561 |
|
562 static int |
|
563 bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value) |
|
564 { |
|
565 int ival; |
|
566 |
|
567 if (i < 0) |
|
568 i += Py_SIZE(self); |
|
569 |
|
570 if (i < 0 || i >= Py_SIZE(self)) { |
|
571 PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); |
|
572 return -1; |
|
573 } |
|
574 |
|
575 if (value == NULL) |
|
576 return bytes_setslice(self, i, i+1, NULL); |
|
577 |
|
578 if (!_getbytevalue(value, &ival)) |
|
579 return -1; |
|
580 |
|
581 self->ob_bytes[i] = ival; |
|
582 return 0; |
|
583 } |
|
584 |
|
585 static int |
|
586 bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values) |
|
587 { |
|
588 Py_ssize_t start, stop, step, slicelen, needed; |
|
589 char *bytes; |
|
590 |
|
591 if (PyIndex_Check(index)) { |
|
592 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError); |
|
593 |
|
594 if (i == -1 && PyErr_Occurred()) |
|
595 return -1; |
|
596 |
|
597 if (i < 0) |
|
598 i += PyByteArray_GET_SIZE(self); |
|
599 |
|
600 if (i < 0 || i >= Py_SIZE(self)) { |
|
601 PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); |
|
602 return -1; |
|
603 } |
|
604 |
|
605 if (values == NULL) { |
|
606 /* Fall through to slice assignment */ |
|
607 start = i; |
|
608 stop = i + 1; |
|
609 step = 1; |
|
610 slicelen = 1; |
|
611 } |
|
612 else { |
|
613 int ival; |
|
614 if (!_getbytevalue(values, &ival)) |
|
615 return -1; |
|
616 self->ob_bytes[i] = (char)ival; |
|
617 return 0; |
|
618 } |
|
619 } |
|
620 else if (PySlice_Check(index)) { |
|
621 if (PySlice_GetIndicesEx((PySliceObject *)index, |
|
622 PyByteArray_GET_SIZE(self), |
|
623 &start, &stop, &step, &slicelen) < 0) { |
|
624 return -1; |
|
625 } |
|
626 } |
|
627 else { |
|
628 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer"); |
|
629 return -1; |
|
630 } |
|
631 |
|
632 if (values == NULL) { |
|
633 bytes = NULL; |
|
634 needed = 0; |
|
635 } |
|
636 else if (values == (PyObject *)self || !PyByteArray_Check(values)) { |
|
637 /* Make a copy an call this function recursively */ |
|
638 int err; |
|
639 values = PyByteArray_FromObject(values); |
|
640 if (values == NULL) |
|
641 return -1; |
|
642 err = bytes_ass_subscript(self, index, values); |
|
643 Py_DECREF(values); |
|
644 return err; |
|
645 } |
|
646 else { |
|
647 assert(PyByteArray_Check(values)); |
|
648 bytes = ((PyByteArrayObject *)values)->ob_bytes; |
|
649 needed = Py_SIZE(values); |
|
650 } |
|
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */ |
|
652 if ((step < 0 && start < stop) || |
|
653 (step > 0 && start > stop)) |
|
654 stop = start; |
|
655 if (step == 1) { |
|
656 if (slicelen != needed) { |
|
657 if (slicelen > needed) { |
|
658 /* |
|
659 0 start stop old_size |
|
660 | |<---slicelen--->|<-----tomove------>| |
|
661 | |<-needed->|<-----tomove------>| |
|
662 0 lo new_hi new_size |
|
663 */ |
|
664 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop, |
|
665 Py_SIZE(self) - stop); |
|
666 } |
|
667 if (PyByteArray_Resize((PyObject *)self, |
|
668 Py_SIZE(self) + needed - slicelen) < 0) |
|
669 return -1; |
|
670 if (slicelen < needed) { |
|
671 /* |
|
672 0 lo hi old_size |
|
673 | |<-avail->|<-----tomove------>| |
|
674 | |<----needed---->|<-----tomove------>| |
|
675 0 lo new_hi new_size |
|
676 */ |
|
677 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop, |
|
678 Py_SIZE(self) - start - needed); |
|
679 } |
|
680 } |
|
681 |
|
682 if (needed > 0) |
|
683 memcpy(self->ob_bytes + start, bytes, needed); |
|
684 |
|
685 return 0; |
|
686 } |
|
687 else { |
|
688 if (needed == 0) { |
|
689 /* Delete slice */ |
|
690 Py_ssize_t cur, i; |
|
691 |
|
692 if (step < 0) { |
|
693 stop = start + 1; |
|
694 start = stop + step * (slicelen - 1) - 1; |
|
695 step = -step; |
|
696 } |
|
697 for (cur = start, i = 0; |
|
698 i < slicelen; cur += step, i++) { |
|
699 Py_ssize_t lim = step - 1; |
|
700 |
|
701 if (cur + step >= PyByteArray_GET_SIZE(self)) |
|
702 lim = PyByteArray_GET_SIZE(self) - cur - 1; |
|
703 |
|
704 memmove(self->ob_bytes + cur - i, |
|
705 self->ob_bytes + cur + 1, lim); |
|
706 } |
|
707 /* Move the tail of the bytes, in one chunk */ |
|
708 cur = start + slicelen*step; |
|
709 if (cur < PyByteArray_GET_SIZE(self)) { |
|
710 memmove(self->ob_bytes + cur - slicelen, |
|
711 self->ob_bytes + cur, |
|
712 PyByteArray_GET_SIZE(self) - cur); |
|
713 } |
|
714 if (PyByteArray_Resize((PyObject *)self, |
|
715 PyByteArray_GET_SIZE(self) - slicelen) < 0) |
|
716 return -1; |
|
717 |
|
718 return 0; |
|
719 } |
|
720 else { |
|
721 /* Assign slice */ |
|
722 Py_ssize_t cur, i; |
|
723 |
|
724 if (needed != slicelen) { |
|
725 PyErr_Format(PyExc_ValueError, |
|
726 "attempt to assign bytes of size %zd " |
|
727 "to extended slice of size %zd", |
|
728 needed, slicelen); |
|
729 return -1; |
|
730 } |
|
731 for (cur = start, i = 0; i < slicelen; cur += step, i++) |
|
732 self->ob_bytes[cur] = bytes[i]; |
|
733 return 0; |
|
734 } |
|
735 } |
|
736 } |
|
737 |
|
738 static int |
|
739 bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) |
|
740 { |
|
741 static char *kwlist[] = {"source", "encoding", "errors", 0}; |
|
742 PyObject *arg = NULL; |
|
743 const char *encoding = NULL; |
|
744 const char *errors = NULL; |
|
745 Py_ssize_t count; |
|
746 PyObject *it; |
|
747 PyObject *(*iternext)(PyObject *); |
|
748 |
|
749 if (Py_SIZE(self) != 0) { |
|
750 /* Empty previous contents (yes, do this first of all!) */ |
|
751 if (PyByteArray_Resize((PyObject *)self, 0) < 0) |
|
752 return -1; |
|
753 } |
|
754 |
|
755 /* Parse arguments */ |
|
756 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist, |
|
757 &arg, &encoding, &errors)) |
|
758 return -1; |
|
759 |
|
760 /* Make a quick exit if no first argument */ |
|
761 if (arg == NULL) { |
|
762 if (encoding != NULL || errors != NULL) { |
|
763 PyErr_SetString(PyExc_TypeError, |
|
764 "encoding or errors without sequence argument"); |
|
765 return -1; |
|
766 } |
|
767 return 0; |
|
768 } |
|
769 |
|
770 if (PyBytes_Check(arg)) { |
|
771 PyObject *new, *encoded; |
|
772 if (encoding != NULL) { |
|
773 encoded = PyCodec_Encode(arg, encoding, errors); |
|
774 if (encoded == NULL) |
|
775 return -1; |
|
776 assert(PyBytes_Check(encoded)); |
|
777 } |
|
778 else { |
|
779 encoded = arg; |
|
780 Py_INCREF(arg); |
|
781 } |
|
782 new = bytes_iconcat(self, arg); |
|
783 Py_DECREF(encoded); |
|
784 if (new == NULL) |
|
785 return -1; |
|
786 Py_DECREF(new); |
|
787 return 0; |
|
788 } |
|
789 |
|
790 if (PyUnicode_Check(arg)) { |
|
791 /* Encode via the codec registry */ |
|
792 PyObject *encoded, *new; |
|
793 if (encoding == NULL) { |
|
794 PyErr_SetString(PyExc_TypeError, |
|
795 "unicode argument without an encoding"); |
|
796 return -1; |
|
797 } |
|
798 encoded = PyCodec_Encode(arg, encoding, errors); |
|
799 if (encoded == NULL) |
|
800 return -1; |
|
801 assert(PyBytes_Check(encoded)); |
|
802 new = bytes_iconcat(self, encoded); |
|
803 Py_DECREF(encoded); |
|
804 if (new == NULL) |
|
805 return -1; |
|
806 Py_DECREF(new); |
|
807 return 0; |
|
808 } |
|
809 |
|
810 /* If it's not unicode, there can't be encoding or errors */ |
|
811 if (encoding != NULL || errors != NULL) { |
|
812 PyErr_SetString(PyExc_TypeError, |
|
813 "encoding or errors without a string argument"); |
|
814 return -1; |
|
815 } |
|
816 |
|
817 /* Is it an int? */ |
|
818 count = PyNumber_AsSsize_t(arg, PyExc_ValueError); |
|
819 if (count == -1 && PyErr_Occurred()) |
|
820 PyErr_Clear(); |
|
821 else { |
|
822 if (count < 0) { |
|
823 PyErr_SetString(PyExc_ValueError, "negative count"); |
|
824 return -1; |
|
825 } |
|
826 if (count > 0) { |
|
827 if (PyByteArray_Resize((PyObject *)self, count)) |
|
828 return -1; |
|
829 memset(self->ob_bytes, 0, count); |
|
830 } |
|
831 return 0; |
|
832 } |
|
833 |
|
834 /* Use the buffer API */ |
|
835 if (PyObject_CheckBuffer(arg)) { |
|
836 Py_ssize_t size; |
|
837 Py_buffer view; |
|
838 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0) |
|
839 return -1; |
|
840 size = view.len; |
|
841 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail; |
|
842 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0) |
|
843 goto fail; |
|
844 PyBuffer_Release(&view); |
|
845 return 0; |
|
846 fail: |
|
847 PyBuffer_Release(&view); |
|
848 return -1; |
|
849 } |
|
850 |
|
851 /* XXX Optimize this if the arguments is a list, tuple */ |
|
852 |
|
853 /* Get the iterator */ |
|
854 it = PyObject_GetIter(arg); |
|
855 if (it == NULL) |
|
856 return -1; |
|
857 iternext = *Py_TYPE(it)->tp_iternext; |
|
858 |
|
859 /* Run the iterator to exhaustion */ |
|
860 for (;;) { |
|
861 PyObject *item; |
|
862 int rc, value; |
|
863 |
|
864 /* Get the next item */ |
|
865 item = iternext(it); |
|
866 if (item == NULL) { |
|
867 if (PyErr_Occurred()) { |
|
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) |
|
869 goto error; |
|
870 PyErr_Clear(); |
|
871 } |
|
872 break; |
|
873 } |
|
874 |
|
875 /* Interpret it as an int (__index__) */ |
|
876 rc = _getbytevalue(item, &value); |
|
877 Py_DECREF(item); |
|
878 if (!rc) |
|
879 goto error; |
|
880 |
|
881 /* Append the byte */ |
|
882 if (Py_SIZE(self) < self->ob_alloc) |
|
883 Py_SIZE(self)++; |
|
884 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0) |
|
885 goto error; |
|
886 self->ob_bytes[Py_SIZE(self)-1] = value; |
|
887 } |
|
888 |
|
889 /* Clean up and return success */ |
|
890 Py_DECREF(it); |
|
891 return 0; |
|
892 |
|
893 error: |
|
894 /* Error handling when it != NULL */ |
|
895 Py_DECREF(it); |
|
896 return -1; |
|
897 } |
|
898 |
|
899 /* Mostly copied from string_repr, but without the |
|
900 "smart quote" functionality. */ |
|
901 static PyObject * |
|
902 bytes_repr(PyByteArrayObject *self) |
|
903 { |
|
904 static const char *hexdigits = "0123456789abcdef"; |
|
905 const char *quote_prefix = "bytearray(b"; |
|
906 const char *quote_postfix = ")"; |
|
907 Py_ssize_t length = Py_SIZE(self); |
|
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */ |
|
909 size_t newsize = 14 + 4 * length; |
|
910 PyObject *v; |
|
911 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) { |
|
912 PyErr_SetString(PyExc_OverflowError, |
|
913 "bytearray object is too large to make repr"); |
|
914 return NULL; |
|
915 } |
|
916 v = PyUnicode_FromUnicode(NULL, newsize); |
|
917 if (v == NULL) { |
|
918 return NULL; |
|
919 } |
|
920 else { |
|
921 register Py_ssize_t i; |
|
922 register Py_UNICODE c; |
|
923 register Py_UNICODE *p; |
|
924 int quote; |
|
925 |
|
926 /* Figure out which quote to use; single is preferred */ |
|
927 quote = '\''; |
|
928 { |
|
929 char *test, *start; |
|
930 start = PyByteArray_AS_STRING(self); |
|
931 for (test = start; test < start+length; ++test) { |
|
932 if (*test == '"') { |
|
933 quote = '\''; /* back to single */ |
|
934 goto decided; |
|
935 } |
|
936 else if (*test == '\'') |
|
937 quote = '"'; |
|
938 } |
|
939 decided: |
|
940 ; |
|
941 } |
|
942 |
|
943 p = PyUnicode_AS_UNICODE(v); |
|
944 while (*quote_prefix) |
|
945 *p++ = *quote_prefix++; |
|
946 *p++ = quote; |
|
947 |
|
948 for (i = 0; i < length; i++) { |
|
949 /* There's at least enough room for a hex escape |
|
950 and a closing quote. */ |
|
951 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5); |
|
952 c = self->ob_bytes[i]; |
|
953 if (c == '\'' || c == '\\') |
|
954 *p++ = '\\', *p++ = c; |
|
955 else if (c == '\t') |
|
956 *p++ = '\\', *p++ = 't'; |
|
957 else if (c == '\n') |
|
958 *p++ = '\\', *p++ = 'n'; |
|
959 else if (c == '\r') |
|
960 *p++ = '\\', *p++ = 'r'; |
|
961 else if (c == 0) |
|
962 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0'; |
|
963 else if (c < ' ' || c >= 0x7f) { |
|
964 *p++ = '\\'; |
|
965 *p++ = 'x'; |
|
966 *p++ = hexdigits[(c & 0xf0) >> 4]; |
|
967 *p++ = hexdigits[c & 0xf]; |
|
968 } |
|
969 else |
|
970 *p++ = c; |
|
971 } |
|
972 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1); |
|
973 *p++ = quote; |
|
974 while (*quote_postfix) { |
|
975 *p++ = *quote_postfix++; |
|
976 } |
|
977 *p = '\0'; |
|
978 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) { |
|
979 Py_DECREF(v); |
|
980 return NULL; |
|
981 } |
|
982 return v; |
|
983 } |
|
984 } |
|
985 |
|
986 static PyObject * |
|
987 bytes_str(PyObject *op) |
|
988 { |
|
989 #if 0 |
|
990 if (Py_BytesWarningFlag) { |
|
991 if (PyErr_WarnEx(PyExc_BytesWarning, |
|
992 "str() on a bytearray instance", 1)) |
|
993 return NULL; |
|
994 } |
|
995 return bytes_repr((PyByteArrayObject*)op); |
|
996 #endif |
|
997 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op)); |
|
998 } |
|
999 |
|
1000 static PyObject * |
|
1001 bytes_richcompare(PyObject *self, PyObject *other, int op) |
|
1002 { |
|
1003 Py_ssize_t self_size, other_size; |
|
1004 Py_buffer self_bytes, other_bytes; |
|
1005 PyObject *res; |
|
1006 Py_ssize_t minsize; |
|
1007 int cmp; |
|
1008 |
|
1009 /* Bytes can be compared to anything that supports the (binary) |
|
1010 buffer API. Except that a comparison with Unicode is always an |
|
1011 error, even if the comparison is for equality. */ |
|
1012 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) || |
|
1013 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) { |
|
1014 if (Py_BytesWarningFlag && op == Py_EQ) { |
|
1015 if (PyErr_WarnEx(PyExc_BytesWarning, |
|
1016 "Comparsion between bytearray and string", 1)) |
|
1017 return NULL; |
|
1018 } |
|
1019 |
|
1020 Py_INCREF(Py_NotImplemented); |
|
1021 return Py_NotImplemented; |
|
1022 } |
|
1023 |
|
1024 self_size = _getbuffer(self, &self_bytes); |
|
1025 if (self_size < 0) { |
|
1026 PyErr_Clear(); |
|
1027 Py_INCREF(Py_NotImplemented); |
|
1028 return Py_NotImplemented; |
|
1029 } |
|
1030 |
|
1031 other_size = _getbuffer(other, &other_bytes); |
|
1032 if (other_size < 0) { |
|
1033 PyErr_Clear(); |
|
1034 PyBuffer_Release(&self_bytes); |
|
1035 Py_INCREF(Py_NotImplemented); |
|
1036 return Py_NotImplemented; |
|
1037 } |
|
1038 |
|
1039 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) { |
|
1040 /* Shortcut: if the lengths differ, the objects differ */ |
|
1041 cmp = (op == Py_NE); |
|
1042 } |
|
1043 else { |
|
1044 minsize = self_size; |
|
1045 if (other_size < minsize) |
|
1046 minsize = other_size; |
|
1047 |
|
1048 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize); |
|
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */ |
|
1050 |
|
1051 if (cmp == 0) { |
|
1052 if (self_size < other_size) |
|
1053 cmp = -1; |
|
1054 else if (self_size > other_size) |
|
1055 cmp = 1; |
|
1056 } |
|
1057 |
|
1058 switch (op) { |
|
1059 case Py_LT: cmp = cmp < 0; break; |
|
1060 case Py_LE: cmp = cmp <= 0; break; |
|
1061 case Py_EQ: cmp = cmp == 0; break; |
|
1062 case Py_NE: cmp = cmp != 0; break; |
|
1063 case Py_GT: cmp = cmp > 0; break; |
|
1064 case Py_GE: cmp = cmp >= 0; break; |
|
1065 } |
|
1066 } |
|
1067 |
|
1068 res = cmp ? Py_True : Py_False; |
|
1069 PyBuffer_Release(&self_bytes); |
|
1070 PyBuffer_Release(&other_bytes); |
|
1071 Py_INCREF(res); |
|
1072 return res; |
|
1073 } |
|
1074 |
|
1075 static void |
|
1076 bytes_dealloc(PyByteArrayObject *self) |
|
1077 { |
|
1078 if (self->ob_exports > 0) { |
|
1079 PyErr_SetString(PyExc_SystemError, |
|
1080 "deallocated bytearray object has exported buffers"); |
|
1081 PyErr_Print(); |
|
1082 } |
|
1083 if (self->ob_bytes != 0) { |
|
1084 PyMem_Free(self->ob_bytes); |
|
1085 } |
|
1086 Py_TYPE(self)->tp_free((PyObject *)self); |
|
1087 } |
|
1088 |
|
1089 |
|
1090 /* -------------------------------------------------------------------- */ |
|
1091 /* Methods */ |
|
1092 |
|
1093 #define STRINGLIB_CHAR char |
|
1094 #define STRINGLIB_CMP memcmp |
|
1095 #define STRINGLIB_LEN PyByteArray_GET_SIZE |
|
1096 #define STRINGLIB_STR PyByteArray_AS_STRING |
|
1097 #define STRINGLIB_NEW PyByteArray_FromStringAndSize |
|
1098 #define STRINGLIB_EMPTY nullbytes |
|
1099 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact |
|
1100 #define STRINGLIB_MUTABLE 1 |
|
1101 #define FROM_BYTEARRAY 1 |
|
1102 |
|
1103 #include "stringlib/fastsearch.h" |
|
1104 #include "stringlib/count.h" |
|
1105 #include "stringlib/find.h" |
|
1106 #include "stringlib/partition.h" |
|
1107 #include "stringlib/ctype.h" |
|
1108 #include "stringlib/transmogrify.h" |
|
1109 |
|
1110 |
|
1111 /* The following Py_LOCAL_INLINE and Py_LOCAL functions |
|
1112 were copied from the old char* style string object. */ |
|
1113 |
|
1114 Py_LOCAL_INLINE(void) |
|
1115 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) |
|
1116 { |
|
1117 if (*end > len) |
|
1118 *end = len; |
|
1119 else if (*end < 0) |
|
1120 *end += len; |
|
1121 if (*end < 0) |
|
1122 *end = 0; |
|
1123 if (*start < 0) |
|
1124 *start += len; |
|
1125 if (*start < 0) |
|
1126 *start = 0; |
|
1127 } |
|
1128 |
|
1129 |
|
1130 Py_LOCAL_INLINE(Py_ssize_t) |
|
1131 bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir) |
|
1132 { |
|
1133 PyObject *subobj; |
|
1134 Py_buffer subbuf; |
|
1135 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; |
|
1136 Py_ssize_t res; |
|
1137 |
|
1138 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj, |
|
1139 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
1140 return -2; |
|
1141 if (_getbuffer(subobj, &subbuf) < 0) |
|
1142 return -2; |
|
1143 if (dir > 0) |
|
1144 res = stringlib_find_slice( |
|
1145 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), |
|
1146 subbuf.buf, subbuf.len, start, end); |
|
1147 else |
|
1148 res = stringlib_rfind_slice( |
|
1149 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), |
|
1150 subbuf.buf, subbuf.len, start, end); |
|
1151 PyBuffer_Release(&subbuf); |
|
1152 return res; |
|
1153 } |
|
1154 |
|
1155 PyDoc_STRVAR(find__doc__, |
|
1156 "B.find(sub [,start [,end]]) -> int\n\ |
|
1157 \n\ |
|
1158 Return the lowest index in B where subsection sub is found,\n\ |
|
1159 such that sub is contained within s[start,end]. Optional\n\ |
|
1160 arguments start and end are interpreted as in slice notation.\n\ |
|
1161 \n\ |
|
1162 Return -1 on failure."); |
|
1163 |
|
1164 static PyObject * |
|
1165 bytes_find(PyByteArrayObject *self, PyObject *args) |
|
1166 { |
|
1167 Py_ssize_t result = bytes_find_internal(self, args, +1); |
|
1168 if (result == -2) |
|
1169 return NULL; |
|
1170 return PyInt_FromSsize_t(result); |
|
1171 } |
|
1172 |
|
1173 PyDoc_STRVAR(count__doc__, |
|
1174 "B.count(sub [,start [,end]]) -> int\n\ |
|
1175 \n\ |
|
1176 Return the number of non-overlapping occurrences of subsection sub in\n\ |
|
1177 bytes B[start:end]. Optional arguments start and end are interpreted\n\ |
|
1178 as in slice notation."); |
|
1179 |
|
1180 static PyObject * |
|
1181 bytes_count(PyByteArrayObject *self, PyObject *args) |
|
1182 { |
|
1183 PyObject *sub_obj; |
|
1184 const char *str = PyByteArray_AS_STRING(self); |
|
1185 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
|
1186 Py_buffer vsub; |
|
1187 PyObject *count_obj; |
|
1188 |
|
1189 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, |
|
1190 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
1191 return NULL; |
|
1192 |
|
1193 if (_getbuffer(sub_obj, &vsub) < 0) |
|
1194 return NULL; |
|
1195 |
|
1196 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self)); |
|
1197 |
|
1198 count_obj = PyInt_FromSsize_t( |
|
1199 stringlib_count(str + start, end - start, vsub.buf, vsub.len) |
|
1200 ); |
|
1201 PyBuffer_Release(&vsub); |
|
1202 return count_obj; |
|
1203 } |
|
1204 |
|
1205 |
|
1206 PyDoc_STRVAR(index__doc__, |
|
1207 "B.index(sub [,start [,end]]) -> int\n\ |
|
1208 \n\ |
|
1209 Like B.find() but raise ValueError when the subsection is not found."); |
|
1210 |
|
1211 static PyObject * |
|
1212 bytes_index(PyByteArrayObject *self, PyObject *args) |
|
1213 { |
|
1214 Py_ssize_t result = bytes_find_internal(self, args, +1); |
|
1215 if (result == -2) |
|
1216 return NULL; |
|
1217 if (result == -1) { |
|
1218 PyErr_SetString(PyExc_ValueError, |
|
1219 "subsection not found"); |
|
1220 return NULL; |
|
1221 } |
|
1222 return PyInt_FromSsize_t(result); |
|
1223 } |
|
1224 |
|
1225 |
|
1226 PyDoc_STRVAR(rfind__doc__, |
|
1227 "B.rfind(sub [,start [,end]]) -> int\n\ |
|
1228 \n\ |
|
1229 Return the highest index in B where subsection sub is found,\n\ |
|
1230 such that sub is contained within s[start,end]. Optional\n\ |
|
1231 arguments start and end are interpreted as in slice notation.\n\ |
|
1232 \n\ |
|
1233 Return -1 on failure."); |
|
1234 |
|
1235 static PyObject * |
|
1236 bytes_rfind(PyByteArrayObject *self, PyObject *args) |
|
1237 { |
|
1238 Py_ssize_t result = bytes_find_internal(self, args, -1); |
|
1239 if (result == -2) |
|
1240 return NULL; |
|
1241 return PyInt_FromSsize_t(result); |
|
1242 } |
|
1243 |
|
1244 |
|
1245 PyDoc_STRVAR(rindex__doc__, |
|
1246 "B.rindex(sub [,start [,end]]) -> int\n\ |
|
1247 \n\ |
|
1248 Like B.rfind() but raise ValueError when the subsection is not found."); |
|
1249 |
|
1250 static PyObject * |
|
1251 bytes_rindex(PyByteArrayObject *self, PyObject *args) |
|
1252 { |
|
1253 Py_ssize_t result = bytes_find_internal(self, args, -1); |
|
1254 if (result == -2) |
|
1255 return NULL; |
|
1256 if (result == -1) { |
|
1257 PyErr_SetString(PyExc_ValueError, |
|
1258 "subsection not found"); |
|
1259 return NULL; |
|
1260 } |
|
1261 return PyInt_FromSsize_t(result); |
|
1262 } |
|
1263 |
|
1264 |
|
1265 static int |
|
1266 bytes_contains(PyObject *self, PyObject *arg) |
|
1267 { |
|
1268 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); |
|
1269 if (ival == -1 && PyErr_Occurred()) { |
|
1270 Py_buffer varg; |
|
1271 int pos; |
|
1272 PyErr_Clear(); |
|
1273 if (_getbuffer(arg, &varg) < 0) |
|
1274 return -1; |
|
1275 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self), |
|
1276 varg.buf, varg.len, 0); |
|
1277 PyBuffer_Release(&varg); |
|
1278 return pos >= 0; |
|
1279 } |
|
1280 if (ival < 0 || ival >= 256) { |
|
1281 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); |
|
1282 return -1; |
|
1283 } |
|
1284 |
|
1285 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL; |
|
1286 } |
|
1287 |
|
1288 |
|
1289 /* Matches the end (direction >= 0) or start (direction < 0) of self |
|
1290 * against substr, using the start and end arguments. Returns |
|
1291 * -1 on error, 0 if not found and 1 if found. |
|
1292 */ |
|
1293 Py_LOCAL(int) |
|
1294 _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start, |
|
1295 Py_ssize_t end, int direction) |
|
1296 { |
|
1297 Py_ssize_t len = PyByteArray_GET_SIZE(self); |
|
1298 const char* str; |
|
1299 Py_buffer vsubstr; |
|
1300 int rv = 0; |
|
1301 |
|
1302 str = PyByteArray_AS_STRING(self); |
|
1303 |
|
1304 if (_getbuffer(substr, &vsubstr) < 0) |
|
1305 return -1; |
|
1306 |
|
1307 _adjust_indices(&start, &end, len); |
|
1308 |
|
1309 if (direction < 0) { |
|
1310 /* startswith */ |
|
1311 if (start+vsubstr.len > len) { |
|
1312 goto done; |
|
1313 } |
|
1314 } else { |
|
1315 /* endswith */ |
|
1316 if (end-start < vsubstr.len || start > len) { |
|
1317 goto done; |
|
1318 } |
|
1319 |
|
1320 if (end-vsubstr.len > start) |
|
1321 start = end - vsubstr.len; |
|
1322 } |
|
1323 if (end-start >= vsubstr.len) |
|
1324 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len); |
|
1325 |
|
1326 done: |
|
1327 PyBuffer_Release(&vsubstr); |
|
1328 return rv; |
|
1329 } |
|
1330 |
|
1331 |
|
1332 PyDoc_STRVAR(startswith__doc__, |
|
1333 "B.startswith(prefix [,start [,end]]) -> bool\n\ |
|
1334 \n\ |
|
1335 Return True if B starts with the specified prefix, False otherwise.\n\ |
|
1336 With optional start, test B beginning at that position.\n\ |
|
1337 With optional end, stop comparing B at that position.\n\ |
|
1338 prefix can also be a tuple of strings to try."); |
|
1339 |
|
1340 static PyObject * |
|
1341 bytes_startswith(PyByteArrayObject *self, PyObject *args) |
|
1342 { |
|
1343 Py_ssize_t start = 0; |
|
1344 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
1345 PyObject *subobj; |
|
1346 int result; |
|
1347 |
|
1348 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, |
|
1349 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
1350 return NULL; |
|
1351 if (PyTuple_Check(subobj)) { |
|
1352 Py_ssize_t i; |
|
1353 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
|
1354 result = _bytes_tailmatch(self, |
|
1355 PyTuple_GET_ITEM(subobj, i), |
|
1356 start, end, -1); |
|
1357 if (result == -1) |
|
1358 return NULL; |
|
1359 else if (result) { |
|
1360 Py_RETURN_TRUE; |
|
1361 } |
|
1362 } |
|
1363 Py_RETURN_FALSE; |
|
1364 } |
|
1365 result = _bytes_tailmatch(self, subobj, start, end, -1); |
|
1366 if (result == -1) |
|
1367 return NULL; |
|
1368 else |
|
1369 return PyBool_FromLong(result); |
|
1370 } |
|
1371 |
|
1372 PyDoc_STRVAR(endswith__doc__, |
|
1373 "B.endswith(suffix [,start [,end]]) -> bool\n\ |
|
1374 \n\ |
|
1375 Return True if B ends with the specified suffix, False otherwise.\n\ |
|
1376 With optional start, test B beginning at that position.\n\ |
|
1377 With optional end, stop comparing B at that position.\n\ |
|
1378 suffix can also be a tuple of strings to try."); |
|
1379 |
|
1380 static PyObject * |
|
1381 bytes_endswith(PyByteArrayObject *self, PyObject *args) |
|
1382 { |
|
1383 Py_ssize_t start = 0; |
|
1384 Py_ssize_t end = PY_SSIZE_T_MAX; |
|
1385 PyObject *subobj; |
|
1386 int result; |
|
1387 |
|
1388 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, |
|
1389 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) |
|
1390 return NULL; |
|
1391 if (PyTuple_Check(subobj)) { |
|
1392 Py_ssize_t i; |
|
1393 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { |
|
1394 result = _bytes_tailmatch(self, |
|
1395 PyTuple_GET_ITEM(subobj, i), |
|
1396 start, end, +1); |
|
1397 if (result == -1) |
|
1398 return NULL; |
|
1399 else if (result) { |
|
1400 Py_RETURN_TRUE; |
|
1401 } |
|
1402 } |
|
1403 Py_RETURN_FALSE; |
|
1404 } |
|
1405 result = _bytes_tailmatch(self, subobj, start, end, +1); |
|
1406 if (result == -1) |
|
1407 return NULL; |
|
1408 else |
|
1409 return PyBool_FromLong(result); |
|
1410 } |
|
1411 |
|
1412 |
|
1413 PyDoc_STRVAR(translate__doc__, |
|
1414 "B.translate(table[, deletechars]) -> bytearray\n\ |
|
1415 \n\ |
|
1416 Return a copy of B, where all characters occurring in the\n\ |
|
1417 optional argument deletechars are removed, and the remaining\n\ |
|
1418 characters have been mapped through the given translation\n\ |
|
1419 table, which must be a bytes object of length 256."); |
|
1420 |
|
1421 static PyObject * |
|
1422 bytes_translate(PyByteArrayObject *self, PyObject *args) |
|
1423 { |
|
1424 register char *input, *output; |
|
1425 register const char *table; |
|
1426 register Py_ssize_t i, c; |
|
1427 PyObject *input_obj = (PyObject*)self; |
|
1428 const char *output_start; |
|
1429 Py_ssize_t inlen; |
|
1430 PyObject *result; |
|
1431 int trans_table[256]; |
|
1432 PyObject *tableobj, *delobj = NULL; |
|
1433 Py_buffer vtable, vdel; |
|
1434 |
|
1435 if (!PyArg_UnpackTuple(args, "translate", 1, 2, |
|
1436 &tableobj, &delobj)) |
|
1437 return NULL; |
|
1438 |
|
1439 if (_getbuffer(tableobj, &vtable) < 0) |
|
1440 return NULL; |
|
1441 |
|
1442 if (vtable.len != 256) { |
|
1443 PyErr_SetString(PyExc_ValueError, |
|
1444 "translation table must be 256 characters long"); |
|
1445 result = NULL; |
|
1446 goto done; |
|
1447 } |
|
1448 |
|
1449 if (delobj != NULL) { |
|
1450 if (_getbuffer(delobj, &vdel) < 0) { |
|
1451 result = NULL; |
|
1452 goto done; |
|
1453 } |
|
1454 } |
|
1455 else { |
|
1456 vdel.buf = NULL; |
|
1457 vdel.len = 0; |
|
1458 } |
|
1459 |
|
1460 table = (const char *)vtable.buf; |
|
1461 inlen = PyByteArray_GET_SIZE(input_obj); |
|
1462 result = PyByteArray_FromStringAndSize((char *)NULL, inlen); |
|
1463 if (result == NULL) |
|
1464 goto done; |
|
1465 output_start = output = PyByteArray_AsString(result); |
|
1466 input = PyByteArray_AS_STRING(input_obj); |
|
1467 |
|
1468 if (vdel.len == 0) { |
|
1469 /* If no deletions are required, use faster code */ |
|
1470 for (i = inlen; --i >= 0; ) { |
|
1471 c = Py_CHARMASK(*input++); |
|
1472 *output++ = table[c]; |
|
1473 } |
|
1474 goto done; |
|
1475 } |
|
1476 |
|
1477 for (i = 0; i < 256; i++) |
|
1478 trans_table[i] = Py_CHARMASK(table[i]); |
|
1479 |
|
1480 for (i = 0; i < vdel.len; i++) |
|
1481 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1; |
|
1482 |
|
1483 for (i = inlen; --i >= 0; ) { |
|
1484 c = Py_CHARMASK(*input++); |
|
1485 if (trans_table[c] != -1) |
|
1486 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) |
|
1487 continue; |
|
1488 } |
|
1489 /* Fix the size of the resulting string */ |
|
1490 if (inlen > 0) |
|
1491 PyByteArray_Resize(result, output - output_start); |
|
1492 |
|
1493 done: |
|
1494 PyBuffer_Release(&vtable); |
|
1495 if (delobj != NULL) |
|
1496 PyBuffer_Release(&vdel); |
|
1497 return result; |
|
1498 } |
|
1499 |
|
1500 |
|
1501 #define FORWARD 1 |
|
1502 #define REVERSE -1 |
|
1503 |
|
1504 /* find and count characters and substrings */ |
|
1505 |
|
1506 #define findchar(target, target_len, c) \ |
|
1507 ((char *)memchr((const void *)(target), c, target_len)) |
|
1508 |
|
1509 /* Don't call if length < 2 */ |
|
1510 #define Py_STRING_MATCH(target, offset, pattern, length) \ |
|
1511 (target[offset] == pattern[0] && \ |
|
1512 target[offset+length-1] == pattern[length-1] && \ |
|
1513 !memcmp(target+offset+1, pattern+1, length-2) ) |
|
1514 |
|
1515 |
|
1516 /* Bytes ops must return a string, create a copy */ |
|
1517 Py_LOCAL(PyByteArrayObject *) |
|
1518 return_self(PyByteArrayObject *self) |
|
1519 { |
|
1520 return (PyByteArrayObject *)PyByteArray_FromStringAndSize( |
|
1521 PyByteArray_AS_STRING(self), |
|
1522 PyByteArray_GET_SIZE(self)); |
|
1523 } |
|
1524 |
|
1525 Py_LOCAL_INLINE(Py_ssize_t) |
|
1526 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) |
|
1527 { |
|
1528 Py_ssize_t count=0; |
|
1529 const char *start=target; |
|
1530 const char *end=target+target_len; |
|
1531 |
|
1532 while ( (start=findchar(start, end-start, c)) != NULL ) { |
|
1533 count++; |
|
1534 if (count >= maxcount) |
|
1535 break; |
|
1536 start += 1; |
|
1537 } |
|
1538 return count; |
|
1539 } |
|
1540 |
|
1541 Py_LOCAL(Py_ssize_t) |
|
1542 findstring(const char *target, Py_ssize_t target_len, |
|
1543 const char *pattern, Py_ssize_t pattern_len, |
|
1544 Py_ssize_t start, |
|
1545 Py_ssize_t end, |
|
1546 int direction) |
|
1547 { |
|
1548 if (start < 0) { |
|
1549 start += target_len; |
|
1550 if (start < 0) |
|
1551 start = 0; |
|
1552 } |
|
1553 if (end > target_len) { |
|
1554 end = target_len; |
|
1555 } else if (end < 0) { |
|
1556 end += target_len; |
|
1557 if (end < 0) |
|
1558 end = 0; |
|
1559 } |
|
1560 |
|
1561 /* zero-length substrings always match at the first attempt */ |
|
1562 if (pattern_len == 0) |
|
1563 return (direction > 0) ? start : end; |
|
1564 |
|
1565 end -= pattern_len; |
|
1566 |
|
1567 if (direction < 0) { |
|
1568 for (; end >= start; end--) |
|
1569 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) |
|
1570 return end; |
|
1571 } else { |
|
1572 for (; start <= end; start++) |
|
1573 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) |
|
1574 return start; |
|
1575 } |
|
1576 return -1; |
|
1577 } |
|
1578 |
|
1579 Py_LOCAL_INLINE(Py_ssize_t) |
|
1580 countstring(const char *target, Py_ssize_t target_len, |
|
1581 const char *pattern, Py_ssize_t pattern_len, |
|
1582 Py_ssize_t start, |
|
1583 Py_ssize_t end, |
|
1584 int direction, Py_ssize_t maxcount) |
|
1585 { |
|
1586 Py_ssize_t count=0; |
|
1587 |
|
1588 if (start < 0) { |
|
1589 start += target_len; |
|
1590 if (start < 0) |
|
1591 start = 0; |
|
1592 } |
|
1593 if (end > target_len) { |
|
1594 end = target_len; |
|
1595 } else if (end < 0) { |
|
1596 end += target_len; |
|
1597 if (end < 0) |
|
1598 end = 0; |
|
1599 } |
|
1600 |
|
1601 /* zero-length substrings match everywhere */ |
|
1602 if (pattern_len == 0 || maxcount == 0) { |
|
1603 if (target_len+1 < maxcount) |
|
1604 return target_len+1; |
|
1605 return maxcount; |
|
1606 } |
|
1607 |
|
1608 end -= pattern_len; |
|
1609 if (direction < 0) { |
|
1610 for (; (end >= start); end--) |
|
1611 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { |
|
1612 count++; |
|
1613 if (--maxcount <= 0) break; |
|
1614 end -= pattern_len-1; |
|
1615 } |
|
1616 } else { |
|
1617 for (; (start <= end); start++) |
|
1618 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { |
|
1619 count++; |
|
1620 if (--maxcount <= 0) |
|
1621 break; |
|
1622 start += pattern_len-1; |
|
1623 } |
|
1624 } |
|
1625 return count; |
|
1626 } |
|
1627 |
|
1628 |
|
1629 /* Algorithms for different cases of string replacement */ |
|
1630 |
|
1631 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ |
|
1632 Py_LOCAL(PyByteArrayObject *) |
|
1633 replace_interleave(PyByteArrayObject *self, |
|
1634 const char *to_s, Py_ssize_t to_len, |
|
1635 Py_ssize_t maxcount) |
|
1636 { |
|
1637 char *self_s, *result_s; |
|
1638 Py_ssize_t self_len, result_len; |
|
1639 Py_ssize_t count, i, product; |
|
1640 PyByteArrayObject *result; |
|
1641 |
|
1642 self_len = PyByteArray_GET_SIZE(self); |
|
1643 |
|
1644 /* 1 at the end plus 1 after every character */ |
|
1645 count = self_len+1; |
|
1646 if (maxcount < count) |
|
1647 count = maxcount; |
|
1648 |
|
1649 /* Check for overflow */ |
|
1650 /* result_len = count * to_len + self_len; */ |
|
1651 product = count * to_len; |
|
1652 if (product / to_len != count) { |
|
1653 PyErr_SetString(PyExc_OverflowError, |
|
1654 "replace string is too long"); |
|
1655 return NULL; |
|
1656 } |
|
1657 result_len = product + self_len; |
|
1658 if (result_len < 0) { |
|
1659 PyErr_SetString(PyExc_OverflowError, |
|
1660 "replace string is too long"); |
|
1661 return NULL; |
|
1662 } |
|
1663 |
|
1664 if (! (result = (PyByteArrayObject *) |
|
1665 PyByteArray_FromStringAndSize(NULL, result_len)) ) |
|
1666 return NULL; |
|
1667 |
|
1668 self_s = PyByteArray_AS_STRING(self); |
|
1669 result_s = PyByteArray_AS_STRING(result); |
|
1670 |
|
1671 /* TODO: special case single character, which doesn't need memcpy */ |
|
1672 |
|
1673 /* Lay the first one down (guaranteed this will occur) */ |
|
1674 Py_MEMCPY(result_s, to_s, to_len); |
|
1675 result_s += to_len; |
|
1676 count -= 1; |
|
1677 |
|
1678 for (i=0; i<count; i++) { |
|
1679 *result_s++ = *self_s++; |
|
1680 Py_MEMCPY(result_s, to_s, to_len); |
|
1681 result_s += to_len; |
|
1682 } |
|
1683 |
|
1684 /* Copy the rest of the original string */ |
|
1685 Py_MEMCPY(result_s, self_s, self_len-i); |
|
1686 |
|
1687 return result; |
|
1688 } |
|
1689 |
|
1690 /* Special case for deleting a single character */ |
|
1691 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ |
|
1692 Py_LOCAL(PyByteArrayObject *) |
|
1693 replace_delete_single_character(PyByteArrayObject *self, |
|
1694 char from_c, Py_ssize_t maxcount) |
|
1695 { |
|
1696 char *self_s, *result_s; |
|
1697 char *start, *next, *end; |
|
1698 Py_ssize_t self_len, result_len; |
|
1699 Py_ssize_t count; |
|
1700 PyByteArrayObject *result; |
|
1701 |
|
1702 self_len = PyByteArray_GET_SIZE(self); |
|
1703 self_s = PyByteArray_AS_STRING(self); |
|
1704 |
|
1705 count = countchar(self_s, self_len, from_c, maxcount); |
|
1706 if (count == 0) { |
|
1707 return return_self(self); |
|
1708 } |
|
1709 |
|
1710 result_len = self_len - count; /* from_len == 1 */ |
|
1711 assert(result_len>=0); |
|
1712 |
|
1713 if ( (result = (PyByteArrayObject *) |
|
1714 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) |
|
1715 return NULL; |
|
1716 result_s = PyByteArray_AS_STRING(result); |
|
1717 |
|
1718 start = self_s; |
|
1719 end = self_s + self_len; |
|
1720 while (count-- > 0) { |
|
1721 next = findchar(start, end-start, from_c); |
|
1722 if (next == NULL) |
|
1723 break; |
|
1724 Py_MEMCPY(result_s, start, next-start); |
|
1725 result_s += (next-start); |
|
1726 start = next+1; |
|
1727 } |
|
1728 Py_MEMCPY(result_s, start, end-start); |
|
1729 |
|
1730 return result; |
|
1731 } |
|
1732 |
|
1733 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ |
|
1734 |
|
1735 Py_LOCAL(PyByteArrayObject *) |
|
1736 replace_delete_substring(PyByteArrayObject *self, |
|
1737 const char *from_s, Py_ssize_t from_len, |
|
1738 Py_ssize_t maxcount) |
|
1739 { |
|
1740 char *self_s, *result_s; |
|
1741 char *start, *next, *end; |
|
1742 Py_ssize_t self_len, result_len; |
|
1743 Py_ssize_t count, offset; |
|
1744 PyByteArrayObject *result; |
|
1745 |
|
1746 self_len = PyByteArray_GET_SIZE(self); |
|
1747 self_s = PyByteArray_AS_STRING(self); |
|
1748 |
|
1749 count = countstring(self_s, self_len, |
|
1750 from_s, from_len, |
|
1751 0, self_len, 1, |
|
1752 maxcount); |
|
1753 |
|
1754 if (count == 0) { |
|
1755 /* no matches */ |
|
1756 return return_self(self); |
|
1757 } |
|
1758 |
|
1759 result_len = self_len - (count * from_len); |
|
1760 assert (result_len>=0); |
|
1761 |
|
1762 if ( (result = (PyByteArrayObject *) |
|
1763 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL ) |
|
1764 return NULL; |
|
1765 |
|
1766 result_s = PyByteArray_AS_STRING(result); |
|
1767 |
|
1768 start = self_s; |
|
1769 end = self_s + self_len; |
|
1770 while (count-- > 0) { |
|
1771 offset = findstring(start, end-start, |
|
1772 from_s, from_len, |
|
1773 0, end-start, FORWARD); |
|
1774 if (offset == -1) |
|
1775 break; |
|
1776 next = start + offset; |
|
1777 |
|
1778 Py_MEMCPY(result_s, start, next-start); |
|
1779 |
|
1780 result_s += (next-start); |
|
1781 start = next+from_len; |
|
1782 } |
|
1783 Py_MEMCPY(result_s, start, end-start); |
|
1784 return result; |
|
1785 } |
|
1786 |
|
1787 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ |
|
1788 Py_LOCAL(PyByteArrayObject *) |
|
1789 replace_single_character_in_place(PyByteArrayObject *self, |
|
1790 char from_c, char to_c, |
|
1791 Py_ssize_t maxcount) |
|
1792 { |
|
1793 char *self_s, *result_s, *start, *end, *next; |
|
1794 Py_ssize_t self_len; |
|
1795 PyByteArrayObject *result; |
|
1796 |
|
1797 /* The result string will be the same size */ |
|
1798 self_s = PyByteArray_AS_STRING(self); |
|
1799 self_len = PyByteArray_GET_SIZE(self); |
|
1800 |
|
1801 next = findchar(self_s, self_len, from_c); |
|
1802 |
|
1803 if (next == NULL) { |
|
1804 /* No matches; return the original bytes */ |
|
1805 return return_self(self); |
|
1806 } |
|
1807 |
|
1808 /* Need to make a new bytes */ |
|
1809 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); |
|
1810 if (result == NULL) |
|
1811 return NULL; |
|
1812 result_s = PyByteArray_AS_STRING(result); |
|
1813 Py_MEMCPY(result_s, self_s, self_len); |
|
1814 |
|
1815 /* change everything in-place, starting with this one */ |
|
1816 start = result_s + (next-self_s); |
|
1817 *start = to_c; |
|
1818 start++; |
|
1819 end = result_s + self_len; |
|
1820 |
|
1821 while (--maxcount > 0) { |
|
1822 next = findchar(start, end-start, from_c); |
|
1823 if (next == NULL) |
|
1824 break; |
|
1825 *next = to_c; |
|
1826 start = next+1; |
|
1827 } |
|
1828 |
|
1829 return result; |
|
1830 } |
|
1831 |
|
1832 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ |
|
1833 Py_LOCAL(PyByteArrayObject *) |
|
1834 replace_substring_in_place(PyByteArrayObject *self, |
|
1835 const char *from_s, Py_ssize_t from_len, |
|
1836 const char *to_s, Py_ssize_t to_len, |
|
1837 Py_ssize_t maxcount) |
|
1838 { |
|
1839 char *result_s, *start, *end; |
|
1840 char *self_s; |
|
1841 Py_ssize_t self_len, offset; |
|
1842 PyByteArrayObject *result; |
|
1843 |
|
1844 /* The result bytes will be the same size */ |
|
1845 |
|
1846 self_s = PyByteArray_AS_STRING(self); |
|
1847 self_len = PyByteArray_GET_SIZE(self); |
|
1848 |
|
1849 offset = findstring(self_s, self_len, |
|
1850 from_s, from_len, |
|
1851 0, self_len, FORWARD); |
|
1852 if (offset == -1) { |
|
1853 /* No matches; return the original bytes */ |
|
1854 return return_self(self); |
|
1855 } |
|
1856 |
|
1857 /* Need to make a new bytes */ |
|
1858 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); |
|
1859 if (result == NULL) |
|
1860 return NULL; |
|
1861 result_s = PyByteArray_AS_STRING(result); |
|
1862 Py_MEMCPY(result_s, self_s, self_len); |
|
1863 |
|
1864 /* change everything in-place, starting with this one */ |
|
1865 start = result_s + offset; |
|
1866 Py_MEMCPY(start, to_s, from_len); |
|
1867 start += from_len; |
|
1868 end = result_s + self_len; |
|
1869 |
|
1870 while ( --maxcount > 0) { |
|
1871 offset = findstring(start, end-start, |
|
1872 from_s, from_len, |
|
1873 0, end-start, FORWARD); |
|
1874 if (offset==-1) |
|
1875 break; |
|
1876 Py_MEMCPY(start+offset, to_s, from_len); |
|
1877 start += offset+from_len; |
|
1878 } |
|
1879 |
|
1880 return result; |
|
1881 } |
|
1882 |
|
1883 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ |
|
1884 Py_LOCAL(PyByteArrayObject *) |
|
1885 replace_single_character(PyByteArrayObject *self, |
|
1886 char from_c, |
|
1887 const char *to_s, Py_ssize_t to_len, |
|
1888 Py_ssize_t maxcount) |
|
1889 { |
|
1890 char *self_s, *result_s; |
|
1891 char *start, *next, *end; |
|
1892 Py_ssize_t self_len, result_len; |
|
1893 Py_ssize_t count, product; |
|
1894 PyByteArrayObject *result; |
|
1895 |
|
1896 self_s = PyByteArray_AS_STRING(self); |
|
1897 self_len = PyByteArray_GET_SIZE(self); |
|
1898 |
|
1899 count = countchar(self_s, self_len, from_c, maxcount); |
|
1900 if (count == 0) { |
|
1901 /* no matches, return unchanged */ |
|
1902 return return_self(self); |
|
1903 } |
|
1904 |
|
1905 /* use the difference between current and new, hence the "-1" */ |
|
1906 /* result_len = self_len + count * (to_len-1) */ |
|
1907 product = count * (to_len-1); |
|
1908 if (product / (to_len-1) != count) { |
|
1909 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
|
1910 return NULL; |
|
1911 } |
|
1912 result_len = self_len + product; |
|
1913 if (result_len < 0) { |
|
1914 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
|
1915 return NULL; |
|
1916 } |
|
1917 |
|
1918 if ( (result = (PyByteArrayObject *) |
|
1919 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) |
|
1920 return NULL; |
|
1921 result_s = PyByteArray_AS_STRING(result); |
|
1922 |
|
1923 start = self_s; |
|
1924 end = self_s + self_len; |
|
1925 while (count-- > 0) { |
|
1926 next = findchar(start, end-start, from_c); |
|
1927 if (next == NULL) |
|
1928 break; |
|
1929 |
|
1930 if (next == start) { |
|
1931 /* replace with the 'to' */ |
|
1932 Py_MEMCPY(result_s, to_s, to_len); |
|
1933 result_s += to_len; |
|
1934 start += 1; |
|
1935 } else { |
|
1936 /* copy the unchanged old then the 'to' */ |
|
1937 Py_MEMCPY(result_s, start, next-start); |
|
1938 result_s += (next-start); |
|
1939 Py_MEMCPY(result_s, to_s, to_len); |
|
1940 result_s += to_len; |
|
1941 start = next+1; |
|
1942 } |
|
1943 } |
|
1944 /* Copy the remainder of the remaining bytes */ |
|
1945 Py_MEMCPY(result_s, start, end-start); |
|
1946 |
|
1947 return result; |
|
1948 } |
|
1949 |
|
1950 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ |
|
1951 Py_LOCAL(PyByteArrayObject *) |
|
1952 replace_substring(PyByteArrayObject *self, |
|
1953 const char *from_s, Py_ssize_t from_len, |
|
1954 const char *to_s, Py_ssize_t to_len, |
|
1955 Py_ssize_t maxcount) |
|
1956 { |
|
1957 char *self_s, *result_s; |
|
1958 char *start, *next, *end; |
|
1959 Py_ssize_t self_len, result_len; |
|
1960 Py_ssize_t count, offset, product; |
|
1961 PyByteArrayObject *result; |
|
1962 |
|
1963 self_s = PyByteArray_AS_STRING(self); |
|
1964 self_len = PyByteArray_GET_SIZE(self); |
|
1965 |
|
1966 count = countstring(self_s, self_len, |
|
1967 from_s, from_len, |
|
1968 0, self_len, FORWARD, maxcount); |
|
1969 if (count == 0) { |
|
1970 /* no matches, return unchanged */ |
|
1971 return return_self(self); |
|
1972 } |
|
1973 |
|
1974 /* Check for overflow */ |
|
1975 /* result_len = self_len + count * (to_len-from_len) */ |
|
1976 product = count * (to_len-from_len); |
|
1977 if (product / (to_len-from_len) != count) { |
|
1978 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
|
1979 return NULL; |
|
1980 } |
|
1981 result_len = self_len + product; |
|
1982 if (result_len < 0) { |
|
1983 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); |
|
1984 return NULL; |
|
1985 } |
|
1986 |
|
1987 if ( (result = (PyByteArrayObject *) |
|
1988 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) |
|
1989 return NULL; |
|
1990 result_s = PyByteArray_AS_STRING(result); |
|
1991 |
|
1992 start = self_s; |
|
1993 end = self_s + self_len; |
|
1994 while (count-- > 0) { |
|
1995 offset = findstring(start, end-start, |
|
1996 from_s, from_len, |
|
1997 0, end-start, FORWARD); |
|
1998 if (offset == -1) |
|
1999 break; |
|
2000 next = start+offset; |
|
2001 if (next == start) { |
|
2002 /* replace with the 'to' */ |
|
2003 Py_MEMCPY(result_s, to_s, to_len); |
|
2004 result_s += to_len; |
|
2005 start += from_len; |
|
2006 } else { |
|
2007 /* copy the unchanged old then the 'to' */ |
|
2008 Py_MEMCPY(result_s, start, next-start); |
|
2009 result_s += (next-start); |
|
2010 Py_MEMCPY(result_s, to_s, to_len); |
|
2011 result_s += to_len; |
|
2012 start = next+from_len; |
|
2013 } |
|
2014 } |
|
2015 /* Copy the remainder of the remaining bytes */ |
|
2016 Py_MEMCPY(result_s, start, end-start); |
|
2017 |
|
2018 return result; |
|
2019 } |
|
2020 |
|
2021 |
|
2022 Py_LOCAL(PyByteArrayObject *) |
|
2023 replace(PyByteArrayObject *self, |
|
2024 const char *from_s, Py_ssize_t from_len, |
|
2025 const char *to_s, Py_ssize_t to_len, |
|
2026 Py_ssize_t maxcount) |
|
2027 { |
|
2028 if (maxcount < 0) { |
|
2029 maxcount = PY_SSIZE_T_MAX; |
|
2030 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) { |
|
2031 /* nothing to do; return the original bytes */ |
|
2032 return return_self(self); |
|
2033 } |
|
2034 |
|
2035 if (maxcount == 0 || |
|
2036 (from_len == 0 && to_len == 0)) { |
|
2037 /* nothing to do; return the original bytes */ |
|
2038 return return_self(self); |
|
2039 } |
|
2040 |
|
2041 /* Handle zero-length special cases */ |
|
2042 |
|
2043 if (from_len == 0) { |
|
2044 /* insert the 'to' bytes everywhere. */ |
|
2045 /* >>> "Python".replace("", ".") */ |
|
2046 /* '.P.y.t.h.o.n.' */ |
|
2047 return replace_interleave(self, to_s, to_len, maxcount); |
|
2048 } |
|
2049 |
|
2050 /* Except for "".replace("", "A") == "A" there is no way beyond this */ |
|
2051 /* point for an empty self bytes to generate a non-empty bytes */ |
|
2052 /* Special case so the remaining code always gets a non-empty bytes */ |
|
2053 if (PyByteArray_GET_SIZE(self) == 0) { |
|
2054 return return_self(self); |
|
2055 } |
|
2056 |
|
2057 if (to_len == 0) { |
|
2058 /* delete all occurances of 'from' bytes */ |
|
2059 if (from_len == 1) { |
|
2060 return replace_delete_single_character( |
|
2061 self, from_s[0], maxcount); |
|
2062 } else { |
|
2063 return replace_delete_substring(self, from_s, from_len, maxcount); |
|
2064 } |
|
2065 } |
|
2066 |
|
2067 /* Handle special case where both bytes have the same length */ |
|
2068 |
|
2069 if (from_len == to_len) { |
|
2070 if (from_len == 1) { |
|
2071 return replace_single_character_in_place( |
|
2072 self, |
|
2073 from_s[0], |
|
2074 to_s[0], |
|
2075 maxcount); |
|
2076 } else { |
|
2077 return replace_substring_in_place( |
|
2078 self, from_s, from_len, to_s, to_len, maxcount); |
|
2079 } |
|
2080 } |
|
2081 |
|
2082 /* Otherwise use the more generic algorithms */ |
|
2083 if (from_len == 1) { |
|
2084 return replace_single_character(self, from_s[0], |
|
2085 to_s, to_len, maxcount); |
|
2086 } else { |
|
2087 /* len('from')>=2, len('to')>=1 */ |
|
2088 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); |
|
2089 } |
|
2090 } |
|
2091 |
|
2092 |
|
2093 PyDoc_STRVAR(replace__doc__, |
|
2094 "B.replace(old, new[, count]) -> bytes\n\ |
|
2095 \n\ |
|
2096 Return a copy of B with all occurrences of subsection\n\ |
|
2097 old replaced by new. If the optional argument count is\n\ |
|
2098 given, only the first count occurrences are replaced."); |
|
2099 |
|
2100 static PyObject * |
|
2101 bytes_replace(PyByteArrayObject *self, PyObject *args) |
|
2102 { |
|
2103 Py_ssize_t count = -1; |
|
2104 PyObject *from, *to, *res; |
|
2105 Py_buffer vfrom, vto; |
|
2106 |
|
2107 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) |
|
2108 return NULL; |
|
2109 |
|
2110 if (_getbuffer(from, &vfrom) < 0) |
|
2111 return NULL; |
|
2112 if (_getbuffer(to, &vto) < 0) { |
|
2113 PyBuffer_Release(&vfrom); |
|
2114 return NULL; |
|
2115 } |
|
2116 |
|
2117 res = (PyObject *)replace((PyByteArrayObject *) self, |
|
2118 vfrom.buf, vfrom.len, |
|
2119 vto.buf, vto.len, count); |
|
2120 |
|
2121 PyBuffer_Release(&vfrom); |
|
2122 PyBuffer_Release(&vto); |
|
2123 return res; |
|
2124 } |
|
2125 |
|
2126 |
|
2127 /* Overallocate the initial list to reduce the number of reallocs for small |
|
2128 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three |
|
2129 resizes, to sizes 4, 8, then 16. Most observed string splits are for human |
|
2130 text (roughly 11 words per line) and field delimited data (usually 1-10 |
|
2131 fields). For large strings the split algorithms are bandwidth limited |
|
2132 so increasing the preallocation likely will not improve things.*/ |
|
2133 |
|
2134 #define MAX_PREALLOC 12 |
|
2135 |
|
2136 /* 5 splits gives 6 elements */ |
|
2137 #define PREALLOC_SIZE(maxsplit) \ |
|
2138 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) |
|
2139 |
|
2140 #define SPLIT_APPEND(data, left, right) \ |
|
2141 str = PyByteArray_FromStringAndSize((data) + (left), \ |
|
2142 (right) - (left)); \ |
|
2143 if (str == NULL) \ |
|
2144 goto onError; \ |
|
2145 if (PyList_Append(list, str)) { \ |
|
2146 Py_DECREF(str); \ |
|
2147 goto onError; \ |
|
2148 } \ |
|
2149 else \ |
|
2150 Py_DECREF(str); |
|
2151 |
|
2152 #define SPLIT_ADD(data, left, right) { \ |
|
2153 str = PyByteArray_FromStringAndSize((data) + (left), \ |
|
2154 (right) - (left)); \ |
|
2155 if (str == NULL) \ |
|
2156 goto onError; \ |
|
2157 if (count < MAX_PREALLOC) { \ |
|
2158 PyList_SET_ITEM(list, count, str); \ |
|
2159 } else { \ |
|
2160 if (PyList_Append(list, str)) { \ |
|
2161 Py_DECREF(str); \ |
|
2162 goto onError; \ |
|
2163 } \ |
|
2164 else \ |
|
2165 Py_DECREF(str); \ |
|
2166 } \ |
|
2167 count++; } |
|
2168 |
|
2169 /* Always force the list to the expected size. */ |
|
2170 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count |
|
2171 |
|
2172 |
|
2173 Py_LOCAL_INLINE(PyObject *) |
|
2174 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) |
|
2175 { |
|
2176 register Py_ssize_t i, j, count = 0; |
|
2177 PyObject *str; |
|
2178 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
2179 |
|
2180 if (list == NULL) |
|
2181 return NULL; |
|
2182 |
|
2183 i = j = 0; |
|
2184 while ((j < len) && (maxcount-- > 0)) { |
|
2185 for(; j < len; j++) { |
|
2186 /* I found that using memchr makes no difference */ |
|
2187 if (s[j] == ch) { |
|
2188 SPLIT_ADD(s, i, j); |
|
2189 i = j = j + 1; |
|
2190 break; |
|
2191 } |
|
2192 } |
|
2193 } |
|
2194 if (i <= len) { |
|
2195 SPLIT_ADD(s, i, len); |
|
2196 } |
|
2197 FIX_PREALLOC_SIZE(list); |
|
2198 return list; |
|
2199 |
|
2200 onError: |
|
2201 Py_DECREF(list); |
|
2202 return NULL; |
|
2203 } |
|
2204 |
|
2205 |
|
2206 Py_LOCAL_INLINE(PyObject *) |
|
2207 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) |
|
2208 { |
|
2209 register Py_ssize_t i, j, count = 0; |
|
2210 PyObject *str; |
|
2211 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
2212 |
|
2213 if (list == NULL) |
|
2214 return NULL; |
|
2215 |
|
2216 for (i = j = 0; i < len; ) { |
|
2217 /* find a token */ |
|
2218 while (i < len && ISSPACE(s[i])) |
|
2219 i++; |
|
2220 j = i; |
|
2221 while (i < len && !ISSPACE(s[i])) |
|
2222 i++; |
|
2223 if (j < i) { |
|
2224 if (maxcount-- <= 0) |
|
2225 break; |
|
2226 SPLIT_ADD(s, j, i); |
|
2227 while (i < len && ISSPACE(s[i])) |
|
2228 i++; |
|
2229 j = i; |
|
2230 } |
|
2231 } |
|
2232 if (j < len) { |
|
2233 SPLIT_ADD(s, j, len); |
|
2234 } |
|
2235 FIX_PREALLOC_SIZE(list); |
|
2236 return list; |
|
2237 |
|
2238 onError: |
|
2239 Py_DECREF(list); |
|
2240 return NULL; |
|
2241 } |
|
2242 |
|
2243 PyDoc_STRVAR(split__doc__, |
|
2244 "B.split([sep[, maxsplit]]) -> list of bytearray\n\ |
|
2245 \n\ |
|
2246 Return a list of the sections in B, using sep as the delimiter.\n\ |
|
2247 If sep is not given, B is split on ASCII whitespace characters\n\ |
|
2248 (space, tab, return, newline, formfeed, vertical tab).\n\ |
|
2249 If maxsplit is given, at most maxsplit splits are done."); |
|
2250 |
|
2251 static PyObject * |
|
2252 bytes_split(PyByteArrayObject *self, PyObject *args) |
|
2253 { |
|
2254 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j; |
|
2255 Py_ssize_t maxsplit = -1, count = 0; |
|
2256 const char *s = PyByteArray_AS_STRING(self), *sub; |
|
2257 PyObject *list, *str, *subobj = Py_None; |
|
2258 Py_buffer vsub; |
|
2259 #ifdef USE_FAST |
|
2260 Py_ssize_t pos; |
|
2261 #endif |
|
2262 |
|
2263 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) |
|
2264 return NULL; |
|
2265 if (maxsplit < 0) |
|
2266 maxsplit = PY_SSIZE_T_MAX; |
|
2267 |
|
2268 if (subobj == Py_None) |
|
2269 return split_whitespace(s, len, maxsplit); |
|
2270 |
|
2271 if (_getbuffer(subobj, &vsub) < 0) |
|
2272 return NULL; |
|
2273 sub = vsub.buf; |
|
2274 n = vsub.len; |
|
2275 |
|
2276 if (n == 0) { |
|
2277 PyErr_SetString(PyExc_ValueError, "empty separator"); |
|
2278 PyBuffer_Release(&vsub); |
|
2279 return NULL; |
|
2280 } |
|
2281 if (n == 1) { |
|
2282 list = split_char(s, len, sub[0], maxsplit); |
|
2283 PyBuffer_Release(&vsub); |
|
2284 return list; |
|
2285 } |
|
2286 |
|
2287 list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
2288 if (list == NULL) { |
|
2289 PyBuffer_Release(&vsub); |
|
2290 return NULL; |
|
2291 } |
|
2292 |
|
2293 #ifdef USE_FAST |
|
2294 i = j = 0; |
|
2295 while (maxsplit-- > 0) { |
|
2296 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); |
|
2297 if (pos < 0) |
|
2298 break; |
|
2299 j = i+pos; |
|
2300 SPLIT_ADD(s, i, j); |
|
2301 i = j + n; |
|
2302 } |
|
2303 #else |
|
2304 i = j = 0; |
|
2305 while ((j+n <= len) && (maxsplit-- > 0)) { |
|
2306 for (; j+n <= len; j++) { |
|
2307 if (Py_STRING_MATCH(s, j, sub, n)) { |
|
2308 SPLIT_ADD(s, i, j); |
|
2309 i = j = j + n; |
|
2310 break; |
|
2311 } |
|
2312 } |
|
2313 } |
|
2314 #endif |
|
2315 SPLIT_ADD(s, i, len); |
|
2316 FIX_PREALLOC_SIZE(list); |
|
2317 PyBuffer_Release(&vsub); |
|
2318 return list; |
|
2319 |
|
2320 onError: |
|
2321 Py_DECREF(list); |
|
2322 PyBuffer_Release(&vsub); |
|
2323 return NULL; |
|
2324 } |
|
2325 |
|
2326 /* stringlib's partition shares nullbytes in some cases. |
|
2327 undo this, we don't want the nullbytes to be shared. */ |
|
2328 static PyObject * |
|
2329 make_nullbytes_unique(PyObject *result) |
|
2330 { |
|
2331 if (result != NULL) { |
|
2332 int i; |
|
2333 assert(PyTuple_Check(result)); |
|
2334 assert(PyTuple_GET_SIZE(result) == 3); |
|
2335 for (i = 0; i < 3; i++) { |
|
2336 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) { |
|
2337 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0); |
|
2338 if (new == NULL) { |
|
2339 Py_DECREF(result); |
|
2340 result = NULL; |
|
2341 break; |
|
2342 } |
|
2343 Py_DECREF(nullbytes); |
|
2344 PyTuple_SET_ITEM(result, i, new); |
|
2345 } |
|
2346 } |
|
2347 } |
|
2348 return result; |
|
2349 } |
|
2350 |
|
2351 PyDoc_STRVAR(partition__doc__, |
|
2352 "B.partition(sep) -> (head, sep, tail)\n\ |
|
2353 \n\ |
|
2354 Searches for the separator sep in B, and returns the part before it,\n\ |
|
2355 the separator itself, and the part after it. If the separator is not\n\ |
|
2356 found, returns B and two empty bytearray objects."); |
|
2357 |
|
2358 static PyObject * |
|
2359 bytes_partition(PyByteArrayObject *self, PyObject *sep_obj) |
|
2360 { |
|
2361 PyObject *bytesep, *result; |
|
2362 |
|
2363 bytesep = PyByteArray_FromObject(sep_obj); |
|
2364 if (! bytesep) |
|
2365 return NULL; |
|
2366 |
|
2367 result = stringlib_partition( |
|
2368 (PyObject*) self, |
|
2369 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), |
|
2370 bytesep, |
|
2371 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep) |
|
2372 ); |
|
2373 |
|
2374 Py_DECREF(bytesep); |
|
2375 return make_nullbytes_unique(result); |
|
2376 } |
|
2377 |
|
2378 PyDoc_STRVAR(rpartition__doc__, |
|
2379 "B.rpartition(sep) -> (tail, sep, head)\n\ |
|
2380 \n\ |
|
2381 Searches for the separator sep in B, starting at the end of B,\n\ |
|
2382 and returns the part before it, the separator itself, and the\n\ |
|
2383 part after it. If the separator is not found, returns two empty\n\ |
|
2384 bytearray objects and B."); |
|
2385 |
|
2386 static PyObject * |
|
2387 bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj) |
|
2388 { |
|
2389 PyObject *bytesep, *result; |
|
2390 |
|
2391 bytesep = PyByteArray_FromObject(sep_obj); |
|
2392 if (! bytesep) |
|
2393 return NULL; |
|
2394 |
|
2395 result = stringlib_rpartition( |
|
2396 (PyObject*) self, |
|
2397 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), |
|
2398 bytesep, |
|
2399 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep) |
|
2400 ); |
|
2401 |
|
2402 Py_DECREF(bytesep); |
|
2403 return make_nullbytes_unique(result); |
|
2404 } |
|
2405 |
|
2406 Py_LOCAL_INLINE(PyObject *) |
|
2407 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) |
|
2408 { |
|
2409 register Py_ssize_t i, j, count=0; |
|
2410 PyObject *str; |
|
2411 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
2412 |
|
2413 if (list == NULL) |
|
2414 return NULL; |
|
2415 |
|
2416 i = j = len - 1; |
|
2417 while ((i >= 0) && (maxcount-- > 0)) { |
|
2418 for (; i >= 0; i--) { |
|
2419 if (s[i] == ch) { |
|
2420 SPLIT_ADD(s, i + 1, j + 1); |
|
2421 j = i = i - 1; |
|
2422 break; |
|
2423 } |
|
2424 } |
|
2425 } |
|
2426 if (j >= -1) { |
|
2427 SPLIT_ADD(s, 0, j + 1); |
|
2428 } |
|
2429 FIX_PREALLOC_SIZE(list); |
|
2430 if (PyList_Reverse(list) < 0) |
|
2431 goto onError; |
|
2432 |
|
2433 return list; |
|
2434 |
|
2435 onError: |
|
2436 Py_DECREF(list); |
|
2437 return NULL; |
|
2438 } |
|
2439 |
|
2440 Py_LOCAL_INLINE(PyObject *) |
|
2441 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) |
|
2442 { |
|
2443 register Py_ssize_t i, j, count = 0; |
|
2444 PyObject *str; |
|
2445 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); |
|
2446 |
|
2447 if (list == NULL) |
|
2448 return NULL; |
|
2449 |
|
2450 for (i = j = len - 1; i >= 0; ) { |
|
2451 /* find a token */ |
|
2452 while (i >= 0 && ISSPACE(s[i])) |
|
2453 i--; |
|
2454 j = i; |
|
2455 while (i >= 0 && !ISSPACE(s[i])) |
|
2456 i--; |
|
2457 if (j > i) { |
|
2458 if (maxcount-- <= 0) |
|
2459 break; |
|
2460 SPLIT_ADD(s, i + 1, j + 1); |
|
2461 while (i >= 0 && ISSPACE(s[i])) |
|
2462 i--; |
|
2463 j = i; |
|
2464 } |
|
2465 } |
|
2466 if (j >= 0) { |
|
2467 SPLIT_ADD(s, 0, j + 1); |
|
2468 } |
|
2469 FIX_PREALLOC_SIZE(list); |
|
2470 if (PyList_Reverse(list) < 0) |
|
2471 goto onError; |
|
2472 |
|
2473 return list; |
|
2474 |
|
2475 onError: |
|
2476 Py_DECREF(list); |
|
2477 return NULL; |
|
2478 } |
|
2479 |
|
2480 PyDoc_STRVAR(rsplit__doc__, |
|
2481 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\ |
|
2482 \n\ |
|
2483 Return a list of the sections in B, using sep as the delimiter,\n\ |
|
2484 starting at the end of B and working to the front.\n\ |
|
2485 If sep is not given, B is split on ASCII whitespace characters\n\ |
|
2486 (space, tab, return, newline, formfeed, vertical tab).\n\ |
|
2487 If maxsplit is given, at most maxsplit splits are done."); |
|
2488 |
|
2489 static PyObject * |
|
2490 bytes_rsplit(PyByteArrayObject *self, PyObject *args) |
|
2491 { |
|
2492 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j; |
|
2493 Py_ssize_t maxsplit = -1, count = 0; |
|
2494 const char *s = PyByteArray_AS_STRING(self), *sub; |
|
2495 PyObject *list, *str, *subobj = Py_None; |
|
2496 Py_buffer vsub; |
|
2497 |
|
2498 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) |
|
2499 return NULL; |
|
2500 if (maxsplit < 0) |
|
2501 maxsplit = PY_SSIZE_T_MAX; |
|
2502 |
|
2503 if (subobj == Py_None) |
|
2504 return rsplit_whitespace(s, len, maxsplit); |
|
2505 |
|
2506 if (_getbuffer(subobj, &vsub) < 0) |
|
2507 return NULL; |
|
2508 sub = vsub.buf; |
|
2509 n = vsub.len; |
|
2510 |
|
2511 if (n == 0) { |
|
2512 PyErr_SetString(PyExc_ValueError, "empty separator"); |
|
2513 PyBuffer_Release(&vsub); |
|
2514 return NULL; |
|
2515 } |
|
2516 else if (n == 1) { |
|
2517 list = rsplit_char(s, len, sub[0], maxsplit); |
|
2518 PyBuffer_Release(&vsub); |
|
2519 return list; |
|
2520 } |
|
2521 |
|
2522 list = PyList_New(PREALLOC_SIZE(maxsplit)); |
|
2523 if (list == NULL) { |
|
2524 PyBuffer_Release(&vsub); |
|
2525 return NULL; |
|
2526 } |
|
2527 |
|
2528 j = len; |
|
2529 i = j - n; |
|
2530 |
|
2531 while ( (i >= 0) && (maxsplit-- > 0) ) { |
|
2532 for (; i>=0; i--) { |
|
2533 if (Py_STRING_MATCH(s, i, sub, n)) { |
|
2534 SPLIT_ADD(s, i + n, j); |
|
2535 j = i; |
|
2536 i -= n; |
|
2537 break; |
|
2538 } |
|
2539 } |
|
2540 } |
|
2541 SPLIT_ADD(s, 0, j); |
|
2542 FIX_PREALLOC_SIZE(list); |
|
2543 if (PyList_Reverse(list) < 0) |
|
2544 goto onError; |
|
2545 PyBuffer_Release(&vsub); |
|
2546 return list; |
|
2547 |
|
2548 onError: |
|
2549 Py_DECREF(list); |
|
2550 PyBuffer_Release(&vsub); |
|
2551 return NULL; |
|
2552 } |
|
2553 |
|
2554 PyDoc_STRVAR(reverse__doc__, |
|
2555 "B.reverse() -> None\n\ |
|
2556 \n\ |
|
2557 Reverse the order of the values in B in place."); |
|
2558 static PyObject * |
|
2559 bytes_reverse(PyByteArrayObject *self, PyObject *unused) |
|
2560 { |
|
2561 char swap, *head, *tail; |
|
2562 Py_ssize_t i, j, n = Py_SIZE(self); |
|
2563 |
|
2564 j = n / 2; |
|
2565 head = self->ob_bytes; |
|
2566 tail = head + n - 1; |
|
2567 for (i = 0; i < j; i++) { |
|
2568 swap = *head; |
|
2569 *head++ = *tail; |
|
2570 *tail-- = swap; |
|
2571 } |
|
2572 |
|
2573 Py_RETURN_NONE; |
|
2574 } |
|
2575 |
|
2576 PyDoc_STRVAR(insert__doc__, |
|
2577 "B.insert(index, int) -> None\n\ |
|
2578 \n\ |
|
2579 Insert a single item into the bytearray before the given index."); |
|
2580 static PyObject * |
|
2581 bytes_insert(PyByteArrayObject *self, PyObject *args) |
|
2582 { |
|
2583 PyObject *value; |
|
2584 int ival; |
|
2585 Py_ssize_t where, n = Py_SIZE(self); |
|
2586 |
|
2587 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value)) |
|
2588 return NULL; |
|
2589 |
|
2590 if (n == PY_SSIZE_T_MAX) { |
|
2591 PyErr_SetString(PyExc_OverflowError, |
|
2592 "cannot add more objects to bytes"); |
|
2593 return NULL; |
|
2594 } |
|
2595 if (!_getbytevalue(value, &ival)) |
|
2596 return NULL; |
|
2597 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0) |
|
2598 return NULL; |
|
2599 |
|
2600 if (where < 0) { |
|
2601 where += n; |
|
2602 if (where < 0) |
|
2603 where = 0; |
|
2604 } |
|
2605 if (where > n) |
|
2606 where = n; |
|
2607 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where); |
|
2608 self->ob_bytes[where] = ival; |
|
2609 |
|
2610 Py_RETURN_NONE; |
|
2611 } |
|
2612 |
|
2613 PyDoc_STRVAR(append__doc__, |
|
2614 "B.append(int) -> None\n\ |
|
2615 \n\ |
|
2616 Append a single item to the end of B."); |
|
2617 static PyObject * |
|
2618 bytes_append(PyByteArrayObject *self, PyObject *arg) |
|
2619 { |
|
2620 int value; |
|
2621 Py_ssize_t n = Py_SIZE(self); |
|
2622 |
|
2623 if (! _getbytevalue(arg, &value)) |
|
2624 return NULL; |
|
2625 if (n == PY_SSIZE_T_MAX) { |
|
2626 PyErr_SetString(PyExc_OverflowError, |
|
2627 "cannot add more objects to bytes"); |
|
2628 return NULL; |
|
2629 } |
|
2630 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0) |
|
2631 return NULL; |
|
2632 |
|
2633 self->ob_bytes[n] = value; |
|
2634 |
|
2635 Py_RETURN_NONE; |
|
2636 } |
|
2637 |
|
2638 PyDoc_STRVAR(extend__doc__, |
|
2639 "B.extend(iterable int) -> None\n\ |
|
2640 \n\ |
|
2641 Append all the elements from the iterator or sequence to the\n\ |
|
2642 end of B."); |
|
2643 static PyObject * |
|
2644 bytes_extend(PyByteArrayObject *self, PyObject *arg) |
|
2645 { |
|
2646 PyObject *it, *item, *bytes_obj; |
|
2647 Py_ssize_t buf_size = 0, len = 0; |
|
2648 int value; |
|
2649 char *buf; |
|
2650 |
|
2651 /* bytes_setslice code only accepts something supporting PEP 3118. */ |
|
2652 if (PyObject_CheckBuffer(arg)) { |
|
2653 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1) |
|
2654 return NULL; |
|
2655 |
|
2656 Py_RETURN_NONE; |
|
2657 } |
|
2658 |
|
2659 it = PyObject_GetIter(arg); |
|
2660 if (it == NULL) |
|
2661 return NULL; |
|
2662 |
|
2663 /* Try to determine the length of the argument. 32 is abitrary. */ |
|
2664 buf_size = _PyObject_LengthHint(arg, 32); |
|
2665 |
|
2666 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size); |
|
2667 if (bytes_obj == NULL) |
|
2668 return NULL; |
|
2669 buf = PyByteArray_AS_STRING(bytes_obj); |
|
2670 |
|
2671 while ((item = PyIter_Next(it)) != NULL) { |
|
2672 if (! _getbytevalue(item, &value)) { |
|
2673 Py_DECREF(item); |
|
2674 Py_DECREF(it); |
|
2675 Py_DECREF(bytes_obj); |
|
2676 return NULL; |
|
2677 } |
|
2678 buf[len++] = value; |
|
2679 Py_DECREF(item); |
|
2680 |
|
2681 if (len >= buf_size) { |
|
2682 buf_size = len + (len >> 1) + 1; |
|
2683 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) { |
|
2684 Py_DECREF(it); |
|
2685 Py_DECREF(bytes_obj); |
|
2686 return NULL; |
|
2687 } |
|
2688 /* Recompute the `buf' pointer, since the resizing operation may |
|
2689 have invalidated it. */ |
|
2690 buf = PyByteArray_AS_STRING(bytes_obj); |
|
2691 } |
|
2692 } |
|
2693 Py_DECREF(it); |
|
2694 |
|
2695 /* Resize down to exact size. */ |
|
2696 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) { |
|
2697 Py_DECREF(bytes_obj); |
|
2698 return NULL; |
|
2699 } |
|
2700 |
|
2701 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1) |
|
2702 return NULL; |
|
2703 Py_DECREF(bytes_obj); |
|
2704 |
|
2705 Py_RETURN_NONE; |
|
2706 } |
|
2707 |
|
2708 PyDoc_STRVAR(pop__doc__, |
|
2709 "B.pop([index]) -> int\n\ |
|
2710 \n\ |
|
2711 Remove and return a single item from B. If no index\n\ |
|
2712 argument is given, will pop the last value."); |
|
2713 static PyObject * |
|
2714 bytes_pop(PyByteArrayObject *self, PyObject *args) |
|
2715 { |
|
2716 int value; |
|
2717 Py_ssize_t where = -1, n = Py_SIZE(self); |
|
2718 |
|
2719 if (!PyArg_ParseTuple(args, "|n:pop", &where)) |
|
2720 return NULL; |
|
2721 |
|
2722 if (n == 0) { |
|
2723 PyErr_SetString(PyExc_OverflowError, |
|
2724 "cannot pop an empty bytes"); |
|
2725 return NULL; |
|
2726 } |
|
2727 if (where < 0) |
|
2728 where += Py_SIZE(self); |
|
2729 if (where < 0 || where >= Py_SIZE(self)) { |
|
2730 PyErr_SetString(PyExc_IndexError, "pop index out of range"); |
|
2731 return NULL; |
|
2732 } |
|
2733 |
|
2734 value = self->ob_bytes[where]; |
|
2735 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where); |
|
2736 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0) |
|
2737 return NULL; |
|
2738 |
|
2739 return PyInt_FromLong(value); |
|
2740 } |
|
2741 |
|
2742 PyDoc_STRVAR(remove__doc__, |
|
2743 "B.remove(int) -> None\n\ |
|
2744 \n\ |
|
2745 Remove the first occurance of a value in B."); |
|
2746 static PyObject * |
|
2747 bytes_remove(PyByteArrayObject *self, PyObject *arg) |
|
2748 { |
|
2749 int value; |
|
2750 Py_ssize_t where, n = Py_SIZE(self); |
|
2751 |
|
2752 if (! _getbytevalue(arg, &value)) |
|
2753 return NULL; |
|
2754 |
|
2755 for (where = 0; where < n; where++) { |
|
2756 if (self->ob_bytes[where] == value) |
|
2757 break; |
|
2758 } |
|
2759 if (where == n) { |
|
2760 PyErr_SetString(PyExc_ValueError, "value not found in bytes"); |
|
2761 return NULL; |
|
2762 } |
|
2763 |
|
2764 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where); |
|
2765 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0) |
|
2766 return NULL; |
|
2767 |
|
2768 Py_RETURN_NONE; |
|
2769 } |
|
2770 |
|
2771 /* XXX These two helpers could be optimized if argsize == 1 */ |
|
2772 |
|
2773 static Py_ssize_t |
|
2774 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize, |
|
2775 void *argptr, Py_ssize_t argsize) |
|
2776 { |
|
2777 Py_ssize_t i = 0; |
|
2778 while (i < mysize && memchr(argptr, myptr[i], argsize)) |
|
2779 i++; |
|
2780 return i; |
|
2781 } |
|
2782 |
|
2783 static Py_ssize_t |
|
2784 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize, |
|
2785 void *argptr, Py_ssize_t argsize) |
|
2786 { |
|
2787 Py_ssize_t i = mysize - 1; |
|
2788 while (i >= 0 && memchr(argptr, myptr[i], argsize)) |
|
2789 i--; |
|
2790 return i + 1; |
|
2791 } |
|
2792 |
|
2793 PyDoc_STRVAR(strip__doc__, |
|
2794 "B.strip([bytes]) -> bytearray\n\ |
|
2795 \n\ |
|
2796 Strip leading and trailing bytes contained in the argument.\n\ |
|
2797 If the argument is omitted, strip ASCII whitespace."); |
|
2798 static PyObject * |
|
2799 bytes_strip(PyByteArrayObject *self, PyObject *args) |
|
2800 { |
|
2801 Py_ssize_t left, right, mysize, argsize; |
|
2802 void *myptr, *argptr; |
|
2803 PyObject *arg = Py_None; |
|
2804 Py_buffer varg; |
|
2805 if (!PyArg_ParseTuple(args, "|O:strip", &arg)) |
|
2806 return NULL; |
|
2807 if (arg == Py_None) { |
|
2808 argptr = "\t\n\r\f\v "; |
|
2809 argsize = 6; |
|
2810 } |
|
2811 else { |
|
2812 if (_getbuffer(arg, &varg) < 0) |
|
2813 return NULL; |
|
2814 argptr = varg.buf; |
|
2815 argsize = varg.len; |
|
2816 } |
|
2817 myptr = self->ob_bytes; |
|
2818 mysize = Py_SIZE(self); |
|
2819 left = lstrip_helper(myptr, mysize, argptr, argsize); |
|
2820 if (left == mysize) |
|
2821 right = left; |
|
2822 else |
|
2823 right = rstrip_helper(myptr, mysize, argptr, argsize); |
|
2824 if (arg != Py_None) |
|
2825 PyBuffer_Release(&varg); |
|
2826 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left); |
|
2827 } |
|
2828 |
|
2829 PyDoc_STRVAR(lstrip__doc__, |
|
2830 "B.lstrip([bytes]) -> bytearray\n\ |
|
2831 \n\ |
|
2832 Strip leading bytes contained in the argument.\n\ |
|
2833 If the argument is omitted, strip leading ASCII whitespace."); |
|
2834 static PyObject * |
|
2835 bytes_lstrip(PyByteArrayObject *self, PyObject *args) |
|
2836 { |
|
2837 Py_ssize_t left, right, mysize, argsize; |
|
2838 void *myptr, *argptr; |
|
2839 PyObject *arg = Py_None; |
|
2840 Py_buffer varg; |
|
2841 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg)) |
|
2842 return NULL; |
|
2843 if (arg == Py_None) { |
|
2844 argptr = "\t\n\r\f\v "; |
|
2845 argsize = 6; |
|
2846 } |
|
2847 else { |
|
2848 if (_getbuffer(arg, &varg) < 0) |
|
2849 return NULL; |
|
2850 argptr = varg.buf; |
|
2851 argsize = varg.len; |
|
2852 } |
|
2853 myptr = self->ob_bytes; |
|
2854 mysize = Py_SIZE(self); |
|
2855 left = lstrip_helper(myptr, mysize, argptr, argsize); |
|
2856 right = mysize; |
|
2857 if (arg != Py_None) |
|
2858 PyBuffer_Release(&varg); |
|
2859 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left); |
|
2860 } |
|
2861 |
|
2862 PyDoc_STRVAR(rstrip__doc__, |
|
2863 "B.rstrip([bytes]) -> bytearray\n\ |
|
2864 \n\ |
|
2865 Strip trailing bytes contained in the argument.\n\ |
|
2866 If the argument is omitted, strip trailing ASCII whitespace."); |
|
2867 static PyObject * |
|
2868 bytes_rstrip(PyByteArrayObject *self, PyObject *args) |
|
2869 { |
|
2870 Py_ssize_t left, right, mysize, argsize; |
|
2871 void *myptr, *argptr; |
|
2872 PyObject *arg = Py_None; |
|
2873 Py_buffer varg; |
|
2874 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg)) |
|
2875 return NULL; |
|
2876 if (arg == Py_None) { |
|
2877 argptr = "\t\n\r\f\v "; |
|
2878 argsize = 6; |
|
2879 } |
|
2880 else { |
|
2881 if (_getbuffer(arg, &varg) < 0) |
|
2882 return NULL; |
|
2883 argptr = varg.buf; |
|
2884 argsize = varg.len; |
|
2885 } |
|
2886 myptr = self->ob_bytes; |
|
2887 mysize = Py_SIZE(self); |
|
2888 left = 0; |
|
2889 right = rstrip_helper(myptr, mysize, argptr, argsize); |
|
2890 if (arg != Py_None) |
|
2891 PyBuffer_Release(&varg); |
|
2892 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left); |
|
2893 } |
|
2894 |
|
2895 PyDoc_STRVAR(decode_doc, |
|
2896 "B.decode([encoding[, errors]]) -> unicode object.\n\ |
|
2897 \n\ |
|
2898 Decodes B using the codec registered for encoding. encoding defaults\n\ |
|
2899 to the default encoding. errors may be given to set a different error\n\ |
|
2900 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ |
|
2901 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ |
|
2902 as well as any other name registered with codecs.register_error that is\n\ |
|
2903 able to handle UnicodeDecodeErrors."); |
|
2904 |
|
2905 static PyObject * |
|
2906 bytes_decode(PyObject *self, PyObject *args) |
|
2907 { |
|
2908 const char *encoding = NULL; |
|
2909 const char *errors = NULL; |
|
2910 |
|
2911 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) |
|
2912 return NULL; |
|
2913 if (encoding == NULL) |
|
2914 encoding = PyUnicode_GetDefaultEncoding(); |
|
2915 return PyCodec_Decode(self, encoding, errors); |
|
2916 } |
|
2917 |
|
2918 PyDoc_STRVAR(alloc_doc, |
|
2919 "B.__alloc__() -> int\n\ |
|
2920 \n\ |
|
2921 Returns the number of bytes actually allocated."); |
|
2922 |
|
2923 static PyObject * |
|
2924 bytes_alloc(PyByteArrayObject *self) |
|
2925 { |
|
2926 return PyInt_FromSsize_t(self->ob_alloc); |
|
2927 } |
|
2928 |
|
2929 PyDoc_STRVAR(join_doc, |
|
2930 "B.join(iterable_of_bytes) -> bytes\n\ |
|
2931 \n\ |
|
2932 Concatenates any number of bytearray objects, with B in between each pair."); |
|
2933 |
|
2934 static PyObject * |
|
2935 bytes_join(PyByteArrayObject *self, PyObject *it) |
|
2936 { |
|
2937 PyObject *seq; |
|
2938 Py_ssize_t mysize = Py_SIZE(self); |
|
2939 Py_ssize_t i; |
|
2940 Py_ssize_t n; |
|
2941 PyObject **items; |
|
2942 Py_ssize_t totalsize = 0; |
|
2943 PyObject *result; |
|
2944 char *dest; |
|
2945 |
|
2946 seq = PySequence_Fast(it, "can only join an iterable"); |
|
2947 if (seq == NULL) |
|
2948 return NULL; |
|
2949 n = PySequence_Fast_GET_SIZE(seq); |
|
2950 items = PySequence_Fast_ITEMS(seq); |
|
2951 |
|
2952 /* Compute the total size, and check that they are all bytes */ |
|
2953 /* XXX Shouldn't we use _getbuffer() on these items instead? */ |
|
2954 for (i = 0; i < n; i++) { |
|
2955 PyObject *obj = items[i]; |
|
2956 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) { |
|
2957 PyErr_Format(PyExc_TypeError, |
|
2958 "can only join an iterable of bytes " |
|
2959 "(item %ld has type '%.100s')", |
|
2960 /* XXX %ld isn't right on Win64 */ |
|
2961 (long)i, Py_TYPE(obj)->tp_name); |
|
2962 goto error; |
|
2963 } |
|
2964 if (i > 0) |
|
2965 totalsize += mysize; |
|
2966 totalsize += Py_SIZE(obj); |
|
2967 if (totalsize < 0) { |
|
2968 PyErr_NoMemory(); |
|
2969 goto error; |
|
2970 } |
|
2971 } |
|
2972 |
|
2973 /* Allocate the result, and copy the bytes */ |
|
2974 result = PyByteArray_FromStringAndSize(NULL, totalsize); |
|
2975 if (result == NULL) |
|
2976 goto error; |
|
2977 dest = PyByteArray_AS_STRING(result); |
|
2978 for (i = 0; i < n; i++) { |
|
2979 PyObject *obj = items[i]; |
|
2980 Py_ssize_t size = Py_SIZE(obj); |
|
2981 char *buf; |
|
2982 if (PyByteArray_Check(obj)) |
|
2983 buf = PyByteArray_AS_STRING(obj); |
|
2984 else |
|
2985 buf = PyBytes_AS_STRING(obj); |
|
2986 if (i) { |
|
2987 memcpy(dest, self->ob_bytes, mysize); |
|
2988 dest += mysize; |
|
2989 } |
|
2990 memcpy(dest, buf, size); |
|
2991 dest += size; |
|
2992 } |
|
2993 |
|
2994 /* Done */ |
|
2995 Py_DECREF(seq); |
|
2996 return result; |
|
2997 |
|
2998 /* Error handling */ |
|
2999 error: |
|
3000 Py_DECREF(seq); |
|
3001 return NULL; |
|
3002 } |
|
3003 |
|
3004 PyDoc_STRVAR(fromhex_doc, |
|
3005 "bytearray.fromhex(string) -> bytearray\n\ |
|
3006 \n\ |
|
3007 Create a bytearray object from a string of hexadecimal numbers.\n\ |
|
3008 Spaces between two numbers are accepted.\n\ |
|
3009 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef')."); |
|
3010 |
|
3011 static int |
|
3012 hex_digit_to_int(Py_UNICODE c) |
|
3013 { |
|
3014 if (c >= 128) |
|
3015 return -1; |
|
3016 if (ISDIGIT(c)) |
|
3017 return c - '0'; |
|
3018 else { |
|
3019 if (ISUPPER(c)) |
|
3020 c = TOLOWER(c); |
|
3021 if (c >= 'a' && c <= 'f') |
|
3022 return c - 'a' + 10; |
|
3023 } |
|
3024 return -1; |
|
3025 } |
|
3026 |
|
3027 static PyObject * |
|
3028 bytes_fromhex(PyObject *cls, PyObject *args) |
|
3029 { |
|
3030 PyObject *newbytes, *hexobj; |
|
3031 char *buf; |
|
3032 Py_UNICODE *hex; |
|
3033 Py_ssize_t hexlen, byteslen, i, j; |
|
3034 int top, bot; |
|
3035 |
|
3036 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj)) |
|
3037 return NULL; |
|
3038 assert(PyUnicode_Check(hexobj)); |
|
3039 hexlen = PyUnicode_GET_SIZE(hexobj); |
|
3040 hex = PyUnicode_AS_UNICODE(hexobj); |
|
3041 byteslen = hexlen/2; /* This overestimates if there are spaces */ |
|
3042 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen); |
|
3043 if (!newbytes) |
|
3044 return NULL; |
|
3045 buf = PyByteArray_AS_STRING(newbytes); |
|
3046 for (i = j = 0; i < hexlen; i += 2) { |
|
3047 /* skip over spaces in the input */ |
|
3048 while (hex[i] == ' ') |
|
3049 i++; |
|
3050 if (i >= hexlen) |
|
3051 break; |
|
3052 top = hex_digit_to_int(hex[i]); |
|
3053 bot = hex_digit_to_int(hex[i+1]); |
|
3054 if (top == -1 || bot == -1) { |
|
3055 PyErr_Format(PyExc_ValueError, |
|
3056 "non-hexadecimal number found in " |
|
3057 "fromhex() arg at position %zd", i); |
|
3058 goto error; |
|
3059 } |
|
3060 buf[j++] = (top << 4) + bot; |
|
3061 } |
|
3062 if (PyByteArray_Resize(newbytes, j) < 0) |
|
3063 goto error; |
|
3064 return newbytes; |
|
3065 |
|
3066 error: |
|
3067 Py_DECREF(newbytes); |
|
3068 return NULL; |
|
3069 } |
|
3070 |
|
3071 PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); |
|
3072 |
|
3073 static PyObject * |
|
3074 bytes_reduce(PyByteArrayObject *self) |
|
3075 { |
|
3076 PyObject *latin1, *dict; |
|
3077 if (self->ob_bytes) |
|
3078 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes, |
|
3079 Py_SIZE(self), NULL); |
|
3080 else |
|
3081 latin1 = PyUnicode_FromString(""); |
|
3082 |
|
3083 dict = PyObject_GetAttrString((PyObject *)self, "__dict__"); |
|
3084 if (dict == NULL) { |
|
3085 PyErr_Clear(); |
|
3086 dict = Py_None; |
|
3087 Py_INCREF(dict); |
|
3088 } |
|
3089 |
|
3090 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict); |
|
3091 } |
|
3092 |
|
3093 PyDoc_STRVAR(sizeof_doc, |
|
3094 "B.__sizeof__() -> int\n\ |
|
3095 \n\ |
|
3096 Returns the size of B in memory, in bytes"); |
|
3097 static PyObject * |
|
3098 bytes_sizeof(PyByteArrayObject *self) |
|
3099 { |
|
3100 Py_ssize_t res; |
|
3101 |
|
3102 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char); |
|
3103 return PyInt_FromSsize_t(res); |
|
3104 } |
|
3105 |
|
3106 static PySequenceMethods bytes_as_sequence = { |
|
3107 (lenfunc)bytes_length, /* sq_length */ |
|
3108 (binaryfunc)PyByteArray_Concat, /* sq_concat */ |
|
3109 (ssizeargfunc)bytes_repeat, /* sq_repeat */ |
|
3110 (ssizeargfunc)bytes_getitem, /* sq_item */ |
|
3111 0, /* sq_slice */ |
|
3112 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */ |
|
3113 0, /* sq_ass_slice */ |
|
3114 (objobjproc)bytes_contains, /* sq_contains */ |
|
3115 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */ |
|
3116 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */ |
|
3117 }; |
|
3118 |
|
3119 static PyMappingMethods bytes_as_mapping = { |
|
3120 (lenfunc)bytes_length, |
|
3121 (binaryfunc)bytes_subscript, |
|
3122 (objobjargproc)bytes_ass_subscript, |
|
3123 }; |
|
3124 |
|
3125 static PyBufferProcs bytes_as_buffer = { |
|
3126 (readbufferproc)bytes_buffer_getreadbuf, |
|
3127 (writebufferproc)bytes_buffer_getwritebuf, |
|
3128 (segcountproc)bytes_buffer_getsegcount, |
|
3129 (charbufferproc)bytes_buffer_getcharbuf, |
|
3130 (getbufferproc)bytes_getbuffer, |
|
3131 (releasebufferproc)bytes_releasebuffer, |
|
3132 }; |
|
3133 |
|
3134 static PyMethodDef |
|
3135 bytes_methods[] = { |
|
3136 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc}, |
|
3137 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc}, |
|
3138 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc}, |
|
3139 {"append", (PyCFunction)bytes_append, METH_O, append__doc__}, |
|
3140 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, |
|
3141 _Py_capitalize__doc__}, |
|
3142 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, |
|
3143 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__}, |
|
3144 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc}, |
|
3145 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, |
|
3146 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, |
|
3147 expandtabs__doc__}, |
|
3148 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__}, |
|
3149 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, |
|
3150 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, |
|
3151 fromhex_doc}, |
|
3152 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__}, |
|
3153 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__}, |
|
3154 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, |
|
3155 _Py_isalnum__doc__}, |
|
3156 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, |
|
3157 _Py_isalpha__doc__}, |
|
3158 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS, |
|
3159 _Py_isdigit__doc__}, |
|
3160 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS, |
|
3161 _Py_islower__doc__}, |
|
3162 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS, |
|
3163 _Py_isspace__doc__}, |
|
3164 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS, |
|
3165 _Py_istitle__doc__}, |
|
3166 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, |
|
3167 _Py_isupper__doc__}, |
|
3168 {"join", (PyCFunction)bytes_join, METH_O, join_doc}, |
|
3169 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, |
|
3170 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, |
|
3171 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__}, |
|
3172 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__}, |
|
3173 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__}, |
|
3174 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__}, |
|
3175 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__}, |
|
3176 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__}, |
|
3177 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__}, |
|
3178 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__}, |
|
3179 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, |
|
3180 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__}, |
|
3181 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__}, |
|
3182 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, |
|
3183 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__}, |
|
3184 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, |
|
3185 splitlines__doc__}, |
|
3186 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS , |
|
3187 startswith__doc__}, |
|
3188 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__}, |
|
3189 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, |
|
3190 _Py_swapcase__doc__}, |
|
3191 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, |
|
3192 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, |
|
3193 translate__doc__}, |
|
3194 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, |
|
3195 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, |
|
3196 {NULL} |
|
3197 }; |
|
3198 |
|
3199 PyDoc_STRVAR(bytes_doc, |
|
3200 "bytearray(iterable_of_ints) -> bytearray.\n\ |
|
3201 bytearray(string, encoding[, errors]) -> bytearray.\n\ |
|
3202 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\ |
|
3203 bytearray(memory_view) -> bytearray.\n\ |
|
3204 \n\ |
|
3205 Construct an mutable bytearray object from:\n\ |
|
3206 - an iterable yielding integers in range(256)\n\ |
|
3207 - a text string encoded using the specified encoding\n\ |
|
3208 - a bytes or a bytearray object\n\ |
|
3209 - any object implementing the buffer API.\n\ |
|
3210 \n\ |
|
3211 bytearray(int) -> bytearray.\n\ |
|
3212 \n\ |
|
3213 Construct a zero-initialized bytearray of the given length."); |
|
3214 |
|
3215 |
|
3216 static PyObject *bytes_iter(PyObject *seq); |
|
3217 |
|
3218 PyTypeObject PyByteArray_Type = { |
|
3219 PyVarObject_HEAD_INIT(&PyType_Type, 0) |
|
3220 "bytearray", |
|
3221 sizeof(PyByteArrayObject), |
|
3222 0, |
|
3223 (destructor)bytes_dealloc, /* tp_dealloc */ |
|
3224 0, /* tp_print */ |
|
3225 0, /* tp_getattr */ |
|
3226 0, /* tp_setattr */ |
|
3227 0, /* tp_compare */ |
|
3228 (reprfunc)bytes_repr, /* tp_repr */ |
|
3229 0, /* tp_as_number */ |
|
3230 &bytes_as_sequence, /* tp_as_sequence */ |
|
3231 &bytes_as_mapping, /* tp_as_mapping */ |
|
3232 0, /* tp_hash */ |
|
3233 0, /* tp_call */ |
|
3234 bytes_str, /* tp_str */ |
|
3235 PyObject_GenericGetAttr, /* tp_getattro */ |
|
3236 0, /* tp_setattro */ |
|
3237 &bytes_as_buffer, /* tp_as_buffer */ |
|
3238 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
|
3239 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */ |
|
3240 bytes_doc, /* tp_doc */ |
|
3241 0, /* tp_traverse */ |
|
3242 0, /* tp_clear */ |
|
3243 (richcmpfunc)bytes_richcompare, /* tp_richcompare */ |
|
3244 0, /* tp_weaklistoffset */ |
|
3245 bytes_iter, /* tp_iter */ |
|
3246 0, /* tp_iternext */ |
|
3247 bytes_methods, /* tp_methods */ |
|
3248 0, /* tp_members */ |
|
3249 0, /* tp_getset */ |
|
3250 0, /* tp_base */ |
|
3251 0, /* tp_dict */ |
|
3252 0, /* tp_descr_get */ |
|
3253 0, /* tp_descr_set */ |
|
3254 0, /* tp_dictoffset */ |
|
3255 (initproc)bytes_init, /* tp_init */ |
|
3256 PyType_GenericAlloc, /* tp_alloc */ |
|
3257 PyType_GenericNew, /* tp_new */ |
|
3258 PyObject_Del, /* tp_free */ |
|
3259 }; |
|
3260 |
|
3261 /*********************** Bytes Iterator ****************************/ |
|
3262 |
|
3263 typedef struct { |
|
3264 PyObject_HEAD |
|
3265 Py_ssize_t it_index; |
|
3266 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */ |
|
3267 } bytesiterobject; |
|
3268 |
|
3269 static void |
|
3270 bytesiter_dealloc(bytesiterobject *it) |
|
3271 { |
|
3272 _PyObject_GC_UNTRACK(it); |
|
3273 Py_XDECREF(it->it_seq); |
|
3274 PyObject_GC_Del(it); |
|
3275 } |
|
3276 |
|
3277 static int |
|
3278 bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg) |
|
3279 { |
|
3280 Py_VISIT(it->it_seq); |
|
3281 return 0; |
|
3282 } |
|
3283 |
|
3284 static PyObject * |
|
3285 bytesiter_next(bytesiterobject *it) |
|
3286 { |
|
3287 PyByteArrayObject *seq; |
|
3288 PyObject *item; |
|
3289 |
|
3290 assert(it != NULL); |
|
3291 seq = it->it_seq; |
|
3292 if (seq == NULL) |
|
3293 return NULL; |
|
3294 assert(PyByteArray_Check(seq)); |
|
3295 |
|
3296 if (it->it_index < PyByteArray_GET_SIZE(seq)) { |
|
3297 item = PyInt_FromLong( |
|
3298 (unsigned char)seq->ob_bytes[it->it_index]); |
|
3299 if (item != NULL) |
|
3300 ++it->it_index; |
|
3301 return item; |
|
3302 } |
|
3303 |
|
3304 Py_DECREF(seq); |
|
3305 it->it_seq = NULL; |
|
3306 return NULL; |
|
3307 } |
|
3308 |
|
3309 static PyObject * |
|
3310 bytesiter_length_hint(bytesiterobject *it) |
|
3311 { |
|
3312 Py_ssize_t len = 0; |
|
3313 if (it->it_seq) |
|
3314 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index; |
|
3315 return PyInt_FromSsize_t(len); |
|
3316 } |
|
3317 |
|
3318 PyDoc_STRVAR(length_hint_doc, |
|
3319 "Private method returning an estimate of len(list(it))."); |
|
3320 |
|
3321 static PyMethodDef bytesiter_methods[] = { |
|
3322 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS, |
|
3323 length_hint_doc}, |
|
3324 {NULL, NULL} /* sentinel */ |
|
3325 }; |
|
3326 |
|
3327 PyTypeObject PyByteArrayIter_Type = { |
|
3328 PyVarObject_HEAD_INIT(&PyType_Type, 0) |
|
3329 "bytearray_iterator", /* tp_name */ |
|
3330 sizeof(bytesiterobject), /* tp_basicsize */ |
|
3331 0, /* tp_itemsize */ |
|
3332 /* methods */ |
|
3333 (destructor)bytesiter_dealloc, /* tp_dealloc */ |
|
3334 0, /* tp_print */ |
|
3335 0, /* tp_getattr */ |
|
3336 0, /* tp_setattr */ |
|
3337 0, /* tp_compare */ |
|
3338 0, /* tp_repr */ |
|
3339 0, /* tp_as_number */ |
|
3340 0, /* tp_as_sequence */ |
|
3341 0, /* tp_as_mapping */ |
|
3342 0, /* tp_hash */ |
|
3343 0, /* tp_call */ |
|
3344 0, /* tp_str */ |
|
3345 PyObject_GenericGetAttr, /* tp_getattro */ |
|
3346 0, /* tp_setattro */ |
|
3347 0, /* tp_as_buffer */ |
|
3348 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ |
|
3349 0, /* tp_doc */ |
|
3350 (traverseproc)bytesiter_traverse, /* tp_traverse */ |
|
3351 0, /* tp_clear */ |
|
3352 0, /* tp_richcompare */ |
|
3353 0, /* tp_weaklistoffset */ |
|
3354 PyObject_SelfIter, /* tp_iter */ |
|
3355 (iternextfunc)bytesiter_next, /* tp_iternext */ |
|
3356 bytesiter_methods, /* tp_methods */ |
|
3357 0, |
|
3358 }; |
|
3359 |
|
3360 static PyObject * |
|
3361 bytes_iter(PyObject *seq) |
|
3362 { |
|
3363 bytesiterobject *it; |
|
3364 |
|
3365 if (!PyByteArray_Check(seq)) { |
|
3366 PyErr_BadInternalCall(); |
|
3367 return NULL; |
|
3368 } |
|
3369 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type); |
|
3370 if (it == NULL) |
|
3371 return NULL; |
|
3372 it->it_index = 0; |
|
3373 Py_INCREF(seq); |
|
3374 it->it_seq = (PyByteArrayObject *)seq; |
|
3375 _PyObject_GC_TRACK(it); |
|
3376 return (PyObject *)it; |
|
3377 } |