|
1 /* |
|
2 |
|
3 python-bz2 - python bz2 library interface |
|
4 |
|
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com> |
|
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved |
|
7 |
|
8 */ |
|
9 |
|
10 #include "Python.h" |
|
11 #include <stdio.h> |
|
12 #include <bzlib.h> |
|
13 #include "structmember.h" |
|
14 |
|
15 #ifdef WITH_THREAD |
|
16 #include "pythread.h" |
|
17 #endif |
|
18 |
|
19 static char __author__[] = |
|
20 "The bz2 python module was written by:\n\ |
|
21 \n\ |
|
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\ |
|
23 "; |
|
24 |
|
25 /* Our very own off_t-like type, 64-bit if possible */ |
|
26 /* copied from Objects/fileobject.c */ |
|
27 #if !defined(HAVE_LARGEFILE_SUPPORT) |
|
28 typedef off_t Py_off_t; |
|
29 #elif SIZEOF_OFF_T >= 8 |
|
30 typedef off_t Py_off_t; |
|
31 #elif SIZEOF_FPOS_T >= 8 |
|
32 typedef fpos_t Py_off_t; |
|
33 #else |
|
34 #error "Large file support, but neither off_t nor fpos_t is large enough." |
|
35 #endif |
|
36 |
|
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v) |
|
38 |
|
39 #define MODE_CLOSED 0 |
|
40 #define MODE_READ 1 |
|
41 #define MODE_READ_EOF 2 |
|
42 #define MODE_WRITE 3 |
|
43 |
|
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type) |
|
45 |
|
46 |
|
47 #ifdef BZ_CONFIG_ERROR |
|
48 |
|
49 #if SIZEOF_LONG >= 8 |
|
50 #define BZS_TOTAL_OUT(bzs) \ |
|
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) |
|
52 #elif SIZEOF_LONG_LONG >= 8 |
|
53 #define BZS_TOTAL_OUT(bzs) \ |
|
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32) |
|
55 #else |
|
56 #define BZS_TOTAL_OUT(bzs) \ |
|
57 bzs->total_out_lo32 |
|
58 #endif |
|
59 |
|
60 #else /* ! BZ_CONFIG_ERROR */ |
|
61 |
|
62 #define BZ2_bzRead bzRead |
|
63 #define BZ2_bzReadOpen bzReadOpen |
|
64 #define BZ2_bzReadClose bzReadClose |
|
65 #define BZ2_bzWrite bzWrite |
|
66 #define BZ2_bzWriteOpen bzWriteOpen |
|
67 #define BZ2_bzWriteClose bzWriteClose |
|
68 #define BZ2_bzCompress bzCompress |
|
69 #define BZ2_bzCompressInit bzCompressInit |
|
70 #define BZ2_bzCompressEnd bzCompressEnd |
|
71 #define BZ2_bzDecompress bzDecompress |
|
72 #define BZ2_bzDecompressInit bzDecompressInit |
|
73 #define BZ2_bzDecompressEnd bzDecompressEnd |
|
74 |
|
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out |
|
76 |
|
77 #endif /* ! BZ_CONFIG_ERROR */ |
|
78 |
|
79 |
|
80 #ifdef WITH_THREAD |
|
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1) |
|
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock) |
|
83 #else |
|
84 #define ACQUIRE_LOCK(obj) |
|
85 #define RELEASE_LOCK(obj) |
|
86 #endif |
|
87 |
|
88 /* Bits in f_newlinetypes */ |
|
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ |
|
90 #define NEWLINE_CR 1 /* \r newline seen */ |
|
91 #define NEWLINE_LF 2 /* \n newline seen */ |
|
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */ |
|
93 |
|
94 /* ===================================================================== */ |
|
95 /* Structure definitions. */ |
|
96 |
|
97 typedef struct { |
|
98 PyObject_HEAD |
|
99 PyObject *file; |
|
100 |
|
101 char* f_buf; /* Allocated readahead buffer */ |
|
102 char* f_bufend; /* Points after last occupied position */ |
|
103 char* f_bufptr; /* Current buffer position */ |
|
104 |
|
105 int f_softspace; /* Flag used by 'print' command */ |
|
106 |
|
107 int f_univ_newline; /* Handle any newline convention */ |
|
108 int f_newlinetypes; /* Types of newlines seen */ |
|
109 int f_skipnextlf; /* Skip next \n */ |
|
110 |
|
111 BZFILE *fp; |
|
112 int mode; |
|
113 Py_off_t pos; |
|
114 Py_off_t size; |
|
115 #ifdef WITH_THREAD |
|
116 PyThread_type_lock lock; |
|
117 #endif |
|
118 } BZ2FileObject; |
|
119 |
|
120 typedef struct { |
|
121 PyObject_HEAD |
|
122 bz_stream bzs; |
|
123 int running; |
|
124 #ifdef WITH_THREAD |
|
125 PyThread_type_lock lock; |
|
126 #endif |
|
127 } BZ2CompObject; |
|
128 |
|
129 typedef struct { |
|
130 PyObject_HEAD |
|
131 bz_stream bzs; |
|
132 int running; |
|
133 PyObject *unused_data; |
|
134 #ifdef WITH_THREAD |
|
135 PyThread_type_lock lock; |
|
136 #endif |
|
137 } BZ2DecompObject; |
|
138 |
|
139 /* ===================================================================== */ |
|
140 /* Utility functions. */ |
|
141 |
|
142 static int |
|
143 Util_CatchBZ2Error(int bzerror) |
|
144 { |
|
145 int ret = 0; |
|
146 switch(bzerror) { |
|
147 case BZ_OK: |
|
148 case BZ_STREAM_END: |
|
149 break; |
|
150 |
|
151 #ifdef BZ_CONFIG_ERROR |
|
152 case BZ_CONFIG_ERROR: |
|
153 PyErr_SetString(PyExc_SystemError, |
|
154 "the bz2 library was not compiled " |
|
155 "correctly"); |
|
156 ret = 1; |
|
157 break; |
|
158 #endif |
|
159 |
|
160 case BZ_PARAM_ERROR: |
|
161 PyErr_SetString(PyExc_ValueError, |
|
162 "the bz2 library has received wrong " |
|
163 "parameters"); |
|
164 ret = 1; |
|
165 break; |
|
166 |
|
167 case BZ_MEM_ERROR: |
|
168 PyErr_NoMemory(); |
|
169 ret = 1; |
|
170 break; |
|
171 |
|
172 case BZ_DATA_ERROR: |
|
173 case BZ_DATA_ERROR_MAGIC: |
|
174 PyErr_SetString(PyExc_IOError, "invalid data stream"); |
|
175 ret = 1; |
|
176 break; |
|
177 |
|
178 case BZ_IO_ERROR: |
|
179 PyErr_SetString(PyExc_IOError, "unknown IO error"); |
|
180 ret = 1; |
|
181 break; |
|
182 |
|
183 case BZ_UNEXPECTED_EOF: |
|
184 PyErr_SetString(PyExc_EOFError, |
|
185 "compressed file ended before the " |
|
186 "logical end-of-stream was detected"); |
|
187 ret = 1; |
|
188 break; |
|
189 |
|
190 case BZ_SEQUENCE_ERROR: |
|
191 PyErr_SetString(PyExc_RuntimeError, |
|
192 "wrong sequence of bz2 library " |
|
193 "commands used"); |
|
194 ret = 1; |
|
195 break; |
|
196 } |
|
197 return ret; |
|
198 } |
|
199 |
|
200 #if BUFSIZ < 8192 |
|
201 #define SMALLCHUNK 8192 |
|
202 #else |
|
203 #define SMALLCHUNK BUFSIZ |
|
204 #endif |
|
205 |
|
206 #if SIZEOF_INT < 4 |
|
207 #define BIGCHUNK (512 * 32) |
|
208 #else |
|
209 #define BIGCHUNK (512 * 1024) |
|
210 #endif |
|
211 |
|
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ |
|
213 static size_t |
|
214 Util_NewBufferSize(size_t currentsize) |
|
215 { |
|
216 if (currentsize > SMALLCHUNK) { |
|
217 /* Keep doubling until we reach BIGCHUNK; |
|
218 then keep adding BIGCHUNK. */ |
|
219 if (currentsize <= BIGCHUNK) |
|
220 return currentsize + currentsize; |
|
221 else |
|
222 return currentsize + BIGCHUNK; |
|
223 } |
|
224 return currentsize + SMALLCHUNK; |
|
225 } |
|
226 |
|
227 /* This is a hacked version of Python's fileobject.c:get_line(). */ |
|
228 static PyObject * |
|
229 Util_GetLine(BZ2FileObject *f, int n) |
|
230 { |
|
231 char c; |
|
232 char *buf, *end; |
|
233 size_t total_v_size; /* total # of slots in buffer */ |
|
234 size_t used_v_size; /* # used slots in buffer */ |
|
235 size_t increment; /* amount to increment the buffer */ |
|
236 PyObject *v; |
|
237 int bzerror; |
|
238 int bytes_read; |
|
239 int newlinetypes = f->f_newlinetypes; |
|
240 int skipnextlf = f->f_skipnextlf; |
|
241 int univ_newline = f->f_univ_newline; |
|
242 |
|
243 total_v_size = n > 0 ? n : 100; |
|
244 v = PyString_FromStringAndSize((char *)NULL, total_v_size); |
|
245 if (v == NULL) |
|
246 return NULL; |
|
247 |
|
248 buf = BUF(v); |
|
249 end = buf + total_v_size; |
|
250 |
|
251 for (;;) { |
|
252 Py_BEGIN_ALLOW_THREADS |
|
253 while (buf != end) { |
|
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); |
|
255 f->pos++; |
|
256 if (bytes_read == 0) break; |
|
257 if (univ_newline) { |
|
258 if (skipnextlf) { |
|
259 skipnextlf = 0; |
|
260 if (c == '\n') { |
|
261 /* Seeing a \n here with skipnextlf true means we |
|
262 * saw a \r before. |
|
263 */ |
|
264 newlinetypes |= NEWLINE_CRLF; |
|
265 if (bzerror != BZ_OK) break; |
|
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); |
|
267 f->pos++; |
|
268 if (bytes_read == 0) break; |
|
269 } else { |
|
270 newlinetypes |= NEWLINE_CR; |
|
271 } |
|
272 } |
|
273 if (c == '\r') { |
|
274 skipnextlf = 1; |
|
275 c = '\n'; |
|
276 } else if (c == '\n') |
|
277 newlinetypes |= NEWLINE_LF; |
|
278 } |
|
279 *buf++ = c; |
|
280 if (bzerror != BZ_OK || c == '\n') break; |
|
281 } |
|
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf) |
|
283 newlinetypes |= NEWLINE_CR; |
|
284 Py_END_ALLOW_THREADS |
|
285 f->f_newlinetypes = newlinetypes; |
|
286 f->f_skipnextlf = skipnextlf; |
|
287 if (bzerror == BZ_STREAM_END) { |
|
288 f->size = f->pos; |
|
289 f->mode = MODE_READ_EOF; |
|
290 break; |
|
291 } else if (bzerror != BZ_OK) { |
|
292 Util_CatchBZ2Error(bzerror); |
|
293 Py_DECREF(v); |
|
294 return NULL; |
|
295 } |
|
296 if (c == '\n') |
|
297 break; |
|
298 /* Must be because buf == end */ |
|
299 if (n > 0) |
|
300 break; |
|
301 used_v_size = total_v_size; |
|
302 increment = total_v_size >> 2; /* mild exponential growth */ |
|
303 total_v_size += increment; |
|
304 if (total_v_size > INT_MAX) { |
|
305 PyErr_SetString(PyExc_OverflowError, |
|
306 "line is longer than a Python string can hold"); |
|
307 Py_DECREF(v); |
|
308 return NULL; |
|
309 } |
|
310 if (_PyString_Resize(&v, total_v_size) < 0) |
|
311 return NULL; |
|
312 buf = BUF(v) + used_v_size; |
|
313 end = BUF(v) + total_v_size; |
|
314 } |
|
315 |
|
316 used_v_size = buf - BUF(v); |
|
317 if (used_v_size != total_v_size) |
|
318 _PyString_Resize(&v, used_v_size); |
|
319 return v; |
|
320 } |
|
321 |
|
322 /* This is a hacked version of Python's |
|
323 * fileobject.c:Py_UniversalNewlineFread(). */ |
|
324 size_t |
|
325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream, |
|
326 char* buf, size_t n, BZ2FileObject *f) |
|
327 { |
|
328 char *dst = buf; |
|
329 int newlinetypes, skipnextlf; |
|
330 |
|
331 assert(buf != NULL); |
|
332 assert(stream != NULL); |
|
333 |
|
334 if (!f->f_univ_newline) |
|
335 return BZ2_bzRead(bzerror, stream, buf, n); |
|
336 |
|
337 newlinetypes = f->f_newlinetypes; |
|
338 skipnextlf = f->f_skipnextlf; |
|
339 |
|
340 /* Invariant: n is the number of bytes remaining to be filled |
|
341 * in the buffer. |
|
342 */ |
|
343 while (n) { |
|
344 size_t nread; |
|
345 int shortread; |
|
346 char *src = dst; |
|
347 |
|
348 nread = BZ2_bzRead(bzerror, stream, dst, n); |
|
349 assert(nread <= n); |
|
350 n -= nread; /* assuming 1 byte out for each in; will adjust */ |
|
351 shortread = n != 0; /* true iff EOF or error */ |
|
352 while (nread--) { |
|
353 char c = *src++; |
|
354 if (c == '\r') { |
|
355 /* Save as LF and set flag to skip next LF. */ |
|
356 *dst++ = '\n'; |
|
357 skipnextlf = 1; |
|
358 } |
|
359 else if (skipnextlf && c == '\n') { |
|
360 /* Skip LF, and remember we saw CR LF. */ |
|
361 skipnextlf = 0; |
|
362 newlinetypes |= NEWLINE_CRLF; |
|
363 ++n; |
|
364 } |
|
365 else { |
|
366 /* Normal char to be stored in buffer. Also |
|
367 * update the newlinetypes flag if either this |
|
368 * is an LF or the previous char was a CR. |
|
369 */ |
|
370 if (c == '\n') |
|
371 newlinetypes |= NEWLINE_LF; |
|
372 else if (skipnextlf) |
|
373 newlinetypes |= NEWLINE_CR; |
|
374 *dst++ = c; |
|
375 skipnextlf = 0; |
|
376 } |
|
377 } |
|
378 if (shortread) { |
|
379 /* If this is EOF, update type flags. */ |
|
380 if (skipnextlf && *bzerror == BZ_STREAM_END) |
|
381 newlinetypes |= NEWLINE_CR; |
|
382 break; |
|
383 } |
|
384 } |
|
385 f->f_newlinetypes = newlinetypes; |
|
386 f->f_skipnextlf = skipnextlf; |
|
387 return dst - buf; |
|
388 } |
|
389 |
|
390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */ |
|
391 static void |
|
392 Util_DropReadAhead(BZ2FileObject *f) |
|
393 { |
|
394 if (f->f_buf != NULL) { |
|
395 PyMem_Free(f->f_buf); |
|
396 f->f_buf = NULL; |
|
397 } |
|
398 } |
|
399 |
|
400 /* This is a hacked version of Python's fileobject.c:readahead(). */ |
|
401 static int |
|
402 Util_ReadAhead(BZ2FileObject *f, int bufsize) |
|
403 { |
|
404 int chunksize; |
|
405 int bzerror; |
|
406 |
|
407 if (f->f_buf != NULL) { |
|
408 if((f->f_bufend - f->f_bufptr) >= 1) |
|
409 return 0; |
|
410 else |
|
411 Util_DropReadAhead(f); |
|
412 } |
|
413 if (f->mode == MODE_READ_EOF) { |
|
414 f->f_bufptr = f->f_buf; |
|
415 f->f_bufend = f->f_buf; |
|
416 return 0; |
|
417 } |
|
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { |
|
419 PyErr_NoMemory(); |
|
420 return -1; |
|
421 } |
|
422 Py_BEGIN_ALLOW_THREADS |
|
423 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, |
|
424 bufsize, f); |
|
425 Py_END_ALLOW_THREADS |
|
426 f->pos += chunksize; |
|
427 if (bzerror == BZ_STREAM_END) { |
|
428 f->size = f->pos; |
|
429 f->mode = MODE_READ_EOF; |
|
430 } else if (bzerror != BZ_OK) { |
|
431 Util_CatchBZ2Error(bzerror); |
|
432 Util_DropReadAhead(f); |
|
433 return -1; |
|
434 } |
|
435 f->f_bufptr = f->f_buf; |
|
436 f->f_bufend = f->f_buf + chunksize; |
|
437 return 0; |
|
438 } |
|
439 |
|
440 /* This is a hacked version of Python's |
|
441 * fileobject.c:readahead_get_line_skip(). */ |
|
442 static PyStringObject * |
|
443 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize) |
|
444 { |
|
445 PyStringObject* s; |
|
446 char *bufptr; |
|
447 char *buf; |
|
448 int len; |
|
449 |
|
450 if (f->f_buf == NULL) |
|
451 if (Util_ReadAhead(f, bufsize) < 0) |
|
452 return NULL; |
|
453 |
|
454 len = f->f_bufend - f->f_bufptr; |
|
455 if (len == 0) |
|
456 return (PyStringObject *) |
|
457 PyString_FromStringAndSize(NULL, skip); |
|
458 bufptr = memchr(f->f_bufptr, '\n', len); |
|
459 if (bufptr != NULL) { |
|
460 bufptr++; /* Count the '\n' */ |
|
461 len = bufptr - f->f_bufptr; |
|
462 s = (PyStringObject *) |
|
463 PyString_FromStringAndSize(NULL, skip+len); |
|
464 if (s == NULL) |
|
465 return NULL; |
|
466 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); |
|
467 f->f_bufptr = bufptr; |
|
468 if (bufptr == f->f_bufend) |
|
469 Util_DropReadAhead(f); |
|
470 } else { |
|
471 bufptr = f->f_bufptr; |
|
472 buf = f->f_buf; |
|
473 f->f_buf = NULL; /* Force new readahead buffer */ |
|
474 s = Util_ReadAheadGetLineSkip(f, skip+len, |
|
475 bufsize + (bufsize>>2)); |
|
476 if (s == NULL) { |
|
477 PyMem_Free(buf); |
|
478 return NULL; |
|
479 } |
|
480 memcpy(PyString_AS_STRING(s)+skip, bufptr, len); |
|
481 PyMem_Free(buf); |
|
482 } |
|
483 return s; |
|
484 } |
|
485 |
|
486 /* ===================================================================== */ |
|
487 /* Methods of BZ2File. */ |
|
488 |
|
489 PyDoc_STRVAR(BZ2File_read__doc__, |
|
490 "read([size]) -> string\n\ |
|
491 \n\ |
|
492 Read at most size uncompressed bytes, returned as a string. If the size\n\ |
|
493 argument is negative or omitted, read until EOF is reached.\n\ |
|
494 "); |
|
495 |
|
496 /* This is a hacked version of Python's fileobject.c:file_read(). */ |
|
497 static PyObject * |
|
498 BZ2File_read(BZ2FileObject *self, PyObject *args) |
|
499 { |
|
500 long bytesrequested = -1; |
|
501 size_t bytesread, buffersize, chunksize; |
|
502 int bzerror; |
|
503 PyObject *ret = NULL; |
|
504 |
|
505 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) |
|
506 return NULL; |
|
507 |
|
508 ACQUIRE_LOCK(self); |
|
509 switch (self->mode) { |
|
510 case MODE_READ: |
|
511 break; |
|
512 case MODE_READ_EOF: |
|
513 ret = PyString_FromString(""); |
|
514 goto cleanup; |
|
515 case MODE_CLOSED: |
|
516 PyErr_SetString(PyExc_ValueError, |
|
517 "I/O operation on closed file"); |
|
518 goto cleanup; |
|
519 default: |
|
520 PyErr_SetString(PyExc_IOError, |
|
521 "file is not ready for reading"); |
|
522 goto cleanup; |
|
523 } |
|
524 |
|
525 if (bytesrequested < 0) |
|
526 buffersize = Util_NewBufferSize((size_t)0); |
|
527 else |
|
528 buffersize = bytesrequested; |
|
529 if (buffersize > INT_MAX) { |
|
530 PyErr_SetString(PyExc_OverflowError, |
|
531 "requested number of bytes is " |
|
532 "more than a Python string can hold"); |
|
533 goto cleanup; |
|
534 } |
|
535 ret = PyString_FromStringAndSize((char *)NULL, buffersize); |
|
536 if (ret == NULL) |
|
537 goto cleanup; |
|
538 bytesread = 0; |
|
539 |
|
540 for (;;) { |
|
541 Py_BEGIN_ALLOW_THREADS |
|
542 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, |
|
543 BUF(ret)+bytesread, |
|
544 buffersize-bytesread, |
|
545 self); |
|
546 self->pos += chunksize; |
|
547 Py_END_ALLOW_THREADS |
|
548 bytesread += chunksize; |
|
549 if (bzerror == BZ_STREAM_END) { |
|
550 self->size = self->pos; |
|
551 self->mode = MODE_READ_EOF; |
|
552 break; |
|
553 } else if (bzerror != BZ_OK) { |
|
554 Util_CatchBZ2Error(bzerror); |
|
555 Py_DECREF(ret); |
|
556 ret = NULL; |
|
557 goto cleanup; |
|
558 } |
|
559 if (bytesrequested < 0) { |
|
560 buffersize = Util_NewBufferSize(buffersize); |
|
561 if (_PyString_Resize(&ret, buffersize) < 0) |
|
562 goto cleanup; |
|
563 } else { |
|
564 break; |
|
565 } |
|
566 } |
|
567 if (bytesread != buffersize) |
|
568 _PyString_Resize(&ret, bytesread); |
|
569 |
|
570 cleanup: |
|
571 RELEASE_LOCK(self); |
|
572 return ret; |
|
573 } |
|
574 |
|
575 PyDoc_STRVAR(BZ2File_readline__doc__, |
|
576 "readline([size]) -> string\n\ |
|
577 \n\ |
|
578 Return the next line from the file, as a string, retaining newline.\n\ |
|
579 A non-negative size argument will limit the maximum number of bytes to\n\ |
|
580 return (an incomplete line may be returned then). Return an empty\n\ |
|
581 string at EOF.\n\ |
|
582 "); |
|
583 |
|
584 static PyObject * |
|
585 BZ2File_readline(BZ2FileObject *self, PyObject *args) |
|
586 { |
|
587 PyObject *ret = NULL; |
|
588 int sizehint = -1; |
|
589 |
|
590 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) |
|
591 return NULL; |
|
592 |
|
593 ACQUIRE_LOCK(self); |
|
594 switch (self->mode) { |
|
595 case MODE_READ: |
|
596 break; |
|
597 case MODE_READ_EOF: |
|
598 ret = PyString_FromString(""); |
|
599 goto cleanup; |
|
600 case MODE_CLOSED: |
|
601 PyErr_SetString(PyExc_ValueError, |
|
602 "I/O operation on closed file"); |
|
603 goto cleanup; |
|
604 default: |
|
605 PyErr_SetString(PyExc_IOError, |
|
606 "file is not ready for reading"); |
|
607 goto cleanup; |
|
608 } |
|
609 |
|
610 if (sizehint == 0) |
|
611 ret = PyString_FromString(""); |
|
612 else |
|
613 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); |
|
614 |
|
615 cleanup: |
|
616 RELEASE_LOCK(self); |
|
617 return ret; |
|
618 } |
|
619 |
|
620 PyDoc_STRVAR(BZ2File_readlines__doc__, |
|
621 "readlines([size]) -> list\n\ |
|
622 \n\ |
|
623 Call readline() repeatedly and return a list of lines read.\n\ |
|
624 The optional size argument, if given, is an approximate bound on the\n\ |
|
625 total number of bytes in the lines returned.\n\ |
|
626 "); |
|
627 |
|
628 /* This is a hacked version of Python's fileobject.c:file_readlines(). */ |
|
629 static PyObject * |
|
630 BZ2File_readlines(BZ2FileObject *self, PyObject *args) |
|
631 { |
|
632 long sizehint = 0; |
|
633 PyObject *list = NULL; |
|
634 PyObject *line; |
|
635 char small_buffer[SMALLCHUNK]; |
|
636 char *buffer = small_buffer; |
|
637 size_t buffersize = SMALLCHUNK; |
|
638 PyObject *big_buffer = NULL; |
|
639 size_t nfilled = 0; |
|
640 size_t nread; |
|
641 size_t totalread = 0; |
|
642 char *p, *q, *end; |
|
643 int err; |
|
644 int shortread = 0; |
|
645 int bzerror; |
|
646 |
|
647 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) |
|
648 return NULL; |
|
649 |
|
650 ACQUIRE_LOCK(self); |
|
651 switch (self->mode) { |
|
652 case MODE_READ: |
|
653 break; |
|
654 case MODE_READ_EOF: |
|
655 list = PyList_New(0); |
|
656 goto cleanup; |
|
657 case MODE_CLOSED: |
|
658 PyErr_SetString(PyExc_ValueError, |
|
659 "I/O operation on closed file"); |
|
660 goto cleanup; |
|
661 default: |
|
662 PyErr_SetString(PyExc_IOError, |
|
663 "file is not ready for reading"); |
|
664 goto cleanup; |
|
665 } |
|
666 |
|
667 if ((list = PyList_New(0)) == NULL) |
|
668 goto cleanup; |
|
669 |
|
670 for (;;) { |
|
671 Py_BEGIN_ALLOW_THREADS |
|
672 nread = Util_UnivNewlineRead(&bzerror, self->fp, |
|
673 buffer+nfilled, |
|
674 buffersize-nfilled, self); |
|
675 self->pos += nread; |
|
676 Py_END_ALLOW_THREADS |
|
677 if (bzerror == BZ_STREAM_END) { |
|
678 self->size = self->pos; |
|
679 self->mode = MODE_READ_EOF; |
|
680 if (nread == 0) { |
|
681 sizehint = 0; |
|
682 break; |
|
683 } |
|
684 shortread = 1; |
|
685 } else if (bzerror != BZ_OK) { |
|
686 Util_CatchBZ2Error(bzerror); |
|
687 error: |
|
688 Py_DECREF(list); |
|
689 list = NULL; |
|
690 goto cleanup; |
|
691 } |
|
692 totalread += nread; |
|
693 p = memchr(buffer+nfilled, '\n', nread); |
|
694 if (!shortread && p == NULL) { |
|
695 /* Need a larger buffer to fit this line */ |
|
696 nfilled += nread; |
|
697 buffersize *= 2; |
|
698 if (buffersize > INT_MAX) { |
|
699 PyErr_SetString(PyExc_OverflowError, |
|
700 "line is longer than a Python string can hold"); |
|
701 goto error; |
|
702 } |
|
703 if (big_buffer == NULL) { |
|
704 /* Create the big buffer */ |
|
705 big_buffer = PyString_FromStringAndSize( |
|
706 NULL, buffersize); |
|
707 if (big_buffer == NULL) |
|
708 goto error; |
|
709 buffer = PyString_AS_STRING(big_buffer); |
|
710 memcpy(buffer, small_buffer, nfilled); |
|
711 } |
|
712 else { |
|
713 /* Grow the big buffer */ |
|
714 _PyString_Resize(&big_buffer, buffersize); |
|
715 buffer = PyString_AS_STRING(big_buffer); |
|
716 } |
|
717 continue; |
|
718 } |
|
719 end = buffer+nfilled+nread; |
|
720 q = buffer; |
|
721 while (p != NULL) { |
|
722 /* Process complete lines */ |
|
723 p++; |
|
724 line = PyString_FromStringAndSize(q, p-q); |
|
725 if (line == NULL) |
|
726 goto error; |
|
727 err = PyList_Append(list, line); |
|
728 Py_DECREF(line); |
|
729 if (err != 0) |
|
730 goto error; |
|
731 q = p; |
|
732 p = memchr(q, '\n', end-q); |
|
733 } |
|
734 /* Move the remaining incomplete line to the start */ |
|
735 nfilled = end-q; |
|
736 memmove(buffer, q, nfilled); |
|
737 if (sizehint > 0) |
|
738 if (totalread >= (size_t)sizehint) |
|
739 break; |
|
740 if (shortread) { |
|
741 sizehint = 0; |
|
742 break; |
|
743 } |
|
744 } |
|
745 if (nfilled != 0) { |
|
746 /* Partial last line */ |
|
747 line = PyString_FromStringAndSize(buffer, nfilled); |
|
748 if (line == NULL) |
|
749 goto error; |
|
750 if (sizehint > 0) { |
|
751 /* Need to complete the last line */ |
|
752 PyObject *rest = Util_GetLine(self, 0); |
|
753 if (rest == NULL) { |
|
754 Py_DECREF(line); |
|
755 goto error; |
|
756 } |
|
757 PyString_Concat(&line, rest); |
|
758 Py_DECREF(rest); |
|
759 if (line == NULL) |
|
760 goto error; |
|
761 } |
|
762 err = PyList_Append(list, line); |
|
763 Py_DECREF(line); |
|
764 if (err != 0) |
|
765 goto error; |
|
766 } |
|
767 |
|
768 cleanup: |
|
769 RELEASE_LOCK(self); |
|
770 if (big_buffer) { |
|
771 Py_DECREF(big_buffer); |
|
772 } |
|
773 return list; |
|
774 } |
|
775 |
|
776 PyDoc_STRVAR(BZ2File_xreadlines__doc__, |
|
777 "xreadlines() -> self\n\ |
|
778 \n\ |
|
779 For backward compatibility. BZ2File objects now include the performance\n\ |
|
780 optimizations previously implemented in the xreadlines module.\n\ |
|
781 "); |
|
782 |
|
783 PyDoc_STRVAR(BZ2File_write__doc__, |
|
784 "write(data) -> None\n\ |
|
785 \n\ |
|
786 Write the 'data' string to file. Note that due to buffering, close() may\n\ |
|
787 be needed before the file on disk reflects the data written.\n\ |
|
788 "); |
|
789 |
|
790 /* This is a hacked version of Python's fileobject.c:file_write(). */ |
|
791 static PyObject * |
|
792 BZ2File_write(BZ2FileObject *self, PyObject *args) |
|
793 { |
|
794 PyObject *ret = NULL; |
|
795 Py_buffer pbuf; |
|
796 char *buf; |
|
797 int len; |
|
798 int bzerror; |
|
799 |
|
800 if (!PyArg_ParseTuple(args, "s*:write", &pbuf)) |
|
801 return NULL; |
|
802 buf = pbuf.buf; |
|
803 len = pbuf.len; |
|
804 |
|
805 ACQUIRE_LOCK(self); |
|
806 switch (self->mode) { |
|
807 case MODE_WRITE: |
|
808 break; |
|
809 |
|
810 case MODE_CLOSED: |
|
811 PyErr_SetString(PyExc_ValueError, |
|
812 "I/O operation on closed file"); |
|
813 goto cleanup; |
|
814 |
|
815 default: |
|
816 PyErr_SetString(PyExc_IOError, |
|
817 "file is not ready for writing"); |
|
818 goto cleanup; |
|
819 } |
|
820 |
|
821 self->f_softspace = 0; |
|
822 |
|
823 Py_BEGIN_ALLOW_THREADS |
|
824 BZ2_bzWrite (&bzerror, self->fp, buf, len); |
|
825 self->pos += len; |
|
826 Py_END_ALLOW_THREADS |
|
827 |
|
828 if (bzerror != BZ_OK) { |
|
829 Util_CatchBZ2Error(bzerror); |
|
830 goto cleanup; |
|
831 } |
|
832 |
|
833 Py_INCREF(Py_None); |
|
834 ret = Py_None; |
|
835 |
|
836 cleanup: |
|
837 PyBuffer_Release(&pbuf); |
|
838 RELEASE_LOCK(self); |
|
839 return ret; |
|
840 } |
|
841 |
|
842 PyDoc_STRVAR(BZ2File_writelines__doc__, |
|
843 "writelines(sequence_of_strings) -> None\n\ |
|
844 \n\ |
|
845 Write the sequence of strings to the file. Note that newlines are not\n\ |
|
846 added. The sequence can be any iterable object producing strings. This is\n\ |
|
847 equivalent to calling write() for each string.\n\ |
|
848 "); |
|
849 |
|
850 /* This is a hacked version of Python's fileobject.c:file_writelines(). */ |
|
851 static PyObject * |
|
852 BZ2File_writelines(BZ2FileObject *self, PyObject *seq) |
|
853 { |
|
854 #define CHUNKSIZE 1000 |
|
855 PyObject *list = NULL; |
|
856 PyObject *iter = NULL; |
|
857 PyObject *ret = NULL; |
|
858 PyObject *line; |
|
859 int i, j, index, len, islist; |
|
860 int bzerror; |
|
861 |
|
862 ACQUIRE_LOCK(self); |
|
863 switch (self->mode) { |
|
864 case MODE_WRITE: |
|
865 break; |
|
866 |
|
867 case MODE_CLOSED: |
|
868 PyErr_SetString(PyExc_ValueError, |
|
869 "I/O operation on closed file"); |
|
870 goto error; |
|
871 |
|
872 default: |
|
873 PyErr_SetString(PyExc_IOError, |
|
874 "file is not ready for writing"); |
|
875 goto error; |
|
876 } |
|
877 |
|
878 islist = PyList_Check(seq); |
|
879 if (!islist) { |
|
880 iter = PyObject_GetIter(seq); |
|
881 if (iter == NULL) { |
|
882 PyErr_SetString(PyExc_TypeError, |
|
883 "writelines() requires an iterable argument"); |
|
884 goto error; |
|
885 } |
|
886 list = PyList_New(CHUNKSIZE); |
|
887 if (list == NULL) |
|
888 goto error; |
|
889 } |
|
890 |
|
891 /* Strategy: slurp CHUNKSIZE lines into a private list, |
|
892 checking that they are all strings, then write that list |
|
893 without holding the interpreter lock, then come back for more. */ |
|
894 for (index = 0; ; index += CHUNKSIZE) { |
|
895 if (islist) { |
|
896 Py_XDECREF(list); |
|
897 list = PyList_GetSlice(seq, index, index+CHUNKSIZE); |
|
898 if (list == NULL) |
|
899 goto error; |
|
900 j = PyList_GET_SIZE(list); |
|
901 } |
|
902 else { |
|
903 for (j = 0; j < CHUNKSIZE; j++) { |
|
904 line = PyIter_Next(iter); |
|
905 if (line == NULL) { |
|
906 if (PyErr_Occurred()) |
|
907 goto error; |
|
908 break; |
|
909 } |
|
910 PyList_SetItem(list, j, line); |
|
911 } |
|
912 } |
|
913 if (j == 0) |
|
914 break; |
|
915 |
|
916 /* Check that all entries are indeed strings. If not, |
|
917 apply the same rules as for file.write() and |
|
918 convert the rets to strings. This is slow, but |
|
919 seems to be the only way since all conversion APIs |
|
920 could potentially execute Python code. */ |
|
921 for (i = 0; i < j; i++) { |
|
922 PyObject *v = PyList_GET_ITEM(list, i); |
|
923 if (!PyString_Check(v)) { |
|
924 const char *buffer; |
|
925 Py_ssize_t len; |
|
926 if (PyObject_AsCharBuffer(v, &buffer, &len)) { |
|
927 PyErr_SetString(PyExc_TypeError, |
|
928 "writelines() " |
|
929 "argument must be " |
|
930 "a sequence of " |
|
931 "strings"); |
|
932 goto error; |
|
933 } |
|
934 line = PyString_FromStringAndSize(buffer, |
|
935 len); |
|
936 if (line == NULL) |
|
937 goto error; |
|
938 Py_DECREF(v); |
|
939 PyList_SET_ITEM(list, i, line); |
|
940 } |
|
941 } |
|
942 |
|
943 self->f_softspace = 0; |
|
944 |
|
945 /* Since we are releasing the global lock, the |
|
946 following code may *not* execute Python code. */ |
|
947 Py_BEGIN_ALLOW_THREADS |
|
948 for (i = 0; i < j; i++) { |
|
949 line = PyList_GET_ITEM(list, i); |
|
950 len = PyString_GET_SIZE(line); |
|
951 BZ2_bzWrite (&bzerror, self->fp, |
|
952 PyString_AS_STRING(line), len); |
|
953 if (bzerror != BZ_OK) { |
|
954 Py_BLOCK_THREADS |
|
955 Util_CatchBZ2Error(bzerror); |
|
956 goto error; |
|
957 } |
|
958 } |
|
959 Py_END_ALLOW_THREADS |
|
960 |
|
961 if (j < CHUNKSIZE) |
|
962 break; |
|
963 } |
|
964 |
|
965 Py_INCREF(Py_None); |
|
966 ret = Py_None; |
|
967 |
|
968 error: |
|
969 RELEASE_LOCK(self); |
|
970 Py_XDECREF(list); |
|
971 Py_XDECREF(iter); |
|
972 return ret; |
|
973 #undef CHUNKSIZE |
|
974 } |
|
975 |
|
976 PyDoc_STRVAR(BZ2File_seek__doc__, |
|
977 "seek(offset [, whence]) -> None\n\ |
|
978 \n\ |
|
979 Move to new file position. Argument offset is a byte count. Optional\n\ |
|
980 argument whence defaults to 0 (offset from start of file, offset\n\ |
|
981 should be >= 0); other values are 1 (move relative to current position,\n\ |
|
982 positive or negative), and 2 (move relative to end of file, usually\n\ |
|
983 negative, although many platforms allow seeking beyond the end of a file).\n\ |
|
984 \n\ |
|
985 Note that seeking of bz2 files is emulated, and depending on the parameters\n\ |
|
986 the operation may be extremely slow.\n\ |
|
987 "); |
|
988 |
|
989 static PyObject * |
|
990 BZ2File_seek(BZ2FileObject *self, PyObject *args) |
|
991 { |
|
992 int where = 0; |
|
993 PyObject *offobj; |
|
994 Py_off_t offset; |
|
995 char small_buffer[SMALLCHUNK]; |
|
996 char *buffer = small_buffer; |
|
997 size_t buffersize = SMALLCHUNK; |
|
998 Py_off_t bytesread = 0; |
|
999 size_t readsize; |
|
1000 int chunksize; |
|
1001 int bzerror; |
|
1002 PyObject *ret = NULL; |
|
1003 |
|
1004 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) |
|
1005 return NULL; |
|
1006 #if !defined(HAVE_LARGEFILE_SUPPORT) |
|
1007 offset = PyInt_AsLong(offobj); |
|
1008 #else |
|
1009 offset = PyLong_Check(offobj) ? |
|
1010 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj); |
|
1011 #endif |
|
1012 if (PyErr_Occurred()) |
|
1013 return NULL; |
|
1014 |
|
1015 ACQUIRE_LOCK(self); |
|
1016 Util_DropReadAhead(self); |
|
1017 switch (self->mode) { |
|
1018 case MODE_READ: |
|
1019 case MODE_READ_EOF: |
|
1020 break; |
|
1021 |
|
1022 case MODE_CLOSED: |
|
1023 PyErr_SetString(PyExc_ValueError, |
|
1024 "I/O operation on closed file"); |
|
1025 goto cleanup; |
|
1026 |
|
1027 default: |
|
1028 PyErr_SetString(PyExc_IOError, |
|
1029 "seek works only while reading"); |
|
1030 goto cleanup; |
|
1031 } |
|
1032 |
|
1033 if (where == 2) { |
|
1034 if (self->size == -1) { |
|
1035 assert(self->mode != MODE_READ_EOF); |
|
1036 for (;;) { |
|
1037 Py_BEGIN_ALLOW_THREADS |
|
1038 chunksize = Util_UnivNewlineRead( |
|
1039 &bzerror, self->fp, |
|
1040 buffer, buffersize, |
|
1041 self); |
|
1042 self->pos += chunksize; |
|
1043 Py_END_ALLOW_THREADS |
|
1044 |
|
1045 bytesread += chunksize; |
|
1046 if (bzerror == BZ_STREAM_END) { |
|
1047 break; |
|
1048 } else if (bzerror != BZ_OK) { |
|
1049 Util_CatchBZ2Error(bzerror); |
|
1050 goto cleanup; |
|
1051 } |
|
1052 } |
|
1053 self->mode = MODE_READ_EOF; |
|
1054 self->size = self->pos; |
|
1055 bytesread = 0; |
|
1056 } |
|
1057 offset = self->size + offset; |
|
1058 } else if (where == 1) { |
|
1059 offset = self->pos + offset; |
|
1060 } |
|
1061 |
|
1062 /* Before getting here, offset must be the absolute position the file |
|
1063 * pointer should be set to. */ |
|
1064 |
|
1065 if (offset >= self->pos) { |
|
1066 /* we can move forward */ |
|
1067 offset -= self->pos; |
|
1068 } else { |
|
1069 /* we cannot move back, so rewind the stream */ |
|
1070 BZ2_bzReadClose(&bzerror, self->fp); |
|
1071 if (self->fp) { |
|
1072 PyFile_DecUseCount((PyFileObject *)self->file); |
|
1073 self->fp = NULL; |
|
1074 } |
|
1075 if (bzerror != BZ_OK) { |
|
1076 Util_CatchBZ2Error(bzerror); |
|
1077 goto cleanup; |
|
1078 } |
|
1079 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); |
|
1080 if (!ret) |
|
1081 goto cleanup; |
|
1082 Py_DECREF(ret); |
|
1083 ret = NULL; |
|
1084 self->pos = 0; |
|
1085 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file), |
|
1086 0, 0, NULL, 0); |
|
1087 if (self->fp) |
|
1088 PyFile_IncUseCount((PyFileObject *)self->file); |
|
1089 if (bzerror != BZ_OK) { |
|
1090 Util_CatchBZ2Error(bzerror); |
|
1091 goto cleanup; |
|
1092 } |
|
1093 self->mode = MODE_READ; |
|
1094 } |
|
1095 |
|
1096 if (offset <= 0 || self->mode == MODE_READ_EOF) |
|
1097 goto exit; |
|
1098 |
|
1099 /* Before getting here, offset must be set to the number of bytes |
|
1100 * to walk forward. */ |
|
1101 for (;;) { |
|
1102 if (offset-bytesread > buffersize) |
|
1103 readsize = buffersize; |
|
1104 else |
|
1105 /* offset might be wider that readsize, but the result |
|
1106 * of the subtraction is bound by buffersize (see the |
|
1107 * condition above). buffersize is 8192. */ |
|
1108 readsize = (size_t)(offset-bytesread); |
|
1109 Py_BEGIN_ALLOW_THREADS |
|
1110 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, |
|
1111 buffer, readsize, self); |
|
1112 self->pos += chunksize; |
|
1113 Py_END_ALLOW_THREADS |
|
1114 bytesread += chunksize; |
|
1115 if (bzerror == BZ_STREAM_END) { |
|
1116 self->size = self->pos; |
|
1117 self->mode = MODE_READ_EOF; |
|
1118 break; |
|
1119 } else if (bzerror != BZ_OK) { |
|
1120 Util_CatchBZ2Error(bzerror); |
|
1121 goto cleanup; |
|
1122 } |
|
1123 if (bytesread == offset) |
|
1124 break; |
|
1125 } |
|
1126 |
|
1127 exit: |
|
1128 Py_INCREF(Py_None); |
|
1129 ret = Py_None; |
|
1130 |
|
1131 cleanup: |
|
1132 RELEASE_LOCK(self); |
|
1133 return ret; |
|
1134 } |
|
1135 |
|
1136 PyDoc_STRVAR(BZ2File_tell__doc__, |
|
1137 "tell() -> int\n\ |
|
1138 \n\ |
|
1139 Return the current file position, an integer (may be a long integer).\n\ |
|
1140 "); |
|
1141 |
|
1142 static PyObject * |
|
1143 BZ2File_tell(BZ2FileObject *self, PyObject *args) |
|
1144 { |
|
1145 PyObject *ret = NULL; |
|
1146 |
|
1147 if (self->mode == MODE_CLOSED) { |
|
1148 PyErr_SetString(PyExc_ValueError, |
|
1149 "I/O operation on closed file"); |
|
1150 goto cleanup; |
|
1151 } |
|
1152 |
|
1153 #if !defined(HAVE_LARGEFILE_SUPPORT) |
|
1154 ret = PyInt_FromLong(self->pos); |
|
1155 #else |
|
1156 ret = PyLong_FromLongLong(self->pos); |
|
1157 #endif |
|
1158 |
|
1159 cleanup: |
|
1160 return ret; |
|
1161 } |
|
1162 |
|
1163 PyDoc_STRVAR(BZ2File_close__doc__, |
|
1164 "close() -> None or (perhaps) an integer\n\ |
|
1165 \n\ |
|
1166 Close the file. Sets data attribute .closed to true. A closed file\n\ |
|
1167 cannot be used for further I/O operations. close() may be called more\n\ |
|
1168 than once without error.\n\ |
|
1169 "); |
|
1170 |
|
1171 static PyObject * |
|
1172 BZ2File_close(BZ2FileObject *self) |
|
1173 { |
|
1174 PyObject *ret = NULL; |
|
1175 int bzerror = BZ_OK; |
|
1176 |
|
1177 ACQUIRE_LOCK(self); |
|
1178 switch (self->mode) { |
|
1179 case MODE_READ: |
|
1180 case MODE_READ_EOF: |
|
1181 BZ2_bzReadClose(&bzerror, self->fp); |
|
1182 break; |
|
1183 case MODE_WRITE: |
|
1184 BZ2_bzWriteClose(&bzerror, self->fp, |
|
1185 0, NULL, NULL); |
|
1186 break; |
|
1187 } |
|
1188 if (self->fp) { |
|
1189 PyFile_DecUseCount((PyFileObject *)self->file); |
|
1190 self->fp = NULL; |
|
1191 } |
|
1192 self->mode = MODE_CLOSED; |
|
1193 ret = PyObject_CallMethod(self->file, "close", NULL); |
|
1194 if (bzerror != BZ_OK) { |
|
1195 Util_CatchBZ2Error(bzerror); |
|
1196 Py_XDECREF(ret); |
|
1197 ret = NULL; |
|
1198 } |
|
1199 |
|
1200 RELEASE_LOCK(self); |
|
1201 return ret; |
|
1202 } |
|
1203 |
|
1204 static PyObject *BZ2File_getiter(BZ2FileObject *self); |
|
1205 |
|
1206 static PyMethodDef BZ2File_methods[] = { |
|
1207 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__}, |
|
1208 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__}, |
|
1209 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__}, |
|
1210 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__}, |
|
1211 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__}, |
|
1212 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__}, |
|
1213 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__}, |
|
1214 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__}, |
|
1215 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__}, |
|
1216 {NULL, NULL} /* sentinel */ |
|
1217 }; |
|
1218 |
|
1219 |
|
1220 /* ===================================================================== */ |
|
1221 /* Getters and setters of BZ2File. */ |
|
1222 |
|
1223 /* This is a hacked version of Python's fileobject.c:get_newlines(). */ |
|
1224 static PyObject * |
|
1225 BZ2File_get_newlines(BZ2FileObject *self, void *closure) |
|
1226 { |
|
1227 switch (self->f_newlinetypes) { |
|
1228 case NEWLINE_UNKNOWN: |
|
1229 Py_INCREF(Py_None); |
|
1230 return Py_None; |
|
1231 case NEWLINE_CR: |
|
1232 return PyString_FromString("\r"); |
|
1233 case NEWLINE_LF: |
|
1234 return PyString_FromString("\n"); |
|
1235 case NEWLINE_CR|NEWLINE_LF: |
|
1236 return Py_BuildValue("(ss)", "\r", "\n"); |
|
1237 case NEWLINE_CRLF: |
|
1238 return PyString_FromString("\r\n"); |
|
1239 case NEWLINE_CR|NEWLINE_CRLF: |
|
1240 return Py_BuildValue("(ss)", "\r", "\r\n"); |
|
1241 case NEWLINE_LF|NEWLINE_CRLF: |
|
1242 return Py_BuildValue("(ss)", "\n", "\r\n"); |
|
1243 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: |
|
1244 return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); |
|
1245 default: |
|
1246 PyErr_Format(PyExc_SystemError, |
|
1247 "Unknown newlines value 0x%x\n", |
|
1248 self->f_newlinetypes); |
|
1249 return NULL; |
|
1250 } |
|
1251 } |
|
1252 |
|
1253 static PyObject * |
|
1254 BZ2File_get_closed(BZ2FileObject *self, void *closure) |
|
1255 { |
|
1256 return PyInt_FromLong(self->mode == MODE_CLOSED); |
|
1257 } |
|
1258 |
|
1259 static PyObject * |
|
1260 BZ2File_get_mode(BZ2FileObject *self, void *closure) |
|
1261 { |
|
1262 return PyObject_GetAttrString(self->file, "mode"); |
|
1263 } |
|
1264 |
|
1265 static PyObject * |
|
1266 BZ2File_get_name(BZ2FileObject *self, void *closure) |
|
1267 { |
|
1268 return PyObject_GetAttrString(self->file, "name"); |
|
1269 } |
|
1270 |
|
1271 static PyGetSetDef BZ2File_getset[] = { |
|
1272 {"closed", (getter)BZ2File_get_closed, NULL, |
|
1273 "True if the file is closed"}, |
|
1274 {"newlines", (getter)BZ2File_get_newlines, NULL, |
|
1275 "end-of-line convention used in this file"}, |
|
1276 {"mode", (getter)BZ2File_get_mode, NULL, |
|
1277 "file mode ('r', 'w', or 'U')"}, |
|
1278 {"name", (getter)BZ2File_get_name, NULL, |
|
1279 "file name"}, |
|
1280 {NULL} /* Sentinel */ |
|
1281 }; |
|
1282 |
|
1283 |
|
1284 /* ===================================================================== */ |
|
1285 /* Members of BZ2File_Type. */ |
|
1286 |
|
1287 #undef OFF |
|
1288 #define OFF(x) offsetof(BZ2FileObject, x) |
|
1289 |
|
1290 static PyMemberDef BZ2File_members[] = { |
|
1291 {"softspace", T_INT, OFF(f_softspace), 0, |
|
1292 "flag indicating that a space needs to be printed; used by print"}, |
|
1293 {NULL} /* Sentinel */ |
|
1294 }; |
|
1295 |
|
1296 /* ===================================================================== */ |
|
1297 /* Slot definitions for BZ2File_Type. */ |
|
1298 |
|
1299 static int |
|
1300 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) |
|
1301 { |
|
1302 static char *kwlist[] = {"filename", "mode", "buffering", |
|
1303 "compresslevel", 0}; |
|
1304 PyObject *name; |
|
1305 char *mode = "r"; |
|
1306 int buffering = -1; |
|
1307 int compresslevel = 9; |
|
1308 int bzerror; |
|
1309 int mode_char = 0; |
|
1310 |
|
1311 self->size = -1; |
|
1312 |
|
1313 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", |
|
1314 kwlist, &name, &mode, &buffering, |
|
1315 &compresslevel)) |
|
1316 return -1; |
|
1317 |
|
1318 if (compresslevel < 1 || compresslevel > 9) { |
|
1319 PyErr_SetString(PyExc_ValueError, |
|
1320 "compresslevel must be between 1 and 9"); |
|
1321 return -1; |
|
1322 } |
|
1323 |
|
1324 for (;;) { |
|
1325 int error = 0; |
|
1326 switch (*mode) { |
|
1327 case 'r': |
|
1328 case 'w': |
|
1329 if (mode_char) |
|
1330 error = 1; |
|
1331 mode_char = *mode; |
|
1332 break; |
|
1333 |
|
1334 case 'b': |
|
1335 break; |
|
1336 |
|
1337 case 'U': |
|
1338 #ifdef __VMS |
|
1339 self->f_univ_newline = 0; |
|
1340 #else |
|
1341 self->f_univ_newline = 1; |
|
1342 #endif |
|
1343 break; |
|
1344 |
|
1345 default: |
|
1346 error = 1; |
|
1347 break; |
|
1348 } |
|
1349 if (error) { |
|
1350 PyErr_Format(PyExc_ValueError, |
|
1351 "invalid mode char %c", *mode); |
|
1352 return -1; |
|
1353 } |
|
1354 mode++; |
|
1355 if (*mode == '\0') |
|
1356 break; |
|
1357 } |
|
1358 |
|
1359 if (mode_char == 0) { |
|
1360 mode_char = 'r'; |
|
1361 } |
|
1362 |
|
1363 mode = (mode_char == 'r') ? "rb" : "wb"; |
|
1364 |
|
1365 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)", |
|
1366 name, mode, buffering); |
|
1367 if (self->file == NULL) |
|
1368 return -1; |
|
1369 |
|
1370 /* From now on, we have stuff to dealloc, so jump to error label |
|
1371 * instead of returning */ |
|
1372 |
|
1373 #ifdef WITH_THREAD |
|
1374 self->lock = PyThread_allocate_lock(); |
|
1375 if (!self->lock) { |
|
1376 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); |
|
1377 goto error; |
|
1378 } |
|
1379 #endif |
|
1380 |
|
1381 if (mode_char == 'r') |
|
1382 self->fp = BZ2_bzReadOpen(&bzerror, |
|
1383 PyFile_AsFile(self->file), |
|
1384 0, 0, NULL, 0); |
|
1385 else |
|
1386 self->fp = BZ2_bzWriteOpen(&bzerror, |
|
1387 PyFile_AsFile(self->file), |
|
1388 compresslevel, 0, 0); |
|
1389 |
|
1390 if (bzerror != BZ_OK) { |
|
1391 Util_CatchBZ2Error(bzerror); |
|
1392 goto error; |
|
1393 } |
|
1394 PyFile_IncUseCount((PyFileObject *)self->file); |
|
1395 |
|
1396 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE; |
|
1397 |
|
1398 return 0; |
|
1399 |
|
1400 error: |
|
1401 Py_CLEAR(self->file); |
|
1402 #ifdef WITH_THREAD |
|
1403 if (self->lock) { |
|
1404 PyThread_free_lock(self->lock); |
|
1405 self->lock = NULL; |
|
1406 } |
|
1407 #endif |
|
1408 return -1; |
|
1409 } |
|
1410 |
|
1411 static void |
|
1412 BZ2File_dealloc(BZ2FileObject *self) |
|
1413 { |
|
1414 int bzerror; |
|
1415 #ifdef WITH_THREAD |
|
1416 if (self->lock) |
|
1417 PyThread_free_lock(self->lock); |
|
1418 #endif |
|
1419 switch (self->mode) { |
|
1420 case MODE_READ: |
|
1421 case MODE_READ_EOF: |
|
1422 BZ2_bzReadClose(&bzerror, self->fp); |
|
1423 break; |
|
1424 case MODE_WRITE: |
|
1425 BZ2_bzWriteClose(&bzerror, self->fp, |
|
1426 0, NULL, NULL); |
|
1427 break; |
|
1428 } |
|
1429 if (self->fp) { |
|
1430 PyFile_DecUseCount((PyFileObject *)self->file); |
|
1431 self->fp = NULL; |
|
1432 } |
|
1433 Util_DropReadAhead(self); |
|
1434 Py_XDECREF(self->file); |
|
1435 Py_TYPE(self)->tp_free((PyObject *)self); |
|
1436 } |
|
1437 |
|
1438 /* This is a hacked version of Python's fileobject.c:file_getiter(). */ |
|
1439 static PyObject * |
|
1440 BZ2File_getiter(BZ2FileObject *self) |
|
1441 { |
|
1442 if (self->mode == MODE_CLOSED) { |
|
1443 PyErr_SetString(PyExc_ValueError, |
|
1444 "I/O operation on closed file"); |
|
1445 return NULL; |
|
1446 } |
|
1447 Py_INCREF((PyObject*)self); |
|
1448 return (PyObject *)self; |
|
1449 } |
|
1450 |
|
1451 /* This is a hacked version of Python's fileobject.c:file_iternext(). */ |
|
1452 #define READAHEAD_BUFSIZE 8192 |
|
1453 static PyObject * |
|
1454 BZ2File_iternext(BZ2FileObject *self) |
|
1455 { |
|
1456 PyStringObject* ret; |
|
1457 ACQUIRE_LOCK(self); |
|
1458 if (self->mode == MODE_CLOSED) { |
|
1459 RELEASE_LOCK(self); |
|
1460 PyErr_SetString(PyExc_ValueError, |
|
1461 "I/O operation on closed file"); |
|
1462 return NULL; |
|
1463 } |
|
1464 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); |
|
1465 RELEASE_LOCK(self); |
|
1466 if (ret == NULL || PyString_GET_SIZE(ret) == 0) { |
|
1467 Py_XDECREF(ret); |
|
1468 return NULL; |
|
1469 } |
|
1470 return (PyObject *)ret; |
|
1471 } |
|
1472 |
|
1473 /* ===================================================================== */ |
|
1474 /* BZ2File_Type definition. */ |
|
1475 |
|
1476 PyDoc_VAR(BZ2File__doc__) = |
|
1477 PyDoc_STR( |
|
1478 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\ |
|
1479 \n\ |
|
1480 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\ |
|
1481 writing. When opened for writing, the file will be created if it doesn't\n\ |
|
1482 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ |
|
1483 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\ |
|
1484 is given, must be a number between 1 and 9.\n\ |
|
1485 ") |
|
1486 PyDoc_STR( |
|
1487 "\n\ |
|
1488 Add a 'U' to mode to open the file for input with universal newline\n\ |
|
1489 support. Any line ending in the input file will be seen as a '\\n' in\n\ |
|
1490 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\ |
|
1491 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\ |
|
1492 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\ |
|
1493 newlines are available only when reading.\n\ |
|
1494 ") |
|
1495 ; |
|
1496 |
|
1497 static PyTypeObject BZ2File_Type = { |
|
1498 PyVarObject_HEAD_INIT(NULL, 0) |
|
1499 "bz2.BZ2File", /*tp_name*/ |
|
1500 sizeof(BZ2FileObject), /*tp_basicsize*/ |
|
1501 0, /*tp_itemsize*/ |
|
1502 (destructor)BZ2File_dealloc, /*tp_dealloc*/ |
|
1503 0, /*tp_print*/ |
|
1504 0, /*tp_getattr*/ |
|
1505 0, /*tp_setattr*/ |
|
1506 0, /*tp_compare*/ |
|
1507 0, /*tp_repr*/ |
|
1508 0, /*tp_as_number*/ |
|
1509 0, /*tp_as_sequence*/ |
|
1510 0, /*tp_as_mapping*/ |
|
1511 0, /*tp_hash*/ |
|
1512 0, /*tp_call*/ |
|
1513 0, /*tp_str*/ |
|
1514 PyObject_GenericGetAttr,/*tp_getattro*/ |
|
1515 PyObject_GenericSetAttr,/*tp_setattro*/ |
|
1516 0, /*tp_as_buffer*/ |
|
1517 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
|
1518 BZ2File__doc__, /*tp_doc*/ |
|
1519 0, /*tp_traverse*/ |
|
1520 0, /*tp_clear*/ |
|
1521 0, /*tp_richcompare*/ |
|
1522 0, /*tp_weaklistoffset*/ |
|
1523 (getiterfunc)BZ2File_getiter, /*tp_iter*/ |
|
1524 (iternextfunc)BZ2File_iternext, /*tp_iternext*/ |
|
1525 BZ2File_methods, /*tp_methods*/ |
|
1526 BZ2File_members, /*tp_members*/ |
|
1527 BZ2File_getset, /*tp_getset*/ |
|
1528 0, /*tp_base*/ |
|
1529 0, /*tp_dict*/ |
|
1530 0, /*tp_descr_get*/ |
|
1531 0, /*tp_descr_set*/ |
|
1532 0, /*tp_dictoffset*/ |
|
1533 (initproc)BZ2File_init, /*tp_init*/ |
|
1534 PyType_GenericAlloc, /*tp_alloc*/ |
|
1535 PyType_GenericNew, /*tp_new*/ |
|
1536 _PyObject_Del, /*tp_free*/ |
|
1537 0, /*tp_is_gc*/ |
|
1538 }; |
|
1539 |
|
1540 |
|
1541 /* ===================================================================== */ |
|
1542 /* Methods of BZ2Comp. */ |
|
1543 |
|
1544 PyDoc_STRVAR(BZ2Comp_compress__doc__, |
|
1545 "compress(data) -> string\n\ |
|
1546 \n\ |
|
1547 Provide more data to the compressor object. It will return chunks of\n\ |
|
1548 compressed data whenever possible. When you've finished providing data\n\ |
|
1549 to compress, call the flush() method to finish the compression process,\n\ |
|
1550 and return what is left in the internal buffers.\n\ |
|
1551 "); |
|
1552 |
|
1553 static PyObject * |
|
1554 BZ2Comp_compress(BZ2CompObject *self, PyObject *args) |
|
1555 { |
|
1556 Py_buffer pdata; |
|
1557 char *data; |
|
1558 int datasize; |
|
1559 int bufsize = SMALLCHUNK; |
|
1560 PY_LONG_LONG totalout; |
|
1561 PyObject *ret = NULL; |
|
1562 bz_stream *bzs = &self->bzs; |
|
1563 int bzerror; |
|
1564 |
|
1565 if (!PyArg_ParseTuple(args, "s*:compress", &pdata)) |
|
1566 return NULL; |
|
1567 data = pdata.buf; |
|
1568 datasize = pdata.len; |
|
1569 |
|
1570 if (datasize == 0) { |
|
1571 PyBuffer_Release(&pdata); |
|
1572 return PyString_FromString(""); |
|
1573 } |
|
1574 |
|
1575 ACQUIRE_LOCK(self); |
|
1576 if (!self->running) { |
|
1577 PyErr_SetString(PyExc_ValueError, |
|
1578 "this object was already flushed"); |
|
1579 goto error; |
|
1580 } |
|
1581 |
|
1582 ret = PyString_FromStringAndSize(NULL, bufsize); |
|
1583 if (!ret) |
|
1584 goto error; |
|
1585 |
|
1586 bzs->next_in = data; |
|
1587 bzs->avail_in = datasize; |
|
1588 bzs->next_out = BUF(ret); |
|
1589 bzs->avail_out = bufsize; |
|
1590 |
|
1591 totalout = BZS_TOTAL_OUT(bzs); |
|
1592 |
|
1593 for (;;) { |
|
1594 Py_BEGIN_ALLOW_THREADS |
|
1595 bzerror = BZ2_bzCompress(bzs, BZ_RUN); |
|
1596 Py_END_ALLOW_THREADS |
|
1597 if (bzerror != BZ_RUN_OK) { |
|
1598 Util_CatchBZ2Error(bzerror); |
|
1599 goto error; |
|
1600 } |
|
1601 if (bzs->avail_in == 0) |
|
1602 break; /* no more input data */ |
|
1603 if (bzs->avail_out == 0) { |
|
1604 bufsize = Util_NewBufferSize(bufsize); |
|
1605 if (_PyString_Resize(&ret, bufsize) < 0) { |
|
1606 BZ2_bzCompressEnd(bzs); |
|
1607 goto error; |
|
1608 } |
|
1609 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) |
|
1610 - totalout); |
|
1611 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); |
|
1612 } |
|
1613 } |
|
1614 |
|
1615 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); |
|
1616 |
|
1617 RELEASE_LOCK(self); |
|
1618 PyBuffer_Release(&pdata); |
|
1619 return ret; |
|
1620 |
|
1621 error: |
|
1622 RELEASE_LOCK(self); |
|
1623 PyBuffer_Release(&pdata); |
|
1624 Py_XDECREF(ret); |
|
1625 return NULL; |
|
1626 } |
|
1627 |
|
1628 PyDoc_STRVAR(BZ2Comp_flush__doc__, |
|
1629 "flush() -> string\n\ |
|
1630 \n\ |
|
1631 Finish the compression process and return what is left in internal buffers.\n\ |
|
1632 You must not use the compressor object after calling this method.\n\ |
|
1633 "); |
|
1634 |
|
1635 static PyObject * |
|
1636 BZ2Comp_flush(BZ2CompObject *self) |
|
1637 { |
|
1638 int bufsize = SMALLCHUNK; |
|
1639 PyObject *ret = NULL; |
|
1640 bz_stream *bzs = &self->bzs; |
|
1641 PY_LONG_LONG totalout; |
|
1642 int bzerror; |
|
1643 |
|
1644 ACQUIRE_LOCK(self); |
|
1645 if (!self->running) { |
|
1646 PyErr_SetString(PyExc_ValueError, "object was already " |
|
1647 "flushed"); |
|
1648 goto error; |
|
1649 } |
|
1650 self->running = 0; |
|
1651 |
|
1652 ret = PyString_FromStringAndSize(NULL, bufsize); |
|
1653 if (!ret) |
|
1654 goto error; |
|
1655 |
|
1656 bzs->next_out = BUF(ret); |
|
1657 bzs->avail_out = bufsize; |
|
1658 |
|
1659 totalout = BZS_TOTAL_OUT(bzs); |
|
1660 |
|
1661 for (;;) { |
|
1662 Py_BEGIN_ALLOW_THREADS |
|
1663 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); |
|
1664 Py_END_ALLOW_THREADS |
|
1665 if (bzerror == BZ_STREAM_END) { |
|
1666 break; |
|
1667 } else if (bzerror != BZ_FINISH_OK) { |
|
1668 Util_CatchBZ2Error(bzerror); |
|
1669 goto error; |
|
1670 } |
|
1671 if (bzs->avail_out == 0) { |
|
1672 bufsize = Util_NewBufferSize(bufsize); |
|
1673 if (_PyString_Resize(&ret, bufsize) < 0) |
|
1674 goto error; |
|
1675 bzs->next_out = BUF(ret); |
|
1676 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) |
|
1677 - totalout); |
|
1678 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); |
|
1679 } |
|
1680 } |
|
1681 |
|
1682 if (bzs->avail_out != 0) |
|
1683 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); |
|
1684 |
|
1685 RELEASE_LOCK(self); |
|
1686 return ret; |
|
1687 |
|
1688 error: |
|
1689 RELEASE_LOCK(self); |
|
1690 Py_XDECREF(ret); |
|
1691 return NULL; |
|
1692 } |
|
1693 |
|
1694 static PyMethodDef BZ2Comp_methods[] = { |
|
1695 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS, |
|
1696 BZ2Comp_compress__doc__}, |
|
1697 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS, |
|
1698 BZ2Comp_flush__doc__}, |
|
1699 {NULL, NULL} /* sentinel */ |
|
1700 }; |
|
1701 |
|
1702 |
|
1703 /* ===================================================================== */ |
|
1704 /* Slot definitions for BZ2Comp_Type. */ |
|
1705 |
|
1706 static int |
|
1707 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs) |
|
1708 { |
|
1709 int compresslevel = 9; |
|
1710 int bzerror; |
|
1711 static char *kwlist[] = {"compresslevel", 0}; |
|
1712 |
|
1713 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor", |
|
1714 kwlist, &compresslevel)) |
|
1715 return -1; |
|
1716 |
|
1717 if (compresslevel < 1 || compresslevel > 9) { |
|
1718 PyErr_SetString(PyExc_ValueError, |
|
1719 "compresslevel must be between 1 and 9"); |
|
1720 goto error; |
|
1721 } |
|
1722 |
|
1723 #ifdef WITH_THREAD |
|
1724 self->lock = PyThread_allocate_lock(); |
|
1725 if (!self->lock) { |
|
1726 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); |
|
1727 goto error; |
|
1728 } |
|
1729 #endif |
|
1730 |
|
1731 memset(&self->bzs, 0, sizeof(bz_stream)); |
|
1732 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); |
|
1733 if (bzerror != BZ_OK) { |
|
1734 Util_CatchBZ2Error(bzerror); |
|
1735 goto error; |
|
1736 } |
|
1737 |
|
1738 self->running = 1; |
|
1739 |
|
1740 return 0; |
|
1741 error: |
|
1742 #ifdef WITH_THREAD |
|
1743 if (self->lock) { |
|
1744 PyThread_free_lock(self->lock); |
|
1745 self->lock = NULL; |
|
1746 } |
|
1747 #endif |
|
1748 return -1; |
|
1749 } |
|
1750 |
|
1751 static void |
|
1752 BZ2Comp_dealloc(BZ2CompObject *self) |
|
1753 { |
|
1754 #ifdef WITH_THREAD |
|
1755 if (self->lock) |
|
1756 PyThread_free_lock(self->lock); |
|
1757 #endif |
|
1758 BZ2_bzCompressEnd(&self->bzs); |
|
1759 Py_TYPE(self)->tp_free((PyObject *)self); |
|
1760 } |
|
1761 |
|
1762 |
|
1763 /* ===================================================================== */ |
|
1764 /* BZ2Comp_Type definition. */ |
|
1765 |
|
1766 PyDoc_STRVAR(BZ2Comp__doc__, |
|
1767 "BZ2Compressor([compresslevel=9]) -> compressor object\n\ |
|
1768 \n\ |
|
1769 Create a new compressor object. This object may be used to compress\n\ |
|
1770 data sequentially. If you want to compress data in one shot, use the\n\ |
|
1771 compress() function instead. The compresslevel parameter, if given,\n\ |
|
1772 must be a number between 1 and 9.\n\ |
|
1773 "); |
|
1774 |
|
1775 static PyTypeObject BZ2Comp_Type = { |
|
1776 PyVarObject_HEAD_INIT(NULL, 0) |
|
1777 "bz2.BZ2Compressor", /*tp_name*/ |
|
1778 sizeof(BZ2CompObject), /*tp_basicsize*/ |
|
1779 0, /*tp_itemsize*/ |
|
1780 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/ |
|
1781 0, /*tp_print*/ |
|
1782 0, /*tp_getattr*/ |
|
1783 0, /*tp_setattr*/ |
|
1784 0, /*tp_compare*/ |
|
1785 0, /*tp_repr*/ |
|
1786 0, /*tp_as_number*/ |
|
1787 0, /*tp_as_sequence*/ |
|
1788 0, /*tp_as_mapping*/ |
|
1789 0, /*tp_hash*/ |
|
1790 0, /*tp_call*/ |
|
1791 0, /*tp_str*/ |
|
1792 PyObject_GenericGetAttr,/*tp_getattro*/ |
|
1793 PyObject_GenericSetAttr,/*tp_setattro*/ |
|
1794 0, /*tp_as_buffer*/ |
|
1795 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
|
1796 BZ2Comp__doc__, /*tp_doc*/ |
|
1797 0, /*tp_traverse*/ |
|
1798 0, /*tp_clear*/ |
|
1799 0, /*tp_richcompare*/ |
|
1800 0, /*tp_weaklistoffset*/ |
|
1801 0, /*tp_iter*/ |
|
1802 0, /*tp_iternext*/ |
|
1803 BZ2Comp_methods, /*tp_methods*/ |
|
1804 0, /*tp_members*/ |
|
1805 0, /*tp_getset*/ |
|
1806 0, /*tp_base*/ |
|
1807 0, /*tp_dict*/ |
|
1808 0, /*tp_descr_get*/ |
|
1809 0, /*tp_descr_set*/ |
|
1810 0, /*tp_dictoffset*/ |
|
1811 (initproc)BZ2Comp_init, /*tp_init*/ |
|
1812 PyType_GenericAlloc, /*tp_alloc*/ |
|
1813 PyType_GenericNew, /*tp_new*/ |
|
1814 _PyObject_Del, /*tp_free*/ |
|
1815 0, /*tp_is_gc*/ |
|
1816 }; |
|
1817 |
|
1818 |
|
1819 /* ===================================================================== */ |
|
1820 /* Members of BZ2Decomp. */ |
|
1821 |
|
1822 #undef OFF |
|
1823 #define OFF(x) offsetof(BZ2DecompObject, x) |
|
1824 |
|
1825 static PyMemberDef BZ2Decomp_members[] = { |
|
1826 {"unused_data", T_OBJECT, OFF(unused_data), RO}, |
|
1827 {NULL} /* Sentinel */ |
|
1828 }; |
|
1829 |
|
1830 |
|
1831 /* ===================================================================== */ |
|
1832 /* Methods of BZ2Decomp. */ |
|
1833 |
|
1834 PyDoc_STRVAR(BZ2Decomp_decompress__doc__, |
|
1835 "decompress(data) -> string\n\ |
|
1836 \n\ |
|
1837 Provide more data to the decompressor object. It will return chunks\n\ |
|
1838 of decompressed data whenever possible. If you try to decompress data\n\ |
|
1839 after the end of stream is found, EOFError will be raised. If any data\n\ |
|
1840 was found after the end of stream, it'll be ignored and saved in\n\ |
|
1841 unused_data attribute.\n\ |
|
1842 "); |
|
1843 |
|
1844 static PyObject * |
|
1845 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args) |
|
1846 { |
|
1847 Py_buffer pdata; |
|
1848 char *data; |
|
1849 int datasize; |
|
1850 int bufsize = SMALLCHUNK; |
|
1851 PY_LONG_LONG totalout; |
|
1852 PyObject *ret = NULL; |
|
1853 bz_stream *bzs = &self->bzs; |
|
1854 int bzerror; |
|
1855 |
|
1856 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) |
|
1857 return NULL; |
|
1858 data = pdata.buf; |
|
1859 datasize = pdata.len; |
|
1860 |
|
1861 ACQUIRE_LOCK(self); |
|
1862 if (!self->running) { |
|
1863 PyErr_SetString(PyExc_EOFError, "end of stream was " |
|
1864 "already found"); |
|
1865 goto error; |
|
1866 } |
|
1867 |
|
1868 ret = PyString_FromStringAndSize(NULL, bufsize); |
|
1869 if (!ret) |
|
1870 goto error; |
|
1871 |
|
1872 bzs->next_in = data; |
|
1873 bzs->avail_in = datasize; |
|
1874 bzs->next_out = BUF(ret); |
|
1875 bzs->avail_out = bufsize; |
|
1876 |
|
1877 totalout = BZS_TOTAL_OUT(bzs); |
|
1878 |
|
1879 for (;;) { |
|
1880 Py_BEGIN_ALLOW_THREADS |
|
1881 bzerror = BZ2_bzDecompress(bzs); |
|
1882 Py_END_ALLOW_THREADS |
|
1883 if (bzerror == BZ_STREAM_END) { |
|
1884 if (bzs->avail_in != 0) { |
|
1885 Py_DECREF(self->unused_data); |
|
1886 self->unused_data = |
|
1887 PyString_FromStringAndSize(bzs->next_in, |
|
1888 bzs->avail_in); |
|
1889 } |
|
1890 self->running = 0; |
|
1891 break; |
|
1892 } |
|
1893 if (bzerror != BZ_OK) { |
|
1894 Util_CatchBZ2Error(bzerror); |
|
1895 goto error; |
|
1896 } |
|
1897 if (bzs->avail_in == 0) |
|
1898 break; /* no more input data */ |
|
1899 if (bzs->avail_out == 0) { |
|
1900 bufsize = Util_NewBufferSize(bufsize); |
|
1901 if (_PyString_Resize(&ret, bufsize) < 0) { |
|
1902 BZ2_bzDecompressEnd(bzs); |
|
1903 goto error; |
|
1904 } |
|
1905 bzs->next_out = BUF(ret); |
|
1906 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) |
|
1907 - totalout); |
|
1908 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); |
|
1909 } |
|
1910 } |
|
1911 |
|
1912 if (bzs->avail_out != 0) |
|
1913 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); |
|
1914 |
|
1915 RELEASE_LOCK(self); |
|
1916 PyBuffer_Release(&pdata); |
|
1917 return ret; |
|
1918 |
|
1919 error: |
|
1920 RELEASE_LOCK(self); |
|
1921 PyBuffer_Release(&pdata); |
|
1922 Py_XDECREF(ret); |
|
1923 return NULL; |
|
1924 } |
|
1925 |
|
1926 static PyMethodDef BZ2Decomp_methods[] = { |
|
1927 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__}, |
|
1928 {NULL, NULL} /* sentinel */ |
|
1929 }; |
|
1930 |
|
1931 |
|
1932 /* ===================================================================== */ |
|
1933 /* Slot definitions for BZ2Decomp_Type. */ |
|
1934 |
|
1935 static int |
|
1936 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs) |
|
1937 { |
|
1938 int bzerror; |
|
1939 |
|
1940 if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) |
|
1941 return -1; |
|
1942 |
|
1943 #ifdef WITH_THREAD |
|
1944 self->lock = PyThread_allocate_lock(); |
|
1945 if (!self->lock) { |
|
1946 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); |
|
1947 goto error; |
|
1948 } |
|
1949 #endif |
|
1950 |
|
1951 self->unused_data = PyString_FromString(""); |
|
1952 if (!self->unused_data) |
|
1953 goto error; |
|
1954 |
|
1955 memset(&self->bzs, 0, sizeof(bz_stream)); |
|
1956 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); |
|
1957 if (bzerror != BZ_OK) { |
|
1958 Util_CatchBZ2Error(bzerror); |
|
1959 goto error; |
|
1960 } |
|
1961 |
|
1962 self->running = 1; |
|
1963 |
|
1964 return 0; |
|
1965 |
|
1966 error: |
|
1967 #ifdef WITH_THREAD |
|
1968 if (self->lock) { |
|
1969 PyThread_free_lock(self->lock); |
|
1970 self->lock = NULL; |
|
1971 } |
|
1972 #endif |
|
1973 Py_CLEAR(self->unused_data); |
|
1974 return -1; |
|
1975 } |
|
1976 |
|
1977 static void |
|
1978 BZ2Decomp_dealloc(BZ2DecompObject *self) |
|
1979 { |
|
1980 #ifdef WITH_THREAD |
|
1981 if (self->lock) |
|
1982 PyThread_free_lock(self->lock); |
|
1983 #endif |
|
1984 Py_XDECREF(self->unused_data); |
|
1985 BZ2_bzDecompressEnd(&self->bzs); |
|
1986 Py_TYPE(self)->tp_free((PyObject *)self); |
|
1987 } |
|
1988 |
|
1989 |
|
1990 /* ===================================================================== */ |
|
1991 /* BZ2Decomp_Type definition. */ |
|
1992 |
|
1993 PyDoc_STRVAR(BZ2Decomp__doc__, |
|
1994 "BZ2Decompressor() -> decompressor object\n\ |
|
1995 \n\ |
|
1996 Create a new decompressor object. This object may be used to decompress\n\ |
|
1997 data sequentially. If you want to decompress data in one shot, use the\n\ |
|
1998 decompress() function instead.\n\ |
|
1999 "); |
|
2000 |
|
2001 static PyTypeObject BZ2Decomp_Type = { |
|
2002 PyVarObject_HEAD_INIT(NULL, 0) |
|
2003 "bz2.BZ2Decompressor", /*tp_name*/ |
|
2004 sizeof(BZ2DecompObject), /*tp_basicsize*/ |
|
2005 0, /*tp_itemsize*/ |
|
2006 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/ |
|
2007 0, /*tp_print*/ |
|
2008 0, /*tp_getattr*/ |
|
2009 0, /*tp_setattr*/ |
|
2010 0, /*tp_compare*/ |
|
2011 0, /*tp_repr*/ |
|
2012 0, /*tp_as_number*/ |
|
2013 0, /*tp_as_sequence*/ |
|
2014 0, /*tp_as_mapping*/ |
|
2015 0, /*tp_hash*/ |
|
2016 0, /*tp_call*/ |
|
2017 0, /*tp_str*/ |
|
2018 PyObject_GenericGetAttr,/*tp_getattro*/ |
|
2019 PyObject_GenericSetAttr,/*tp_setattro*/ |
|
2020 0, /*tp_as_buffer*/ |
|
2021 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
|
2022 BZ2Decomp__doc__, /*tp_doc*/ |
|
2023 0, /*tp_traverse*/ |
|
2024 0, /*tp_clear*/ |
|
2025 0, /*tp_richcompare*/ |
|
2026 0, /*tp_weaklistoffset*/ |
|
2027 0, /*tp_iter*/ |
|
2028 0, /*tp_iternext*/ |
|
2029 BZ2Decomp_methods, /*tp_methods*/ |
|
2030 BZ2Decomp_members, /*tp_members*/ |
|
2031 0, /*tp_getset*/ |
|
2032 0, /*tp_base*/ |
|
2033 0, /*tp_dict*/ |
|
2034 0, /*tp_descr_get*/ |
|
2035 0, /*tp_descr_set*/ |
|
2036 0, /*tp_dictoffset*/ |
|
2037 (initproc)BZ2Decomp_init, /*tp_init*/ |
|
2038 PyType_GenericAlloc, /*tp_alloc*/ |
|
2039 PyType_GenericNew, /*tp_new*/ |
|
2040 _PyObject_Del, /*tp_free*/ |
|
2041 0, /*tp_is_gc*/ |
|
2042 }; |
|
2043 |
|
2044 |
|
2045 /* ===================================================================== */ |
|
2046 /* Module functions. */ |
|
2047 |
|
2048 PyDoc_STRVAR(bz2_compress__doc__, |
|
2049 "compress(data [, compresslevel=9]) -> string\n\ |
|
2050 \n\ |
|
2051 Compress data in one shot. If you want to compress data sequentially,\n\ |
|
2052 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\ |
|
2053 given, must be a number between 1 and 9.\n\ |
|
2054 "); |
|
2055 |
|
2056 static PyObject * |
|
2057 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs) |
|
2058 { |
|
2059 int compresslevel=9; |
|
2060 Py_buffer pdata; |
|
2061 char *data; |
|
2062 int datasize; |
|
2063 int bufsize; |
|
2064 PyObject *ret = NULL; |
|
2065 bz_stream _bzs; |
|
2066 bz_stream *bzs = &_bzs; |
|
2067 int bzerror; |
|
2068 static char *kwlist[] = {"data", "compresslevel", 0}; |
|
2069 |
|
2070 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", |
|
2071 kwlist, &pdata, |
|
2072 &compresslevel)) |
|
2073 return NULL; |
|
2074 data = pdata.buf; |
|
2075 datasize = pdata.len; |
|
2076 |
|
2077 if (compresslevel < 1 || compresslevel > 9) { |
|
2078 PyErr_SetString(PyExc_ValueError, |
|
2079 "compresslevel must be between 1 and 9"); |
|
2080 PyBuffer_Release(&pdata); |
|
2081 return NULL; |
|
2082 } |
|
2083 |
|
2084 /* Conforming to bz2 manual, this is large enough to fit compressed |
|
2085 * data in one shot. We will check it later anyway. */ |
|
2086 bufsize = datasize + (datasize/100+1) + 600; |
|
2087 |
|
2088 ret = PyString_FromStringAndSize(NULL, bufsize); |
|
2089 if (!ret) { |
|
2090 PyBuffer_Release(&pdata); |
|
2091 return NULL; |
|
2092 } |
|
2093 |
|
2094 memset(bzs, 0, sizeof(bz_stream)); |
|
2095 |
|
2096 bzs->next_in = data; |
|
2097 bzs->avail_in = datasize; |
|
2098 bzs->next_out = BUF(ret); |
|
2099 bzs->avail_out = bufsize; |
|
2100 |
|
2101 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0); |
|
2102 if (bzerror != BZ_OK) { |
|
2103 Util_CatchBZ2Error(bzerror); |
|
2104 PyBuffer_Release(&pdata); |
|
2105 Py_DECREF(ret); |
|
2106 return NULL; |
|
2107 } |
|
2108 |
|
2109 for (;;) { |
|
2110 Py_BEGIN_ALLOW_THREADS |
|
2111 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); |
|
2112 Py_END_ALLOW_THREADS |
|
2113 if (bzerror == BZ_STREAM_END) { |
|
2114 break; |
|
2115 } else if (bzerror != BZ_FINISH_OK) { |
|
2116 BZ2_bzCompressEnd(bzs); |
|
2117 Util_CatchBZ2Error(bzerror); |
|
2118 PyBuffer_Release(&pdata); |
|
2119 Py_DECREF(ret); |
|
2120 return NULL; |
|
2121 } |
|
2122 if (bzs->avail_out == 0) { |
|
2123 bufsize = Util_NewBufferSize(bufsize); |
|
2124 if (_PyString_Resize(&ret, bufsize) < 0) { |
|
2125 BZ2_bzCompressEnd(bzs); |
|
2126 PyBuffer_Release(&pdata); |
|
2127 Py_DECREF(ret); |
|
2128 return NULL; |
|
2129 } |
|
2130 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); |
|
2131 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); |
|
2132 } |
|
2133 } |
|
2134 |
|
2135 if (bzs->avail_out != 0) |
|
2136 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); |
|
2137 BZ2_bzCompressEnd(bzs); |
|
2138 |
|
2139 PyBuffer_Release(&pdata); |
|
2140 return ret; |
|
2141 } |
|
2142 |
|
2143 PyDoc_STRVAR(bz2_decompress__doc__, |
|
2144 "decompress(data) -> decompressed data\n\ |
|
2145 \n\ |
|
2146 Decompress data in one shot. If you want to decompress data sequentially,\n\ |
|
2147 use an instance of BZ2Decompressor instead.\n\ |
|
2148 "); |
|
2149 |
|
2150 static PyObject * |
|
2151 bz2_decompress(PyObject *self, PyObject *args) |
|
2152 { |
|
2153 Py_buffer pdata; |
|
2154 char *data; |
|
2155 int datasize; |
|
2156 int bufsize = SMALLCHUNK; |
|
2157 PyObject *ret; |
|
2158 bz_stream _bzs; |
|
2159 bz_stream *bzs = &_bzs; |
|
2160 int bzerror; |
|
2161 |
|
2162 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) |
|
2163 return NULL; |
|
2164 data = pdata.buf; |
|
2165 datasize = pdata.len; |
|
2166 |
|
2167 if (datasize == 0) { |
|
2168 PyBuffer_Release(&pdata); |
|
2169 return PyString_FromString(""); |
|
2170 } |
|
2171 |
|
2172 ret = PyString_FromStringAndSize(NULL, bufsize); |
|
2173 if (!ret) { |
|
2174 PyBuffer_Release(&pdata); |
|
2175 return NULL; |
|
2176 } |
|
2177 |
|
2178 memset(bzs, 0, sizeof(bz_stream)); |
|
2179 |
|
2180 bzs->next_in = data; |
|
2181 bzs->avail_in = datasize; |
|
2182 bzs->next_out = BUF(ret); |
|
2183 bzs->avail_out = bufsize; |
|
2184 |
|
2185 bzerror = BZ2_bzDecompressInit(bzs, 0, 0); |
|
2186 if (bzerror != BZ_OK) { |
|
2187 Util_CatchBZ2Error(bzerror); |
|
2188 Py_DECREF(ret); |
|
2189 PyBuffer_Release(&pdata); |
|
2190 return NULL; |
|
2191 } |
|
2192 |
|
2193 for (;;) { |
|
2194 Py_BEGIN_ALLOW_THREADS |
|
2195 bzerror = BZ2_bzDecompress(bzs); |
|
2196 Py_END_ALLOW_THREADS |
|
2197 if (bzerror == BZ_STREAM_END) { |
|
2198 break; |
|
2199 } else if (bzerror != BZ_OK) { |
|
2200 BZ2_bzDecompressEnd(bzs); |
|
2201 Util_CatchBZ2Error(bzerror); |
|
2202 PyBuffer_Release(&pdata); |
|
2203 Py_DECREF(ret); |
|
2204 return NULL; |
|
2205 } |
|
2206 if (bzs->avail_in == 0) { |
|
2207 BZ2_bzDecompressEnd(bzs); |
|
2208 PyErr_SetString(PyExc_ValueError, |
|
2209 "couldn't find end of stream"); |
|
2210 PyBuffer_Release(&pdata); |
|
2211 Py_DECREF(ret); |
|
2212 return NULL; |
|
2213 } |
|
2214 if (bzs->avail_out == 0) { |
|
2215 bufsize = Util_NewBufferSize(bufsize); |
|
2216 if (_PyString_Resize(&ret, bufsize) < 0) { |
|
2217 BZ2_bzDecompressEnd(bzs); |
|
2218 PyBuffer_Release(&pdata); |
|
2219 Py_DECREF(ret); |
|
2220 return NULL; |
|
2221 } |
|
2222 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); |
|
2223 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); |
|
2224 } |
|
2225 } |
|
2226 |
|
2227 if (bzs->avail_out != 0) |
|
2228 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); |
|
2229 BZ2_bzDecompressEnd(bzs); |
|
2230 PyBuffer_Release(&pdata); |
|
2231 |
|
2232 return ret; |
|
2233 } |
|
2234 |
|
2235 static PyMethodDef bz2_methods[] = { |
|
2236 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS, |
|
2237 bz2_compress__doc__}, |
|
2238 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS, |
|
2239 bz2_decompress__doc__}, |
|
2240 {NULL, NULL} /* sentinel */ |
|
2241 }; |
|
2242 |
|
2243 /* ===================================================================== */ |
|
2244 /* Initialization function. */ |
|
2245 |
|
2246 PyDoc_STRVAR(bz2__doc__, |
|
2247 "The python bz2 module provides a comprehensive interface for\n\ |
|
2248 the bz2 compression library. It implements a complete file\n\ |
|
2249 interface, one shot (de)compression functions, and types for\n\ |
|
2250 sequential (de)compression.\n\ |
|
2251 "); |
|
2252 |
|
2253 PyMODINIT_FUNC |
|
2254 initbz2(void) |
|
2255 { |
|
2256 PyObject *m; |
|
2257 |
|
2258 Py_TYPE(&BZ2File_Type) = &PyType_Type; |
|
2259 Py_TYPE(&BZ2Comp_Type) = &PyType_Type; |
|
2260 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type; |
|
2261 |
|
2262 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__); |
|
2263 if (m == NULL) |
|
2264 return; |
|
2265 |
|
2266 PyModule_AddObject(m, "__author__", PyString_FromString(__author__)); |
|
2267 |
|
2268 Py_INCREF(&BZ2File_Type); |
|
2269 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type); |
|
2270 |
|
2271 Py_INCREF(&BZ2Comp_Type); |
|
2272 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type); |
|
2273 |
|
2274 Py_INCREF(&BZ2Decomp_Type); |
|
2275 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type); |
|
2276 } |