|
1 #!/usr/bin/env python |
|
2 # -*- coding: iso-8859-1 -*- |
|
3 #------------------------------------------------------------------- |
|
4 # tarfile.py |
|
5 #------------------------------------------------------------------- |
|
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de> |
|
7 # All rights reserved. |
|
8 # |
|
9 # Permission is hereby granted, free of charge, to any person |
|
10 # obtaining a copy of this software and associated documentation |
|
11 # files (the "Software"), to deal in the Software without |
|
12 # restriction, including without limitation the rights to use, |
|
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
14 # copies of the Software, and to permit persons to whom the |
|
15 # Software is furnished to do so, subject to the following |
|
16 # conditions: |
|
17 # |
|
18 # The above copyright notice and this permission notice shall be |
|
19 # included in all copies or substantial portions of the Software. |
|
20 # |
|
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
28 # OTHER DEALINGS IN THE SOFTWARE. |
|
29 # |
|
30 """Read from and write to tar format archives. |
|
31 """ |
|
32 |
|
33 __version__ = "$Revision: 65514 $" |
|
34 # $Source$ |
|
35 |
|
36 version = "0.9.0" |
|
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)" |
|
38 __date__ = "$Date: 2008-08-04 23:23:07 +0200 (Mo, 04 Aug 2008) $" |
|
39 __cvsid__ = "$Id: tarfile.py 65514 2008-08-04 21:23:07Z brett.cannon $" |
|
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." |
|
41 |
|
42 #--------- |
|
43 # Imports |
|
44 #--------- |
|
45 import sys |
|
46 import os |
|
47 import shutil |
|
48 import stat |
|
49 import errno |
|
50 import time |
|
51 import struct |
|
52 import copy |
|
53 import re |
|
54 import operator |
|
55 |
|
56 if sys.platform == 'mac': |
|
57 # This module needs work for MacOS9, especially in the area of pathname |
|
58 # handling. In many places it is assumed a simple substitution of / by the |
|
59 # local os.path.sep is good enough to convert pathnames, but this does not |
|
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax |
|
61 raise ImportError, "tarfile does not work for platform==mac" |
|
62 |
|
63 try: |
|
64 import grp, pwd |
|
65 except ImportError: |
|
66 grp = pwd = None |
|
67 |
|
68 # from tarfile import * |
|
69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] |
|
70 |
|
71 #--------------------------------------------------------- |
|
72 # tar constants |
|
73 #--------------------------------------------------------- |
|
74 NUL = "\0" # the null character |
|
75 BLOCKSIZE = 512 # length of processing blocks |
|
76 RECORDSIZE = BLOCKSIZE * 20 # length of records |
|
77 GNU_MAGIC = "ustar \0" # magic gnu tar string |
|
78 POSIX_MAGIC = "ustar\x0000" # magic posix tar string |
|
79 |
|
80 LENGTH_NAME = 100 # maximum length of a filename |
|
81 LENGTH_LINK = 100 # maximum length of a linkname |
|
82 LENGTH_PREFIX = 155 # maximum length of the prefix field |
|
83 |
|
84 REGTYPE = "0" # regular file |
|
85 AREGTYPE = "\0" # regular file |
|
86 LNKTYPE = "1" # link (inside tarfile) |
|
87 SYMTYPE = "2" # symbolic link |
|
88 CHRTYPE = "3" # character special device |
|
89 BLKTYPE = "4" # block special device |
|
90 DIRTYPE = "5" # directory |
|
91 FIFOTYPE = "6" # fifo special device |
|
92 CONTTYPE = "7" # contiguous file |
|
93 |
|
94 GNUTYPE_LONGNAME = "L" # GNU tar longname |
|
95 GNUTYPE_LONGLINK = "K" # GNU tar longlink |
|
96 GNUTYPE_SPARSE = "S" # GNU tar sparse file |
|
97 |
|
98 XHDTYPE = "x" # POSIX.1-2001 extended header |
|
99 XGLTYPE = "g" # POSIX.1-2001 global header |
|
100 SOLARIS_XHDTYPE = "X" # Solaris extended header |
|
101 |
|
102 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format |
|
103 GNU_FORMAT = 1 # GNU tar format |
|
104 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format |
|
105 DEFAULT_FORMAT = GNU_FORMAT |
|
106 |
|
107 #--------------------------------------------------------- |
|
108 # tarfile constants |
|
109 #--------------------------------------------------------- |
|
110 # File types that tarfile supports: |
|
111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, |
|
112 SYMTYPE, DIRTYPE, FIFOTYPE, |
|
113 CONTTYPE, CHRTYPE, BLKTYPE, |
|
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, |
|
115 GNUTYPE_SPARSE) |
|
116 |
|
117 # File types that will be treated as a regular file. |
|
118 REGULAR_TYPES = (REGTYPE, AREGTYPE, |
|
119 CONTTYPE, GNUTYPE_SPARSE) |
|
120 |
|
121 # File types that are part of the GNU tar format. |
|
122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, |
|
123 GNUTYPE_SPARSE) |
|
124 |
|
125 # Fields from a pax header that override a TarInfo attribute. |
|
126 PAX_FIELDS = ("path", "linkpath", "size", "mtime", |
|
127 "uid", "gid", "uname", "gname") |
|
128 |
|
129 # Fields in a pax header that are numbers, all other fields |
|
130 # are treated as strings. |
|
131 PAX_NUMBER_FIELDS = { |
|
132 "atime": float, |
|
133 "ctime": float, |
|
134 "mtime": float, |
|
135 "uid": int, |
|
136 "gid": int, |
|
137 "size": int |
|
138 } |
|
139 |
|
140 #--------------------------------------------------------- |
|
141 # Bits used in the mode field, values in octal. |
|
142 #--------------------------------------------------------- |
|
143 S_IFLNK = 0120000 # symbolic link |
|
144 S_IFREG = 0100000 # regular file |
|
145 S_IFBLK = 0060000 # block device |
|
146 S_IFDIR = 0040000 # directory |
|
147 S_IFCHR = 0020000 # character device |
|
148 S_IFIFO = 0010000 # fifo |
|
149 |
|
150 TSUID = 04000 # set UID on execution |
|
151 TSGID = 02000 # set GID on execution |
|
152 TSVTX = 01000 # reserved |
|
153 |
|
154 TUREAD = 0400 # read by owner |
|
155 TUWRITE = 0200 # write by owner |
|
156 TUEXEC = 0100 # execute/search by owner |
|
157 TGREAD = 0040 # read by group |
|
158 TGWRITE = 0020 # write by group |
|
159 TGEXEC = 0010 # execute/search by group |
|
160 TOREAD = 0004 # read by other |
|
161 TOWRITE = 0002 # write by other |
|
162 TOEXEC = 0001 # execute/search by other |
|
163 |
|
164 #--------------------------------------------------------- |
|
165 # initialization |
|
166 #--------------------------------------------------------- |
|
167 ENCODING = sys.getfilesystemencoding() |
|
168 if ENCODING is None: |
|
169 ENCODING = sys.getdefaultencoding() |
|
170 |
|
171 #--------------------------------------------------------- |
|
172 # Some useful functions |
|
173 #--------------------------------------------------------- |
|
174 |
|
175 def stn(s, length): |
|
176 """Convert a python string to a null-terminated string buffer. |
|
177 """ |
|
178 return s[:length] + (length - len(s)) * NUL |
|
179 |
|
180 def nts(s): |
|
181 """Convert a null-terminated string field to a python string. |
|
182 """ |
|
183 # Use the string up to the first null char. |
|
184 p = s.find("\0") |
|
185 if p == -1: |
|
186 return s |
|
187 return s[:p] |
|
188 |
|
189 def nti(s): |
|
190 """Convert a number field to a python number. |
|
191 """ |
|
192 # There are two possible encodings for a number field, see |
|
193 # itn() below. |
|
194 if s[0] != chr(0200): |
|
195 try: |
|
196 n = int(nts(s) or "0", 8) |
|
197 except ValueError: |
|
198 raise HeaderError("invalid header") |
|
199 else: |
|
200 n = 0L |
|
201 for i in xrange(len(s) - 1): |
|
202 n <<= 8 |
|
203 n += ord(s[i + 1]) |
|
204 return n |
|
205 |
|
206 def itn(n, digits=8, format=DEFAULT_FORMAT): |
|
207 """Convert a python number to a number field. |
|
208 """ |
|
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of |
|
210 # octal digits followed by a null-byte, this allows values up to |
|
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than |
|
212 # that if necessary. A leading 0200 byte indicates this particular |
|
213 # encoding, the following digits-1 bytes are a big-endian |
|
214 # representation. This allows values up to (256**(digits-1))-1. |
|
215 if 0 <= n < 8 ** (digits - 1): |
|
216 s = "%0*o" % (digits - 1, n) + NUL |
|
217 else: |
|
218 if format != GNU_FORMAT or n >= 256 ** (digits - 1): |
|
219 raise ValueError("overflow in number field") |
|
220 |
|
221 if n < 0: |
|
222 # XXX We mimic GNU tar's behaviour with negative numbers, |
|
223 # this could raise OverflowError. |
|
224 n = struct.unpack("L", struct.pack("l", n))[0] |
|
225 |
|
226 s = "" |
|
227 for i in xrange(digits - 1): |
|
228 s = chr(n & 0377) + s |
|
229 n >>= 8 |
|
230 s = chr(0200) + s |
|
231 return s |
|
232 |
|
233 def uts(s, encoding, errors): |
|
234 """Convert a unicode object to a string. |
|
235 """ |
|
236 if errors == "utf-8": |
|
237 # An extra error handler similar to the -o invalid=UTF-8 option |
|
238 # in POSIX.1-2001. Replace untranslatable characters with their |
|
239 # UTF-8 representation. |
|
240 try: |
|
241 return s.encode(encoding, "strict") |
|
242 except UnicodeEncodeError: |
|
243 x = [] |
|
244 for c in s: |
|
245 try: |
|
246 x.append(c.encode(encoding, "strict")) |
|
247 except UnicodeEncodeError: |
|
248 x.append(c.encode("utf8")) |
|
249 return "".join(x) |
|
250 else: |
|
251 return s.encode(encoding, errors) |
|
252 |
|
253 def calc_chksums(buf): |
|
254 """Calculate the checksum for a member's header by summing up all |
|
255 characters except for the chksum field which is treated as if |
|
256 it was filled with spaces. According to the GNU tar sources, |
|
257 some tars (Sun and NeXT) calculate chksum with signed char, |
|
258 which will be different if there are chars in the buffer with |
|
259 the high bit set. So we calculate two checksums, unsigned and |
|
260 signed. |
|
261 """ |
|
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) |
|
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) |
|
264 return unsigned_chksum, signed_chksum |
|
265 |
|
266 def copyfileobj(src, dst, length=None): |
|
267 """Copy length bytes from fileobj src to fileobj dst. |
|
268 If length is None, copy the entire content. |
|
269 """ |
|
270 if length == 0: |
|
271 return |
|
272 if length is None: |
|
273 shutil.copyfileobj(src, dst) |
|
274 return |
|
275 |
|
276 BUFSIZE = 16 * 1024 |
|
277 blocks, remainder = divmod(length, BUFSIZE) |
|
278 for b in xrange(blocks): |
|
279 buf = src.read(BUFSIZE) |
|
280 if len(buf) < BUFSIZE: |
|
281 raise IOError("end of file reached") |
|
282 dst.write(buf) |
|
283 |
|
284 if remainder != 0: |
|
285 buf = src.read(remainder) |
|
286 if len(buf) < remainder: |
|
287 raise IOError("end of file reached") |
|
288 dst.write(buf) |
|
289 return |
|
290 |
|
291 filemode_table = ( |
|
292 ((S_IFLNK, "l"), |
|
293 (S_IFREG, "-"), |
|
294 (S_IFBLK, "b"), |
|
295 (S_IFDIR, "d"), |
|
296 (S_IFCHR, "c"), |
|
297 (S_IFIFO, "p")), |
|
298 |
|
299 ((TUREAD, "r"),), |
|
300 ((TUWRITE, "w"),), |
|
301 ((TUEXEC|TSUID, "s"), |
|
302 (TSUID, "S"), |
|
303 (TUEXEC, "x")), |
|
304 |
|
305 ((TGREAD, "r"),), |
|
306 ((TGWRITE, "w"),), |
|
307 ((TGEXEC|TSGID, "s"), |
|
308 (TSGID, "S"), |
|
309 (TGEXEC, "x")), |
|
310 |
|
311 ((TOREAD, "r"),), |
|
312 ((TOWRITE, "w"),), |
|
313 ((TOEXEC|TSVTX, "t"), |
|
314 (TSVTX, "T"), |
|
315 (TOEXEC, "x")) |
|
316 ) |
|
317 |
|
318 def filemode(mode): |
|
319 """Convert a file's mode to a string of the form |
|
320 -rwxrwxrwx. |
|
321 Used by TarFile.list() |
|
322 """ |
|
323 perm = [] |
|
324 for table in filemode_table: |
|
325 for bit, char in table: |
|
326 if mode & bit == bit: |
|
327 perm.append(char) |
|
328 break |
|
329 else: |
|
330 perm.append("-") |
|
331 return "".join(perm) |
|
332 |
|
333 if os.sep != "/": |
|
334 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/") |
|
335 else: |
|
336 normpath = os.path.normpath |
|
337 |
|
338 class TarError(Exception): |
|
339 """Base exception.""" |
|
340 pass |
|
341 class ExtractError(TarError): |
|
342 """General exception for extract errors.""" |
|
343 pass |
|
344 class ReadError(TarError): |
|
345 """Exception for unreadble tar archives.""" |
|
346 pass |
|
347 class CompressionError(TarError): |
|
348 """Exception for unavailable compression methods.""" |
|
349 pass |
|
350 class StreamError(TarError): |
|
351 """Exception for unsupported operations on stream-like TarFiles.""" |
|
352 pass |
|
353 class HeaderError(TarError): |
|
354 """Exception for invalid headers.""" |
|
355 pass |
|
356 |
|
357 #--------------------------- |
|
358 # internal stream interface |
|
359 #--------------------------- |
|
360 class _LowLevelFile: |
|
361 """Low-level file object. Supports reading and writing. |
|
362 It is used instead of a regular file object for streaming |
|
363 access. |
|
364 """ |
|
365 |
|
366 def __init__(self, name, mode): |
|
367 mode = { |
|
368 "r": os.O_RDONLY, |
|
369 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, |
|
370 }[mode] |
|
371 if hasattr(os, "O_BINARY"): |
|
372 mode |= os.O_BINARY |
|
373 self.fd = os.open(name, mode) |
|
374 |
|
375 def close(self): |
|
376 os.close(self.fd) |
|
377 |
|
378 def read(self, size): |
|
379 return os.read(self.fd, size) |
|
380 |
|
381 def write(self, s): |
|
382 os.write(self.fd, s) |
|
383 |
|
384 class _Stream: |
|
385 """Class that serves as an adapter between TarFile and |
|
386 a stream-like object. The stream-like object only |
|
387 needs to have a read() or write() method and is accessed |
|
388 blockwise. Use of gzip or bzip2 compression is possible. |
|
389 A stream-like object could be for example: sys.stdin, |
|
390 sys.stdout, a socket, a tape device etc. |
|
391 |
|
392 _Stream is intended to be used only internally. |
|
393 """ |
|
394 |
|
395 def __init__(self, name, mode, comptype, fileobj, bufsize): |
|
396 """Construct a _Stream object. |
|
397 """ |
|
398 self._extfileobj = True |
|
399 if fileobj is None: |
|
400 fileobj = _LowLevelFile(name, mode) |
|
401 self._extfileobj = False |
|
402 |
|
403 if comptype == '*': |
|
404 # Enable transparent compression detection for the |
|
405 # stream interface |
|
406 fileobj = _StreamProxy(fileobj) |
|
407 comptype = fileobj.getcomptype() |
|
408 |
|
409 self.name = name or "" |
|
410 self.mode = mode |
|
411 self.comptype = comptype |
|
412 self.fileobj = fileobj |
|
413 self.bufsize = bufsize |
|
414 self.buf = "" |
|
415 self.pos = 0L |
|
416 self.closed = False |
|
417 |
|
418 if comptype == "gz": |
|
419 try: |
|
420 import zlib |
|
421 except ImportError: |
|
422 raise CompressionError("zlib module is not available") |
|
423 self.zlib = zlib |
|
424 self.crc = zlib.crc32("") & 0xffffffffL |
|
425 if mode == "r": |
|
426 self._init_read_gz() |
|
427 else: |
|
428 self._init_write_gz() |
|
429 |
|
430 if comptype == "bz2": |
|
431 try: |
|
432 import bz2 |
|
433 except ImportError: |
|
434 raise CompressionError("bz2 module is not available") |
|
435 if mode == "r": |
|
436 self.dbuf = "" |
|
437 self.cmp = bz2.BZ2Decompressor() |
|
438 else: |
|
439 self.cmp = bz2.BZ2Compressor() |
|
440 |
|
441 def __del__(self): |
|
442 if hasattr(self, "closed") and not self.closed: |
|
443 self.close() |
|
444 |
|
445 def _init_write_gz(self): |
|
446 """Initialize for writing with gzip compression. |
|
447 """ |
|
448 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, |
|
449 -self.zlib.MAX_WBITS, |
|
450 self.zlib.DEF_MEM_LEVEL, |
|
451 0) |
|
452 timestamp = struct.pack("<L", long(time.time())) |
|
453 self.__write("\037\213\010\010%s\002\377" % timestamp) |
|
454 if self.name.endswith(".gz"): |
|
455 self.name = self.name[:-3] |
|
456 self.__write(self.name + NUL) |
|
457 |
|
458 def write(self, s): |
|
459 """Write string s to the stream. |
|
460 """ |
|
461 if self.comptype == "gz": |
|
462 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL |
|
463 self.pos += len(s) |
|
464 if self.comptype != "tar": |
|
465 s = self.cmp.compress(s) |
|
466 self.__write(s) |
|
467 |
|
468 def __write(self, s): |
|
469 """Write string s to the stream if a whole new block |
|
470 is ready to be written. |
|
471 """ |
|
472 self.buf += s |
|
473 while len(self.buf) > self.bufsize: |
|
474 self.fileobj.write(self.buf[:self.bufsize]) |
|
475 self.buf = self.buf[self.bufsize:] |
|
476 |
|
477 def close(self): |
|
478 """Close the _Stream object. No operation should be |
|
479 done on it afterwards. |
|
480 """ |
|
481 if self.closed: |
|
482 return |
|
483 |
|
484 if self.mode == "w" and self.comptype != "tar": |
|
485 self.buf += self.cmp.flush() |
|
486 |
|
487 if self.mode == "w" and self.buf: |
|
488 self.fileobj.write(self.buf) |
|
489 self.buf = "" |
|
490 if self.comptype == "gz": |
|
491 # The native zlib crc is an unsigned 32-bit integer, but |
|
492 # the Python wrapper implicitly casts that to a signed C |
|
493 # long. So, on a 32-bit box self.crc may "look negative", |
|
494 # while the same crc on a 64-bit box may "look positive". |
|
495 # To avoid irksome warnings from the `struct` module, force |
|
496 # it to look positive on all boxes. |
|
497 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL)) |
|
498 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) |
|
499 |
|
500 if not self._extfileobj: |
|
501 self.fileobj.close() |
|
502 |
|
503 self.closed = True |
|
504 |
|
505 def _init_read_gz(self): |
|
506 """Initialize for reading a gzip compressed fileobj. |
|
507 """ |
|
508 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) |
|
509 self.dbuf = "" |
|
510 |
|
511 # taken from gzip.GzipFile with some alterations |
|
512 if self.__read(2) != "\037\213": |
|
513 raise ReadError("not a gzip file") |
|
514 if self.__read(1) != "\010": |
|
515 raise CompressionError("unsupported compression method") |
|
516 |
|
517 flag = ord(self.__read(1)) |
|
518 self.__read(6) |
|
519 |
|
520 if flag & 4: |
|
521 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) |
|
522 self.read(xlen) |
|
523 if flag & 8: |
|
524 while True: |
|
525 s = self.__read(1) |
|
526 if not s or s == NUL: |
|
527 break |
|
528 if flag & 16: |
|
529 while True: |
|
530 s = self.__read(1) |
|
531 if not s or s == NUL: |
|
532 break |
|
533 if flag & 2: |
|
534 self.__read(2) |
|
535 |
|
536 def tell(self): |
|
537 """Return the stream's file pointer position. |
|
538 """ |
|
539 return self.pos |
|
540 |
|
541 def seek(self, pos=0): |
|
542 """Set the stream's file pointer to pos. Negative seeking |
|
543 is forbidden. |
|
544 """ |
|
545 if pos - self.pos >= 0: |
|
546 blocks, remainder = divmod(pos - self.pos, self.bufsize) |
|
547 for i in xrange(blocks): |
|
548 self.read(self.bufsize) |
|
549 self.read(remainder) |
|
550 else: |
|
551 raise StreamError("seeking backwards is not allowed") |
|
552 return self.pos |
|
553 |
|
554 def read(self, size=None): |
|
555 """Return the next size number of bytes from the stream. |
|
556 If size is not defined, return all bytes of the stream |
|
557 up to EOF. |
|
558 """ |
|
559 if size is None: |
|
560 t = [] |
|
561 while True: |
|
562 buf = self._read(self.bufsize) |
|
563 if not buf: |
|
564 break |
|
565 t.append(buf) |
|
566 buf = "".join(t) |
|
567 else: |
|
568 buf = self._read(size) |
|
569 self.pos += len(buf) |
|
570 return buf |
|
571 |
|
572 def _read(self, size): |
|
573 """Return size bytes from the stream. |
|
574 """ |
|
575 if self.comptype == "tar": |
|
576 return self.__read(size) |
|
577 |
|
578 c = len(self.dbuf) |
|
579 t = [self.dbuf] |
|
580 while c < size: |
|
581 buf = self.__read(self.bufsize) |
|
582 if not buf: |
|
583 break |
|
584 try: |
|
585 buf = self.cmp.decompress(buf) |
|
586 except IOError: |
|
587 raise ReadError("invalid compressed data") |
|
588 t.append(buf) |
|
589 c += len(buf) |
|
590 t = "".join(t) |
|
591 self.dbuf = t[size:] |
|
592 return t[:size] |
|
593 |
|
594 def __read(self, size): |
|
595 """Return size bytes from stream. If internal buffer is empty, |
|
596 read another block from the stream. |
|
597 """ |
|
598 c = len(self.buf) |
|
599 t = [self.buf] |
|
600 while c < size: |
|
601 buf = self.fileobj.read(self.bufsize) |
|
602 if not buf: |
|
603 break |
|
604 t.append(buf) |
|
605 c += len(buf) |
|
606 t = "".join(t) |
|
607 self.buf = t[size:] |
|
608 return t[:size] |
|
609 # class _Stream |
|
610 |
|
611 class _StreamProxy(object): |
|
612 """Small proxy class that enables transparent compression |
|
613 detection for the Stream interface (mode 'r|*'). |
|
614 """ |
|
615 |
|
616 def __init__(self, fileobj): |
|
617 self.fileobj = fileobj |
|
618 self.buf = self.fileobj.read(BLOCKSIZE) |
|
619 |
|
620 def read(self, size): |
|
621 self.read = self.fileobj.read |
|
622 return self.buf |
|
623 |
|
624 def getcomptype(self): |
|
625 if self.buf.startswith("\037\213\010"): |
|
626 return "gz" |
|
627 if self.buf.startswith("BZh91"): |
|
628 return "bz2" |
|
629 return "tar" |
|
630 |
|
631 def close(self): |
|
632 self.fileobj.close() |
|
633 # class StreamProxy |
|
634 |
|
635 class _BZ2Proxy(object): |
|
636 """Small proxy class that enables external file object |
|
637 support for "r:bz2" and "w:bz2" modes. This is actually |
|
638 a workaround for a limitation in bz2 module's BZ2File |
|
639 class which (unlike gzip.GzipFile) has no support for |
|
640 a file object argument. |
|
641 """ |
|
642 |
|
643 blocksize = 16 * 1024 |
|
644 |
|
645 def __init__(self, fileobj, mode): |
|
646 self.fileobj = fileobj |
|
647 self.mode = mode |
|
648 self.name = getattr(self.fileobj, "name", None) |
|
649 self.init() |
|
650 |
|
651 def init(self): |
|
652 import bz2 |
|
653 self.pos = 0 |
|
654 if self.mode == "r": |
|
655 self.bz2obj = bz2.BZ2Decompressor() |
|
656 self.fileobj.seek(0) |
|
657 self.buf = "" |
|
658 else: |
|
659 self.bz2obj = bz2.BZ2Compressor() |
|
660 |
|
661 def read(self, size): |
|
662 b = [self.buf] |
|
663 x = len(self.buf) |
|
664 while x < size: |
|
665 try: |
|
666 raw = self.fileobj.read(self.blocksize) |
|
667 data = self.bz2obj.decompress(raw) |
|
668 b.append(data) |
|
669 except EOFError: |
|
670 break |
|
671 x += len(data) |
|
672 self.buf = "".join(b) |
|
673 |
|
674 buf = self.buf[:size] |
|
675 self.buf = self.buf[size:] |
|
676 self.pos += len(buf) |
|
677 return buf |
|
678 |
|
679 def seek(self, pos): |
|
680 if pos < self.pos: |
|
681 self.init() |
|
682 self.read(pos - self.pos) |
|
683 |
|
684 def tell(self): |
|
685 return self.pos |
|
686 |
|
687 def write(self, data): |
|
688 self.pos += len(data) |
|
689 raw = self.bz2obj.compress(data) |
|
690 self.fileobj.write(raw) |
|
691 |
|
692 def close(self): |
|
693 if self.mode == "w": |
|
694 raw = self.bz2obj.flush() |
|
695 self.fileobj.write(raw) |
|
696 # class _BZ2Proxy |
|
697 |
|
698 #------------------------ |
|
699 # Extraction file object |
|
700 #------------------------ |
|
701 class _FileInFile(object): |
|
702 """A thin wrapper around an existing file object that |
|
703 provides a part of its data as an individual file |
|
704 object. |
|
705 """ |
|
706 |
|
707 def __init__(self, fileobj, offset, size, sparse=None): |
|
708 self.fileobj = fileobj |
|
709 self.offset = offset |
|
710 self.size = size |
|
711 self.sparse = sparse |
|
712 self.position = 0 |
|
713 |
|
714 def tell(self): |
|
715 """Return the current file position. |
|
716 """ |
|
717 return self.position |
|
718 |
|
719 def seek(self, position): |
|
720 """Seek to a position in the file. |
|
721 """ |
|
722 self.position = position |
|
723 |
|
724 def read(self, size=None): |
|
725 """Read data from the file. |
|
726 """ |
|
727 if size is None: |
|
728 size = self.size - self.position |
|
729 else: |
|
730 size = min(size, self.size - self.position) |
|
731 |
|
732 if self.sparse is None: |
|
733 return self.readnormal(size) |
|
734 else: |
|
735 return self.readsparse(size) |
|
736 |
|
737 def readnormal(self, size): |
|
738 """Read operation for regular files. |
|
739 """ |
|
740 self.fileobj.seek(self.offset + self.position) |
|
741 self.position += size |
|
742 return self.fileobj.read(size) |
|
743 |
|
744 def readsparse(self, size): |
|
745 """Read operation for sparse files. |
|
746 """ |
|
747 data = [] |
|
748 while size > 0: |
|
749 buf = self.readsparsesection(size) |
|
750 if not buf: |
|
751 break |
|
752 size -= len(buf) |
|
753 data.append(buf) |
|
754 return "".join(data) |
|
755 |
|
756 def readsparsesection(self, size): |
|
757 """Read a single section of a sparse file. |
|
758 """ |
|
759 section = self.sparse.find(self.position) |
|
760 |
|
761 if section is None: |
|
762 return "" |
|
763 |
|
764 size = min(size, section.offset + section.size - self.position) |
|
765 |
|
766 if isinstance(section, _data): |
|
767 realpos = section.realpos + self.position - section.offset |
|
768 self.fileobj.seek(self.offset + realpos) |
|
769 self.position += size |
|
770 return self.fileobj.read(size) |
|
771 else: |
|
772 self.position += size |
|
773 return NUL * size |
|
774 #class _FileInFile |
|
775 |
|
776 |
|
777 class ExFileObject(object): |
|
778 """File-like object for reading an archive member. |
|
779 Is returned by TarFile.extractfile(). |
|
780 """ |
|
781 blocksize = 1024 |
|
782 |
|
783 def __init__(self, tarfile, tarinfo): |
|
784 self.fileobj = _FileInFile(tarfile.fileobj, |
|
785 tarinfo.offset_data, |
|
786 tarinfo.size, |
|
787 getattr(tarinfo, "sparse", None)) |
|
788 self.name = tarinfo.name |
|
789 self.mode = "r" |
|
790 self.closed = False |
|
791 self.size = tarinfo.size |
|
792 |
|
793 self.position = 0 |
|
794 self.buffer = "" |
|
795 |
|
796 def read(self, size=None): |
|
797 """Read at most size bytes from the file. If size is not |
|
798 present or None, read all data until EOF is reached. |
|
799 """ |
|
800 if self.closed: |
|
801 raise ValueError("I/O operation on closed file") |
|
802 |
|
803 buf = "" |
|
804 if self.buffer: |
|
805 if size is None: |
|
806 buf = self.buffer |
|
807 self.buffer = "" |
|
808 else: |
|
809 buf = self.buffer[:size] |
|
810 self.buffer = self.buffer[size:] |
|
811 |
|
812 if size is None: |
|
813 buf += self.fileobj.read() |
|
814 else: |
|
815 buf += self.fileobj.read(size - len(buf)) |
|
816 |
|
817 self.position += len(buf) |
|
818 return buf |
|
819 |
|
820 def readline(self, size=-1): |
|
821 """Read one entire line from the file. If size is present |
|
822 and non-negative, return a string with at most that |
|
823 size, which may be an incomplete line. |
|
824 """ |
|
825 if self.closed: |
|
826 raise ValueError("I/O operation on closed file") |
|
827 |
|
828 if "\n" in self.buffer: |
|
829 pos = self.buffer.find("\n") + 1 |
|
830 else: |
|
831 buffers = [self.buffer] |
|
832 while True: |
|
833 buf = self.fileobj.read(self.blocksize) |
|
834 buffers.append(buf) |
|
835 if not buf or "\n" in buf: |
|
836 self.buffer = "".join(buffers) |
|
837 pos = self.buffer.find("\n") + 1 |
|
838 if pos == 0: |
|
839 # no newline found. |
|
840 pos = len(self.buffer) |
|
841 break |
|
842 |
|
843 if size != -1: |
|
844 pos = min(size, pos) |
|
845 |
|
846 buf = self.buffer[:pos] |
|
847 self.buffer = self.buffer[pos:] |
|
848 self.position += len(buf) |
|
849 return buf |
|
850 |
|
851 def readlines(self): |
|
852 """Return a list with all remaining lines. |
|
853 """ |
|
854 result = [] |
|
855 while True: |
|
856 line = self.readline() |
|
857 if not line: break |
|
858 result.append(line) |
|
859 return result |
|
860 |
|
861 def tell(self): |
|
862 """Return the current file position. |
|
863 """ |
|
864 if self.closed: |
|
865 raise ValueError("I/O operation on closed file") |
|
866 |
|
867 return self.position |
|
868 |
|
869 def seek(self, pos, whence=os.SEEK_SET): |
|
870 """Seek to a position in the file. |
|
871 """ |
|
872 if self.closed: |
|
873 raise ValueError("I/O operation on closed file") |
|
874 |
|
875 if whence == os.SEEK_SET: |
|
876 self.position = min(max(pos, 0), self.size) |
|
877 elif whence == os.SEEK_CUR: |
|
878 if pos < 0: |
|
879 self.position = max(self.position + pos, 0) |
|
880 else: |
|
881 self.position = min(self.position + pos, self.size) |
|
882 elif whence == os.SEEK_END: |
|
883 self.position = max(min(self.size + pos, self.size), 0) |
|
884 else: |
|
885 raise ValueError("Invalid argument") |
|
886 |
|
887 self.buffer = "" |
|
888 self.fileobj.seek(self.position) |
|
889 |
|
890 def close(self): |
|
891 """Close the file object. |
|
892 """ |
|
893 self.closed = True |
|
894 |
|
895 def __iter__(self): |
|
896 """Get an iterator over the file's lines. |
|
897 """ |
|
898 while True: |
|
899 line = self.readline() |
|
900 if not line: |
|
901 break |
|
902 yield line |
|
903 #class ExFileObject |
|
904 |
|
905 #------------------ |
|
906 # Exported Classes |
|
907 #------------------ |
|
908 class TarInfo(object): |
|
909 """Informational class which holds the details about an |
|
910 archive member given by a tar header block. |
|
911 TarInfo objects are returned by TarFile.getmember(), |
|
912 TarFile.getmembers() and TarFile.gettarinfo() and are |
|
913 usually created internally. |
|
914 """ |
|
915 |
|
916 def __init__(self, name=""): |
|
917 """Construct a TarInfo object. name is the optional name |
|
918 of the member. |
|
919 """ |
|
920 self.name = name # member name |
|
921 self.mode = 0644 # file permissions |
|
922 self.uid = 0 # user id |
|
923 self.gid = 0 # group id |
|
924 self.size = 0 # file size |
|
925 self.mtime = 0 # modification time |
|
926 self.chksum = 0 # header checksum |
|
927 self.type = REGTYPE # member type |
|
928 self.linkname = "" # link name |
|
929 self.uname = "root" # user name |
|
930 self.gname = "root" # group name |
|
931 self.devmajor = 0 # device major number |
|
932 self.devminor = 0 # device minor number |
|
933 |
|
934 self.offset = 0 # the tar header starts here |
|
935 self.offset_data = 0 # the file's data starts here |
|
936 |
|
937 self.pax_headers = {} # pax header information |
|
938 |
|
939 # In pax headers the "name" and "linkname" field are called |
|
940 # "path" and "linkpath". |
|
941 def _getpath(self): |
|
942 return self.name |
|
943 def _setpath(self, name): |
|
944 self.name = name |
|
945 path = property(_getpath, _setpath) |
|
946 |
|
947 def _getlinkpath(self): |
|
948 return self.linkname |
|
949 def _setlinkpath(self, linkname): |
|
950 self.linkname = linkname |
|
951 linkpath = property(_getlinkpath, _setlinkpath) |
|
952 |
|
953 def __repr__(self): |
|
954 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) |
|
955 |
|
956 def get_info(self, encoding, errors): |
|
957 """Return the TarInfo's attributes as a dictionary. |
|
958 """ |
|
959 info = { |
|
960 "name": normpath(self.name), |
|
961 "mode": self.mode & 07777, |
|
962 "uid": self.uid, |
|
963 "gid": self.gid, |
|
964 "size": self.size, |
|
965 "mtime": self.mtime, |
|
966 "chksum": self.chksum, |
|
967 "type": self.type, |
|
968 "linkname": normpath(self.linkname) if self.linkname else "", |
|
969 "uname": self.uname, |
|
970 "gname": self.gname, |
|
971 "devmajor": self.devmajor, |
|
972 "devminor": self.devminor |
|
973 } |
|
974 |
|
975 if info["type"] == DIRTYPE and not info["name"].endswith("/"): |
|
976 info["name"] += "/" |
|
977 |
|
978 for key in ("name", "linkname", "uname", "gname"): |
|
979 if type(info[key]) is unicode: |
|
980 info[key] = info[key].encode(encoding, errors) |
|
981 |
|
982 return info |
|
983 |
|
984 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): |
|
985 """Return a tar header as a string of 512 byte blocks. |
|
986 """ |
|
987 info = self.get_info(encoding, errors) |
|
988 |
|
989 if format == USTAR_FORMAT: |
|
990 return self.create_ustar_header(info) |
|
991 elif format == GNU_FORMAT: |
|
992 return self.create_gnu_header(info) |
|
993 elif format == PAX_FORMAT: |
|
994 return self.create_pax_header(info, encoding, errors) |
|
995 else: |
|
996 raise ValueError("invalid format") |
|
997 |
|
998 def create_ustar_header(self, info): |
|
999 """Return the object as a ustar header block. |
|
1000 """ |
|
1001 info["magic"] = POSIX_MAGIC |
|
1002 |
|
1003 if len(info["linkname"]) > LENGTH_LINK: |
|
1004 raise ValueError("linkname is too long") |
|
1005 |
|
1006 if len(info["name"]) > LENGTH_NAME: |
|
1007 info["prefix"], info["name"] = self._posix_split_name(info["name"]) |
|
1008 |
|
1009 return self._create_header(info, USTAR_FORMAT) |
|
1010 |
|
1011 def create_gnu_header(self, info): |
|
1012 """Return the object as a GNU header block sequence. |
|
1013 """ |
|
1014 info["magic"] = GNU_MAGIC |
|
1015 |
|
1016 buf = "" |
|
1017 if len(info["linkname"]) > LENGTH_LINK: |
|
1018 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) |
|
1019 |
|
1020 if len(info["name"]) > LENGTH_NAME: |
|
1021 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) |
|
1022 |
|
1023 return buf + self._create_header(info, GNU_FORMAT) |
|
1024 |
|
1025 def create_pax_header(self, info, encoding, errors): |
|
1026 """Return the object as a ustar header block. If it cannot be |
|
1027 represented this way, prepend a pax extended header sequence |
|
1028 with supplement information. |
|
1029 """ |
|
1030 info["magic"] = POSIX_MAGIC |
|
1031 pax_headers = self.pax_headers.copy() |
|
1032 |
|
1033 # Test string fields for values that exceed the field length or cannot |
|
1034 # be represented in ASCII encoding. |
|
1035 for name, hname, length in ( |
|
1036 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), |
|
1037 ("uname", "uname", 32), ("gname", "gname", 32)): |
|
1038 |
|
1039 if hname in pax_headers: |
|
1040 # The pax header has priority. |
|
1041 continue |
|
1042 |
|
1043 val = info[name].decode(encoding, errors) |
|
1044 |
|
1045 # Try to encode the string as ASCII. |
|
1046 try: |
|
1047 val.encode("ascii") |
|
1048 except UnicodeEncodeError: |
|
1049 pax_headers[hname] = val |
|
1050 continue |
|
1051 |
|
1052 if len(info[name]) > length: |
|
1053 pax_headers[hname] = val |
|
1054 |
|
1055 # Test number fields for values that exceed the field limit or values |
|
1056 # that like to be stored as float. |
|
1057 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): |
|
1058 if name in pax_headers: |
|
1059 # The pax header has priority. Avoid overflow. |
|
1060 info[name] = 0 |
|
1061 continue |
|
1062 |
|
1063 val = info[name] |
|
1064 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): |
|
1065 pax_headers[name] = unicode(val) |
|
1066 info[name] = 0 |
|
1067 |
|
1068 # Create a pax extended header if necessary. |
|
1069 if pax_headers: |
|
1070 buf = self._create_pax_generic_header(pax_headers) |
|
1071 else: |
|
1072 buf = "" |
|
1073 |
|
1074 return buf + self._create_header(info, USTAR_FORMAT) |
|
1075 |
|
1076 @classmethod |
|
1077 def create_pax_global_header(cls, pax_headers): |
|
1078 """Return the object as a pax global header block sequence. |
|
1079 """ |
|
1080 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) |
|
1081 |
|
1082 def _posix_split_name(self, name): |
|
1083 """Split a name longer than 100 chars into a prefix |
|
1084 and a name part. |
|
1085 """ |
|
1086 prefix = name[:LENGTH_PREFIX + 1] |
|
1087 while prefix and prefix[-1] != "/": |
|
1088 prefix = prefix[:-1] |
|
1089 |
|
1090 name = name[len(prefix):] |
|
1091 prefix = prefix[:-1] |
|
1092 |
|
1093 if not prefix or len(name) > LENGTH_NAME: |
|
1094 raise ValueError("name is too long") |
|
1095 return prefix, name |
|
1096 |
|
1097 @staticmethod |
|
1098 def _create_header(info, format): |
|
1099 """Return a header block. info is a dictionary with file |
|
1100 information, format must be one of the *_FORMAT constants. |
|
1101 """ |
|
1102 parts = [ |
|
1103 stn(info.get("name", ""), 100), |
|
1104 itn(info.get("mode", 0) & 07777, 8, format), |
|
1105 itn(info.get("uid", 0), 8, format), |
|
1106 itn(info.get("gid", 0), 8, format), |
|
1107 itn(info.get("size", 0), 12, format), |
|
1108 itn(info.get("mtime", 0), 12, format), |
|
1109 " ", # checksum field |
|
1110 info.get("type", REGTYPE), |
|
1111 stn(info.get("linkname", ""), 100), |
|
1112 stn(info.get("magic", POSIX_MAGIC), 8), |
|
1113 stn(info.get("uname", "root"), 32), |
|
1114 stn(info.get("gname", "root"), 32), |
|
1115 itn(info.get("devmajor", 0), 8, format), |
|
1116 itn(info.get("devminor", 0), 8, format), |
|
1117 stn(info.get("prefix", ""), 155) |
|
1118 ] |
|
1119 |
|
1120 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) |
|
1121 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] |
|
1122 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] |
|
1123 return buf |
|
1124 |
|
1125 @staticmethod |
|
1126 def _create_payload(payload): |
|
1127 """Return the string payload filled with zero bytes |
|
1128 up to the next 512 byte border. |
|
1129 """ |
|
1130 blocks, remainder = divmod(len(payload), BLOCKSIZE) |
|
1131 if remainder > 0: |
|
1132 payload += (BLOCKSIZE - remainder) * NUL |
|
1133 return payload |
|
1134 |
|
1135 @classmethod |
|
1136 def _create_gnu_long_header(cls, name, type): |
|
1137 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence |
|
1138 for name. |
|
1139 """ |
|
1140 name += NUL |
|
1141 |
|
1142 info = {} |
|
1143 info["name"] = "././@LongLink" |
|
1144 info["type"] = type |
|
1145 info["size"] = len(name) |
|
1146 info["magic"] = GNU_MAGIC |
|
1147 |
|
1148 # create extended header + name blocks. |
|
1149 return cls._create_header(info, USTAR_FORMAT) + \ |
|
1150 cls._create_payload(name) |
|
1151 |
|
1152 @classmethod |
|
1153 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): |
|
1154 """Return a POSIX.1-2001 extended or global header sequence |
|
1155 that contains a list of keyword, value pairs. The values |
|
1156 must be unicode objects. |
|
1157 """ |
|
1158 records = [] |
|
1159 for keyword, value in pax_headers.iteritems(): |
|
1160 keyword = keyword.encode("utf8") |
|
1161 value = value.encode("utf8") |
|
1162 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' |
|
1163 n = p = 0 |
|
1164 while True: |
|
1165 n = l + len(str(p)) |
|
1166 if n == p: |
|
1167 break |
|
1168 p = n |
|
1169 records.append("%d %s=%s\n" % (p, keyword, value)) |
|
1170 records = "".join(records) |
|
1171 |
|
1172 # We use a hardcoded "././@PaxHeader" name like star does |
|
1173 # instead of the one that POSIX recommends. |
|
1174 info = {} |
|
1175 info["name"] = "././@PaxHeader" |
|
1176 info["type"] = type |
|
1177 info["size"] = len(records) |
|
1178 info["magic"] = POSIX_MAGIC |
|
1179 |
|
1180 # Create pax header + record blocks. |
|
1181 return cls._create_header(info, USTAR_FORMAT) + \ |
|
1182 cls._create_payload(records) |
|
1183 |
|
1184 @classmethod |
|
1185 def frombuf(cls, buf): |
|
1186 """Construct a TarInfo object from a 512 byte string buffer. |
|
1187 """ |
|
1188 if len(buf) != BLOCKSIZE: |
|
1189 raise HeaderError("truncated header") |
|
1190 if buf.count(NUL) == BLOCKSIZE: |
|
1191 raise HeaderError("empty header") |
|
1192 |
|
1193 chksum = nti(buf[148:156]) |
|
1194 if chksum not in calc_chksums(buf): |
|
1195 raise HeaderError("bad checksum") |
|
1196 |
|
1197 obj = cls() |
|
1198 obj.buf = buf |
|
1199 obj.name = nts(buf[0:100]) |
|
1200 obj.mode = nti(buf[100:108]) |
|
1201 obj.uid = nti(buf[108:116]) |
|
1202 obj.gid = nti(buf[116:124]) |
|
1203 obj.size = nti(buf[124:136]) |
|
1204 obj.mtime = nti(buf[136:148]) |
|
1205 obj.chksum = chksum |
|
1206 obj.type = buf[156:157] |
|
1207 obj.linkname = nts(buf[157:257]) |
|
1208 obj.uname = nts(buf[265:297]) |
|
1209 obj.gname = nts(buf[297:329]) |
|
1210 obj.devmajor = nti(buf[329:337]) |
|
1211 obj.devminor = nti(buf[337:345]) |
|
1212 prefix = nts(buf[345:500]) |
|
1213 |
|
1214 # Old V7 tar format represents a directory as a regular |
|
1215 # file with a trailing slash. |
|
1216 if obj.type == AREGTYPE and obj.name.endswith("/"): |
|
1217 obj.type = DIRTYPE |
|
1218 |
|
1219 # Remove redundant slashes from directories. |
|
1220 if obj.isdir(): |
|
1221 obj.name = obj.name.rstrip("/") |
|
1222 |
|
1223 # Reconstruct a ustar longname. |
|
1224 if prefix and obj.type not in GNU_TYPES: |
|
1225 obj.name = prefix + "/" + obj.name |
|
1226 return obj |
|
1227 |
|
1228 @classmethod |
|
1229 def fromtarfile(cls, tarfile): |
|
1230 """Return the next TarInfo object from TarFile object |
|
1231 tarfile. |
|
1232 """ |
|
1233 buf = tarfile.fileobj.read(BLOCKSIZE) |
|
1234 if not buf: |
|
1235 return |
|
1236 obj = cls.frombuf(buf) |
|
1237 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE |
|
1238 return obj._proc_member(tarfile) |
|
1239 |
|
1240 #-------------------------------------------------------------------------- |
|
1241 # The following are methods that are called depending on the type of a |
|
1242 # member. The entry point is _proc_member() which can be overridden in a |
|
1243 # subclass to add custom _proc_*() methods. A _proc_*() method MUST |
|
1244 # implement the following |
|
1245 # operations: |
|
1246 # 1. Set self.offset_data to the position where the data blocks begin, |
|
1247 # if there is data that follows. |
|
1248 # 2. Set tarfile.offset to the position where the next member's header will |
|
1249 # begin. |
|
1250 # 3. Return self or another valid TarInfo object. |
|
1251 def _proc_member(self, tarfile): |
|
1252 """Choose the right processing method depending on |
|
1253 the type and call it. |
|
1254 """ |
|
1255 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): |
|
1256 return self._proc_gnulong(tarfile) |
|
1257 elif self.type == GNUTYPE_SPARSE: |
|
1258 return self._proc_sparse(tarfile) |
|
1259 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): |
|
1260 return self._proc_pax(tarfile) |
|
1261 else: |
|
1262 return self._proc_builtin(tarfile) |
|
1263 |
|
1264 def _proc_builtin(self, tarfile): |
|
1265 """Process a builtin type or an unknown type which |
|
1266 will be treated as a regular file. |
|
1267 """ |
|
1268 self.offset_data = tarfile.fileobj.tell() |
|
1269 offset = self.offset_data |
|
1270 if self.isreg() or self.type not in SUPPORTED_TYPES: |
|
1271 # Skip the following data blocks. |
|
1272 offset += self._block(self.size) |
|
1273 tarfile.offset = offset |
|
1274 |
|
1275 # Patch the TarInfo object with saved global |
|
1276 # header information. |
|
1277 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) |
|
1278 |
|
1279 return self |
|
1280 |
|
1281 def _proc_gnulong(self, tarfile): |
|
1282 """Process the blocks that hold a GNU longname |
|
1283 or longlink member. |
|
1284 """ |
|
1285 buf = tarfile.fileobj.read(self._block(self.size)) |
|
1286 |
|
1287 # Fetch the next header and process it. |
|
1288 next = self.fromtarfile(tarfile) |
|
1289 if next is None: |
|
1290 raise HeaderError("missing subsequent header") |
|
1291 |
|
1292 # Patch the TarInfo object from the next header with |
|
1293 # the longname information. |
|
1294 next.offset = self.offset |
|
1295 if self.type == GNUTYPE_LONGNAME: |
|
1296 next.name = nts(buf) |
|
1297 elif self.type == GNUTYPE_LONGLINK: |
|
1298 next.linkname = nts(buf) |
|
1299 |
|
1300 return next |
|
1301 |
|
1302 def _proc_sparse(self, tarfile): |
|
1303 """Process a GNU sparse header plus extra headers. |
|
1304 """ |
|
1305 buf = self.buf |
|
1306 sp = _ringbuffer() |
|
1307 pos = 386 |
|
1308 lastpos = 0L |
|
1309 realpos = 0L |
|
1310 # There are 4 possible sparse structs in the |
|
1311 # first header. |
|
1312 for i in xrange(4): |
|
1313 try: |
|
1314 offset = nti(buf[pos:pos + 12]) |
|
1315 numbytes = nti(buf[pos + 12:pos + 24]) |
|
1316 except ValueError: |
|
1317 break |
|
1318 if offset > lastpos: |
|
1319 sp.append(_hole(lastpos, offset - lastpos)) |
|
1320 sp.append(_data(offset, numbytes, realpos)) |
|
1321 realpos += numbytes |
|
1322 lastpos = offset + numbytes |
|
1323 pos += 24 |
|
1324 |
|
1325 isextended = ord(buf[482]) |
|
1326 origsize = nti(buf[483:495]) |
|
1327 |
|
1328 # If the isextended flag is given, |
|
1329 # there are extra headers to process. |
|
1330 while isextended == 1: |
|
1331 buf = tarfile.fileobj.read(BLOCKSIZE) |
|
1332 pos = 0 |
|
1333 for i in xrange(21): |
|
1334 try: |
|
1335 offset = nti(buf[pos:pos + 12]) |
|
1336 numbytes = nti(buf[pos + 12:pos + 24]) |
|
1337 except ValueError: |
|
1338 break |
|
1339 if offset > lastpos: |
|
1340 sp.append(_hole(lastpos, offset - lastpos)) |
|
1341 sp.append(_data(offset, numbytes, realpos)) |
|
1342 realpos += numbytes |
|
1343 lastpos = offset + numbytes |
|
1344 pos += 24 |
|
1345 isextended = ord(buf[504]) |
|
1346 |
|
1347 if lastpos < origsize: |
|
1348 sp.append(_hole(lastpos, origsize - lastpos)) |
|
1349 |
|
1350 self.sparse = sp |
|
1351 |
|
1352 self.offset_data = tarfile.fileobj.tell() |
|
1353 tarfile.offset = self.offset_data + self._block(self.size) |
|
1354 self.size = origsize |
|
1355 |
|
1356 return self |
|
1357 |
|
1358 def _proc_pax(self, tarfile): |
|
1359 """Process an extended or global header as described in |
|
1360 POSIX.1-2001. |
|
1361 """ |
|
1362 # Read the header information. |
|
1363 buf = tarfile.fileobj.read(self._block(self.size)) |
|
1364 |
|
1365 # A pax header stores supplemental information for either |
|
1366 # the following file (extended) or all following files |
|
1367 # (global). |
|
1368 if self.type == XGLTYPE: |
|
1369 pax_headers = tarfile.pax_headers |
|
1370 else: |
|
1371 pax_headers = tarfile.pax_headers.copy() |
|
1372 |
|
1373 # Parse pax header information. A record looks like that: |
|
1374 # "%d %s=%s\n" % (length, keyword, value). length is the size |
|
1375 # of the complete record including the length field itself and |
|
1376 # the newline. keyword and value are both UTF-8 encoded strings. |
|
1377 regex = re.compile(r"(\d+) ([^=]+)=", re.U) |
|
1378 pos = 0 |
|
1379 while True: |
|
1380 match = regex.match(buf, pos) |
|
1381 if not match: |
|
1382 break |
|
1383 |
|
1384 length, keyword = match.groups() |
|
1385 length = int(length) |
|
1386 value = buf[match.end(2) + 1:match.start(1) + length - 1] |
|
1387 |
|
1388 keyword = keyword.decode("utf8") |
|
1389 value = value.decode("utf8") |
|
1390 |
|
1391 pax_headers[keyword] = value |
|
1392 pos += length |
|
1393 |
|
1394 # Fetch the next header. |
|
1395 next = self.fromtarfile(tarfile) |
|
1396 |
|
1397 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): |
|
1398 if next is None: |
|
1399 raise HeaderError("missing subsequent header") |
|
1400 |
|
1401 # Patch the TarInfo object with the extended header info. |
|
1402 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) |
|
1403 next.offset = self.offset |
|
1404 |
|
1405 if "size" in pax_headers: |
|
1406 # If the extended header replaces the size field, |
|
1407 # we need to recalculate the offset where the next |
|
1408 # header starts. |
|
1409 offset = next.offset_data |
|
1410 if next.isreg() or next.type not in SUPPORTED_TYPES: |
|
1411 offset += next._block(next.size) |
|
1412 tarfile.offset = offset |
|
1413 |
|
1414 return next |
|
1415 |
|
1416 def _apply_pax_info(self, pax_headers, encoding, errors): |
|
1417 """Replace fields with supplemental information from a previous |
|
1418 pax extended or global header. |
|
1419 """ |
|
1420 for keyword, value in pax_headers.iteritems(): |
|
1421 if keyword not in PAX_FIELDS: |
|
1422 continue |
|
1423 |
|
1424 if keyword == "path": |
|
1425 value = value.rstrip("/") |
|
1426 |
|
1427 if keyword in PAX_NUMBER_FIELDS: |
|
1428 try: |
|
1429 value = PAX_NUMBER_FIELDS[keyword](value) |
|
1430 except ValueError: |
|
1431 value = 0 |
|
1432 else: |
|
1433 value = uts(value, encoding, errors) |
|
1434 |
|
1435 setattr(self, keyword, value) |
|
1436 |
|
1437 self.pax_headers = pax_headers.copy() |
|
1438 |
|
1439 def _block(self, count): |
|
1440 """Round up a byte count by BLOCKSIZE and return it, |
|
1441 e.g. _block(834) => 1024. |
|
1442 """ |
|
1443 blocks, remainder = divmod(count, BLOCKSIZE) |
|
1444 if remainder: |
|
1445 blocks += 1 |
|
1446 return blocks * BLOCKSIZE |
|
1447 |
|
1448 def isreg(self): |
|
1449 return self.type in REGULAR_TYPES |
|
1450 def isfile(self): |
|
1451 return self.isreg() |
|
1452 def isdir(self): |
|
1453 return self.type == DIRTYPE |
|
1454 def issym(self): |
|
1455 return self.type == SYMTYPE |
|
1456 def islnk(self): |
|
1457 return self.type == LNKTYPE |
|
1458 def ischr(self): |
|
1459 return self.type == CHRTYPE |
|
1460 def isblk(self): |
|
1461 return self.type == BLKTYPE |
|
1462 def isfifo(self): |
|
1463 return self.type == FIFOTYPE |
|
1464 def issparse(self): |
|
1465 return self.type == GNUTYPE_SPARSE |
|
1466 def isdev(self): |
|
1467 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) |
|
1468 # class TarInfo |
|
1469 |
|
1470 class TarFile(object): |
|
1471 """The TarFile Class provides an interface to tar archives. |
|
1472 """ |
|
1473 |
|
1474 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) |
|
1475 |
|
1476 dereference = False # If true, add content of linked file to the |
|
1477 # tar file, else the link. |
|
1478 |
|
1479 ignore_zeros = False # If true, skips empty or invalid blocks and |
|
1480 # continues processing. |
|
1481 |
|
1482 errorlevel = 0 # If 0, fatal errors only appear in debug |
|
1483 # messages (if debug >= 0). If > 0, errors |
|
1484 # are passed to the caller as exceptions. |
|
1485 |
|
1486 format = DEFAULT_FORMAT # The format to use when creating an archive. |
|
1487 |
|
1488 encoding = ENCODING # Encoding for 8-bit character strings. |
|
1489 |
|
1490 errors = None # Error handler for unicode conversion. |
|
1491 |
|
1492 tarinfo = TarInfo # The default TarInfo class to use. |
|
1493 |
|
1494 fileobject = ExFileObject # The default ExFileObject class to use. |
|
1495 |
|
1496 def __init__(self, name=None, mode="r", fileobj=None, format=None, |
|
1497 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, |
|
1498 errors=None, pax_headers=None, debug=None, errorlevel=None): |
|
1499 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to |
|
1500 read from an existing archive, 'a' to append data to an existing |
|
1501 file or 'w' to create a new file overwriting an existing one. `mode' |
|
1502 defaults to 'r'. |
|
1503 If `fileobj' is given, it is used for reading or writing data. If it |
|
1504 can be determined, `mode' is overridden by `fileobj's mode. |
|
1505 `fileobj' is not closed, when TarFile is closed. |
|
1506 """ |
|
1507 if len(mode) > 1 or mode not in "raw": |
|
1508 raise ValueError("mode must be 'r', 'a' or 'w'") |
|
1509 self.mode = mode |
|
1510 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] |
|
1511 |
|
1512 if not fileobj: |
|
1513 if self.mode == "a" and not os.path.exists(name): |
|
1514 # Create nonexistent files in append mode. |
|
1515 self.mode = "w" |
|
1516 self._mode = "wb" |
|
1517 fileobj = bltn_open(name, self._mode) |
|
1518 self._extfileobj = False |
|
1519 else: |
|
1520 if name is None and hasattr(fileobj, "name"): |
|
1521 name = fileobj.name |
|
1522 if hasattr(fileobj, "mode"): |
|
1523 self._mode = fileobj.mode |
|
1524 self._extfileobj = True |
|
1525 self.name = os.path.abspath(name) if name else None |
|
1526 self.fileobj = fileobj |
|
1527 |
|
1528 # Init attributes. |
|
1529 if format is not None: |
|
1530 self.format = format |
|
1531 if tarinfo is not None: |
|
1532 self.tarinfo = tarinfo |
|
1533 if dereference is not None: |
|
1534 self.dereference = dereference |
|
1535 if ignore_zeros is not None: |
|
1536 self.ignore_zeros = ignore_zeros |
|
1537 if encoding is not None: |
|
1538 self.encoding = encoding |
|
1539 |
|
1540 if errors is not None: |
|
1541 self.errors = errors |
|
1542 elif mode == "r": |
|
1543 self.errors = "utf-8" |
|
1544 else: |
|
1545 self.errors = "strict" |
|
1546 |
|
1547 if pax_headers is not None and self.format == PAX_FORMAT: |
|
1548 self.pax_headers = pax_headers |
|
1549 else: |
|
1550 self.pax_headers = {} |
|
1551 |
|
1552 if debug is not None: |
|
1553 self.debug = debug |
|
1554 if errorlevel is not None: |
|
1555 self.errorlevel = errorlevel |
|
1556 |
|
1557 # Init datastructures. |
|
1558 self.closed = False |
|
1559 self.members = [] # list of members as TarInfo objects |
|
1560 self._loaded = False # flag if all members have been read |
|
1561 self.offset = self.fileobj.tell() |
|
1562 # current position in the archive file |
|
1563 self.inodes = {} # dictionary caching the inodes of |
|
1564 # archive members already added |
|
1565 |
|
1566 if self.mode == "r": |
|
1567 self.firstmember = None |
|
1568 self.firstmember = self.next() |
|
1569 |
|
1570 if self.mode == "a": |
|
1571 # Move to the end of the archive, |
|
1572 # before the first empty block. |
|
1573 self.firstmember = None |
|
1574 while True: |
|
1575 if self.next() is None: |
|
1576 if self.offset > 0: |
|
1577 self.fileobj.seek(- BLOCKSIZE, 1) |
|
1578 break |
|
1579 |
|
1580 if self.mode in "aw": |
|
1581 self._loaded = True |
|
1582 |
|
1583 if self.pax_headers: |
|
1584 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) |
|
1585 self.fileobj.write(buf) |
|
1586 self.offset += len(buf) |
|
1587 |
|
1588 def _getposix(self): |
|
1589 return self.format == USTAR_FORMAT |
|
1590 def _setposix(self, value): |
|
1591 import warnings |
|
1592 warnings.warn("use the format attribute instead", DeprecationWarning) |
|
1593 if value: |
|
1594 self.format = USTAR_FORMAT |
|
1595 else: |
|
1596 self.format = GNU_FORMAT |
|
1597 posix = property(_getposix, _setposix) |
|
1598 |
|
1599 #-------------------------------------------------------------------------- |
|
1600 # Below are the classmethods which act as alternate constructors to the |
|
1601 # TarFile class. The open() method is the only one that is needed for |
|
1602 # public use; it is the "super"-constructor and is able to select an |
|
1603 # adequate "sub"-constructor for a particular compression using the mapping |
|
1604 # from OPEN_METH. |
|
1605 # |
|
1606 # This concept allows one to subclass TarFile without losing the comfort of |
|
1607 # the super-constructor. A sub-constructor is registered and made available |
|
1608 # by adding it to the mapping in OPEN_METH. |
|
1609 |
|
1610 @classmethod |
|
1611 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): |
|
1612 """Open a tar archive for reading, writing or appending. Return |
|
1613 an appropriate TarFile class. |
|
1614 |
|
1615 mode: |
|
1616 'r' or 'r:*' open for reading with transparent compression |
|
1617 'r:' open for reading exclusively uncompressed |
|
1618 'r:gz' open for reading with gzip compression |
|
1619 'r:bz2' open for reading with bzip2 compression |
|
1620 'a' or 'a:' open for appending, creating the file if necessary |
|
1621 'w' or 'w:' open for writing without compression |
|
1622 'w:gz' open for writing with gzip compression |
|
1623 'w:bz2' open for writing with bzip2 compression |
|
1624 |
|
1625 'r|*' open a stream of tar blocks with transparent compression |
|
1626 'r|' open an uncompressed stream of tar blocks for reading |
|
1627 'r|gz' open a gzip compressed stream of tar blocks |
|
1628 'r|bz2' open a bzip2 compressed stream of tar blocks |
|
1629 'w|' open an uncompressed stream for writing |
|
1630 'w|gz' open a gzip compressed stream for writing |
|
1631 'w|bz2' open a bzip2 compressed stream for writing |
|
1632 """ |
|
1633 |
|
1634 if not name and not fileobj: |
|
1635 raise ValueError("nothing to open") |
|
1636 |
|
1637 if mode in ("r", "r:*"): |
|
1638 # Find out which *open() is appropriate for opening the file. |
|
1639 for comptype in cls.OPEN_METH: |
|
1640 func = getattr(cls, cls.OPEN_METH[comptype]) |
|
1641 if fileobj is not None: |
|
1642 saved_pos = fileobj.tell() |
|
1643 try: |
|
1644 return func(name, "r", fileobj, **kwargs) |
|
1645 except (ReadError, CompressionError), e: |
|
1646 if fileobj is not None: |
|
1647 fileobj.seek(saved_pos) |
|
1648 continue |
|
1649 raise ReadError("file could not be opened successfully") |
|
1650 |
|
1651 elif ":" in mode: |
|
1652 filemode, comptype = mode.split(":", 1) |
|
1653 filemode = filemode or "r" |
|
1654 comptype = comptype or "tar" |
|
1655 |
|
1656 # Select the *open() function according to |
|
1657 # given compression. |
|
1658 if comptype in cls.OPEN_METH: |
|
1659 func = getattr(cls, cls.OPEN_METH[comptype]) |
|
1660 else: |
|
1661 raise CompressionError("unknown compression type %r" % comptype) |
|
1662 return func(name, filemode, fileobj, **kwargs) |
|
1663 |
|
1664 elif "|" in mode: |
|
1665 filemode, comptype = mode.split("|", 1) |
|
1666 filemode = filemode or "r" |
|
1667 comptype = comptype or "tar" |
|
1668 |
|
1669 if filemode not in "rw": |
|
1670 raise ValueError("mode must be 'r' or 'w'") |
|
1671 |
|
1672 t = cls(name, filemode, |
|
1673 _Stream(name, filemode, comptype, fileobj, bufsize), |
|
1674 **kwargs) |
|
1675 t._extfileobj = False |
|
1676 return t |
|
1677 |
|
1678 elif mode in "aw": |
|
1679 return cls.taropen(name, mode, fileobj, **kwargs) |
|
1680 |
|
1681 raise ValueError("undiscernible mode") |
|
1682 |
|
1683 @classmethod |
|
1684 def taropen(cls, name, mode="r", fileobj=None, **kwargs): |
|
1685 """Open uncompressed tar archive name for reading or writing. |
|
1686 """ |
|
1687 if len(mode) > 1 or mode not in "raw": |
|
1688 raise ValueError("mode must be 'r', 'a' or 'w'") |
|
1689 return cls(name, mode, fileobj, **kwargs) |
|
1690 |
|
1691 @classmethod |
|
1692 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): |
|
1693 """Open gzip compressed tar archive name for reading or writing. |
|
1694 Appending is not allowed. |
|
1695 """ |
|
1696 if len(mode) > 1 or mode not in "rw": |
|
1697 raise ValueError("mode must be 'r' or 'w'") |
|
1698 |
|
1699 try: |
|
1700 import gzip |
|
1701 gzip.GzipFile |
|
1702 except (ImportError, AttributeError): |
|
1703 raise CompressionError("gzip module is not available") |
|
1704 |
|
1705 if fileobj is None: |
|
1706 fileobj = bltn_open(name, mode + "b") |
|
1707 |
|
1708 try: |
|
1709 t = cls.taropen(name, mode, |
|
1710 gzip.GzipFile(name, mode, compresslevel, fileobj), |
|
1711 **kwargs) |
|
1712 except IOError: |
|
1713 raise ReadError("not a gzip file") |
|
1714 t._extfileobj = False |
|
1715 return t |
|
1716 |
|
1717 @classmethod |
|
1718 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): |
|
1719 """Open bzip2 compressed tar archive name for reading or writing. |
|
1720 Appending is not allowed. |
|
1721 """ |
|
1722 if len(mode) > 1 or mode not in "rw": |
|
1723 raise ValueError("mode must be 'r' or 'w'.") |
|
1724 |
|
1725 try: |
|
1726 import bz2 |
|
1727 except ImportError: |
|
1728 raise CompressionError("bz2 module is not available") |
|
1729 |
|
1730 if fileobj is not None: |
|
1731 fileobj = _BZ2Proxy(fileobj, mode) |
|
1732 else: |
|
1733 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) |
|
1734 |
|
1735 try: |
|
1736 t = cls.taropen(name, mode, fileobj, **kwargs) |
|
1737 except IOError: |
|
1738 raise ReadError("not a bzip2 file") |
|
1739 t._extfileobj = False |
|
1740 return t |
|
1741 |
|
1742 # All *open() methods are registered here. |
|
1743 OPEN_METH = { |
|
1744 "tar": "taropen", # uncompressed tar |
|
1745 "gz": "gzopen", # gzip compressed tar |
|
1746 "bz2": "bz2open" # bzip2 compressed tar |
|
1747 } |
|
1748 |
|
1749 #-------------------------------------------------------------------------- |
|
1750 # The public methods which TarFile provides: |
|
1751 |
|
1752 def close(self): |
|
1753 """Close the TarFile. In write-mode, two finishing zero blocks are |
|
1754 appended to the archive. |
|
1755 """ |
|
1756 if self.closed: |
|
1757 return |
|
1758 |
|
1759 if self.mode in "aw": |
|
1760 self.fileobj.write(NUL * (BLOCKSIZE * 2)) |
|
1761 self.offset += (BLOCKSIZE * 2) |
|
1762 # fill up the end with zero-blocks |
|
1763 # (like option -b20 for tar does) |
|
1764 blocks, remainder = divmod(self.offset, RECORDSIZE) |
|
1765 if remainder > 0: |
|
1766 self.fileobj.write(NUL * (RECORDSIZE - remainder)) |
|
1767 |
|
1768 if not self._extfileobj: |
|
1769 self.fileobj.close() |
|
1770 self.closed = True |
|
1771 |
|
1772 def getmember(self, name): |
|
1773 """Return a TarInfo object for member `name'. If `name' can not be |
|
1774 found in the archive, KeyError is raised. If a member occurs more |
|
1775 than once in the archive, its last occurence is assumed to be the |
|
1776 most up-to-date version. |
|
1777 """ |
|
1778 tarinfo = self._getmember(name) |
|
1779 if tarinfo is None: |
|
1780 raise KeyError("filename %r not found" % name) |
|
1781 return tarinfo |
|
1782 |
|
1783 def getmembers(self): |
|
1784 """Return the members of the archive as a list of TarInfo objects. The |
|
1785 list has the same order as the members in the archive. |
|
1786 """ |
|
1787 self._check() |
|
1788 if not self._loaded: # if we want to obtain a list of |
|
1789 self._load() # all members, we first have to |
|
1790 # scan the whole archive. |
|
1791 return self.members |
|
1792 |
|
1793 def getnames(self): |
|
1794 """Return the members of the archive as a list of their names. It has |
|
1795 the same order as the list returned by getmembers(). |
|
1796 """ |
|
1797 return [tarinfo.name for tarinfo in self.getmembers()] |
|
1798 |
|
1799 def gettarinfo(self, name=None, arcname=None, fileobj=None): |
|
1800 """Create a TarInfo object for either the file `name' or the file |
|
1801 object `fileobj' (using os.fstat on its file descriptor). You can |
|
1802 modify some of the TarInfo's attributes before you add it using |
|
1803 addfile(). If given, `arcname' specifies an alternative name for the |
|
1804 file in the archive. |
|
1805 """ |
|
1806 self._check("aw") |
|
1807 |
|
1808 # When fileobj is given, replace name by |
|
1809 # fileobj's real name. |
|
1810 if fileobj is not None: |
|
1811 name = fileobj.name |
|
1812 |
|
1813 # Building the name of the member in the archive. |
|
1814 # Backward slashes are converted to forward slashes, |
|
1815 # Absolute paths are turned to relative paths. |
|
1816 if arcname is None: |
|
1817 arcname = name |
|
1818 arcname = normpath(arcname) |
|
1819 drv, arcname = os.path.splitdrive(arcname) |
|
1820 while arcname[0:1] == "/": |
|
1821 arcname = arcname[1:] |
|
1822 |
|
1823 # Now, fill the TarInfo object with |
|
1824 # information specific for the file. |
|
1825 tarinfo = self.tarinfo() |
|
1826 tarinfo.tarfile = self |
|
1827 |
|
1828 # Use os.stat or os.lstat, depending on platform |
|
1829 # and if symlinks shall be resolved. |
|
1830 if fileobj is None: |
|
1831 if hasattr(os, "lstat") and not self.dereference: |
|
1832 statres = os.lstat(name) |
|
1833 else: |
|
1834 statres = os.stat(name) |
|
1835 else: |
|
1836 statres = os.fstat(fileobj.fileno()) |
|
1837 linkname = "" |
|
1838 |
|
1839 stmd = statres.st_mode |
|
1840 if stat.S_ISREG(stmd): |
|
1841 inode = (statres.st_ino, statres.st_dev) |
|
1842 if not self.dereference and statres.st_nlink > 1 and \ |
|
1843 inode in self.inodes and arcname != self.inodes[inode]: |
|
1844 # Is it a hardlink to an already |
|
1845 # archived file? |
|
1846 type = LNKTYPE |
|
1847 linkname = self.inodes[inode] |
|
1848 else: |
|
1849 # The inode is added only if its valid. |
|
1850 # For win32 it is always 0. |
|
1851 type = REGTYPE |
|
1852 if inode[0]: |
|
1853 self.inodes[inode] = arcname |
|
1854 elif stat.S_ISDIR(stmd): |
|
1855 type = DIRTYPE |
|
1856 elif stat.S_ISFIFO(stmd): |
|
1857 type = FIFOTYPE |
|
1858 elif stat.S_ISLNK(stmd): |
|
1859 type = SYMTYPE |
|
1860 linkname = os.readlink(name) |
|
1861 elif stat.S_ISCHR(stmd): |
|
1862 type = CHRTYPE |
|
1863 elif stat.S_ISBLK(stmd): |
|
1864 type = BLKTYPE |
|
1865 else: |
|
1866 return None |
|
1867 |
|
1868 # Fill the TarInfo object with all |
|
1869 # information we can get. |
|
1870 tarinfo.name = arcname |
|
1871 tarinfo.mode = stmd |
|
1872 tarinfo.uid = statres.st_uid |
|
1873 tarinfo.gid = statres.st_gid |
|
1874 if stat.S_ISREG(stmd): |
|
1875 tarinfo.size = statres.st_size |
|
1876 else: |
|
1877 tarinfo.size = 0L |
|
1878 tarinfo.mtime = statres.st_mtime |
|
1879 tarinfo.type = type |
|
1880 tarinfo.linkname = linkname |
|
1881 if pwd: |
|
1882 try: |
|
1883 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] |
|
1884 except KeyError: |
|
1885 pass |
|
1886 if grp: |
|
1887 try: |
|
1888 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] |
|
1889 except KeyError: |
|
1890 pass |
|
1891 |
|
1892 if type in (CHRTYPE, BLKTYPE): |
|
1893 if hasattr(os, "major") and hasattr(os, "minor"): |
|
1894 tarinfo.devmajor = os.major(statres.st_rdev) |
|
1895 tarinfo.devminor = os.minor(statres.st_rdev) |
|
1896 return tarinfo |
|
1897 |
|
1898 def list(self, verbose=True): |
|
1899 """Print a table of contents to sys.stdout. If `verbose' is False, only |
|
1900 the names of the members are printed. If it is True, an `ls -l'-like |
|
1901 output is produced. |
|
1902 """ |
|
1903 self._check() |
|
1904 |
|
1905 for tarinfo in self: |
|
1906 if verbose: |
|
1907 print filemode(tarinfo.mode), |
|
1908 print "%s/%s" % (tarinfo.uname or tarinfo.uid, |
|
1909 tarinfo.gname or tarinfo.gid), |
|
1910 if tarinfo.ischr() or tarinfo.isblk(): |
|
1911 print "%10s" % ("%d,%d" \ |
|
1912 % (tarinfo.devmajor, tarinfo.devminor)), |
|
1913 else: |
|
1914 print "%10d" % tarinfo.size, |
|
1915 print "%d-%02d-%02d %02d:%02d:%02d" \ |
|
1916 % time.localtime(tarinfo.mtime)[:6], |
|
1917 |
|
1918 print tarinfo.name + ("/" if tarinfo.isdir() else ""), |
|
1919 |
|
1920 if verbose: |
|
1921 if tarinfo.issym(): |
|
1922 print "->", tarinfo.linkname, |
|
1923 if tarinfo.islnk(): |
|
1924 print "link to", tarinfo.linkname, |
|
1925 print |
|
1926 |
|
1927 def add(self, name, arcname=None, recursive=True, exclude=None): |
|
1928 """Add the file `name' to the archive. `name' may be any type of file |
|
1929 (directory, fifo, symbolic link, etc.). If given, `arcname' |
|
1930 specifies an alternative name for the file in the archive. |
|
1931 Directories are added recursively by default. This can be avoided by |
|
1932 setting `recursive' to False. `exclude' is a function that should |
|
1933 return True for each filename to be excluded. |
|
1934 """ |
|
1935 self._check("aw") |
|
1936 |
|
1937 if arcname is None: |
|
1938 arcname = name |
|
1939 |
|
1940 # Exclude pathnames. |
|
1941 if exclude is not None and exclude(name): |
|
1942 self._dbg(2, "tarfile: Excluded %r" % name) |
|
1943 return |
|
1944 |
|
1945 # Skip if somebody tries to archive the archive... |
|
1946 if self.name is not None and os.path.abspath(name) == self.name: |
|
1947 self._dbg(2, "tarfile: Skipped %r" % name) |
|
1948 return |
|
1949 |
|
1950 # Special case: The user wants to add the current |
|
1951 # working directory. |
|
1952 if name == ".": |
|
1953 if recursive: |
|
1954 if arcname == ".": |
|
1955 arcname = "" |
|
1956 for f in os.listdir(name): |
|
1957 self.add(f, os.path.join(arcname, f), recursive, exclude) |
|
1958 return |
|
1959 |
|
1960 self._dbg(1, name) |
|
1961 |
|
1962 # Create a TarInfo object from the file. |
|
1963 tarinfo = self.gettarinfo(name, arcname) |
|
1964 |
|
1965 if tarinfo is None: |
|
1966 self._dbg(1, "tarfile: Unsupported type %r" % name) |
|
1967 return |
|
1968 |
|
1969 # Append the tar header and data to the archive. |
|
1970 if tarinfo.isreg(): |
|
1971 f = bltn_open(name, "rb") |
|
1972 self.addfile(tarinfo, f) |
|
1973 f.close() |
|
1974 |
|
1975 elif tarinfo.isdir(): |
|
1976 self.addfile(tarinfo) |
|
1977 if recursive: |
|
1978 for f in os.listdir(name): |
|
1979 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude) |
|
1980 |
|
1981 else: |
|
1982 self.addfile(tarinfo) |
|
1983 |
|
1984 def addfile(self, tarinfo, fileobj=None): |
|
1985 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is |
|
1986 given, tarinfo.size bytes are read from it and added to the archive. |
|
1987 You can create TarInfo objects using gettarinfo(). |
|
1988 On Windows platforms, `fileobj' should always be opened with mode |
|
1989 'rb' to avoid irritation about the file size. |
|
1990 """ |
|
1991 self._check("aw") |
|
1992 |
|
1993 tarinfo = copy.copy(tarinfo) |
|
1994 |
|
1995 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) |
|
1996 self.fileobj.write(buf) |
|
1997 self.offset += len(buf) |
|
1998 |
|
1999 # If there's data to follow, append it. |
|
2000 if fileobj is not None: |
|
2001 copyfileobj(fileobj, self.fileobj, tarinfo.size) |
|
2002 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) |
|
2003 if remainder > 0: |
|
2004 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) |
|
2005 blocks += 1 |
|
2006 self.offset += blocks * BLOCKSIZE |
|
2007 |
|
2008 self.members.append(tarinfo) |
|
2009 |
|
2010 def extractall(self, path=".", members=None): |
|
2011 """Extract all members from the archive to the current working |
|
2012 directory and set owner, modification time and permissions on |
|
2013 directories afterwards. `path' specifies a different directory |
|
2014 to extract to. `members' is optional and must be a subset of the |
|
2015 list returned by getmembers(). |
|
2016 """ |
|
2017 directories = [] |
|
2018 |
|
2019 if members is None: |
|
2020 members = self |
|
2021 |
|
2022 for tarinfo in members: |
|
2023 if tarinfo.isdir(): |
|
2024 # Extract directories with a safe mode. |
|
2025 directories.append(tarinfo) |
|
2026 tarinfo = copy.copy(tarinfo) |
|
2027 tarinfo.mode = 0700 |
|
2028 self.extract(tarinfo, path) |
|
2029 |
|
2030 # Reverse sort directories. |
|
2031 directories.sort(key=operator.attrgetter('name')) |
|
2032 directories.reverse() |
|
2033 |
|
2034 # Set correct owner, mtime and filemode on directories. |
|
2035 for tarinfo in directories: |
|
2036 dirpath = os.path.join(path, tarinfo.name) |
|
2037 try: |
|
2038 self.chown(tarinfo, dirpath) |
|
2039 self.utime(tarinfo, dirpath) |
|
2040 self.chmod(tarinfo, dirpath) |
|
2041 except ExtractError, e: |
|
2042 if self.errorlevel > 1: |
|
2043 raise |
|
2044 else: |
|
2045 self._dbg(1, "tarfile: %s" % e) |
|
2046 |
|
2047 def extract(self, member, path=""): |
|
2048 """Extract a member from the archive to the current working directory, |
|
2049 using its full name. Its file information is extracted as accurately |
|
2050 as possible. `member' may be a filename or a TarInfo object. You can |
|
2051 specify a different directory using `path'. |
|
2052 """ |
|
2053 self._check("r") |
|
2054 |
|
2055 if isinstance(member, basestring): |
|
2056 tarinfo = self.getmember(member) |
|
2057 else: |
|
2058 tarinfo = member |
|
2059 |
|
2060 # Prepare the link target for makelink(). |
|
2061 if tarinfo.islnk(): |
|
2062 tarinfo._link_target = os.path.join(path, tarinfo.linkname) |
|
2063 |
|
2064 try: |
|
2065 self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) |
|
2066 except EnvironmentError, e: |
|
2067 if self.errorlevel > 0: |
|
2068 raise |
|
2069 else: |
|
2070 if e.filename is None: |
|
2071 self._dbg(1, "tarfile: %s" % e.strerror) |
|
2072 else: |
|
2073 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) |
|
2074 except ExtractError, e: |
|
2075 if self.errorlevel > 1: |
|
2076 raise |
|
2077 else: |
|
2078 self._dbg(1, "tarfile: %s" % e) |
|
2079 |
|
2080 def extractfile(self, member): |
|
2081 """Extract a member from the archive as a file object. `member' may be |
|
2082 a filename or a TarInfo object. If `member' is a regular file, a |
|
2083 file-like object is returned. If `member' is a link, a file-like |
|
2084 object is constructed from the link's target. If `member' is none of |
|
2085 the above, None is returned. |
|
2086 The file-like object is read-only and provides the following |
|
2087 methods: read(), readline(), readlines(), seek() and tell() |
|
2088 """ |
|
2089 self._check("r") |
|
2090 |
|
2091 if isinstance(member, basestring): |
|
2092 tarinfo = self.getmember(member) |
|
2093 else: |
|
2094 tarinfo = member |
|
2095 |
|
2096 if tarinfo.isreg(): |
|
2097 return self.fileobject(self, tarinfo) |
|
2098 |
|
2099 elif tarinfo.type not in SUPPORTED_TYPES: |
|
2100 # If a member's type is unknown, it is treated as a |
|
2101 # regular file. |
|
2102 return self.fileobject(self, tarinfo) |
|
2103 |
|
2104 elif tarinfo.islnk() or tarinfo.issym(): |
|
2105 if isinstance(self.fileobj, _Stream): |
|
2106 # A small but ugly workaround for the case that someone tries |
|
2107 # to extract a (sym)link as a file-object from a non-seekable |
|
2108 # stream of tar blocks. |
|
2109 raise StreamError("cannot extract (sym)link as file object") |
|
2110 else: |
|
2111 # A (sym)link's file object is its target's file object. |
|
2112 return self.extractfile(self._getmember(tarinfo.linkname, |
|
2113 tarinfo)) |
|
2114 else: |
|
2115 # If there's no data associated with the member (directory, chrdev, |
|
2116 # blkdev, etc.), return None instead of a file object. |
|
2117 return None |
|
2118 |
|
2119 def _extract_member(self, tarinfo, targetpath): |
|
2120 """Extract the TarInfo object tarinfo to a physical |
|
2121 file called targetpath. |
|
2122 """ |
|
2123 # Fetch the TarInfo object for the given name |
|
2124 # and build the destination pathname, replacing |
|
2125 # forward slashes to platform specific separators. |
|
2126 if targetpath[-1:] == "/": |
|
2127 targetpath = targetpath[:-1] |
|
2128 targetpath = os.path.normpath(targetpath) |
|
2129 |
|
2130 # Create all upper directories. |
|
2131 upperdirs = os.path.dirname(targetpath) |
|
2132 if upperdirs and not os.path.exists(upperdirs): |
|
2133 # Create directories that are not part of the archive with |
|
2134 # default permissions. |
|
2135 os.makedirs(upperdirs) |
|
2136 |
|
2137 if tarinfo.islnk() or tarinfo.issym(): |
|
2138 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) |
|
2139 else: |
|
2140 self._dbg(1, tarinfo.name) |
|
2141 |
|
2142 if tarinfo.isreg(): |
|
2143 self.makefile(tarinfo, targetpath) |
|
2144 elif tarinfo.isdir(): |
|
2145 self.makedir(tarinfo, targetpath) |
|
2146 elif tarinfo.isfifo(): |
|
2147 self.makefifo(tarinfo, targetpath) |
|
2148 elif tarinfo.ischr() or tarinfo.isblk(): |
|
2149 self.makedev(tarinfo, targetpath) |
|
2150 elif tarinfo.islnk() or tarinfo.issym(): |
|
2151 self.makelink(tarinfo, targetpath) |
|
2152 elif tarinfo.type not in SUPPORTED_TYPES: |
|
2153 self.makeunknown(tarinfo, targetpath) |
|
2154 else: |
|
2155 self.makefile(tarinfo, targetpath) |
|
2156 |
|
2157 self.chown(tarinfo, targetpath) |
|
2158 if not tarinfo.issym(): |
|
2159 self.chmod(tarinfo, targetpath) |
|
2160 self.utime(tarinfo, targetpath) |
|
2161 |
|
2162 #-------------------------------------------------------------------------- |
|
2163 # Below are the different file methods. They are called via |
|
2164 # _extract_member() when extract() is called. They can be replaced in a |
|
2165 # subclass to implement other functionality. |
|
2166 |
|
2167 def makedir(self, tarinfo, targetpath): |
|
2168 """Make a directory called targetpath. |
|
2169 """ |
|
2170 try: |
|
2171 # Use a safe mode for the directory, the real mode is set |
|
2172 # later in _extract_member(). |
|
2173 os.mkdir(targetpath, 0700) |
|
2174 except EnvironmentError, e: |
|
2175 if e.errno != errno.EEXIST: |
|
2176 raise |
|
2177 |
|
2178 def makefile(self, tarinfo, targetpath): |
|
2179 """Make a file called targetpath. |
|
2180 """ |
|
2181 source = self.extractfile(tarinfo) |
|
2182 target = bltn_open(targetpath, "wb") |
|
2183 copyfileobj(source, target) |
|
2184 source.close() |
|
2185 target.close() |
|
2186 |
|
2187 def makeunknown(self, tarinfo, targetpath): |
|
2188 """Make a file from a TarInfo object with an unknown type |
|
2189 at targetpath. |
|
2190 """ |
|
2191 self.makefile(tarinfo, targetpath) |
|
2192 self._dbg(1, "tarfile: Unknown file type %r, " \ |
|
2193 "extracted as regular file." % tarinfo.type) |
|
2194 |
|
2195 def makefifo(self, tarinfo, targetpath): |
|
2196 """Make a fifo called targetpath. |
|
2197 """ |
|
2198 if hasattr(os, "mkfifo"): |
|
2199 os.mkfifo(targetpath) |
|
2200 else: |
|
2201 raise ExtractError("fifo not supported by system") |
|
2202 |
|
2203 def makedev(self, tarinfo, targetpath): |
|
2204 """Make a character or block device called targetpath. |
|
2205 """ |
|
2206 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): |
|
2207 raise ExtractError("special devices not supported by system") |
|
2208 |
|
2209 mode = tarinfo.mode |
|
2210 if tarinfo.isblk(): |
|
2211 mode |= stat.S_IFBLK |
|
2212 else: |
|
2213 mode |= stat.S_IFCHR |
|
2214 |
|
2215 os.mknod(targetpath, mode, |
|
2216 os.makedev(tarinfo.devmajor, tarinfo.devminor)) |
|
2217 |
|
2218 def makelink(self, tarinfo, targetpath): |
|
2219 """Make a (symbolic) link called targetpath. If it cannot be created |
|
2220 (platform limitation), we try to make a copy of the referenced file |
|
2221 instead of a link. |
|
2222 """ |
|
2223 linkpath = tarinfo.linkname |
|
2224 try: |
|
2225 if tarinfo.issym(): |
|
2226 os.symlink(linkpath, targetpath) |
|
2227 else: |
|
2228 # See extract(). |
|
2229 os.link(tarinfo._link_target, targetpath) |
|
2230 except AttributeError: |
|
2231 if tarinfo.issym(): |
|
2232 linkpath = os.path.join(os.path.dirname(tarinfo.name), |
|
2233 linkpath) |
|
2234 linkpath = normpath(linkpath) |
|
2235 |
|
2236 try: |
|
2237 self._extract_member(self.getmember(linkpath), targetpath) |
|
2238 except (EnvironmentError, KeyError), e: |
|
2239 linkpath = os.path.normpath(linkpath) |
|
2240 try: |
|
2241 shutil.copy2(linkpath, targetpath) |
|
2242 except EnvironmentError, e: |
|
2243 raise IOError("link could not be created") |
|
2244 |
|
2245 def chown(self, tarinfo, targetpath): |
|
2246 """Set owner of targetpath according to tarinfo. |
|
2247 """ |
|
2248 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: |
|
2249 # We have to be root to do so. |
|
2250 try: |
|
2251 g = grp.getgrnam(tarinfo.gname)[2] |
|
2252 except KeyError: |
|
2253 try: |
|
2254 g = grp.getgrgid(tarinfo.gid)[2] |
|
2255 except KeyError: |
|
2256 g = os.getgid() |
|
2257 try: |
|
2258 u = pwd.getpwnam(tarinfo.uname)[2] |
|
2259 except KeyError: |
|
2260 try: |
|
2261 u = pwd.getpwuid(tarinfo.uid)[2] |
|
2262 except KeyError: |
|
2263 u = os.getuid() |
|
2264 try: |
|
2265 if tarinfo.issym() and hasattr(os, "lchown"): |
|
2266 os.lchown(targetpath, u, g) |
|
2267 else: |
|
2268 if sys.platform != "os2emx": |
|
2269 os.chown(targetpath, u, g) |
|
2270 except EnvironmentError, e: |
|
2271 raise ExtractError("could not change owner") |
|
2272 |
|
2273 def chmod(self, tarinfo, targetpath): |
|
2274 """Set file permissions of targetpath according to tarinfo. |
|
2275 """ |
|
2276 if hasattr(os, 'chmod'): |
|
2277 try: |
|
2278 os.chmod(targetpath, tarinfo.mode) |
|
2279 except EnvironmentError, e: |
|
2280 raise ExtractError("could not change mode") |
|
2281 |
|
2282 def utime(self, tarinfo, targetpath): |
|
2283 """Set modification time of targetpath according to tarinfo. |
|
2284 """ |
|
2285 if not hasattr(os, 'utime'): |
|
2286 return |
|
2287 if sys.platform == "win32" and tarinfo.isdir(): |
|
2288 # According to msdn.microsoft.com, it is an error (EACCES) |
|
2289 # to use utime() on directories. |
|
2290 return |
|
2291 try: |
|
2292 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) |
|
2293 except EnvironmentError, e: |
|
2294 raise ExtractError("could not change modification time") |
|
2295 |
|
2296 #-------------------------------------------------------------------------- |
|
2297 def next(self): |
|
2298 """Return the next member of the archive as a TarInfo object, when |
|
2299 TarFile is opened for reading. Return None if there is no more |
|
2300 available. |
|
2301 """ |
|
2302 self._check("ra") |
|
2303 if self.firstmember is not None: |
|
2304 m = self.firstmember |
|
2305 self.firstmember = None |
|
2306 return m |
|
2307 |
|
2308 # Read the next block. |
|
2309 self.fileobj.seek(self.offset) |
|
2310 while True: |
|
2311 try: |
|
2312 tarinfo = self.tarinfo.fromtarfile(self) |
|
2313 if tarinfo is None: |
|
2314 return |
|
2315 self.members.append(tarinfo) |
|
2316 |
|
2317 except HeaderError, e: |
|
2318 if self.ignore_zeros: |
|
2319 self._dbg(2, "0x%X: %s" % (self.offset, e)) |
|
2320 self.offset += BLOCKSIZE |
|
2321 continue |
|
2322 else: |
|
2323 if self.offset == 0: |
|
2324 raise ReadError(str(e)) |
|
2325 return None |
|
2326 break |
|
2327 |
|
2328 return tarinfo |
|
2329 |
|
2330 #-------------------------------------------------------------------------- |
|
2331 # Little helper methods: |
|
2332 |
|
2333 def _getmember(self, name, tarinfo=None): |
|
2334 """Find an archive member by name from bottom to top. |
|
2335 If tarinfo is given, it is used as the starting point. |
|
2336 """ |
|
2337 # Ensure that all members have been loaded. |
|
2338 members = self.getmembers() |
|
2339 |
|
2340 if tarinfo is None: |
|
2341 end = len(members) |
|
2342 else: |
|
2343 end = members.index(tarinfo) |
|
2344 |
|
2345 for i in xrange(end - 1, -1, -1): |
|
2346 if name == members[i].name: |
|
2347 return members[i] |
|
2348 |
|
2349 def _load(self): |
|
2350 """Read through the entire archive file and look for readable |
|
2351 members. |
|
2352 """ |
|
2353 while True: |
|
2354 tarinfo = self.next() |
|
2355 if tarinfo is None: |
|
2356 break |
|
2357 self._loaded = True |
|
2358 |
|
2359 def _check(self, mode=None): |
|
2360 """Check if TarFile is still open, and if the operation's mode |
|
2361 corresponds to TarFile's mode. |
|
2362 """ |
|
2363 if self.closed: |
|
2364 raise IOError("%s is closed" % self.__class__.__name__) |
|
2365 if mode is not None and self.mode not in mode: |
|
2366 raise IOError("bad operation for mode %r" % self.mode) |
|
2367 |
|
2368 def __iter__(self): |
|
2369 """Provide an iterator object. |
|
2370 """ |
|
2371 if self._loaded: |
|
2372 return iter(self.members) |
|
2373 else: |
|
2374 return TarIter(self) |
|
2375 |
|
2376 def _dbg(self, level, msg): |
|
2377 """Write debugging output to sys.stderr. |
|
2378 """ |
|
2379 if level <= self.debug: |
|
2380 print >> sys.stderr, msg |
|
2381 # class TarFile |
|
2382 |
|
2383 class TarIter: |
|
2384 """Iterator Class. |
|
2385 |
|
2386 for tarinfo in TarFile(...): |
|
2387 suite... |
|
2388 """ |
|
2389 |
|
2390 def __init__(self, tarfile): |
|
2391 """Construct a TarIter object. |
|
2392 """ |
|
2393 self.tarfile = tarfile |
|
2394 self.index = 0 |
|
2395 def __iter__(self): |
|
2396 """Return iterator object. |
|
2397 """ |
|
2398 return self |
|
2399 def next(self): |
|
2400 """Return the next item using TarFile's next() method. |
|
2401 When all members have been read, set TarFile as _loaded. |
|
2402 """ |
|
2403 # Fix for SF #1100429: Under rare circumstances it can |
|
2404 # happen that getmembers() is called during iteration, |
|
2405 # which will cause TarIter to stop prematurely. |
|
2406 if not self.tarfile._loaded: |
|
2407 tarinfo = self.tarfile.next() |
|
2408 if not tarinfo: |
|
2409 self.tarfile._loaded = True |
|
2410 raise StopIteration |
|
2411 else: |
|
2412 try: |
|
2413 tarinfo = self.tarfile.members[self.index] |
|
2414 except IndexError: |
|
2415 raise StopIteration |
|
2416 self.index += 1 |
|
2417 return tarinfo |
|
2418 |
|
2419 # Helper classes for sparse file support |
|
2420 class _section: |
|
2421 """Base class for _data and _hole. |
|
2422 """ |
|
2423 def __init__(self, offset, size): |
|
2424 self.offset = offset |
|
2425 self.size = size |
|
2426 def __contains__(self, offset): |
|
2427 return self.offset <= offset < self.offset + self.size |
|
2428 |
|
2429 class _data(_section): |
|
2430 """Represent a data section in a sparse file. |
|
2431 """ |
|
2432 def __init__(self, offset, size, realpos): |
|
2433 _section.__init__(self, offset, size) |
|
2434 self.realpos = realpos |
|
2435 |
|
2436 class _hole(_section): |
|
2437 """Represent a hole section in a sparse file. |
|
2438 """ |
|
2439 pass |
|
2440 |
|
2441 class _ringbuffer(list): |
|
2442 """Ringbuffer class which increases performance |
|
2443 over a regular list. |
|
2444 """ |
|
2445 def __init__(self): |
|
2446 self.idx = 0 |
|
2447 def find(self, offset): |
|
2448 idx = self.idx |
|
2449 while True: |
|
2450 item = self[idx] |
|
2451 if offset in item: |
|
2452 break |
|
2453 idx += 1 |
|
2454 if idx == len(self): |
|
2455 idx = 0 |
|
2456 if idx == self.idx: |
|
2457 # End of File |
|
2458 return None |
|
2459 self.idx = idx |
|
2460 return item |
|
2461 |
|
2462 #--------------------------------------------- |
|
2463 # zipfile compatible TarFile class |
|
2464 #--------------------------------------------- |
|
2465 TAR_PLAIN = 0 # zipfile.ZIP_STORED |
|
2466 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED |
|
2467 class TarFileCompat: |
|
2468 """TarFile class compatible with standard module zipfile's |
|
2469 ZipFile class. |
|
2470 """ |
|
2471 def __init__(self, file, mode="r", compression=TAR_PLAIN): |
|
2472 from warnings import warnpy3k |
|
2473 warnpy3k("the TarFileCompat class has been removed in Python 3.0", |
|
2474 stacklevel=2) |
|
2475 if compression == TAR_PLAIN: |
|
2476 self.tarfile = TarFile.taropen(file, mode) |
|
2477 elif compression == TAR_GZIPPED: |
|
2478 self.tarfile = TarFile.gzopen(file, mode) |
|
2479 else: |
|
2480 raise ValueError("unknown compression constant") |
|
2481 if mode[0:1] == "r": |
|
2482 members = self.tarfile.getmembers() |
|
2483 for m in members: |
|
2484 m.filename = m.name |
|
2485 m.file_size = m.size |
|
2486 m.date_time = time.gmtime(m.mtime)[:6] |
|
2487 def namelist(self): |
|
2488 return map(lambda m: m.name, self.infolist()) |
|
2489 def infolist(self): |
|
2490 return filter(lambda m: m.type in REGULAR_TYPES, |
|
2491 self.tarfile.getmembers()) |
|
2492 def printdir(self): |
|
2493 self.tarfile.list() |
|
2494 def testzip(self): |
|
2495 return |
|
2496 def getinfo(self, name): |
|
2497 return self.tarfile.getmember(name) |
|
2498 def read(self, name): |
|
2499 return self.tarfile.extractfile(self.tarfile.getmember(name)).read() |
|
2500 def write(self, filename, arcname=None, compress_type=None): |
|
2501 self.tarfile.add(filename, arcname) |
|
2502 def writestr(self, zinfo, bytes): |
|
2503 try: |
|
2504 from cStringIO import StringIO |
|
2505 except ImportError: |
|
2506 from StringIO import StringIO |
|
2507 import calendar |
|
2508 tinfo = TarInfo(zinfo.filename) |
|
2509 tinfo.size = len(bytes) |
|
2510 tinfo.mtime = calendar.timegm(zinfo.date_time) |
|
2511 self.tarfile.addfile(tinfo, StringIO(bytes)) |
|
2512 def close(self): |
|
2513 self.tarfile.close() |
|
2514 #class TarFileCompat |
|
2515 |
|
2516 #-------------------- |
|
2517 # exported functions |
|
2518 #-------------------- |
|
2519 def is_tarfile(name): |
|
2520 """Return True if name points to a tar archive that we |
|
2521 are able to handle, else return False. |
|
2522 """ |
|
2523 try: |
|
2524 t = open(name) |
|
2525 t.close() |
|
2526 return True |
|
2527 except TarError: |
|
2528 return False |
|
2529 |
|
2530 bltn_open = open |
|
2531 open = TarFile.open |