symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/tarfile.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 #!/usr/bin/env python
       
     2 # -*- coding: iso-8859-1 -*-
       
     3 #-------------------------------------------------------------------
       
     4 # tarfile.py
       
     5 #-------------------------------------------------------------------
       
     6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
       
     7 # All rights reserved.
       
     8 #
       
     9 # Permission  is  hereby granted,  free  of charge,  to  any person
       
    10 # obtaining a  copy of  this software  and associated documentation
       
    11 # files  (the  "Software"),  to   deal  in  the  Software   without
       
    12 # restriction,  including  without limitation  the  rights to  use,
       
    13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    14 # copies  of  the  Software,  and to  permit  persons  to  whom the
       
    15 # Software  is  furnished  to  do  so,  subject  to  the  following
       
    16 # conditions:
       
    17 #
       
    18 # The above copyright  notice and this  permission notice shall  be
       
    19 # included in all copies or substantial portions of the Software.
       
    20 #
       
    21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
       
    22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
       
    23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
       
    24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
       
    25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
       
    26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
       
    27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
       
    28 # OTHER DEALINGS IN THE SOFTWARE.
       
    29 #
       
    30 """Read from and write to tar format archives.
       
    31 """
       
    32 
       
    33 __version__ = "$Revision: 65514 $"
       
    34 # $Source$
       
    35 
       
    36 version     = "0.9.0"
       
    37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
       
    38 __date__    = "$Date: 2008-08-04 23:23:07 +0200 (Mo, 04 Aug 2008) $"
       
    39 __cvsid__   = "$Id: tarfile.py 65514 2008-08-04 21:23:07Z brett.cannon $"
       
    40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
       
    41 
       
    42 #---------
       
    43 # Imports
       
    44 #---------
       
    45 import sys
       
    46 import os
       
    47 import shutil
       
    48 import stat
       
    49 import errno
       
    50 import time
       
    51 import struct
       
    52 import copy
       
    53 import re
       
    54 import operator
       
    55 
       
    56 if sys.platform == 'mac':
       
    57     # This module needs work for MacOS9, especially in the area of pathname
       
    58     # handling. In many places it is assumed a simple substitution of / by the
       
    59     # local os.path.sep is good enough to convert pathnames, but this does not
       
    60     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
       
    61     raise ImportError, "tarfile does not work for platform==mac"
       
    62 
       
    63 try:
       
    64     import grp, pwd
       
    65 except ImportError:
       
    66     grp = pwd = None
       
    67 
       
    68 # from tarfile import *
       
    69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
       
    70 
       
    71 #---------------------------------------------------------
       
    72 # tar constants
       
    73 #---------------------------------------------------------
       
    74 NUL = "\0"                      # the null character
       
    75 BLOCKSIZE = 512                 # length of processing blocks
       
    76 RECORDSIZE = BLOCKSIZE * 20     # length of records
       
    77 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
       
    78 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
       
    79 
       
    80 LENGTH_NAME = 100               # maximum length of a filename
       
    81 LENGTH_LINK = 100               # maximum length of a linkname
       
    82 LENGTH_PREFIX = 155             # maximum length of the prefix field
       
    83 
       
    84 REGTYPE = "0"                   # regular file
       
    85 AREGTYPE = "\0"                 # regular file
       
    86 LNKTYPE = "1"                   # link (inside tarfile)
       
    87 SYMTYPE = "2"                   # symbolic link
       
    88 CHRTYPE = "3"                   # character special device
       
    89 BLKTYPE = "4"                   # block special device
       
    90 DIRTYPE = "5"                   # directory
       
    91 FIFOTYPE = "6"                  # fifo special device
       
    92 CONTTYPE = "7"                  # contiguous file
       
    93 
       
    94 GNUTYPE_LONGNAME = "L"          # GNU tar longname
       
    95 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
       
    96 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
       
    97 
       
    98 XHDTYPE = "x"                   # POSIX.1-2001 extended header
       
    99 XGLTYPE = "g"                   # POSIX.1-2001 global header
       
   100 SOLARIS_XHDTYPE = "X"           # Solaris extended header
       
   101 
       
   102 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
       
   103 GNU_FORMAT = 1                  # GNU tar format
       
   104 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
       
   105 DEFAULT_FORMAT = GNU_FORMAT
       
   106 
       
   107 #---------------------------------------------------------
       
   108 # tarfile constants
       
   109 #---------------------------------------------------------
       
   110 # File types that tarfile supports:
       
   111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
       
   112                    SYMTYPE, DIRTYPE, FIFOTYPE,
       
   113                    CONTTYPE, CHRTYPE, BLKTYPE,
       
   114                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
       
   115                    GNUTYPE_SPARSE)
       
   116 
       
   117 # File types that will be treated as a regular file.
       
   118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
       
   119                  CONTTYPE, GNUTYPE_SPARSE)
       
   120 
       
   121 # File types that are part of the GNU tar format.
       
   122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
       
   123              GNUTYPE_SPARSE)
       
   124 
       
   125 # Fields from a pax header that override a TarInfo attribute.
       
   126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
       
   127               "uid", "gid", "uname", "gname")
       
   128 
       
   129 # Fields in a pax header that are numbers, all other fields
       
   130 # are treated as strings.
       
   131 PAX_NUMBER_FIELDS = {
       
   132     "atime": float,
       
   133     "ctime": float,
       
   134     "mtime": float,
       
   135     "uid": int,
       
   136     "gid": int,
       
   137     "size": int
       
   138 }
       
   139 
       
   140 #---------------------------------------------------------
       
   141 # Bits used in the mode field, values in octal.
       
   142 #---------------------------------------------------------
       
   143 S_IFLNK = 0120000        # symbolic link
       
   144 S_IFREG = 0100000        # regular file
       
   145 S_IFBLK = 0060000        # block device
       
   146 S_IFDIR = 0040000        # directory
       
   147 S_IFCHR = 0020000        # character device
       
   148 S_IFIFO = 0010000        # fifo
       
   149 
       
   150 TSUID   = 04000          # set UID on execution
       
   151 TSGID   = 02000          # set GID on execution
       
   152 TSVTX   = 01000          # reserved
       
   153 
       
   154 TUREAD  = 0400           # read by owner
       
   155 TUWRITE = 0200           # write by owner
       
   156 TUEXEC  = 0100           # execute/search by owner
       
   157 TGREAD  = 0040           # read by group
       
   158 TGWRITE = 0020           # write by group
       
   159 TGEXEC  = 0010           # execute/search by group
       
   160 TOREAD  = 0004           # read by other
       
   161 TOWRITE = 0002           # write by other
       
   162 TOEXEC  = 0001           # execute/search by other
       
   163 
       
   164 #---------------------------------------------------------
       
   165 # initialization
       
   166 #---------------------------------------------------------
       
   167 ENCODING = sys.getfilesystemencoding()
       
   168 if ENCODING is None:
       
   169     ENCODING = sys.getdefaultencoding()
       
   170 
       
   171 #---------------------------------------------------------
       
   172 # Some useful functions
       
   173 #---------------------------------------------------------
       
   174 
       
   175 def stn(s, length):
       
   176     """Convert a python string to a null-terminated string buffer.
       
   177     """
       
   178     return s[:length] + (length - len(s)) * NUL
       
   179 
       
   180 def nts(s):
       
   181     """Convert a null-terminated string field to a python string.
       
   182     """
       
   183     # Use the string up to the first null char.
       
   184     p = s.find("\0")
       
   185     if p == -1:
       
   186         return s
       
   187     return s[:p]
       
   188 
       
   189 def nti(s):
       
   190     """Convert a number field to a python number.
       
   191     """
       
   192     # There are two possible encodings for a number field, see
       
   193     # itn() below.
       
   194     if s[0] != chr(0200):
       
   195         try:
       
   196             n = int(nts(s) or "0", 8)
       
   197         except ValueError:
       
   198             raise HeaderError("invalid header")
       
   199     else:
       
   200         n = 0L
       
   201         for i in xrange(len(s) - 1):
       
   202             n <<= 8
       
   203             n += ord(s[i + 1])
       
   204     return n
       
   205 
       
   206 def itn(n, digits=8, format=DEFAULT_FORMAT):
       
   207     """Convert a python number to a number field.
       
   208     """
       
   209     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
       
   210     # octal digits followed by a null-byte, this allows values up to
       
   211     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
       
   212     # that if necessary. A leading 0200 byte indicates this particular
       
   213     # encoding, the following digits-1 bytes are a big-endian
       
   214     # representation. This allows values up to (256**(digits-1))-1.
       
   215     if 0 <= n < 8 ** (digits - 1):
       
   216         s = "%0*o" % (digits - 1, n) + NUL
       
   217     else:
       
   218         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
       
   219             raise ValueError("overflow in number field")
       
   220 
       
   221         if n < 0:
       
   222             # XXX We mimic GNU tar's behaviour with negative numbers,
       
   223             # this could raise OverflowError.
       
   224             n = struct.unpack("L", struct.pack("l", n))[0]
       
   225 
       
   226         s = ""
       
   227         for i in xrange(digits - 1):
       
   228             s = chr(n & 0377) + s
       
   229             n >>= 8
       
   230         s = chr(0200) + s
       
   231     return s
       
   232 
       
   233 def uts(s, encoding, errors):
       
   234     """Convert a unicode object to a string.
       
   235     """
       
   236     if errors == "utf-8":
       
   237         # An extra error handler similar to the -o invalid=UTF-8 option
       
   238         # in POSIX.1-2001. Replace untranslatable characters with their
       
   239         # UTF-8 representation.
       
   240         try:
       
   241             return s.encode(encoding, "strict")
       
   242         except UnicodeEncodeError:
       
   243             x = []
       
   244             for c in s:
       
   245                 try:
       
   246                     x.append(c.encode(encoding, "strict"))
       
   247                 except UnicodeEncodeError:
       
   248                     x.append(c.encode("utf8"))
       
   249             return "".join(x)
       
   250     else:
       
   251         return s.encode(encoding, errors)
       
   252 
       
   253 def calc_chksums(buf):
       
   254     """Calculate the checksum for a member's header by summing up all
       
   255        characters except for the chksum field which is treated as if
       
   256        it was filled with spaces. According to the GNU tar sources,
       
   257        some tars (Sun and NeXT) calculate chksum with signed char,
       
   258        which will be different if there are chars in the buffer with
       
   259        the high bit set. So we calculate two checksums, unsigned and
       
   260        signed.
       
   261     """
       
   262     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
       
   263     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
       
   264     return unsigned_chksum, signed_chksum
       
   265 
       
   266 def copyfileobj(src, dst, length=None):
       
   267     """Copy length bytes from fileobj src to fileobj dst.
       
   268        If length is None, copy the entire content.
       
   269     """
       
   270     if length == 0:
       
   271         return
       
   272     if length is None:
       
   273         shutil.copyfileobj(src, dst)
       
   274         return
       
   275 
       
   276     BUFSIZE = 16 * 1024
       
   277     blocks, remainder = divmod(length, BUFSIZE)
       
   278     for b in xrange(blocks):
       
   279         buf = src.read(BUFSIZE)
       
   280         if len(buf) < BUFSIZE:
       
   281             raise IOError("end of file reached")
       
   282         dst.write(buf)
       
   283 
       
   284     if remainder != 0:
       
   285         buf = src.read(remainder)
       
   286         if len(buf) < remainder:
       
   287             raise IOError("end of file reached")
       
   288         dst.write(buf)
       
   289     return
       
   290 
       
   291 filemode_table = (
       
   292     ((S_IFLNK,      "l"),
       
   293      (S_IFREG,      "-"),
       
   294      (S_IFBLK,      "b"),
       
   295      (S_IFDIR,      "d"),
       
   296      (S_IFCHR,      "c"),
       
   297      (S_IFIFO,      "p")),
       
   298 
       
   299     ((TUREAD,       "r"),),
       
   300     ((TUWRITE,      "w"),),
       
   301     ((TUEXEC|TSUID, "s"),
       
   302      (TSUID,        "S"),
       
   303      (TUEXEC,       "x")),
       
   304 
       
   305     ((TGREAD,       "r"),),
       
   306     ((TGWRITE,      "w"),),
       
   307     ((TGEXEC|TSGID, "s"),
       
   308      (TSGID,        "S"),
       
   309      (TGEXEC,       "x")),
       
   310 
       
   311     ((TOREAD,       "r"),),
       
   312     ((TOWRITE,      "w"),),
       
   313     ((TOEXEC|TSVTX, "t"),
       
   314      (TSVTX,        "T"),
       
   315      (TOEXEC,       "x"))
       
   316 )
       
   317 
       
   318 def filemode(mode):
       
   319     """Convert a file's mode to a string of the form
       
   320        -rwxrwxrwx.
       
   321        Used by TarFile.list()
       
   322     """
       
   323     perm = []
       
   324     for table in filemode_table:
       
   325         for bit, char in table:
       
   326             if mode & bit == bit:
       
   327                 perm.append(char)
       
   328                 break
       
   329         else:
       
   330             perm.append("-")
       
   331     return "".join(perm)
       
   332 
       
   333 if os.sep != "/":
       
   334     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
       
   335 else:
       
   336     normpath = os.path.normpath
       
   337 
       
   338 class TarError(Exception):
       
   339     """Base exception."""
       
   340     pass
       
   341 class ExtractError(TarError):
       
   342     """General exception for extract errors."""
       
   343     pass
       
   344 class ReadError(TarError):
       
   345     """Exception for unreadble tar archives."""
       
   346     pass
       
   347 class CompressionError(TarError):
       
   348     """Exception for unavailable compression methods."""
       
   349     pass
       
   350 class StreamError(TarError):
       
   351     """Exception for unsupported operations on stream-like TarFiles."""
       
   352     pass
       
   353 class HeaderError(TarError):
       
   354     """Exception for invalid headers."""
       
   355     pass
       
   356 
       
   357 #---------------------------
       
   358 # internal stream interface
       
   359 #---------------------------
       
   360 class _LowLevelFile:
       
   361     """Low-level file object. Supports reading and writing.
       
   362        It is used instead of a regular file object for streaming
       
   363        access.
       
   364     """
       
   365 
       
   366     def __init__(self, name, mode):
       
   367         mode = {
       
   368             "r": os.O_RDONLY,
       
   369             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
       
   370         }[mode]
       
   371         if hasattr(os, "O_BINARY"):
       
   372             mode |= os.O_BINARY
       
   373         self.fd = os.open(name, mode)
       
   374 
       
   375     def close(self):
       
   376         os.close(self.fd)
       
   377 
       
   378     def read(self, size):
       
   379         return os.read(self.fd, size)
       
   380 
       
   381     def write(self, s):
       
   382         os.write(self.fd, s)
       
   383 
       
   384 class _Stream:
       
   385     """Class that serves as an adapter between TarFile and
       
   386        a stream-like object.  The stream-like object only
       
   387        needs to have a read() or write() method and is accessed
       
   388        blockwise.  Use of gzip or bzip2 compression is possible.
       
   389        A stream-like object could be for example: sys.stdin,
       
   390        sys.stdout, a socket, a tape device etc.
       
   391 
       
   392        _Stream is intended to be used only internally.
       
   393     """
       
   394 
       
   395     def __init__(self, name, mode, comptype, fileobj, bufsize):
       
   396         """Construct a _Stream object.
       
   397         """
       
   398         self._extfileobj = True
       
   399         if fileobj is None:
       
   400             fileobj = _LowLevelFile(name, mode)
       
   401             self._extfileobj = False
       
   402 
       
   403         if comptype == '*':
       
   404             # Enable transparent compression detection for the
       
   405             # stream interface
       
   406             fileobj = _StreamProxy(fileobj)
       
   407             comptype = fileobj.getcomptype()
       
   408 
       
   409         self.name     = name or ""
       
   410         self.mode     = mode
       
   411         self.comptype = comptype
       
   412         self.fileobj  = fileobj
       
   413         self.bufsize  = bufsize
       
   414         self.buf      = ""
       
   415         self.pos      = 0L
       
   416         self.closed   = False
       
   417 
       
   418         if comptype == "gz":
       
   419             try:
       
   420                 import zlib
       
   421             except ImportError:
       
   422                 raise CompressionError("zlib module is not available")
       
   423             self.zlib = zlib
       
   424             self.crc = zlib.crc32("") & 0xffffffffL
       
   425             if mode == "r":
       
   426                 self._init_read_gz()
       
   427             else:
       
   428                 self._init_write_gz()
       
   429 
       
   430         if comptype == "bz2":
       
   431             try:
       
   432                 import bz2
       
   433             except ImportError:
       
   434                 raise CompressionError("bz2 module is not available")
       
   435             if mode == "r":
       
   436                 self.dbuf = ""
       
   437                 self.cmp = bz2.BZ2Decompressor()
       
   438             else:
       
   439                 self.cmp = bz2.BZ2Compressor()
       
   440 
       
   441     def __del__(self):
       
   442         if hasattr(self, "closed") and not self.closed:
       
   443             self.close()
       
   444 
       
   445     def _init_write_gz(self):
       
   446         """Initialize for writing with gzip compression.
       
   447         """
       
   448         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
       
   449                                             -self.zlib.MAX_WBITS,
       
   450                                             self.zlib.DEF_MEM_LEVEL,
       
   451                                             0)
       
   452         timestamp = struct.pack("<L", long(time.time()))
       
   453         self.__write("\037\213\010\010%s\002\377" % timestamp)
       
   454         if self.name.endswith(".gz"):
       
   455             self.name = self.name[:-3]
       
   456         self.__write(self.name + NUL)
       
   457 
       
   458     def write(self, s):
       
   459         """Write string s to the stream.
       
   460         """
       
   461         if self.comptype == "gz":
       
   462             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
       
   463         self.pos += len(s)
       
   464         if self.comptype != "tar":
       
   465             s = self.cmp.compress(s)
       
   466         self.__write(s)
       
   467 
       
   468     def __write(self, s):
       
   469         """Write string s to the stream if a whole new block
       
   470            is ready to be written.
       
   471         """
       
   472         self.buf += s
       
   473         while len(self.buf) > self.bufsize:
       
   474             self.fileobj.write(self.buf[:self.bufsize])
       
   475             self.buf = self.buf[self.bufsize:]
       
   476 
       
   477     def close(self):
       
   478         """Close the _Stream object. No operation should be
       
   479            done on it afterwards.
       
   480         """
       
   481         if self.closed:
       
   482             return
       
   483 
       
   484         if self.mode == "w" and self.comptype != "tar":
       
   485             self.buf += self.cmp.flush()
       
   486 
       
   487         if self.mode == "w" and self.buf:
       
   488             self.fileobj.write(self.buf)
       
   489             self.buf = ""
       
   490             if self.comptype == "gz":
       
   491                 # The native zlib crc is an unsigned 32-bit integer, but
       
   492                 # the Python wrapper implicitly casts that to a signed C
       
   493                 # long.  So, on a 32-bit box self.crc may "look negative",
       
   494                 # while the same crc on a 64-bit box may "look positive".
       
   495                 # To avoid irksome warnings from the `struct` module, force
       
   496                 # it to look positive on all boxes.
       
   497                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
       
   498                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
       
   499 
       
   500         if not self._extfileobj:
       
   501             self.fileobj.close()
       
   502 
       
   503         self.closed = True
       
   504 
       
   505     def _init_read_gz(self):
       
   506         """Initialize for reading a gzip compressed fileobj.
       
   507         """
       
   508         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
       
   509         self.dbuf = ""
       
   510 
       
   511         # taken from gzip.GzipFile with some alterations
       
   512         if self.__read(2) != "\037\213":
       
   513             raise ReadError("not a gzip file")
       
   514         if self.__read(1) != "\010":
       
   515             raise CompressionError("unsupported compression method")
       
   516 
       
   517         flag = ord(self.__read(1))
       
   518         self.__read(6)
       
   519 
       
   520         if flag & 4:
       
   521             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
       
   522             self.read(xlen)
       
   523         if flag & 8:
       
   524             while True:
       
   525                 s = self.__read(1)
       
   526                 if not s or s == NUL:
       
   527                     break
       
   528         if flag & 16:
       
   529             while True:
       
   530                 s = self.__read(1)
       
   531                 if not s or s == NUL:
       
   532                     break
       
   533         if flag & 2:
       
   534             self.__read(2)
       
   535 
       
   536     def tell(self):
       
   537         """Return the stream's file pointer position.
       
   538         """
       
   539         return self.pos
       
   540 
       
   541     def seek(self, pos=0):
       
   542         """Set the stream's file pointer to pos. Negative seeking
       
   543            is forbidden.
       
   544         """
       
   545         if pos - self.pos >= 0:
       
   546             blocks, remainder = divmod(pos - self.pos, self.bufsize)
       
   547             for i in xrange(blocks):
       
   548                 self.read(self.bufsize)
       
   549             self.read(remainder)
       
   550         else:
       
   551             raise StreamError("seeking backwards is not allowed")
       
   552         return self.pos
       
   553 
       
   554     def read(self, size=None):
       
   555         """Return the next size number of bytes from the stream.
       
   556            If size is not defined, return all bytes of the stream
       
   557            up to EOF.
       
   558         """
       
   559         if size is None:
       
   560             t = []
       
   561             while True:
       
   562                 buf = self._read(self.bufsize)
       
   563                 if not buf:
       
   564                     break
       
   565                 t.append(buf)
       
   566             buf = "".join(t)
       
   567         else:
       
   568             buf = self._read(size)
       
   569         self.pos += len(buf)
       
   570         return buf
       
   571 
       
   572     def _read(self, size):
       
   573         """Return size bytes from the stream.
       
   574         """
       
   575         if self.comptype == "tar":
       
   576             return self.__read(size)
       
   577 
       
   578         c = len(self.dbuf)
       
   579         t = [self.dbuf]
       
   580         while c < size:
       
   581             buf = self.__read(self.bufsize)
       
   582             if not buf:
       
   583                 break
       
   584             try:
       
   585                 buf = self.cmp.decompress(buf)
       
   586             except IOError:
       
   587                 raise ReadError("invalid compressed data")
       
   588             t.append(buf)
       
   589             c += len(buf)
       
   590         t = "".join(t)
       
   591         self.dbuf = t[size:]
       
   592         return t[:size]
       
   593 
       
   594     def __read(self, size):
       
   595         """Return size bytes from stream. If internal buffer is empty,
       
   596            read another block from the stream.
       
   597         """
       
   598         c = len(self.buf)
       
   599         t = [self.buf]
       
   600         while c < size:
       
   601             buf = self.fileobj.read(self.bufsize)
       
   602             if not buf:
       
   603                 break
       
   604             t.append(buf)
       
   605             c += len(buf)
       
   606         t = "".join(t)
       
   607         self.buf = t[size:]
       
   608         return t[:size]
       
   609 # class _Stream
       
   610 
       
   611 class _StreamProxy(object):
       
   612     """Small proxy class that enables transparent compression
       
   613        detection for the Stream interface (mode 'r|*').
       
   614     """
       
   615 
       
   616     def __init__(self, fileobj):
       
   617         self.fileobj = fileobj
       
   618         self.buf = self.fileobj.read(BLOCKSIZE)
       
   619 
       
   620     def read(self, size):
       
   621         self.read = self.fileobj.read
       
   622         return self.buf
       
   623 
       
   624     def getcomptype(self):
       
   625         if self.buf.startswith("\037\213\010"):
       
   626             return "gz"
       
   627         if self.buf.startswith("BZh91"):
       
   628             return "bz2"
       
   629         return "tar"
       
   630 
       
   631     def close(self):
       
   632         self.fileobj.close()
       
   633 # class StreamProxy
       
   634 
       
   635 class _BZ2Proxy(object):
       
   636     """Small proxy class that enables external file object
       
   637        support for "r:bz2" and "w:bz2" modes. This is actually
       
   638        a workaround for a limitation in bz2 module's BZ2File
       
   639        class which (unlike gzip.GzipFile) has no support for
       
   640        a file object argument.
       
   641     """
       
   642 
       
   643     blocksize = 16 * 1024
       
   644 
       
   645     def __init__(self, fileobj, mode):
       
   646         self.fileobj = fileobj
       
   647         self.mode = mode
       
   648         self.name = getattr(self.fileobj, "name", None)
       
   649         self.init()
       
   650 
       
   651     def init(self):
       
   652         import bz2
       
   653         self.pos = 0
       
   654         if self.mode == "r":
       
   655             self.bz2obj = bz2.BZ2Decompressor()
       
   656             self.fileobj.seek(0)
       
   657             self.buf = ""
       
   658         else:
       
   659             self.bz2obj = bz2.BZ2Compressor()
       
   660 
       
   661     def read(self, size):
       
   662         b = [self.buf]
       
   663         x = len(self.buf)
       
   664         while x < size:
       
   665             try:
       
   666                 raw = self.fileobj.read(self.blocksize)
       
   667                 data = self.bz2obj.decompress(raw)
       
   668                 b.append(data)
       
   669             except EOFError:
       
   670                 break
       
   671             x += len(data)
       
   672         self.buf = "".join(b)
       
   673 
       
   674         buf = self.buf[:size]
       
   675         self.buf = self.buf[size:]
       
   676         self.pos += len(buf)
       
   677         return buf
       
   678 
       
   679     def seek(self, pos):
       
   680         if pos < self.pos:
       
   681             self.init()
       
   682         self.read(pos - self.pos)
       
   683 
       
   684     def tell(self):
       
   685         return self.pos
       
   686 
       
   687     def write(self, data):
       
   688         self.pos += len(data)
       
   689         raw = self.bz2obj.compress(data)
       
   690         self.fileobj.write(raw)
       
   691 
       
   692     def close(self):
       
   693         if self.mode == "w":
       
   694             raw = self.bz2obj.flush()
       
   695             self.fileobj.write(raw)
       
   696 # class _BZ2Proxy
       
   697 
       
   698 #------------------------
       
   699 # Extraction file object
       
   700 #------------------------
       
   701 class _FileInFile(object):
       
   702     """A thin wrapper around an existing file object that
       
   703        provides a part of its data as an individual file
       
   704        object.
       
   705     """
       
   706 
       
   707     def __init__(self, fileobj, offset, size, sparse=None):
       
   708         self.fileobj = fileobj
       
   709         self.offset = offset
       
   710         self.size = size
       
   711         self.sparse = sparse
       
   712         self.position = 0
       
   713 
       
   714     def tell(self):
       
   715         """Return the current file position.
       
   716         """
       
   717         return self.position
       
   718 
       
   719     def seek(self, position):
       
   720         """Seek to a position in the file.
       
   721         """
       
   722         self.position = position
       
   723 
       
   724     def read(self, size=None):
       
   725         """Read data from the file.
       
   726         """
       
   727         if size is None:
       
   728             size = self.size - self.position
       
   729         else:
       
   730             size = min(size, self.size - self.position)
       
   731 
       
   732         if self.sparse is None:
       
   733             return self.readnormal(size)
       
   734         else:
       
   735             return self.readsparse(size)
       
   736 
       
   737     def readnormal(self, size):
       
   738         """Read operation for regular files.
       
   739         """
       
   740         self.fileobj.seek(self.offset + self.position)
       
   741         self.position += size
       
   742         return self.fileobj.read(size)
       
   743 
       
   744     def readsparse(self, size):
       
   745         """Read operation for sparse files.
       
   746         """
       
   747         data = []
       
   748         while size > 0:
       
   749             buf = self.readsparsesection(size)
       
   750             if not buf:
       
   751                 break
       
   752             size -= len(buf)
       
   753             data.append(buf)
       
   754         return "".join(data)
       
   755 
       
   756     def readsparsesection(self, size):
       
   757         """Read a single section of a sparse file.
       
   758         """
       
   759         section = self.sparse.find(self.position)
       
   760 
       
   761         if section is None:
       
   762             return ""
       
   763 
       
   764         size = min(size, section.offset + section.size - self.position)
       
   765 
       
   766         if isinstance(section, _data):
       
   767             realpos = section.realpos + self.position - section.offset
       
   768             self.fileobj.seek(self.offset + realpos)
       
   769             self.position += size
       
   770             return self.fileobj.read(size)
       
   771         else:
       
   772             self.position += size
       
   773             return NUL * size
       
   774 #class _FileInFile
       
   775 
       
   776 
       
   777 class ExFileObject(object):
       
   778     """File-like object for reading an archive member.
       
   779        Is returned by TarFile.extractfile().
       
   780     """
       
   781     blocksize = 1024
       
   782 
       
   783     def __init__(self, tarfile, tarinfo):
       
   784         self.fileobj = _FileInFile(tarfile.fileobj,
       
   785                                    tarinfo.offset_data,
       
   786                                    tarinfo.size,
       
   787                                    getattr(tarinfo, "sparse", None))
       
   788         self.name = tarinfo.name
       
   789         self.mode = "r"
       
   790         self.closed = False
       
   791         self.size = tarinfo.size
       
   792 
       
   793         self.position = 0
       
   794         self.buffer = ""
       
   795 
       
   796     def read(self, size=None):
       
   797         """Read at most size bytes from the file. If size is not
       
   798            present or None, read all data until EOF is reached.
       
   799         """
       
   800         if self.closed:
       
   801             raise ValueError("I/O operation on closed file")
       
   802 
       
   803         buf = ""
       
   804         if self.buffer:
       
   805             if size is None:
       
   806                 buf = self.buffer
       
   807                 self.buffer = ""
       
   808             else:
       
   809                 buf = self.buffer[:size]
       
   810                 self.buffer = self.buffer[size:]
       
   811 
       
   812         if size is None:
       
   813             buf += self.fileobj.read()
       
   814         else:
       
   815             buf += self.fileobj.read(size - len(buf))
       
   816 
       
   817         self.position += len(buf)
       
   818         return buf
       
   819 
       
   820     def readline(self, size=-1):
       
   821         """Read one entire line from the file. If size is present
       
   822            and non-negative, return a string with at most that
       
   823            size, which may be an incomplete line.
       
   824         """
       
   825         if self.closed:
       
   826             raise ValueError("I/O operation on closed file")
       
   827 
       
   828         if "\n" in self.buffer:
       
   829             pos = self.buffer.find("\n") + 1
       
   830         else:
       
   831             buffers = [self.buffer]
       
   832             while True:
       
   833                 buf = self.fileobj.read(self.blocksize)
       
   834                 buffers.append(buf)
       
   835                 if not buf or "\n" in buf:
       
   836                     self.buffer = "".join(buffers)
       
   837                     pos = self.buffer.find("\n") + 1
       
   838                     if pos == 0:
       
   839                         # no newline found.
       
   840                         pos = len(self.buffer)
       
   841                     break
       
   842 
       
   843         if size != -1:
       
   844             pos = min(size, pos)
       
   845 
       
   846         buf = self.buffer[:pos]
       
   847         self.buffer = self.buffer[pos:]
       
   848         self.position += len(buf)
       
   849         return buf
       
   850 
       
   851     def readlines(self):
       
   852         """Return a list with all remaining lines.
       
   853         """
       
   854         result = []
       
   855         while True:
       
   856             line = self.readline()
       
   857             if not line: break
       
   858             result.append(line)
       
   859         return result
       
   860 
       
   861     def tell(self):
       
   862         """Return the current file position.
       
   863         """
       
   864         if self.closed:
       
   865             raise ValueError("I/O operation on closed file")
       
   866 
       
   867         return self.position
       
   868 
       
   869     def seek(self, pos, whence=os.SEEK_SET):
       
   870         """Seek to a position in the file.
       
   871         """
       
   872         if self.closed:
       
   873             raise ValueError("I/O operation on closed file")
       
   874 
       
   875         if whence == os.SEEK_SET:
       
   876             self.position = min(max(pos, 0), self.size)
       
   877         elif whence == os.SEEK_CUR:
       
   878             if pos < 0:
       
   879                 self.position = max(self.position + pos, 0)
       
   880             else:
       
   881                 self.position = min(self.position + pos, self.size)
       
   882         elif whence == os.SEEK_END:
       
   883             self.position = max(min(self.size + pos, self.size), 0)
       
   884         else:
       
   885             raise ValueError("Invalid argument")
       
   886 
       
   887         self.buffer = ""
       
   888         self.fileobj.seek(self.position)
       
   889 
       
   890     def close(self):
       
   891         """Close the file object.
       
   892         """
       
   893         self.closed = True
       
   894 
       
   895     def __iter__(self):
       
   896         """Get an iterator over the file's lines.
       
   897         """
       
   898         while True:
       
   899             line = self.readline()
       
   900             if not line:
       
   901                 break
       
   902             yield line
       
   903 #class ExFileObject
       
   904 
       
   905 #------------------
       
   906 # Exported Classes
       
   907 #------------------
       
   908 class TarInfo(object):
       
   909     """Informational class which holds the details about an
       
   910        archive member given by a tar header block.
       
   911        TarInfo objects are returned by TarFile.getmember(),
       
   912        TarFile.getmembers() and TarFile.gettarinfo() and are
       
   913        usually created internally.
       
   914     """
       
   915 
       
   916     def __init__(self, name=""):
       
   917         """Construct a TarInfo object. name is the optional name
       
   918            of the member.
       
   919         """
       
   920         self.name = name        # member name
       
   921         self.mode = 0644        # file permissions
       
   922         self.uid = 0            # user id
       
   923         self.gid = 0            # group id
       
   924         self.size = 0           # file size
       
   925         self.mtime = 0          # modification time
       
   926         self.chksum = 0         # header checksum
       
   927         self.type = REGTYPE     # member type
       
   928         self.linkname = ""      # link name
       
   929         self.uname = "root"     # user name
       
   930         self.gname = "root"     # group name
       
   931         self.devmajor = 0       # device major number
       
   932         self.devminor = 0       # device minor number
       
   933 
       
   934         self.offset = 0         # the tar header starts here
       
   935         self.offset_data = 0    # the file's data starts here
       
   936 
       
   937         self.pax_headers = {}   # pax header information
       
   938 
       
   939     # In pax headers the "name" and "linkname" field are called
       
   940     # "path" and "linkpath".
       
   941     def _getpath(self):
       
   942         return self.name
       
   943     def _setpath(self, name):
       
   944         self.name = name
       
   945     path = property(_getpath, _setpath)
       
   946 
       
   947     def _getlinkpath(self):
       
   948         return self.linkname
       
   949     def _setlinkpath(self, linkname):
       
   950         self.linkname = linkname
       
   951     linkpath = property(_getlinkpath, _setlinkpath)
       
   952 
       
   953     def __repr__(self):
       
   954         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
       
   955 
       
   956     def get_info(self, encoding, errors):
       
   957         """Return the TarInfo's attributes as a dictionary.
       
   958         """
       
   959         info = {
       
   960             "name":     normpath(self.name),
       
   961             "mode":     self.mode & 07777,
       
   962             "uid":      self.uid,
       
   963             "gid":      self.gid,
       
   964             "size":     self.size,
       
   965             "mtime":    self.mtime,
       
   966             "chksum":   self.chksum,
       
   967             "type":     self.type,
       
   968             "linkname": normpath(self.linkname) if self.linkname else "",
       
   969             "uname":    self.uname,
       
   970             "gname":    self.gname,
       
   971             "devmajor": self.devmajor,
       
   972             "devminor": self.devminor
       
   973         }
       
   974 
       
   975         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
       
   976             info["name"] += "/"
       
   977 
       
   978         for key in ("name", "linkname", "uname", "gname"):
       
   979             if type(info[key]) is unicode:
       
   980                 info[key] = info[key].encode(encoding, errors)
       
   981 
       
   982         return info
       
   983 
       
   984     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
       
   985         """Return a tar header as a string of 512 byte blocks.
       
   986         """
       
   987         info = self.get_info(encoding, errors)
       
   988 
       
   989         if format == USTAR_FORMAT:
       
   990             return self.create_ustar_header(info)
       
   991         elif format == GNU_FORMAT:
       
   992             return self.create_gnu_header(info)
       
   993         elif format == PAX_FORMAT:
       
   994             return self.create_pax_header(info, encoding, errors)
       
   995         else:
       
   996             raise ValueError("invalid format")
       
   997 
       
   998     def create_ustar_header(self, info):
       
   999         """Return the object as a ustar header block.
       
  1000         """
       
  1001         info["magic"] = POSIX_MAGIC
       
  1002 
       
  1003         if len(info["linkname"]) > LENGTH_LINK:
       
  1004             raise ValueError("linkname is too long")
       
  1005 
       
  1006         if len(info["name"]) > LENGTH_NAME:
       
  1007             info["prefix"], info["name"] = self._posix_split_name(info["name"])
       
  1008 
       
  1009         return self._create_header(info, USTAR_FORMAT)
       
  1010 
       
  1011     def create_gnu_header(self, info):
       
  1012         """Return the object as a GNU header block sequence.
       
  1013         """
       
  1014         info["magic"] = GNU_MAGIC
       
  1015 
       
  1016         buf = ""
       
  1017         if len(info["linkname"]) > LENGTH_LINK:
       
  1018             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
       
  1019 
       
  1020         if len(info["name"]) > LENGTH_NAME:
       
  1021             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
       
  1022 
       
  1023         return buf + self._create_header(info, GNU_FORMAT)
       
  1024 
       
  1025     def create_pax_header(self, info, encoding, errors):
       
  1026         """Return the object as a ustar header block. If it cannot be
       
  1027            represented this way, prepend a pax extended header sequence
       
  1028            with supplement information.
       
  1029         """
       
  1030         info["magic"] = POSIX_MAGIC
       
  1031         pax_headers = self.pax_headers.copy()
       
  1032 
       
  1033         # Test string fields for values that exceed the field length or cannot
       
  1034         # be represented in ASCII encoding.
       
  1035         for name, hname, length in (
       
  1036                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
       
  1037                 ("uname", "uname", 32), ("gname", "gname", 32)):
       
  1038 
       
  1039             if hname in pax_headers:
       
  1040                 # The pax header has priority.
       
  1041                 continue
       
  1042 
       
  1043             val = info[name].decode(encoding, errors)
       
  1044 
       
  1045             # Try to encode the string as ASCII.
       
  1046             try:
       
  1047                 val.encode("ascii")
       
  1048             except UnicodeEncodeError:
       
  1049                 pax_headers[hname] = val
       
  1050                 continue
       
  1051 
       
  1052             if len(info[name]) > length:
       
  1053                 pax_headers[hname] = val
       
  1054 
       
  1055         # Test number fields for values that exceed the field limit or values
       
  1056         # that like to be stored as float.
       
  1057         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
       
  1058             if name in pax_headers:
       
  1059                 # The pax header has priority. Avoid overflow.
       
  1060                 info[name] = 0
       
  1061                 continue
       
  1062 
       
  1063             val = info[name]
       
  1064             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
       
  1065                 pax_headers[name] = unicode(val)
       
  1066                 info[name] = 0
       
  1067 
       
  1068         # Create a pax extended header if necessary.
       
  1069         if pax_headers:
       
  1070             buf = self._create_pax_generic_header(pax_headers)
       
  1071         else:
       
  1072             buf = ""
       
  1073 
       
  1074         return buf + self._create_header(info, USTAR_FORMAT)
       
  1075 
       
  1076     @classmethod
       
  1077     def create_pax_global_header(cls, pax_headers):
       
  1078         """Return the object as a pax global header block sequence.
       
  1079         """
       
  1080         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
       
  1081 
       
  1082     def _posix_split_name(self, name):
       
  1083         """Split a name longer than 100 chars into a prefix
       
  1084            and a name part.
       
  1085         """
       
  1086         prefix = name[:LENGTH_PREFIX + 1]
       
  1087         while prefix and prefix[-1] != "/":
       
  1088             prefix = prefix[:-1]
       
  1089 
       
  1090         name = name[len(prefix):]
       
  1091         prefix = prefix[:-1]
       
  1092 
       
  1093         if not prefix or len(name) > LENGTH_NAME:
       
  1094             raise ValueError("name is too long")
       
  1095         return prefix, name
       
  1096 
       
  1097     @staticmethod
       
  1098     def _create_header(info, format):
       
  1099         """Return a header block. info is a dictionary with file
       
  1100            information, format must be one of the *_FORMAT constants.
       
  1101         """
       
  1102         parts = [
       
  1103             stn(info.get("name", ""), 100),
       
  1104             itn(info.get("mode", 0) & 07777, 8, format),
       
  1105             itn(info.get("uid", 0), 8, format),
       
  1106             itn(info.get("gid", 0), 8, format),
       
  1107             itn(info.get("size", 0), 12, format),
       
  1108             itn(info.get("mtime", 0), 12, format),
       
  1109             "        ", # checksum field
       
  1110             info.get("type", REGTYPE),
       
  1111             stn(info.get("linkname", ""), 100),
       
  1112             stn(info.get("magic", POSIX_MAGIC), 8),
       
  1113             stn(info.get("uname", "root"), 32),
       
  1114             stn(info.get("gname", "root"), 32),
       
  1115             itn(info.get("devmajor", 0), 8, format),
       
  1116             itn(info.get("devminor", 0), 8, format),
       
  1117             stn(info.get("prefix", ""), 155)
       
  1118         ]
       
  1119 
       
  1120         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
       
  1121         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
       
  1122         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
       
  1123         return buf
       
  1124 
       
  1125     @staticmethod
       
  1126     def _create_payload(payload):
       
  1127         """Return the string payload filled with zero bytes
       
  1128            up to the next 512 byte border.
       
  1129         """
       
  1130         blocks, remainder = divmod(len(payload), BLOCKSIZE)
       
  1131         if remainder > 0:
       
  1132             payload += (BLOCKSIZE - remainder) * NUL
       
  1133         return payload
       
  1134 
       
  1135     @classmethod
       
  1136     def _create_gnu_long_header(cls, name, type):
       
  1137         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
       
  1138            for name.
       
  1139         """
       
  1140         name += NUL
       
  1141 
       
  1142         info = {}
       
  1143         info["name"] = "././@LongLink"
       
  1144         info["type"] = type
       
  1145         info["size"] = len(name)
       
  1146         info["magic"] = GNU_MAGIC
       
  1147 
       
  1148         # create extended header + name blocks.
       
  1149         return cls._create_header(info, USTAR_FORMAT) + \
       
  1150                 cls._create_payload(name)
       
  1151 
       
  1152     @classmethod
       
  1153     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
       
  1154         """Return a POSIX.1-2001 extended or global header sequence
       
  1155            that contains a list of keyword, value pairs. The values
       
  1156            must be unicode objects.
       
  1157         """
       
  1158         records = []
       
  1159         for keyword, value in pax_headers.iteritems():
       
  1160             keyword = keyword.encode("utf8")
       
  1161             value = value.encode("utf8")
       
  1162             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
       
  1163             n = p = 0
       
  1164             while True:
       
  1165                 n = l + len(str(p))
       
  1166                 if n == p:
       
  1167                     break
       
  1168                 p = n
       
  1169             records.append("%d %s=%s\n" % (p, keyword, value))
       
  1170         records = "".join(records)
       
  1171 
       
  1172         # We use a hardcoded "././@PaxHeader" name like star does
       
  1173         # instead of the one that POSIX recommends.
       
  1174         info = {}
       
  1175         info["name"] = "././@PaxHeader"
       
  1176         info["type"] = type
       
  1177         info["size"] = len(records)
       
  1178         info["magic"] = POSIX_MAGIC
       
  1179 
       
  1180         # Create pax header + record blocks.
       
  1181         return cls._create_header(info, USTAR_FORMAT) + \
       
  1182                 cls._create_payload(records)
       
  1183 
       
  1184     @classmethod
       
  1185     def frombuf(cls, buf):
       
  1186         """Construct a TarInfo object from a 512 byte string buffer.
       
  1187         """
       
  1188         if len(buf) != BLOCKSIZE:
       
  1189             raise HeaderError("truncated header")
       
  1190         if buf.count(NUL) == BLOCKSIZE:
       
  1191             raise HeaderError("empty header")
       
  1192 
       
  1193         chksum = nti(buf[148:156])
       
  1194         if chksum not in calc_chksums(buf):
       
  1195             raise HeaderError("bad checksum")
       
  1196 
       
  1197         obj = cls()
       
  1198         obj.buf = buf
       
  1199         obj.name = nts(buf[0:100])
       
  1200         obj.mode = nti(buf[100:108])
       
  1201         obj.uid = nti(buf[108:116])
       
  1202         obj.gid = nti(buf[116:124])
       
  1203         obj.size = nti(buf[124:136])
       
  1204         obj.mtime = nti(buf[136:148])
       
  1205         obj.chksum = chksum
       
  1206         obj.type = buf[156:157]
       
  1207         obj.linkname = nts(buf[157:257])
       
  1208         obj.uname = nts(buf[265:297])
       
  1209         obj.gname = nts(buf[297:329])
       
  1210         obj.devmajor = nti(buf[329:337])
       
  1211         obj.devminor = nti(buf[337:345])
       
  1212         prefix = nts(buf[345:500])
       
  1213 
       
  1214         # Old V7 tar format represents a directory as a regular
       
  1215         # file with a trailing slash.
       
  1216         if obj.type == AREGTYPE and obj.name.endswith("/"):
       
  1217             obj.type = DIRTYPE
       
  1218 
       
  1219         # Remove redundant slashes from directories.
       
  1220         if obj.isdir():
       
  1221             obj.name = obj.name.rstrip("/")
       
  1222 
       
  1223         # Reconstruct a ustar longname.
       
  1224         if prefix and obj.type not in GNU_TYPES:
       
  1225             obj.name = prefix + "/" + obj.name
       
  1226         return obj
       
  1227 
       
  1228     @classmethod
       
  1229     def fromtarfile(cls, tarfile):
       
  1230         """Return the next TarInfo object from TarFile object
       
  1231            tarfile.
       
  1232         """
       
  1233         buf = tarfile.fileobj.read(BLOCKSIZE)
       
  1234         if not buf:
       
  1235             return
       
  1236         obj = cls.frombuf(buf)
       
  1237         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
       
  1238         return obj._proc_member(tarfile)
       
  1239 
       
  1240     #--------------------------------------------------------------------------
       
  1241     # The following are methods that are called depending on the type of a
       
  1242     # member. The entry point is _proc_member() which can be overridden in a
       
  1243     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
       
  1244     # implement the following
       
  1245     # operations:
       
  1246     # 1. Set self.offset_data to the position where the data blocks begin,
       
  1247     #    if there is data that follows.
       
  1248     # 2. Set tarfile.offset to the position where the next member's header will
       
  1249     #    begin.
       
  1250     # 3. Return self or another valid TarInfo object.
       
  1251     def _proc_member(self, tarfile):
       
  1252         """Choose the right processing method depending on
       
  1253            the type and call it.
       
  1254         """
       
  1255         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
       
  1256             return self._proc_gnulong(tarfile)
       
  1257         elif self.type == GNUTYPE_SPARSE:
       
  1258             return self._proc_sparse(tarfile)
       
  1259         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
       
  1260             return self._proc_pax(tarfile)
       
  1261         else:
       
  1262             return self._proc_builtin(tarfile)
       
  1263 
       
  1264     def _proc_builtin(self, tarfile):
       
  1265         """Process a builtin type or an unknown type which
       
  1266            will be treated as a regular file.
       
  1267         """
       
  1268         self.offset_data = tarfile.fileobj.tell()
       
  1269         offset = self.offset_data
       
  1270         if self.isreg() or self.type not in SUPPORTED_TYPES:
       
  1271             # Skip the following data blocks.
       
  1272             offset += self._block(self.size)
       
  1273         tarfile.offset = offset
       
  1274 
       
  1275         # Patch the TarInfo object with saved global
       
  1276         # header information.
       
  1277         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
       
  1278 
       
  1279         return self
       
  1280 
       
  1281     def _proc_gnulong(self, tarfile):
       
  1282         """Process the blocks that hold a GNU longname
       
  1283            or longlink member.
       
  1284         """
       
  1285         buf = tarfile.fileobj.read(self._block(self.size))
       
  1286 
       
  1287         # Fetch the next header and process it.
       
  1288         next = self.fromtarfile(tarfile)
       
  1289         if next is None:
       
  1290             raise HeaderError("missing subsequent header")
       
  1291 
       
  1292         # Patch the TarInfo object from the next header with
       
  1293         # the longname information.
       
  1294         next.offset = self.offset
       
  1295         if self.type == GNUTYPE_LONGNAME:
       
  1296             next.name = nts(buf)
       
  1297         elif self.type == GNUTYPE_LONGLINK:
       
  1298             next.linkname = nts(buf)
       
  1299 
       
  1300         return next
       
  1301 
       
  1302     def _proc_sparse(self, tarfile):
       
  1303         """Process a GNU sparse header plus extra headers.
       
  1304         """
       
  1305         buf = self.buf
       
  1306         sp = _ringbuffer()
       
  1307         pos = 386
       
  1308         lastpos = 0L
       
  1309         realpos = 0L
       
  1310         # There are 4 possible sparse structs in the
       
  1311         # first header.
       
  1312         for i in xrange(4):
       
  1313             try:
       
  1314                 offset = nti(buf[pos:pos + 12])
       
  1315                 numbytes = nti(buf[pos + 12:pos + 24])
       
  1316             except ValueError:
       
  1317                 break
       
  1318             if offset > lastpos:
       
  1319                 sp.append(_hole(lastpos, offset - lastpos))
       
  1320             sp.append(_data(offset, numbytes, realpos))
       
  1321             realpos += numbytes
       
  1322             lastpos = offset + numbytes
       
  1323             pos += 24
       
  1324 
       
  1325         isextended = ord(buf[482])
       
  1326         origsize = nti(buf[483:495])
       
  1327 
       
  1328         # If the isextended flag is given,
       
  1329         # there are extra headers to process.
       
  1330         while isextended == 1:
       
  1331             buf = tarfile.fileobj.read(BLOCKSIZE)
       
  1332             pos = 0
       
  1333             for i in xrange(21):
       
  1334                 try:
       
  1335                     offset = nti(buf[pos:pos + 12])
       
  1336                     numbytes = nti(buf[pos + 12:pos + 24])
       
  1337                 except ValueError:
       
  1338                     break
       
  1339                 if offset > lastpos:
       
  1340                     sp.append(_hole(lastpos, offset - lastpos))
       
  1341                 sp.append(_data(offset, numbytes, realpos))
       
  1342                 realpos += numbytes
       
  1343                 lastpos = offset + numbytes
       
  1344                 pos += 24
       
  1345             isextended = ord(buf[504])
       
  1346 
       
  1347         if lastpos < origsize:
       
  1348             sp.append(_hole(lastpos, origsize - lastpos))
       
  1349 
       
  1350         self.sparse = sp
       
  1351 
       
  1352         self.offset_data = tarfile.fileobj.tell()
       
  1353         tarfile.offset = self.offset_data + self._block(self.size)
       
  1354         self.size = origsize
       
  1355 
       
  1356         return self
       
  1357 
       
  1358     def _proc_pax(self, tarfile):
       
  1359         """Process an extended or global header as described in
       
  1360            POSIX.1-2001.
       
  1361         """
       
  1362         # Read the header information.
       
  1363         buf = tarfile.fileobj.read(self._block(self.size))
       
  1364 
       
  1365         # A pax header stores supplemental information for either
       
  1366         # the following file (extended) or all following files
       
  1367         # (global).
       
  1368         if self.type == XGLTYPE:
       
  1369             pax_headers = tarfile.pax_headers
       
  1370         else:
       
  1371             pax_headers = tarfile.pax_headers.copy()
       
  1372 
       
  1373         # Parse pax header information. A record looks like that:
       
  1374         # "%d %s=%s\n" % (length, keyword, value). length is the size
       
  1375         # of the complete record including the length field itself and
       
  1376         # the newline. keyword and value are both UTF-8 encoded strings.
       
  1377         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
       
  1378         pos = 0
       
  1379         while True:
       
  1380             match = regex.match(buf, pos)
       
  1381             if not match:
       
  1382                 break
       
  1383 
       
  1384             length, keyword = match.groups()
       
  1385             length = int(length)
       
  1386             value = buf[match.end(2) + 1:match.start(1) + length - 1]
       
  1387 
       
  1388             keyword = keyword.decode("utf8")
       
  1389             value = value.decode("utf8")
       
  1390 
       
  1391             pax_headers[keyword] = value
       
  1392             pos += length
       
  1393 
       
  1394         # Fetch the next header.
       
  1395         next = self.fromtarfile(tarfile)
       
  1396 
       
  1397         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
       
  1398             if next is None:
       
  1399                 raise HeaderError("missing subsequent header")
       
  1400 
       
  1401             # Patch the TarInfo object with the extended header info.
       
  1402             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
       
  1403             next.offset = self.offset
       
  1404 
       
  1405             if "size" in pax_headers:
       
  1406                 # If the extended header replaces the size field,
       
  1407                 # we need to recalculate the offset where the next
       
  1408                 # header starts.
       
  1409                 offset = next.offset_data
       
  1410                 if next.isreg() or next.type not in SUPPORTED_TYPES:
       
  1411                     offset += next._block(next.size)
       
  1412                 tarfile.offset = offset
       
  1413 
       
  1414         return next
       
  1415 
       
  1416     def _apply_pax_info(self, pax_headers, encoding, errors):
       
  1417         """Replace fields with supplemental information from a previous
       
  1418            pax extended or global header.
       
  1419         """
       
  1420         for keyword, value in pax_headers.iteritems():
       
  1421             if keyword not in PAX_FIELDS:
       
  1422                 continue
       
  1423 
       
  1424             if keyword == "path":
       
  1425                 value = value.rstrip("/")
       
  1426 
       
  1427             if keyword in PAX_NUMBER_FIELDS:
       
  1428                 try:
       
  1429                     value = PAX_NUMBER_FIELDS[keyword](value)
       
  1430                 except ValueError:
       
  1431                     value = 0
       
  1432             else:
       
  1433                 value = uts(value, encoding, errors)
       
  1434 
       
  1435             setattr(self, keyword, value)
       
  1436 
       
  1437         self.pax_headers = pax_headers.copy()
       
  1438 
       
  1439     def _block(self, count):
       
  1440         """Round up a byte count by BLOCKSIZE and return it,
       
  1441            e.g. _block(834) => 1024.
       
  1442         """
       
  1443         blocks, remainder = divmod(count, BLOCKSIZE)
       
  1444         if remainder:
       
  1445             blocks += 1
       
  1446         return blocks * BLOCKSIZE
       
  1447 
       
  1448     def isreg(self):
       
  1449         return self.type in REGULAR_TYPES
       
  1450     def isfile(self):
       
  1451         return self.isreg()
       
  1452     def isdir(self):
       
  1453         return self.type == DIRTYPE
       
  1454     def issym(self):
       
  1455         return self.type == SYMTYPE
       
  1456     def islnk(self):
       
  1457         return self.type == LNKTYPE
       
  1458     def ischr(self):
       
  1459         return self.type == CHRTYPE
       
  1460     def isblk(self):
       
  1461         return self.type == BLKTYPE
       
  1462     def isfifo(self):
       
  1463         return self.type == FIFOTYPE
       
  1464     def issparse(self):
       
  1465         return self.type == GNUTYPE_SPARSE
       
  1466     def isdev(self):
       
  1467         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
       
  1468 # class TarInfo
       
  1469 
       
  1470 class TarFile(object):
       
  1471     """The TarFile Class provides an interface to tar archives.
       
  1472     """
       
  1473 
       
  1474     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
       
  1475 
       
  1476     dereference = False         # If true, add content of linked file to the
       
  1477                                 # tar file, else the link.
       
  1478 
       
  1479     ignore_zeros = False        # If true, skips empty or invalid blocks and
       
  1480                                 # continues processing.
       
  1481 
       
  1482     errorlevel = 0              # If 0, fatal errors only appear in debug
       
  1483                                 # messages (if debug >= 0). If > 0, errors
       
  1484                                 # are passed to the caller as exceptions.
       
  1485 
       
  1486     format = DEFAULT_FORMAT     # The format to use when creating an archive.
       
  1487 
       
  1488     encoding = ENCODING         # Encoding for 8-bit character strings.
       
  1489 
       
  1490     errors = None               # Error handler for unicode conversion.
       
  1491 
       
  1492     tarinfo = TarInfo           # The default TarInfo class to use.
       
  1493 
       
  1494     fileobject = ExFileObject   # The default ExFileObject class to use.
       
  1495 
       
  1496     def __init__(self, name=None, mode="r", fileobj=None, format=None,
       
  1497             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
       
  1498             errors=None, pax_headers=None, debug=None, errorlevel=None):
       
  1499         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
       
  1500            read from an existing archive, 'a' to append data to an existing
       
  1501            file or 'w' to create a new file overwriting an existing one. `mode'
       
  1502            defaults to 'r'.
       
  1503            If `fileobj' is given, it is used for reading or writing data. If it
       
  1504            can be determined, `mode' is overridden by `fileobj's mode.
       
  1505            `fileobj' is not closed, when TarFile is closed.
       
  1506         """
       
  1507         if len(mode) > 1 or mode not in "raw":
       
  1508             raise ValueError("mode must be 'r', 'a' or 'w'")
       
  1509         self.mode = mode
       
  1510         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
       
  1511 
       
  1512         if not fileobj:
       
  1513             if self.mode == "a" and not os.path.exists(name):
       
  1514                 # Create nonexistent files in append mode.
       
  1515                 self.mode = "w"
       
  1516                 self._mode = "wb"
       
  1517             fileobj = bltn_open(name, self._mode)
       
  1518             self._extfileobj = False
       
  1519         else:
       
  1520             if name is None and hasattr(fileobj, "name"):
       
  1521                 name = fileobj.name
       
  1522             if hasattr(fileobj, "mode"):
       
  1523                 self._mode = fileobj.mode
       
  1524             self._extfileobj = True
       
  1525         self.name = os.path.abspath(name) if name else None
       
  1526         self.fileobj = fileobj
       
  1527 
       
  1528         # Init attributes.
       
  1529         if format is not None:
       
  1530             self.format = format
       
  1531         if tarinfo is not None:
       
  1532             self.tarinfo = tarinfo
       
  1533         if dereference is not None:
       
  1534             self.dereference = dereference
       
  1535         if ignore_zeros is not None:
       
  1536             self.ignore_zeros = ignore_zeros
       
  1537         if encoding is not None:
       
  1538             self.encoding = encoding
       
  1539 
       
  1540         if errors is not None:
       
  1541             self.errors = errors
       
  1542         elif mode == "r":
       
  1543             self.errors = "utf-8"
       
  1544         else:
       
  1545             self.errors = "strict"
       
  1546 
       
  1547         if pax_headers is not None and self.format == PAX_FORMAT:
       
  1548             self.pax_headers = pax_headers
       
  1549         else:
       
  1550             self.pax_headers = {}
       
  1551 
       
  1552         if debug is not None:
       
  1553             self.debug = debug
       
  1554         if errorlevel is not None:
       
  1555             self.errorlevel = errorlevel
       
  1556 
       
  1557         # Init datastructures.
       
  1558         self.closed = False
       
  1559         self.members = []       # list of members as TarInfo objects
       
  1560         self._loaded = False    # flag if all members have been read
       
  1561         self.offset = self.fileobj.tell()
       
  1562                                 # current position in the archive file
       
  1563         self.inodes = {}        # dictionary caching the inodes of
       
  1564                                 # archive members already added
       
  1565 
       
  1566         if self.mode == "r":
       
  1567             self.firstmember = None
       
  1568             self.firstmember = self.next()
       
  1569 
       
  1570         if self.mode == "a":
       
  1571             # Move to the end of the archive,
       
  1572             # before the first empty block.
       
  1573             self.firstmember = None
       
  1574             while True:
       
  1575                 if self.next() is None:
       
  1576                     if self.offset > 0:
       
  1577                         self.fileobj.seek(- BLOCKSIZE, 1)
       
  1578                     break
       
  1579 
       
  1580         if self.mode in "aw":
       
  1581             self._loaded = True
       
  1582 
       
  1583             if self.pax_headers:
       
  1584                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
       
  1585                 self.fileobj.write(buf)
       
  1586                 self.offset += len(buf)
       
  1587 
       
  1588     def _getposix(self):
       
  1589         return self.format == USTAR_FORMAT
       
  1590     def _setposix(self, value):
       
  1591         import warnings
       
  1592         warnings.warn("use the format attribute instead", DeprecationWarning)
       
  1593         if value:
       
  1594             self.format = USTAR_FORMAT
       
  1595         else:
       
  1596             self.format = GNU_FORMAT
       
  1597     posix = property(_getposix, _setposix)
       
  1598 
       
  1599     #--------------------------------------------------------------------------
       
  1600     # Below are the classmethods which act as alternate constructors to the
       
  1601     # TarFile class. The open() method is the only one that is needed for
       
  1602     # public use; it is the "super"-constructor and is able to select an
       
  1603     # adequate "sub"-constructor for a particular compression using the mapping
       
  1604     # from OPEN_METH.
       
  1605     #
       
  1606     # This concept allows one to subclass TarFile without losing the comfort of
       
  1607     # the super-constructor. A sub-constructor is registered and made available
       
  1608     # by adding it to the mapping in OPEN_METH.
       
  1609 
       
  1610     @classmethod
       
  1611     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
       
  1612         """Open a tar archive for reading, writing or appending. Return
       
  1613            an appropriate TarFile class.
       
  1614 
       
  1615            mode:
       
  1616            'r' or 'r:*' open for reading with transparent compression
       
  1617            'r:'         open for reading exclusively uncompressed
       
  1618            'r:gz'       open for reading with gzip compression
       
  1619            'r:bz2'      open for reading with bzip2 compression
       
  1620            'a' or 'a:'  open for appending, creating the file if necessary
       
  1621            'w' or 'w:'  open for writing without compression
       
  1622            'w:gz'       open for writing with gzip compression
       
  1623            'w:bz2'      open for writing with bzip2 compression
       
  1624 
       
  1625            'r|*'        open a stream of tar blocks with transparent compression
       
  1626            'r|'         open an uncompressed stream of tar blocks for reading
       
  1627            'r|gz'       open a gzip compressed stream of tar blocks
       
  1628            'r|bz2'      open a bzip2 compressed stream of tar blocks
       
  1629            'w|'         open an uncompressed stream for writing
       
  1630            'w|gz'       open a gzip compressed stream for writing
       
  1631            'w|bz2'      open a bzip2 compressed stream for writing
       
  1632         """
       
  1633 
       
  1634         if not name and not fileobj:
       
  1635             raise ValueError("nothing to open")
       
  1636 
       
  1637         if mode in ("r", "r:*"):
       
  1638             # Find out which *open() is appropriate for opening the file.
       
  1639             for comptype in cls.OPEN_METH:
       
  1640                 func = getattr(cls, cls.OPEN_METH[comptype])
       
  1641                 if fileobj is not None:
       
  1642                     saved_pos = fileobj.tell()
       
  1643                 try:
       
  1644                     return func(name, "r", fileobj, **kwargs)
       
  1645                 except (ReadError, CompressionError), e:
       
  1646                     if fileobj is not None:
       
  1647                         fileobj.seek(saved_pos)
       
  1648                     continue
       
  1649             raise ReadError("file could not be opened successfully")
       
  1650 
       
  1651         elif ":" in mode:
       
  1652             filemode, comptype = mode.split(":", 1)
       
  1653             filemode = filemode or "r"
       
  1654             comptype = comptype or "tar"
       
  1655 
       
  1656             # Select the *open() function according to
       
  1657             # given compression.
       
  1658             if comptype in cls.OPEN_METH:
       
  1659                 func = getattr(cls, cls.OPEN_METH[comptype])
       
  1660             else:
       
  1661                 raise CompressionError("unknown compression type %r" % comptype)
       
  1662             return func(name, filemode, fileobj, **kwargs)
       
  1663 
       
  1664         elif "|" in mode:
       
  1665             filemode, comptype = mode.split("|", 1)
       
  1666             filemode = filemode or "r"
       
  1667             comptype = comptype or "tar"
       
  1668 
       
  1669             if filemode not in "rw":
       
  1670                 raise ValueError("mode must be 'r' or 'w'")
       
  1671 
       
  1672             t = cls(name, filemode,
       
  1673                     _Stream(name, filemode, comptype, fileobj, bufsize),
       
  1674                     **kwargs)
       
  1675             t._extfileobj = False
       
  1676             return t
       
  1677 
       
  1678         elif mode in "aw":
       
  1679             return cls.taropen(name, mode, fileobj, **kwargs)
       
  1680 
       
  1681         raise ValueError("undiscernible mode")
       
  1682 
       
  1683     @classmethod
       
  1684     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
       
  1685         """Open uncompressed tar archive name for reading or writing.
       
  1686         """
       
  1687         if len(mode) > 1 or mode not in "raw":
       
  1688             raise ValueError("mode must be 'r', 'a' or 'w'")
       
  1689         return cls(name, mode, fileobj, **kwargs)
       
  1690 
       
  1691     @classmethod
       
  1692     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
       
  1693         """Open gzip compressed tar archive name for reading or writing.
       
  1694            Appending is not allowed.
       
  1695         """
       
  1696         if len(mode) > 1 or mode not in "rw":
       
  1697             raise ValueError("mode must be 'r' or 'w'")
       
  1698 
       
  1699         try:
       
  1700             import gzip
       
  1701             gzip.GzipFile
       
  1702         except (ImportError, AttributeError):
       
  1703             raise CompressionError("gzip module is not available")
       
  1704 
       
  1705         if fileobj is None:
       
  1706             fileobj = bltn_open(name, mode + "b")
       
  1707 
       
  1708         try:
       
  1709             t = cls.taropen(name, mode,
       
  1710                 gzip.GzipFile(name, mode, compresslevel, fileobj),
       
  1711                 **kwargs)
       
  1712         except IOError:
       
  1713             raise ReadError("not a gzip file")
       
  1714         t._extfileobj = False
       
  1715         return t
       
  1716 
       
  1717     @classmethod
       
  1718     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
       
  1719         """Open bzip2 compressed tar archive name for reading or writing.
       
  1720            Appending is not allowed.
       
  1721         """
       
  1722         if len(mode) > 1 or mode not in "rw":
       
  1723             raise ValueError("mode must be 'r' or 'w'.")
       
  1724 
       
  1725         try:
       
  1726             import bz2
       
  1727         except ImportError:
       
  1728             raise CompressionError("bz2 module is not available")
       
  1729 
       
  1730         if fileobj is not None:
       
  1731             fileobj = _BZ2Proxy(fileobj, mode)
       
  1732         else:
       
  1733             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
       
  1734 
       
  1735         try:
       
  1736             t = cls.taropen(name, mode, fileobj, **kwargs)
       
  1737         except IOError:
       
  1738             raise ReadError("not a bzip2 file")
       
  1739         t._extfileobj = False
       
  1740         return t
       
  1741 
       
  1742     # All *open() methods are registered here.
       
  1743     OPEN_METH = {
       
  1744         "tar": "taropen",   # uncompressed tar
       
  1745         "gz":  "gzopen",    # gzip compressed tar
       
  1746         "bz2": "bz2open"    # bzip2 compressed tar
       
  1747     }
       
  1748 
       
  1749     #--------------------------------------------------------------------------
       
  1750     # The public methods which TarFile provides:
       
  1751 
       
  1752     def close(self):
       
  1753         """Close the TarFile. In write-mode, two finishing zero blocks are
       
  1754            appended to the archive.
       
  1755         """
       
  1756         if self.closed:
       
  1757             return
       
  1758 
       
  1759         if self.mode in "aw":
       
  1760             self.fileobj.write(NUL * (BLOCKSIZE * 2))
       
  1761             self.offset += (BLOCKSIZE * 2)
       
  1762             # fill up the end with zero-blocks
       
  1763             # (like option -b20 for tar does)
       
  1764             blocks, remainder = divmod(self.offset, RECORDSIZE)
       
  1765             if remainder > 0:
       
  1766                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
       
  1767 
       
  1768         if not self._extfileobj:
       
  1769             self.fileobj.close()
       
  1770         self.closed = True
       
  1771 
       
  1772     def getmember(self, name):
       
  1773         """Return a TarInfo object for member `name'. If `name' can not be
       
  1774            found in the archive, KeyError is raised. If a member occurs more
       
  1775            than once in the archive, its last occurence is assumed to be the
       
  1776            most up-to-date version.
       
  1777         """
       
  1778         tarinfo = self._getmember(name)
       
  1779         if tarinfo is None:
       
  1780             raise KeyError("filename %r not found" % name)
       
  1781         return tarinfo
       
  1782 
       
  1783     def getmembers(self):
       
  1784         """Return the members of the archive as a list of TarInfo objects. The
       
  1785            list has the same order as the members in the archive.
       
  1786         """
       
  1787         self._check()
       
  1788         if not self._loaded:    # if we want to obtain a list of
       
  1789             self._load()        # all members, we first have to
       
  1790                                 # scan the whole archive.
       
  1791         return self.members
       
  1792 
       
  1793     def getnames(self):
       
  1794         """Return the members of the archive as a list of their names. It has
       
  1795            the same order as the list returned by getmembers().
       
  1796         """
       
  1797         return [tarinfo.name for tarinfo in self.getmembers()]
       
  1798 
       
  1799     def gettarinfo(self, name=None, arcname=None, fileobj=None):
       
  1800         """Create a TarInfo object for either the file `name' or the file
       
  1801            object `fileobj' (using os.fstat on its file descriptor). You can
       
  1802            modify some of the TarInfo's attributes before you add it using
       
  1803            addfile(). If given, `arcname' specifies an alternative name for the
       
  1804            file in the archive.
       
  1805         """
       
  1806         self._check("aw")
       
  1807 
       
  1808         # When fileobj is given, replace name by
       
  1809         # fileobj's real name.
       
  1810         if fileobj is not None:
       
  1811             name = fileobj.name
       
  1812 
       
  1813         # Building the name of the member in the archive.
       
  1814         # Backward slashes are converted to forward slashes,
       
  1815         # Absolute paths are turned to relative paths.
       
  1816         if arcname is None:
       
  1817             arcname = name
       
  1818         arcname = normpath(arcname)
       
  1819         drv, arcname = os.path.splitdrive(arcname)
       
  1820         while arcname[0:1] == "/":
       
  1821             arcname = arcname[1:]
       
  1822 
       
  1823         # Now, fill the TarInfo object with
       
  1824         # information specific for the file.
       
  1825         tarinfo = self.tarinfo()
       
  1826         tarinfo.tarfile = self
       
  1827 
       
  1828         # Use os.stat or os.lstat, depending on platform
       
  1829         # and if symlinks shall be resolved.
       
  1830         if fileobj is None:
       
  1831             if hasattr(os, "lstat") and not self.dereference:
       
  1832                 statres = os.lstat(name)
       
  1833             else:
       
  1834                 statres = os.stat(name)
       
  1835         else:
       
  1836             statres = os.fstat(fileobj.fileno())
       
  1837         linkname = ""
       
  1838 
       
  1839         stmd = statres.st_mode
       
  1840         if stat.S_ISREG(stmd):
       
  1841             inode = (statres.st_ino, statres.st_dev)
       
  1842             if not self.dereference and statres.st_nlink > 1 and \
       
  1843                     inode in self.inodes and arcname != self.inodes[inode]:
       
  1844                 # Is it a hardlink to an already
       
  1845                 # archived file?
       
  1846                 type = LNKTYPE
       
  1847                 linkname = self.inodes[inode]
       
  1848             else:
       
  1849                 # The inode is added only if its valid.
       
  1850                 # For win32 it is always 0.
       
  1851                 type = REGTYPE
       
  1852                 if inode[0]:
       
  1853                     self.inodes[inode] = arcname
       
  1854         elif stat.S_ISDIR(stmd):
       
  1855             type = DIRTYPE
       
  1856         elif stat.S_ISFIFO(stmd):
       
  1857             type = FIFOTYPE
       
  1858         elif stat.S_ISLNK(stmd):
       
  1859             type = SYMTYPE
       
  1860             linkname = os.readlink(name)
       
  1861         elif stat.S_ISCHR(stmd):
       
  1862             type = CHRTYPE
       
  1863         elif stat.S_ISBLK(stmd):
       
  1864             type = BLKTYPE
       
  1865         else:
       
  1866             return None
       
  1867 
       
  1868         # Fill the TarInfo object with all
       
  1869         # information we can get.
       
  1870         tarinfo.name = arcname
       
  1871         tarinfo.mode = stmd
       
  1872         tarinfo.uid = statres.st_uid
       
  1873         tarinfo.gid = statres.st_gid
       
  1874         if stat.S_ISREG(stmd):
       
  1875             tarinfo.size = statres.st_size
       
  1876         else:
       
  1877             tarinfo.size = 0L
       
  1878         tarinfo.mtime = statres.st_mtime
       
  1879         tarinfo.type = type
       
  1880         tarinfo.linkname = linkname
       
  1881         if pwd:
       
  1882             try:
       
  1883                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
       
  1884             except KeyError:
       
  1885                 pass
       
  1886         if grp:
       
  1887             try:
       
  1888                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
       
  1889             except KeyError:
       
  1890                 pass
       
  1891 
       
  1892         if type in (CHRTYPE, BLKTYPE):
       
  1893             if hasattr(os, "major") and hasattr(os, "minor"):
       
  1894                 tarinfo.devmajor = os.major(statres.st_rdev)
       
  1895                 tarinfo.devminor = os.minor(statres.st_rdev)
       
  1896         return tarinfo
       
  1897 
       
  1898     def list(self, verbose=True):
       
  1899         """Print a table of contents to sys.stdout. If `verbose' is False, only
       
  1900            the names of the members are printed. If it is True, an `ls -l'-like
       
  1901            output is produced.
       
  1902         """
       
  1903         self._check()
       
  1904 
       
  1905         for tarinfo in self:
       
  1906             if verbose:
       
  1907                 print filemode(tarinfo.mode),
       
  1908                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
       
  1909                                  tarinfo.gname or tarinfo.gid),
       
  1910                 if tarinfo.ischr() or tarinfo.isblk():
       
  1911                     print "%10s" % ("%d,%d" \
       
  1912                                     % (tarinfo.devmajor, tarinfo.devminor)),
       
  1913                 else:
       
  1914                     print "%10d" % tarinfo.size,
       
  1915                 print "%d-%02d-%02d %02d:%02d:%02d" \
       
  1916                       % time.localtime(tarinfo.mtime)[:6],
       
  1917 
       
  1918             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
       
  1919 
       
  1920             if verbose:
       
  1921                 if tarinfo.issym():
       
  1922                     print "->", tarinfo.linkname,
       
  1923                 if tarinfo.islnk():
       
  1924                     print "link to", tarinfo.linkname,
       
  1925             print
       
  1926 
       
  1927     def add(self, name, arcname=None, recursive=True, exclude=None):
       
  1928         """Add the file `name' to the archive. `name' may be any type of file
       
  1929            (directory, fifo, symbolic link, etc.). If given, `arcname'
       
  1930            specifies an alternative name for the file in the archive.
       
  1931            Directories are added recursively by default. This can be avoided by
       
  1932            setting `recursive' to False. `exclude' is a function that should
       
  1933            return True for each filename to be excluded.
       
  1934         """
       
  1935         self._check("aw")
       
  1936 
       
  1937         if arcname is None:
       
  1938             arcname = name
       
  1939 
       
  1940         # Exclude pathnames.
       
  1941         if exclude is not None and exclude(name):
       
  1942             self._dbg(2, "tarfile: Excluded %r" % name)
       
  1943             return
       
  1944 
       
  1945         # Skip if somebody tries to archive the archive...
       
  1946         if self.name is not None and os.path.abspath(name) == self.name:
       
  1947             self._dbg(2, "tarfile: Skipped %r" % name)
       
  1948             return
       
  1949 
       
  1950         # Special case: The user wants to add the current
       
  1951         # working directory.
       
  1952         if name == ".":
       
  1953             if recursive:
       
  1954                 if arcname == ".":
       
  1955                     arcname = ""
       
  1956                 for f in os.listdir(name):
       
  1957                     self.add(f, os.path.join(arcname, f), recursive, exclude)
       
  1958             return
       
  1959 
       
  1960         self._dbg(1, name)
       
  1961 
       
  1962         # Create a TarInfo object from the file.
       
  1963         tarinfo = self.gettarinfo(name, arcname)
       
  1964 
       
  1965         if tarinfo is None:
       
  1966             self._dbg(1, "tarfile: Unsupported type %r" % name)
       
  1967             return
       
  1968 
       
  1969         # Append the tar header and data to the archive.
       
  1970         if tarinfo.isreg():
       
  1971             f = bltn_open(name, "rb")
       
  1972             self.addfile(tarinfo, f)
       
  1973             f.close()
       
  1974 
       
  1975         elif tarinfo.isdir():
       
  1976             self.addfile(tarinfo)
       
  1977             if recursive:
       
  1978                 for f in os.listdir(name):
       
  1979                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
       
  1980 
       
  1981         else:
       
  1982             self.addfile(tarinfo)
       
  1983 
       
  1984     def addfile(self, tarinfo, fileobj=None):
       
  1985         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
       
  1986            given, tarinfo.size bytes are read from it and added to the archive.
       
  1987            You can create TarInfo objects using gettarinfo().
       
  1988            On Windows platforms, `fileobj' should always be opened with mode
       
  1989            'rb' to avoid irritation about the file size.
       
  1990         """
       
  1991         self._check("aw")
       
  1992 
       
  1993         tarinfo = copy.copy(tarinfo)
       
  1994 
       
  1995         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
       
  1996         self.fileobj.write(buf)
       
  1997         self.offset += len(buf)
       
  1998 
       
  1999         # If there's data to follow, append it.
       
  2000         if fileobj is not None:
       
  2001             copyfileobj(fileobj, self.fileobj, tarinfo.size)
       
  2002             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
       
  2003             if remainder > 0:
       
  2004                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
       
  2005                 blocks += 1
       
  2006             self.offset += blocks * BLOCKSIZE
       
  2007 
       
  2008         self.members.append(tarinfo)
       
  2009 
       
  2010     def extractall(self, path=".", members=None):
       
  2011         """Extract all members from the archive to the current working
       
  2012            directory and set owner, modification time and permissions on
       
  2013            directories afterwards. `path' specifies a different directory
       
  2014            to extract to. `members' is optional and must be a subset of the
       
  2015            list returned by getmembers().
       
  2016         """
       
  2017         directories = []
       
  2018 
       
  2019         if members is None:
       
  2020             members = self
       
  2021 
       
  2022         for tarinfo in members:
       
  2023             if tarinfo.isdir():
       
  2024                 # Extract directories with a safe mode.
       
  2025                 directories.append(tarinfo)
       
  2026                 tarinfo = copy.copy(tarinfo)
       
  2027                 tarinfo.mode = 0700
       
  2028             self.extract(tarinfo, path)
       
  2029 
       
  2030         # Reverse sort directories.
       
  2031         directories.sort(key=operator.attrgetter('name'))
       
  2032         directories.reverse()
       
  2033 
       
  2034         # Set correct owner, mtime and filemode on directories.
       
  2035         for tarinfo in directories:
       
  2036             dirpath = os.path.join(path, tarinfo.name)
       
  2037             try:
       
  2038                 self.chown(tarinfo, dirpath)
       
  2039                 self.utime(tarinfo, dirpath)
       
  2040                 self.chmod(tarinfo, dirpath)
       
  2041             except ExtractError, e:
       
  2042                 if self.errorlevel > 1:
       
  2043                     raise
       
  2044                 else:
       
  2045                     self._dbg(1, "tarfile: %s" % e)
       
  2046 
       
  2047     def extract(self, member, path=""):
       
  2048         """Extract a member from the archive to the current working directory,
       
  2049            using its full name. Its file information is extracted as accurately
       
  2050            as possible. `member' may be a filename or a TarInfo object. You can
       
  2051            specify a different directory using `path'.
       
  2052         """
       
  2053         self._check("r")
       
  2054 
       
  2055         if isinstance(member, basestring):
       
  2056             tarinfo = self.getmember(member)
       
  2057         else:
       
  2058             tarinfo = member
       
  2059 
       
  2060         # Prepare the link target for makelink().
       
  2061         if tarinfo.islnk():
       
  2062             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
       
  2063 
       
  2064         try:
       
  2065             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
       
  2066         except EnvironmentError, e:
       
  2067             if self.errorlevel > 0:
       
  2068                 raise
       
  2069             else:
       
  2070                 if e.filename is None:
       
  2071                     self._dbg(1, "tarfile: %s" % e.strerror)
       
  2072                 else:
       
  2073                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
       
  2074         except ExtractError, e:
       
  2075             if self.errorlevel > 1:
       
  2076                 raise
       
  2077             else:
       
  2078                 self._dbg(1, "tarfile: %s" % e)
       
  2079 
       
  2080     def extractfile(self, member):
       
  2081         """Extract a member from the archive as a file object. `member' may be
       
  2082            a filename or a TarInfo object. If `member' is a regular file, a
       
  2083            file-like object is returned. If `member' is a link, a file-like
       
  2084            object is constructed from the link's target. If `member' is none of
       
  2085            the above, None is returned.
       
  2086            The file-like object is read-only and provides the following
       
  2087            methods: read(), readline(), readlines(), seek() and tell()
       
  2088         """
       
  2089         self._check("r")
       
  2090 
       
  2091         if isinstance(member, basestring):
       
  2092             tarinfo = self.getmember(member)
       
  2093         else:
       
  2094             tarinfo = member
       
  2095 
       
  2096         if tarinfo.isreg():
       
  2097             return self.fileobject(self, tarinfo)
       
  2098 
       
  2099         elif tarinfo.type not in SUPPORTED_TYPES:
       
  2100             # If a member's type is unknown, it is treated as a
       
  2101             # regular file.
       
  2102             return self.fileobject(self, tarinfo)
       
  2103 
       
  2104         elif tarinfo.islnk() or tarinfo.issym():
       
  2105             if isinstance(self.fileobj, _Stream):
       
  2106                 # A small but ugly workaround for the case that someone tries
       
  2107                 # to extract a (sym)link as a file-object from a non-seekable
       
  2108                 # stream of tar blocks.
       
  2109                 raise StreamError("cannot extract (sym)link as file object")
       
  2110             else:
       
  2111                 # A (sym)link's file object is its target's file object.
       
  2112                 return self.extractfile(self._getmember(tarinfo.linkname,
       
  2113                                                         tarinfo))
       
  2114         else:
       
  2115             # If there's no data associated with the member (directory, chrdev,
       
  2116             # blkdev, etc.), return None instead of a file object.
       
  2117             return None
       
  2118 
       
  2119     def _extract_member(self, tarinfo, targetpath):
       
  2120         """Extract the TarInfo object tarinfo to a physical
       
  2121            file called targetpath.
       
  2122         """
       
  2123         # Fetch the TarInfo object for the given name
       
  2124         # and build the destination pathname, replacing
       
  2125         # forward slashes to platform specific separators.
       
  2126         if targetpath[-1:] == "/":
       
  2127             targetpath = targetpath[:-1]
       
  2128         targetpath = os.path.normpath(targetpath)
       
  2129 
       
  2130         # Create all upper directories.
       
  2131         upperdirs = os.path.dirname(targetpath)
       
  2132         if upperdirs and not os.path.exists(upperdirs):
       
  2133             # Create directories that are not part of the archive with
       
  2134             # default permissions.
       
  2135             os.makedirs(upperdirs)
       
  2136 
       
  2137         if tarinfo.islnk() or tarinfo.issym():
       
  2138             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
       
  2139         else:
       
  2140             self._dbg(1, tarinfo.name)
       
  2141 
       
  2142         if tarinfo.isreg():
       
  2143             self.makefile(tarinfo, targetpath)
       
  2144         elif tarinfo.isdir():
       
  2145             self.makedir(tarinfo, targetpath)
       
  2146         elif tarinfo.isfifo():
       
  2147             self.makefifo(tarinfo, targetpath)
       
  2148         elif tarinfo.ischr() or tarinfo.isblk():
       
  2149             self.makedev(tarinfo, targetpath)
       
  2150         elif tarinfo.islnk() or tarinfo.issym():
       
  2151             self.makelink(tarinfo, targetpath)
       
  2152         elif tarinfo.type not in SUPPORTED_TYPES:
       
  2153             self.makeunknown(tarinfo, targetpath)
       
  2154         else:
       
  2155             self.makefile(tarinfo, targetpath)
       
  2156 
       
  2157         self.chown(tarinfo, targetpath)
       
  2158         if not tarinfo.issym():
       
  2159             self.chmod(tarinfo, targetpath)
       
  2160             self.utime(tarinfo, targetpath)
       
  2161 
       
  2162     #--------------------------------------------------------------------------
       
  2163     # Below are the different file methods. They are called via
       
  2164     # _extract_member() when extract() is called. They can be replaced in a
       
  2165     # subclass to implement other functionality.
       
  2166 
       
  2167     def makedir(self, tarinfo, targetpath):
       
  2168         """Make a directory called targetpath.
       
  2169         """
       
  2170         try:
       
  2171             # Use a safe mode for the directory, the real mode is set
       
  2172             # later in _extract_member().
       
  2173             os.mkdir(targetpath, 0700)
       
  2174         except EnvironmentError, e:
       
  2175             if e.errno != errno.EEXIST:
       
  2176                 raise
       
  2177 
       
  2178     def makefile(self, tarinfo, targetpath):
       
  2179         """Make a file called targetpath.
       
  2180         """
       
  2181         source = self.extractfile(tarinfo)
       
  2182         target = bltn_open(targetpath, "wb")
       
  2183         copyfileobj(source, target)
       
  2184         source.close()
       
  2185         target.close()
       
  2186 
       
  2187     def makeunknown(self, tarinfo, targetpath):
       
  2188         """Make a file from a TarInfo object with an unknown type
       
  2189            at targetpath.
       
  2190         """
       
  2191         self.makefile(tarinfo, targetpath)
       
  2192         self._dbg(1, "tarfile: Unknown file type %r, " \
       
  2193                      "extracted as regular file." % tarinfo.type)
       
  2194 
       
  2195     def makefifo(self, tarinfo, targetpath):
       
  2196         """Make a fifo called targetpath.
       
  2197         """
       
  2198         if hasattr(os, "mkfifo"):
       
  2199             os.mkfifo(targetpath)
       
  2200         else:
       
  2201             raise ExtractError("fifo not supported by system")
       
  2202 
       
  2203     def makedev(self, tarinfo, targetpath):
       
  2204         """Make a character or block device called targetpath.
       
  2205         """
       
  2206         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
       
  2207             raise ExtractError("special devices not supported by system")
       
  2208 
       
  2209         mode = tarinfo.mode
       
  2210         if tarinfo.isblk():
       
  2211             mode |= stat.S_IFBLK
       
  2212         else:
       
  2213             mode |= stat.S_IFCHR
       
  2214 
       
  2215         os.mknod(targetpath, mode,
       
  2216                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
       
  2217 
       
  2218     def makelink(self, tarinfo, targetpath):
       
  2219         """Make a (symbolic) link called targetpath. If it cannot be created
       
  2220           (platform limitation), we try to make a copy of the referenced file
       
  2221           instead of a link.
       
  2222         """
       
  2223         linkpath = tarinfo.linkname
       
  2224         try:
       
  2225             if tarinfo.issym():
       
  2226                 os.symlink(linkpath, targetpath)
       
  2227             else:
       
  2228                 # See extract().
       
  2229                 os.link(tarinfo._link_target, targetpath)
       
  2230         except AttributeError:
       
  2231             if tarinfo.issym():
       
  2232                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
       
  2233                                         linkpath)
       
  2234                 linkpath = normpath(linkpath)
       
  2235 
       
  2236             try:
       
  2237                 self._extract_member(self.getmember(linkpath), targetpath)
       
  2238             except (EnvironmentError, KeyError), e:
       
  2239                 linkpath = os.path.normpath(linkpath)
       
  2240                 try:
       
  2241                     shutil.copy2(linkpath, targetpath)
       
  2242                 except EnvironmentError, e:
       
  2243                     raise IOError("link could not be created")
       
  2244 
       
  2245     def chown(self, tarinfo, targetpath):
       
  2246         """Set owner of targetpath according to tarinfo.
       
  2247         """
       
  2248         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
       
  2249             # We have to be root to do so.
       
  2250             try:
       
  2251                 g = grp.getgrnam(tarinfo.gname)[2]
       
  2252             except KeyError:
       
  2253                 try:
       
  2254                     g = grp.getgrgid(tarinfo.gid)[2]
       
  2255                 except KeyError:
       
  2256                     g = os.getgid()
       
  2257             try:
       
  2258                 u = pwd.getpwnam(tarinfo.uname)[2]
       
  2259             except KeyError:
       
  2260                 try:
       
  2261                     u = pwd.getpwuid(tarinfo.uid)[2]
       
  2262                 except KeyError:
       
  2263                     u = os.getuid()
       
  2264             try:
       
  2265                 if tarinfo.issym() and hasattr(os, "lchown"):
       
  2266                     os.lchown(targetpath, u, g)
       
  2267                 else:
       
  2268                     if sys.platform != "os2emx":
       
  2269                         os.chown(targetpath, u, g)
       
  2270             except EnvironmentError, e:
       
  2271                 raise ExtractError("could not change owner")
       
  2272 
       
  2273     def chmod(self, tarinfo, targetpath):
       
  2274         """Set file permissions of targetpath according to tarinfo.
       
  2275         """
       
  2276         if hasattr(os, 'chmod'):
       
  2277             try:
       
  2278                 os.chmod(targetpath, tarinfo.mode)
       
  2279             except EnvironmentError, e:
       
  2280                 raise ExtractError("could not change mode")
       
  2281 
       
  2282     def utime(self, tarinfo, targetpath):
       
  2283         """Set modification time of targetpath according to tarinfo.
       
  2284         """
       
  2285         if not hasattr(os, 'utime'):
       
  2286             return
       
  2287         if sys.platform == "win32" and tarinfo.isdir():
       
  2288             # According to msdn.microsoft.com, it is an error (EACCES)
       
  2289             # to use utime() on directories.
       
  2290             return
       
  2291         try:
       
  2292             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
       
  2293         except EnvironmentError, e:
       
  2294             raise ExtractError("could not change modification time")
       
  2295 
       
  2296     #--------------------------------------------------------------------------
       
  2297     def next(self):
       
  2298         """Return the next member of the archive as a TarInfo object, when
       
  2299            TarFile is opened for reading. Return None if there is no more
       
  2300            available.
       
  2301         """
       
  2302         self._check("ra")
       
  2303         if self.firstmember is not None:
       
  2304             m = self.firstmember
       
  2305             self.firstmember = None
       
  2306             return m
       
  2307 
       
  2308         # Read the next block.
       
  2309         self.fileobj.seek(self.offset)
       
  2310         while True:
       
  2311             try:
       
  2312                 tarinfo = self.tarinfo.fromtarfile(self)
       
  2313                 if tarinfo is None:
       
  2314                     return
       
  2315                 self.members.append(tarinfo)
       
  2316 
       
  2317             except HeaderError, e:
       
  2318                 if self.ignore_zeros:
       
  2319                     self._dbg(2, "0x%X: %s" % (self.offset, e))
       
  2320                     self.offset += BLOCKSIZE
       
  2321                     continue
       
  2322                 else:
       
  2323                     if self.offset == 0:
       
  2324                         raise ReadError(str(e))
       
  2325                     return None
       
  2326             break
       
  2327 
       
  2328         return tarinfo
       
  2329 
       
  2330     #--------------------------------------------------------------------------
       
  2331     # Little helper methods:
       
  2332 
       
  2333     def _getmember(self, name, tarinfo=None):
       
  2334         """Find an archive member by name from bottom to top.
       
  2335            If tarinfo is given, it is used as the starting point.
       
  2336         """
       
  2337         # Ensure that all members have been loaded.
       
  2338         members = self.getmembers()
       
  2339 
       
  2340         if tarinfo is None:
       
  2341             end = len(members)
       
  2342         else:
       
  2343             end = members.index(tarinfo)
       
  2344 
       
  2345         for i in xrange(end - 1, -1, -1):
       
  2346             if name == members[i].name:
       
  2347                 return members[i]
       
  2348 
       
  2349     def _load(self):
       
  2350         """Read through the entire archive file and look for readable
       
  2351            members.
       
  2352         """
       
  2353         while True:
       
  2354             tarinfo = self.next()
       
  2355             if tarinfo is None:
       
  2356                 break
       
  2357         self._loaded = True
       
  2358 
       
  2359     def _check(self, mode=None):
       
  2360         """Check if TarFile is still open, and if the operation's mode
       
  2361            corresponds to TarFile's mode.
       
  2362         """
       
  2363         if self.closed:
       
  2364             raise IOError("%s is closed" % self.__class__.__name__)
       
  2365         if mode is not None and self.mode not in mode:
       
  2366             raise IOError("bad operation for mode %r" % self.mode)
       
  2367 
       
  2368     def __iter__(self):
       
  2369         """Provide an iterator object.
       
  2370         """
       
  2371         if self._loaded:
       
  2372             return iter(self.members)
       
  2373         else:
       
  2374             return TarIter(self)
       
  2375 
       
  2376     def _dbg(self, level, msg):
       
  2377         """Write debugging output to sys.stderr.
       
  2378         """
       
  2379         if level <= self.debug:
       
  2380             print >> sys.stderr, msg
       
  2381 # class TarFile
       
  2382 
       
  2383 class TarIter:
       
  2384     """Iterator Class.
       
  2385 
       
  2386        for tarinfo in TarFile(...):
       
  2387            suite...
       
  2388     """
       
  2389 
       
  2390     def __init__(self, tarfile):
       
  2391         """Construct a TarIter object.
       
  2392         """
       
  2393         self.tarfile = tarfile
       
  2394         self.index = 0
       
  2395     def __iter__(self):
       
  2396         """Return iterator object.
       
  2397         """
       
  2398         return self
       
  2399     def next(self):
       
  2400         """Return the next item using TarFile's next() method.
       
  2401            When all members have been read, set TarFile as _loaded.
       
  2402         """
       
  2403         # Fix for SF #1100429: Under rare circumstances it can
       
  2404         # happen that getmembers() is called during iteration,
       
  2405         # which will cause TarIter to stop prematurely.
       
  2406         if not self.tarfile._loaded:
       
  2407             tarinfo = self.tarfile.next()
       
  2408             if not tarinfo:
       
  2409                 self.tarfile._loaded = True
       
  2410                 raise StopIteration
       
  2411         else:
       
  2412             try:
       
  2413                 tarinfo = self.tarfile.members[self.index]
       
  2414             except IndexError:
       
  2415                 raise StopIteration
       
  2416         self.index += 1
       
  2417         return tarinfo
       
  2418 
       
  2419 # Helper classes for sparse file support
       
  2420 class _section:
       
  2421     """Base class for _data and _hole.
       
  2422     """
       
  2423     def __init__(self, offset, size):
       
  2424         self.offset = offset
       
  2425         self.size = size
       
  2426     def __contains__(self, offset):
       
  2427         return self.offset <= offset < self.offset + self.size
       
  2428 
       
  2429 class _data(_section):
       
  2430     """Represent a data section in a sparse file.
       
  2431     """
       
  2432     def __init__(self, offset, size, realpos):
       
  2433         _section.__init__(self, offset, size)
       
  2434         self.realpos = realpos
       
  2435 
       
  2436 class _hole(_section):
       
  2437     """Represent a hole section in a sparse file.
       
  2438     """
       
  2439     pass
       
  2440 
       
  2441 class _ringbuffer(list):
       
  2442     """Ringbuffer class which increases performance
       
  2443        over a regular list.
       
  2444     """
       
  2445     def __init__(self):
       
  2446         self.idx = 0
       
  2447     def find(self, offset):
       
  2448         idx = self.idx
       
  2449         while True:
       
  2450             item = self[idx]
       
  2451             if offset in item:
       
  2452                 break
       
  2453             idx += 1
       
  2454             if idx == len(self):
       
  2455                 idx = 0
       
  2456             if idx == self.idx:
       
  2457                 # End of File
       
  2458                 return None
       
  2459         self.idx = idx
       
  2460         return item
       
  2461 
       
  2462 #---------------------------------------------
       
  2463 # zipfile compatible TarFile class
       
  2464 #---------------------------------------------
       
  2465 TAR_PLAIN = 0           # zipfile.ZIP_STORED
       
  2466 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
       
  2467 class TarFileCompat:
       
  2468     """TarFile class compatible with standard module zipfile's
       
  2469        ZipFile class.
       
  2470     """
       
  2471     def __init__(self, file, mode="r", compression=TAR_PLAIN):
       
  2472         from warnings import warnpy3k
       
  2473         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
       
  2474                 stacklevel=2)
       
  2475         if compression == TAR_PLAIN:
       
  2476             self.tarfile = TarFile.taropen(file, mode)
       
  2477         elif compression == TAR_GZIPPED:
       
  2478             self.tarfile = TarFile.gzopen(file, mode)
       
  2479         else:
       
  2480             raise ValueError("unknown compression constant")
       
  2481         if mode[0:1] == "r":
       
  2482             members = self.tarfile.getmembers()
       
  2483             for m in members:
       
  2484                 m.filename = m.name
       
  2485                 m.file_size = m.size
       
  2486                 m.date_time = time.gmtime(m.mtime)[:6]
       
  2487     def namelist(self):
       
  2488         return map(lambda m: m.name, self.infolist())
       
  2489     def infolist(self):
       
  2490         return filter(lambda m: m.type in REGULAR_TYPES,
       
  2491                       self.tarfile.getmembers())
       
  2492     def printdir(self):
       
  2493         self.tarfile.list()
       
  2494     def testzip(self):
       
  2495         return
       
  2496     def getinfo(self, name):
       
  2497         return self.tarfile.getmember(name)
       
  2498     def read(self, name):
       
  2499         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
       
  2500     def write(self, filename, arcname=None, compress_type=None):
       
  2501         self.tarfile.add(filename, arcname)
       
  2502     def writestr(self, zinfo, bytes):
       
  2503         try:
       
  2504             from cStringIO import StringIO
       
  2505         except ImportError:
       
  2506             from StringIO import StringIO
       
  2507         import calendar
       
  2508         tinfo = TarInfo(zinfo.filename)
       
  2509         tinfo.size = len(bytes)
       
  2510         tinfo.mtime = calendar.timegm(zinfo.date_time)
       
  2511         self.tarfile.addfile(tinfo, StringIO(bytes))
       
  2512     def close(self):
       
  2513         self.tarfile.close()
       
  2514 #class TarFileCompat
       
  2515 
       
  2516 #--------------------
       
  2517 # exported functions
       
  2518 #--------------------
       
  2519 def is_tarfile(name):
       
  2520     """Return True if name points to a tar archive that we
       
  2521        are able to handle, else return False.
       
  2522     """
       
  2523     try:
       
  2524         t = open(name)
       
  2525         t.close()
       
  2526         return True
       
  2527     except TarError:
       
  2528         return False
       
  2529 
       
  2530 bltn_open = open
       
  2531 open = TarFile.open