python-2.5.2/win32/Lib/zipfile.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """
       
     2 Read and write ZIP files.
       
     3 """
       
     4 import struct, os, time, sys
       
     5 import binascii, cStringIO
       
     6 
       
     7 try:
       
     8     import zlib # We may need its compression method
       
     9 except ImportError:
       
    10     zlib = None
       
    11 
       
    12 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
       
    13            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
       
    14 
       
    15 class BadZipfile(Exception):
       
    16     pass
       
    17 
       
    18 
       
    19 class LargeZipFile(Exception):
       
    20     """
       
    21     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
       
    22     and those extensions are disabled.
       
    23     """
       
    24 
       
    25 error = BadZipfile      # The exception raised by this module
       
    26 
       
    27 ZIP64_LIMIT= (1 << 31) - 1
       
    28 
       
    29 # constants for Zip file compression methods
       
    30 ZIP_STORED = 0
       
    31 ZIP_DEFLATED = 8
       
    32 # Other ZIP compression methods not supported
       
    33 
       
    34 # Here are some struct module formats for reading headers
       
    35 structEndArchive = "<4s4H2LH"     # 9 items, end of archive, 22 bytes
       
    36 stringEndArchive = "PK\005\006"   # magic number for end of archive record
       
    37 structCentralDir = "<4s4B4HlLL5HLL"# 19 items, central directory, 46 bytes
       
    38 stringCentralDir = "PK\001\002"   # magic number for central directory
       
    39 structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
       
    40 stringFileHeader = "PK\003\004"   # magic number for file header
       
    41 structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
       
    42 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
       
    43 structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
       
    44 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
       
    45 
       
    46 
       
    47 # indexes of entries in the central directory structure
       
    48 _CD_SIGNATURE = 0
       
    49 _CD_CREATE_VERSION = 1
       
    50 _CD_CREATE_SYSTEM = 2
       
    51 _CD_EXTRACT_VERSION = 3
       
    52 _CD_EXTRACT_SYSTEM = 4                  # is this meaningful?
       
    53 _CD_FLAG_BITS = 5
       
    54 _CD_COMPRESS_TYPE = 6
       
    55 _CD_TIME = 7
       
    56 _CD_DATE = 8
       
    57 _CD_CRC = 9
       
    58 _CD_COMPRESSED_SIZE = 10
       
    59 _CD_UNCOMPRESSED_SIZE = 11
       
    60 _CD_FILENAME_LENGTH = 12
       
    61 _CD_EXTRA_FIELD_LENGTH = 13
       
    62 _CD_COMMENT_LENGTH = 14
       
    63 _CD_DISK_NUMBER_START = 15
       
    64 _CD_INTERNAL_FILE_ATTRIBUTES = 16
       
    65 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
       
    66 _CD_LOCAL_HEADER_OFFSET = 18
       
    67 
       
    68 # indexes of entries in the local file header structure
       
    69 _FH_SIGNATURE = 0
       
    70 _FH_EXTRACT_VERSION = 1
       
    71 _FH_EXTRACT_SYSTEM = 2                  # is this meaningful?
       
    72 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
       
    73 _FH_COMPRESSION_METHOD = 4
       
    74 _FH_LAST_MOD_TIME = 5
       
    75 _FH_LAST_MOD_DATE = 6
       
    76 _FH_CRC = 7
       
    77 _FH_COMPRESSED_SIZE = 8
       
    78 _FH_UNCOMPRESSED_SIZE = 9
       
    79 _FH_FILENAME_LENGTH = 10
       
    80 _FH_EXTRA_FIELD_LENGTH = 11
       
    81 
       
    82 def is_zipfile(filename):
       
    83     """Quickly see if file is a ZIP file by checking the magic number."""
       
    84     try:
       
    85         fpin = open(filename, "rb")
       
    86         endrec = _EndRecData(fpin)
       
    87         fpin.close()
       
    88         if endrec:
       
    89             return True                 # file has correct magic number
       
    90     except IOError:
       
    91         pass
       
    92     return False
       
    93 
       
    94 def _EndRecData64(fpin, offset, endrec):
       
    95     """
       
    96     Read the ZIP64 end-of-archive records and use that to update endrec
       
    97     """
       
    98     locatorSize = struct.calcsize(structEndArchive64Locator)
       
    99     fpin.seek(offset - locatorSize, 2)
       
   100     data = fpin.read(locatorSize)
       
   101     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
       
   102     if sig != stringEndArchive64Locator:
       
   103         return endrec
       
   104 
       
   105     if diskno != 0 or disks != 1:
       
   106         raise BadZipfile("zipfiles that span multiple disks are not supported")
       
   107 
       
   108     # Assume no 'zip64 extensible data'
       
   109     endArchiveSize = struct.calcsize(structEndArchive64)
       
   110     fpin.seek(offset - locatorSize - endArchiveSize, 2)
       
   111     data = fpin.read(endArchiveSize)
       
   112     sig, sz, create_version, read_version, disk_num, disk_dir, \
       
   113             dircount, dircount2, dirsize, diroffset = \
       
   114             struct.unpack(structEndArchive64, data)
       
   115     if sig != stringEndArchive64:
       
   116         return endrec
       
   117 
       
   118     # Update the original endrec using data from the ZIP64 record
       
   119     endrec[1] = disk_num
       
   120     endrec[2] = disk_dir
       
   121     endrec[3] = dircount
       
   122     endrec[4] = dircount2
       
   123     endrec[5] = dirsize
       
   124     endrec[6] = diroffset
       
   125     return endrec
       
   126 
       
   127 
       
   128 def _EndRecData(fpin):
       
   129     """Return data from the "End of Central Directory" record, or None.
       
   130 
       
   131     The data is a list of the nine items in the ZIP "End of central dir"
       
   132     record followed by a tenth item, the file seek offset of this record."""
       
   133     fpin.seek(-22, 2)               # Assume no archive comment.
       
   134     filesize = fpin.tell() + 22     # Get file size
       
   135     data = fpin.read()
       
   136     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
       
   137         endrec = struct.unpack(structEndArchive, data)
       
   138         endrec = list(endrec)
       
   139         endrec.append("")               # Append the archive comment
       
   140         endrec.append(filesize - 22)    # Append the record start offset
       
   141         if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
       
   142             return _EndRecData64(fpin, -22, endrec)
       
   143         return endrec
       
   144     # Search the last END_BLOCK bytes of the file for the record signature.
       
   145     # The comment is appended to the ZIP file and has a 16 bit length.
       
   146     # So the comment may be up to 64K long.  We limit the search for the
       
   147     # signature to a few Kbytes at the end of the file for efficiency.
       
   148     # also, the signature must not appear in the comment.
       
   149     END_BLOCK = min(filesize, 1024 * 4)
       
   150     fpin.seek(filesize - END_BLOCK, 0)
       
   151     data = fpin.read()
       
   152     start = data.rfind(stringEndArchive)
       
   153     if start >= 0:     # Correct signature string was found
       
   154         endrec = struct.unpack(structEndArchive, data[start:start+22])
       
   155         endrec = list(endrec)
       
   156         comment = data[start+22:]
       
   157         if endrec[7] == len(comment):     # Comment length checks out
       
   158             # Append the archive comment and start offset
       
   159             endrec.append(comment)
       
   160             endrec.append(filesize - END_BLOCK + start)
       
   161             if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
       
   162                 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
       
   163             return endrec
       
   164     return      # Error, return None
       
   165 
       
   166 
       
   167 class ZipInfo (object):
       
   168     """Class with attributes describing each file in the ZIP archive."""
       
   169 
       
   170     __slots__ = (
       
   171             'orig_filename',
       
   172             'filename',
       
   173             'date_time',
       
   174             'compress_type',
       
   175             'comment',
       
   176             'extra',
       
   177             'create_system',
       
   178             'create_version',
       
   179             'extract_version',
       
   180             'reserved',
       
   181             'flag_bits',
       
   182             'volume',
       
   183             'internal_attr',
       
   184             'external_attr',
       
   185             'header_offset',
       
   186             'CRC',
       
   187             'compress_size',
       
   188             'file_size',
       
   189         )
       
   190 
       
   191     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
       
   192         self.orig_filename = filename   # Original file name in archive
       
   193 
       
   194         # Terminate the file name at the first null byte.  Null bytes in file
       
   195         # names are used as tricks by viruses in archives.
       
   196         null_byte = filename.find(chr(0))
       
   197         if null_byte >= 0:
       
   198             filename = filename[0:null_byte]
       
   199         # This is used to ensure paths in generated ZIP files always use
       
   200         # forward slashes as the directory separator, as required by the
       
   201         # ZIP format specification.
       
   202         if os.sep != "/" and os.sep in filename:
       
   203             filename = filename.replace(os.sep, "/")
       
   204 
       
   205         self.filename = filename        # Normalized file name
       
   206         self.date_time = date_time      # year, month, day, hour, min, sec
       
   207         # Standard values:
       
   208         self.compress_type = ZIP_STORED # Type of compression for the file
       
   209         self.comment = ""               # Comment for each file
       
   210         self.extra = ""                 # ZIP extra data
       
   211         if sys.platform == 'win32':
       
   212             self.create_system = 0          # System which created ZIP archive
       
   213         else:
       
   214             # Assume everything else is unix-y
       
   215             self.create_system = 3          # System which created ZIP archive
       
   216         self.create_version = 20        # Version which created ZIP archive
       
   217         self.extract_version = 20       # Version needed to extract archive
       
   218         self.reserved = 0               # Must be zero
       
   219         self.flag_bits = 0              # ZIP flag bits
       
   220         self.volume = 0                 # Volume number of file header
       
   221         self.internal_attr = 0          # Internal attributes
       
   222         self.external_attr = 0          # External file attributes
       
   223         # Other attributes are set by class ZipFile:
       
   224         # header_offset         Byte offset to the file header
       
   225         # CRC                   CRC-32 of the uncompressed file
       
   226         # compress_size         Size of the compressed file
       
   227         # file_size             Size of the uncompressed file
       
   228 
       
   229     def FileHeader(self):
       
   230         """Return the per-file header as a string."""
       
   231         dt = self.date_time
       
   232         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
       
   233         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
       
   234         if self.flag_bits & 0x08:
       
   235             # Set these to zero because we write them after the file data
       
   236             CRC = compress_size = file_size = 0
       
   237         else:
       
   238             CRC = self.CRC
       
   239             compress_size = self.compress_size
       
   240             file_size = self.file_size
       
   241 
       
   242         extra = self.extra
       
   243 
       
   244         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
       
   245             # File is larger than what fits into a 4 byte integer,
       
   246             # fall back to the ZIP64 extension
       
   247             fmt = '<hhqq'
       
   248             extra = extra + struct.pack(fmt,
       
   249                     1, struct.calcsize(fmt)-4, file_size, compress_size)
       
   250             file_size = 0xffffffff # -1
       
   251             compress_size = 0xffffffff # -1
       
   252             self.extract_version = max(45, self.extract_version)
       
   253             self.create_version = max(45, self.extract_version)
       
   254 
       
   255         header = struct.pack(structFileHeader, stringFileHeader,
       
   256                  self.extract_version, self.reserved, self.flag_bits,
       
   257                  self.compress_type, dostime, dosdate, CRC,
       
   258                  compress_size, file_size,
       
   259                  len(self.filename), len(extra))
       
   260         return header + self.filename + extra
       
   261 
       
   262     def _decodeExtra(self):
       
   263         # Try to decode the extra field.
       
   264         extra = self.extra
       
   265         unpack = struct.unpack
       
   266         while extra:
       
   267             tp, ln = unpack('<hh', extra[:4])
       
   268             if tp == 1:
       
   269                 if ln >= 24:
       
   270                     counts = unpack('<qqq', extra[4:28])
       
   271                 elif ln == 16:
       
   272                     counts = unpack('<qq', extra[4:20])
       
   273                 elif ln == 8:
       
   274                     counts = unpack('<q', extra[4:12])
       
   275                 elif ln == 0:
       
   276                     counts = ()
       
   277                 else:
       
   278                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
       
   279 
       
   280                 idx = 0
       
   281 
       
   282                 # ZIP64 extension (large files and/or large archives)
       
   283                 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
       
   284                     self.file_size = counts[idx]
       
   285                     idx += 1
       
   286 
       
   287                 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
       
   288                     self.compress_size = counts[idx]
       
   289                     idx += 1
       
   290 
       
   291                 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
       
   292                     old = self.header_offset
       
   293                     self.header_offset = counts[idx]
       
   294                     idx+=1
       
   295 
       
   296             extra = extra[ln+4:]
       
   297 
       
   298 
       
   299 class ZipFile:
       
   300     """ Class with methods to open, read, write, close, list zip files.
       
   301 
       
   302     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
       
   303 
       
   304     file: Either the path to the file, or a file-like object.
       
   305           If it is a path, the file will be opened and closed by ZipFile.
       
   306     mode: The mode can be either read "r", write "w" or append "a".
       
   307     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
       
   308     allowZip64: if True ZipFile will create files with ZIP64 extensions when
       
   309                 needed, otherwise it will raise an exception when this would
       
   310                 be necessary.
       
   311 
       
   312     """
       
   313 
       
   314     fp = None                   # Set here since __del__ checks it
       
   315 
       
   316     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
       
   317         """Open the ZIP file with mode read "r", write "w" or append "a"."""
       
   318         self._allowZip64 = allowZip64
       
   319         self._didModify = False
       
   320         if compression == ZIP_STORED:
       
   321             pass
       
   322         elif compression == ZIP_DEFLATED:
       
   323             if not zlib:
       
   324                 raise RuntimeError,\
       
   325                       "Compression requires the (missing) zlib module"
       
   326         else:
       
   327             raise RuntimeError, "That compression method is not supported"
       
   328         self.debug = 0  # Level of printing: 0 through 3
       
   329         self.NameToInfo = {}    # Find file info given name
       
   330         self.filelist = []      # List of ZipInfo instances for archive
       
   331         self.compression = compression  # Method of compression
       
   332         self.mode = key = mode.replace('b', '')[0]
       
   333 
       
   334         # Check if we were passed a file-like object
       
   335         if isinstance(file, basestring):
       
   336             self._filePassed = 0
       
   337             self.filename = file
       
   338             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
       
   339             self.fp = open(file, modeDict[mode])
       
   340         else:
       
   341             self._filePassed = 1
       
   342             self.fp = file
       
   343             self.filename = getattr(file, 'name', None)
       
   344 
       
   345         if key == 'r':
       
   346             self._GetContents()
       
   347         elif key == 'w':
       
   348             pass
       
   349         elif key == 'a':
       
   350             try:                        # See if file is a zip file
       
   351                 self._RealGetContents()
       
   352                 # seek to start of directory and overwrite
       
   353                 self.fp.seek(self.start_dir, 0)
       
   354             except BadZipfile:          # file is not a zip file, just append
       
   355                 self.fp.seek(0, 2)
       
   356         else:
       
   357             if not self._filePassed:
       
   358                 self.fp.close()
       
   359                 self.fp = None
       
   360             raise RuntimeError, 'Mode must be "r", "w" or "a"'
       
   361 
       
   362     def _GetContents(self):
       
   363         """Read the directory, making sure we close the file if the format
       
   364         is bad."""
       
   365         try:
       
   366             self._RealGetContents()
       
   367         except BadZipfile:
       
   368             if not self._filePassed:
       
   369                 self.fp.close()
       
   370                 self.fp = None
       
   371             raise
       
   372 
       
   373     def _RealGetContents(self):
       
   374         """Read in the table of contents for the ZIP file."""
       
   375         fp = self.fp
       
   376         endrec = _EndRecData(fp)
       
   377         if not endrec:
       
   378             raise BadZipfile, "File is not a zip file"
       
   379         if self.debug > 1:
       
   380             print endrec
       
   381         size_cd = endrec[5]             # bytes in central directory
       
   382         offset_cd = endrec[6]   # offset of central directory
       
   383         self.comment = endrec[8]        # archive comment
       
   384         # endrec[9] is the offset of the "End of Central Dir" record
       
   385         if endrec[9] > ZIP64_LIMIT:
       
   386             x = endrec[9] - size_cd - 56 - 20
       
   387         else:
       
   388             x = endrec[9] - size_cd
       
   389         # "concat" is zero, unless zip was concatenated to another file
       
   390         concat = x - offset_cd
       
   391         if self.debug > 2:
       
   392             print "given, inferred, offset", offset_cd, x, concat
       
   393         # self.start_dir:  Position of start of central directory
       
   394         self.start_dir = offset_cd + concat
       
   395         fp.seek(self.start_dir, 0)
       
   396         data = fp.read(size_cd)
       
   397         fp = cStringIO.StringIO(data)
       
   398         total = 0
       
   399         while total < size_cd:
       
   400             centdir = fp.read(46)
       
   401             total = total + 46
       
   402             if centdir[0:4] != stringCentralDir:
       
   403                 raise BadZipfile, "Bad magic number for central directory"
       
   404             centdir = struct.unpack(structCentralDir, centdir)
       
   405             if self.debug > 2:
       
   406                 print centdir
       
   407             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
       
   408             # Create ZipInfo instance to store file information
       
   409             x = ZipInfo(filename)
       
   410             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
       
   411             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
       
   412             total = (total + centdir[_CD_FILENAME_LENGTH]
       
   413                      + centdir[_CD_EXTRA_FIELD_LENGTH]
       
   414                      + centdir[_CD_COMMENT_LENGTH])
       
   415             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
       
   416             (x.create_version, x.create_system, x.extract_version, x.reserved,
       
   417                 x.flag_bits, x.compress_type, t, d,
       
   418                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
       
   419             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
       
   420             # Convert date/time code to (year, month, day, hour, min, sec)
       
   421             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
       
   422                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
       
   423 
       
   424             x._decodeExtra()
       
   425             x.header_offset = x.header_offset + concat
       
   426             self.filelist.append(x)
       
   427             self.NameToInfo[x.filename] = x
       
   428             if self.debug > 2:
       
   429                 print "total", total
       
   430 
       
   431 
       
   432     def namelist(self):
       
   433         """Return a list of file names in the archive."""
       
   434         l = []
       
   435         for data in self.filelist:
       
   436             l.append(data.filename)
       
   437         return l
       
   438 
       
   439     def infolist(self):
       
   440         """Return a list of class ZipInfo instances for files in the
       
   441         archive."""
       
   442         return self.filelist
       
   443 
       
   444     def printdir(self):
       
   445         """Print a table of contents for the zip file."""
       
   446         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
       
   447         for zinfo in self.filelist:
       
   448             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
       
   449             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
       
   450 
       
   451     def testzip(self):
       
   452         """Read all the files and check the CRC."""
       
   453         for zinfo in self.filelist:
       
   454             try:
       
   455                 self.read(zinfo.filename)       # Check CRC-32
       
   456             except BadZipfile:
       
   457                 return zinfo.filename
       
   458 
       
   459 
       
   460     def getinfo(self, name):
       
   461         """Return the instance of ZipInfo given 'name'."""
       
   462         return self.NameToInfo[name]
       
   463 
       
   464     def read(self, name):
       
   465         """Return file bytes (as a string) for name."""
       
   466         if self.mode not in ("r", "a"):
       
   467             raise RuntimeError, 'read() requires mode "r" or "a"'
       
   468         if not self.fp:
       
   469             raise RuntimeError, \
       
   470                   "Attempt to read ZIP archive that was already closed"
       
   471         zinfo = self.getinfo(name)
       
   472         filepos = self.fp.tell()
       
   473 
       
   474         self.fp.seek(zinfo.header_offset, 0)
       
   475 
       
   476         # Skip the file header:
       
   477         fheader = self.fp.read(30)
       
   478         if fheader[0:4] != stringFileHeader:
       
   479             raise BadZipfile, "Bad magic number for file header"
       
   480 
       
   481         fheader = struct.unpack(structFileHeader, fheader)
       
   482         fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
       
   483         if fheader[_FH_EXTRA_FIELD_LENGTH]:
       
   484             self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
       
   485 
       
   486         if fname != zinfo.orig_filename:
       
   487             raise BadZipfile, \
       
   488                       'File name in directory "%s" and header "%s" differ.' % (
       
   489                           zinfo.orig_filename, fname)
       
   490 
       
   491         bytes = self.fp.read(zinfo.compress_size)
       
   492         self.fp.seek(filepos, 0)
       
   493         if zinfo.compress_type == ZIP_STORED:
       
   494             pass
       
   495         elif zinfo.compress_type == ZIP_DEFLATED:
       
   496             if not zlib:
       
   497                 raise RuntimeError, \
       
   498                       "De-compression requires the (missing) zlib module"
       
   499             # zlib compress/decompress code by Jeremy Hylton of CNRI
       
   500             dc = zlib.decompressobj(-15)
       
   501             bytes = dc.decompress(bytes)
       
   502             # need to feed in unused pad byte so that zlib won't choke
       
   503             ex = dc.decompress('Z') + dc.flush()
       
   504             if ex:
       
   505                 bytes = bytes + ex
       
   506         else:
       
   507             raise BadZipfile, \
       
   508                   "Unsupported compression method %d for file %s" % \
       
   509             (zinfo.compress_type, name)
       
   510         crc = binascii.crc32(bytes)
       
   511         if crc != zinfo.CRC:
       
   512             raise BadZipfile, "Bad CRC-32 for file %s" % name
       
   513         return bytes
       
   514 
       
   515     def _writecheck(self, zinfo):
       
   516         """Check for errors before writing a file to the archive."""
       
   517         if zinfo.filename in self.NameToInfo:
       
   518             if self.debug:      # Warning for duplicate names
       
   519                 print "Duplicate name:", zinfo.filename
       
   520         if self.mode not in ("w", "a"):
       
   521             raise RuntimeError, 'write() requires mode "w" or "a"'
       
   522         if not self.fp:
       
   523             raise RuntimeError, \
       
   524                   "Attempt to write ZIP archive that was already closed"
       
   525         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
       
   526             raise RuntimeError, \
       
   527                   "Compression requires the (missing) zlib module"
       
   528         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
       
   529             raise RuntimeError, \
       
   530                   "That compression method is not supported"
       
   531         if zinfo.file_size > ZIP64_LIMIT:
       
   532             if not self._allowZip64:
       
   533                 raise LargeZipFile("Filesize would require ZIP64 extensions")
       
   534         if zinfo.header_offset > ZIP64_LIMIT:
       
   535             if not self._allowZip64:
       
   536                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
       
   537 
       
   538     def write(self, filename, arcname=None, compress_type=None):
       
   539         """Put the bytes from filename into the archive under the name
       
   540         arcname."""
       
   541         st = os.stat(filename)
       
   542         mtime = time.localtime(st.st_mtime)
       
   543         date_time = mtime[0:6]
       
   544         # Create ZipInfo instance to store file information
       
   545         if arcname is None:
       
   546             arcname = filename
       
   547         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
       
   548         while arcname[0] in (os.sep, os.altsep):
       
   549             arcname = arcname[1:]
       
   550         zinfo = ZipInfo(arcname, date_time)
       
   551         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
       
   552         if compress_type is None:
       
   553             zinfo.compress_type = self.compression
       
   554         else:
       
   555             zinfo.compress_type = compress_type
       
   556 
       
   557         zinfo.file_size = st.st_size
       
   558         zinfo.flag_bits = 0x00
       
   559         zinfo.header_offset = self.fp.tell()    # Start of header bytes
       
   560 
       
   561         self._writecheck(zinfo)
       
   562         self._didModify = True
       
   563         fp = open(filename, "rb")
       
   564         # Must overwrite CRC and sizes with correct data later
       
   565         zinfo.CRC = CRC = 0
       
   566         zinfo.compress_size = compress_size = 0
       
   567         zinfo.file_size = file_size = 0
       
   568         self.fp.write(zinfo.FileHeader())
       
   569         if zinfo.compress_type == ZIP_DEFLATED:
       
   570             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
       
   571                  zlib.DEFLATED, -15)
       
   572         else:
       
   573             cmpr = None
       
   574         while 1:
       
   575             buf = fp.read(1024 * 8)
       
   576             if not buf:
       
   577                 break
       
   578             file_size = file_size + len(buf)
       
   579             CRC = binascii.crc32(buf, CRC)
       
   580             if cmpr:
       
   581                 buf = cmpr.compress(buf)
       
   582                 compress_size = compress_size + len(buf)
       
   583             self.fp.write(buf)
       
   584         fp.close()
       
   585         if cmpr:
       
   586             buf = cmpr.flush()
       
   587             compress_size = compress_size + len(buf)
       
   588             self.fp.write(buf)
       
   589             zinfo.compress_size = compress_size
       
   590         else:
       
   591             zinfo.compress_size = file_size
       
   592         zinfo.CRC = CRC
       
   593         zinfo.file_size = file_size
       
   594         # Seek backwards and write CRC and file sizes
       
   595         position = self.fp.tell()       # Preserve current position in file
       
   596         self.fp.seek(zinfo.header_offset + 14, 0)
       
   597         self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
       
   598               zinfo.file_size))
       
   599         self.fp.seek(position, 0)
       
   600         self.filelist.append(zinfo)
       
   601         self.NameToInfo[zinfo.filename] = zinfo
       
   602 
       
   603     def writestr(self, zinfo_or_arcname, bytes):
       
   604         """Write a file into the archive.  The contents is the string
       
   605         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
       
   606         the name of the file in the archive."""
       
   607         if not isinstance(zinfo_or_arcname, ZipInfo):
       
   608             zinfo = ZipInfo(filename=zinfo_or_arcname,
       
   609                             date_time=time.localtime(time.time())[:6])
       
   610             zinfo.compress_type = self.compression
       
   611         else:
       
   612             zinfo = zinfo_or_arcname
       
   613         zinfo.file_size = len(bytes)            # Uncompressed size
       
   614         zinfo.header_offset = self.fp.tell()    # Start of header bytes
       
   615         self._writecheck(zinfo)
       
   616         self._didModify = True
       
   617         zinfo.CRC = binascii.crc32(bytes)       # CRC-32 checksum
       
   618         if zinfo.compress_type == ZIP_DEFLATED:
       
   619             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
       
   620                  zlib.DEFLATED, -15)
       
   621             bytes = co.compress(bytes) + co.flush()
       
   622             zinfo.compress_size = len(bytes)    # Compressed size
       
   623         else:
       
   624             zinfo.compress_size = zinfo.file_size
       
   625         zinfo.header_offset = self.fp.tell()    # Start of header bytes
       
   626         self.fp.write(zinfo.FileHeader())
       
   627         self.fp.write(bytes)
       
   628         self.fp.flush()
       
   629         if zinfo.flag_bits & 0x08:
       
   630             # Write CRC and file sizes after the file data
       
   631             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
       
   632                   zinfo.file_size))
       
   633         self.filelist.append(zinfo)
       
   634         self.NameToInfo[zinfo.filename] = zinfo
       
   635 
       
   636     def __del__(self):
       
   637         """Call the "close()" method in case the user forgot."""
       
   638         self.close()
       
   639 
       
   640     def close(self):
       
   641         """Close the file, and for mode "w" and "a" write the ending
       
   642         records."""
       
   643         if self.fp is None:
       
   644             return
       
   645 
       
   646         if self.mode in ("w", "a") and self._didModify: # write ending records
       
   647             count = 0
       
   648             pos1 = self.fp.tell()
       
   649             for zinfo in self.filelist:         # write central directory
       
   650                 count = count + 1
       
   651                 dt = zinfo.date_time
       
   652                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
       
   653                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
       
   654                 extra = []
       
   655                 if zinfo.file_size > ZIP64_LIMIT \
       
   656                         or zinfo.compress_size > ZIP64_LIMIT:
       
   657                     extra.append(zinfo.file_size)
       
   658                     extra.append(zinfo.compress_size)
       
   659                     file_size = 0xffffffff #-1
       
   660                     compress_size = 0xffffffff #-1
       
   661                 else:
       
   662                     file_size = zinfo.file_size
       
   663                     compress_size = zinfo.compress_size
       
   664 
       
   665                 if zinfo.header_offset > ZIP64_LIMIT:
       
   666                     extra.append(zinfo.header_offset)
       
   667                     header_offset = -1  # struct "l" format:  32 one bits
       
   668                 else:
       
   669                     header_offset = zinfo.header_offset
       
   670 
       
   671                 extra_data = zinfo.extra
       
   672                 if extra:
       
   673                     # Append a ZIP64 field to the extra's
       
   674                     extra_data = struct.pack(
       
   675                             '<hh' + 'q'*len(extra),
       
   676                             1, 8*len(extra), *extra) + extra_data
       
   677 
       
   678                     extract_version = max(45, zinfo.extract_version)
       
   679                     create_version = max(45, zinfo.create_version)
       
   680                 else:
       
   681                     extract_version = zinfo.extract_version
       
   682                     create_version = zinfo.create_version
       
   683 
       
   684                 centdir = struct.pack(structCentralDir,
       
   685                   stringCentralDir, create_version,
       
   686                   zinfo.create_system, extract_version, zinfo.reserved,
       
   687                   zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
       
   688                   zinfo.CRC, compress_size, file_size,
       
   689                   len(zinfo.filename), len(extra_data), len(zinfo.comment),
       
   690                   0, zinfo.internal_attr, zinfo.external_attr,
       
   691                   header_offset)
       
   692                 self.fp.write(centdir)
       
   693                 self.fp.write(zinfo.filename)
       
   694                 self.fp.write(extra_data)
       
   695                 self.fp.write(zinfo.comment)
       
   696 
       
   697             pos2 = self.fp.tell()
       
   698             # Write end-of-zip-archive record
       
   699             if pos1 > ZIP64_LIMIT:
       
   700                 # Need to write the ZIP64 end-of-archive records
       
   701                 zip64endrec = struct.pack(
       
   702                         structEndArchive64, stringEndArchive64,
       
   703                         44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
       
   704                 self.fp.write(zip64endrec)
       
   705 
       
   706                 zip64locrec = struct.pack(
       
   707                         structEndArchive64Locator,
       
   708                         stringEndArchive64Locator, 0, pos2, 1)
       
   709                 self.fp.write(zip64locrec)
       
   710 
       
   711                 # XXX Why is `pos3` computed next?  It's never referenced.
       
   712                 pos3 = self.fp.tell()
       
   713                 endrec = struct.pack(structEndArchive, stringEndArchive,
       
   714                             0, 0, count, count, pos2 - pos1, -1, 0)
       
   715                 self.fp.write(endrec)
       
   716 
       
   717             else:
       
   718                 endrec = struct.pack(structEndArchive, stringEndArchive,
       
   719                          0, 0, count, count, pos2 - pos1, pos1, 0)
       
   720                 self.fp.write(endrec)
       
   721             self.fp.flush()
       
   722         if not self._filePassed:
       
   723             self.fp.close()
       
   724         self.fp = None
       
   725 
       
   726 
       
   727 class PyZipFile(ZipFile):
       
   728     """Class to create ZIP archives with Python library files and packages."""
       
   729 
       
   730     def writepy(self, pathname, basename = ""):
       
   731         """Add all files from "pathname" to the ZIP archive.
       
   732 
       
   733         If pathname is a package directory, search the directory and
       
   734         all package subdirectories recursively for all *.py and enter
       
   735         the modules into the archive.  If pathname is a plain
       
   736         directory, listdir *.py and enter all modules.  Else, pathname
       
   737         must be a Python *.py file and the module will be put into the
       
   738         archive.  Added modules are always module.pyo or module.pyc.
       
   739         This method will compile the module.py into module.pyc if
       
   740         necessary.
       
   741         """
       
   742         dir, name = os.path.split(pathname)
       
   743         if os.path.isdir(pathname):
       
   744             initname = os.path.join(pathname, "__init__.py")
       
   745             if os.path.isfile(initname):
       
   746                 # This is a package directory, add it
       
   747                 if basename:
       
   748                     basename = "%s/%s" % (basename, name)
       
   749                 else:
       
   750                     basename = name
       
   751                 if self.debug:
       
   752                     print "Adding package in", pathname, "as", basename
       
   753                 fname, arcname = self._get_codename(initname[0:-3], basename)
       
   754                 if self.debug:
       
   755                     print "Adding", arcname
       
   756                 self.write(fname, arcname)
       
   757                 dirlist = os.listdir(pathname)
       
   758                 dirlist.remove("__init__.py")
       
   759                 # Add all *.py files and package subdirectories
       
   760                 for filename in dirlist:
       
   761                     path = os.path.join(pathname, filename)
       
   762                     root, ext = os.path.splitext(filename)
       
   763                     if os.path.isdir(path):
       
   764                         if os.path.isfile(os.path.join(path, "__init__.py")):
       
   765                             # This is a package directory, add it
       
   766                             self.writepy(path, basename)  # Recursive call
       
   767                     elif ext == ".py":
       
   768                         fname, arcname = self._get_codename(path[0:-3],
       
   769                                          basename)
       
   770                         if self.debug:
       
   771                             print "Adding", arcname
       
   772                         self.write(fname, arcname)
       
   773             else:
       
   774                 # This is NOT a package directory, add its files at top level
       
   775                 if self.debug:
       
   776                     print "Adding files from directory", pathname
       
   777                 for filename in os.listdir(pathname):
       
   778                     path = os.path.join(pathname, filename)
       
   779                     root, ext = os.path.splitext(filename)
       
   780                     if ext == ".py":
       
   781                         fname, arcname = self._get_codename(path[0:-3],
       
   782                                          basename)
       
   783                         if self.debug:
       
   784                             print "Adding", arcname
       
   785                         self.write(fname, arcname)
       
   786         else:
       
   787             if pathname[-3:] != ".py":
       
   788                 raise RuntimeError, \
       
   789                       'Files added with writepy() must end with ".py"'
       
   790             fname, arcname = self._get_codename(pathname[0:-3], basename)
       
   791             if self.debug:
       
   792                 print "Adding file", arcname
       
   793             self.write(fname, arcname)
       
   794 
       
   795     def _get_codename(self, pathname, basename):
       
   796         """Return (filename, archivename) for the path.
       
   797 
       
   798         Given a module name path, return the correct file path and
       
   799         archive name, compiling if necessary.  For example, given
       
   800         /python/lib/string, return (/python/lib/string.pyc, string).
       
   801         """
       
   802         file_py  = pathname + ".py"
       
   803         file_pyc = pathname + ".pyc"
       
   804         file_pyo = pathname + ".pyo"
       
   805         if os.path.isfile(file_pyo) and \
       
   806                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
       
   807             fname = file_pyo    # Use .pyo file
       
   808         elif not os.path.isfile(file_pyc) or \
       
   809              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
       
   810             import py_compile
       
   811             if self.debug:
       
   812                 print "Compiling", file_py
       
   813             try:
       
   814                 py_compile.compile(file_py, file_pyc, None, True)
       
   815             except py_compile.PyCompileError,err:
       
   816                 print err.msg
       
   817             fname = file_pyc
       
   818         else:
       
   819             fname = file_pyc
       
   820         archivename = os.path.split(fname)[1]
       
   821         if basename:
       
   822             archivename = "%s/%s" % (basename, archivename)
       
   823         return (fname, archivename)
       
   824 
       
   825 
       
   826 def main(args = None):
       
   827     import textwrap
       
   828     USAGE=textwrap.dedent("""\
       
   829         Usage:
       
   830             zipfile.py -l zipfile.zip        # Show listing of a zipfile
       
   831             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
       
   832             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
       
   833             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
       
   834         """)
       
   835     if args is None:
       
   836         args = sys.argv[1:]
       
   837 
       
   838     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
       
   839         print USAGE
       
   840         sys.exit(1)
       
   841 
       
   842     if args[0] == '-l':
       
   843         if len(args) != 2:
       
   844             print USAGE
       
   845             sys.exit(1)
       
   846         zf = ZipFile(args[1], 'r')
       
   847         zf.printdir()
       
   848         zf.close()
       
   849 
       
   850     elif args[0] == '-t':
       
   851         if len(args) != 2:
       
   852             print USAGE
       
   853             sys.exit(1)
       
   854         zf = ZipFile(args[1], 'r')
       
   855         zf.testzip()
       
   856         print "Done testing"
       
   857 
       
   858     elif args[0] == '-e':
       
   859         if len(args) != 3:
       
   860             print USAGE
       
   861             sys.exit(1)
       
   862 
       
   863         zf = ZipFile(args[1], 'r')
       
   864         out = args[2]
       
   865         for path in zf.namelist():
       
   866             if path.startswith('./'):
       
   867                 tgt = os.path.join(out, path[2:])
       
   868             else:
       
   869                 tgt = os.path.join(out, path)
       
   870 
       
   871             tgtdir = os.path.dirname(tgt)
       
   872             if not os.path.exists(tgtdir):
       
   873                 os.makedirs(tgtdir)
       
   874             fp = open(tgt, 'wb')
       
   875             fp.write(zf.read(path))
       
   876             fp.close()
       
   877         zf.close()
       
   878 
       
   879     elif args[0] == '-c':
       
   880         if len(args) < 3:
       
   881             print USAGE
       
   882             sys.exit(1)
       
   883 
       
   884         def addToZip(zf, path, zippath):
       
   885             if os.path.isfile(path):
       
   886                 zf.write(path, zippath, ZIP_DEFLATED)
       
   887             elif os.path.isdir(path):
       
   888                 for nm in os.listdir(path):
       
   889                     addToZip(zf,
       
   890                             os.path.join(path, nm), os.path.join(zippath, nm))
       
   891             # else: ignore
       
   892 
       
   893         zf = ZipFile(args[1], 'w', allowZip64=True)
       
   894         for src in args[2:]:
       
   895             addToZip(zf, src, os.path.basename(src))
       
   896 
       
   897         zf.close()
       
   898 
       
   899 if __name__ == "__main__":
       
   900     main()