symbian-qemu-0.9.1-12/python-2.6.1/Lib/gzip.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """Functions that read and write gzipped files.
       
     2 
       
     3 The user of the file doesn't have to worry about the compression,
       
     4 but random access is not allowed."""
       
     5 
       
     6 # based on Andrew Kuchling's minigzip.py distributed with the zlib module
       
     7 
       
     8 import struct, sys, time
       
     9 import zlib
       
    10 import __builtin__
       
    11 
       
    12 __all__ = ["GzipFile","open"]
       
    13 
       
    14 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
       
    15 
       
    16 READ, WRITE = 1, 2
       
    17 
       
    18 def write32u(output, value):
       
    19     # The L format writes the bit pattern correctly whether signed
       
    20     # or unsigned.
       
    21     output.write(struct.pack("<L", value))
       
    22 
       
    23 def read32(input):
       
    24     return struct.unpack("<I", input.read(4))[0]
       
    25 
       
    26 def open(filename, mode="rb", compresslevel=9):
       
    27     """Shorthand for GzipFile(filename, mode, compresslevel).
       
    28 
       
    29     The filename argument is required; mode defaults to 'rb'
       
    30     and compresslevel defaults to 9.
       
    31 
       
    32     """
       
    33     return GzipFile(filename, mode, compresslevel)
       
    34 
       
    35 class GzipFile:
       
    36     """The GzipFile class simulates most of the methods of a file object with
       
    37     the exception of the readinto() and truncate() methods.
       
    38 
       
    39     """
       
    40 
       
    41     myfileobj = None
       
    42     max_read_chunk = 10 * 1024 * 1024   # 10Mb
       
    43 
       
    44     def __init__(self, filename=None, mode=None,
       
    45                  compresslevel=9, fileobj=None):
       
    46         """Constructor for the GzipFile class.
       
    47 
       
    48         At least one of fileobj and filename must be given a
       
    49         non-trivial value.
       
    50 
       
    51         The new class instance is based on fileobj, which can be a regular
       
    52         file, a StringIO object, or any other object which simulates a file.
       
    53         It defaults to None, in which case filename is opened to provide
       
    54         a file object.
       
    55 
       
    56         When fileobj is not None, the filename argument is only used to be
       
    57         included in the gzip file header, which may includes the original
       
    58         filename of the uncompressed file.  It defaults to the filename of
       
    59         fileobj, if discernible; otherwise, it defaults to the empty string,
       
    60         and in this case the original filename is not included in the header.
       
    61 
       
    62         The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
       
    63         depending on whether the file will be read or written.  The default
       
    64         is the mode of fileobj if discernible; otherwise, the default is 'rb'.
       
    65         Be aware that only the 'rb', 'ab', and 'wb' values should be used
       
    66         for cross-platform portability.
       
    67 
       
    68         The compresslevel argument is an integer from 1 to 9 controlling the
       
    69         level of compression; 1 is fastest and produces the least compression,
       
    70         and 9 is slowest and produces the most compression.  The default is 9.
       
    71 
       
    72         """
       
    73 
       
    74         # guarantee the file is opened in binary mode on platforms
       
    75         # that care about that sort of thing
       
    76         if mode and 'b' not in mode:
       
    77             mode += 'b'
       
    78         if fileobj is None:
       
    79             fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
       
    80         if filename is None:
       
    81             if hasattr(fileobj, 'name'): filename = fileobj.name
       
    82             else: filename = ''
       
    83         if mode is None:
       
    84             if hasattr(fileobj, 'mode'): mode = fileobj.mode
       
    85             else: mode = 'rb'
       
    86 
       
    87         if mode[0:1] == 'r':
       
    88             self.mode = READ
       
    89             # Set flag indicating start of a new member
       
    90             self._new_member = True
       
    91             self.extrabuf = ""
       
    92             self.extrasize = 0
       
    93             self.name = filename
       
    94             # Starts small, scales exponentially
       
    95             self.min_readsize = 100
       
    96 
       
    97         elif mode[0:1] == 'w' or mode[0:1] == 'a':
       
    98             self.mode = WRITE
       
    99             self._init_write(filename)
       
   100             self.compress = zlib.compressobj(compresslevel,
       
   101                                              zlib.DEFLATED,
       
   102                                              -zlib.MAX_WBITS,
       
   103                                              zlib.DEF_MEM_LEVEL,
       
   104                                              0)
       
   105         else:
       
   106             raise IOError, "Mode " + mode + " not supported"
       
   107 
       
   108         self.fileobj = fileobj
       
   109         self.offset = 0
       
   110 
       
   111         if self.mode == WRITE:
       
   112             self._write_gzip_header()
       
   113 
       
   114     @property
       
   115     def filename(self):
       
   116         import warnings
       
   117         warnings.warn("use the name attribute", DeprecationWarning)
       
   118         if self.mode == WRITE and self.name[-3:] != ".gz":
       
   119             return self.name + ".gz"
       
   120         return self.name
       
   121 
       
   122     def __repr__(self):
       
   123         s = repr(self.fileobj)
       
   124         return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
       
   125 
       
   126     def _init_write(self, filename):
       
   127         self.name = filename
       
   128         self.crc = zlib.crc32("") & 0xffffffffL
       
   129         self.size = 0
       
   130         self.writebuf = []
       
   131         self.bufsize = 0
       
   132 
       
   133     def _write_gzip_header(self):
       
   134         self.fileobj.write('\037\213')             # magic header
       
   135         self.fileobj.write('\010')                 # compression method
       
   136         fname = self.name
       
   137         if fname.endswith(".gz"):
       
   138             fname = fname[:-3]
       
   139         flags = 0
       
   140         if fname:
       
   141             flags = FNAME
       
   142         self.fileobj.write(chr(flags))
       
   143         write32u(self.fileobj, long(time.time()))
       
   144         self.fileobj.write('\002')
       
   145         self.fileobj.write('\377')
       
   146         if fname:
       
   147             self.fileobj.write(fname + '\000')
       
   148 
       
   149     def _init_read(self):
       
   150         self.crc = zlib.crc32("") & 0xffffffffL
       
   151         self.size = 0
       
   152 
       
   153     def _read_gzip_header(self):
       
   154         magic = self.fileobj.read(2)
       
   155         if magic != '\037\213':
       
   156             raise IOError, 'Not a gzipped file'
       
   157         method = ord( self.fileobj.read(1) )
       
   158         if method != 8:
       
   159             raise IOError, 'Unknown compression method'
       
   160         flag = ord( self.fileobj.read(1) )
       
   161         # modtime = self.fileobj.read(4)
       
   162         # extraflag = self.fileobj.read(1)
       
   163         # os = self.fileobj.read(1)
       
   164         self.fileobj.read(6)
       
   165 
       
   166         if flag & FEXTRA:
       
   167             # Read & discard the extra field, if present
       
   168             xlen = ord(self.fileobj.read(1))
       
   169             xlen = xlen + 256*ord(self.fileobj.read(1))
       
   170             self.fileobj.read(xlen)
       
   171         if flag & FNAME:
       
   172             # Read and discard a null-terminated string containing the filename
       
   173             while True:
       
   174                 s = self.fileobj.read(1)
       
   175                 if not s or s=='\000':
       
   176                     break
       
   177         if flag & FCOMMENT:
       
   178             # Read and discard a null-terminated string containing a comment
       
   179             while True:
       
   180                 s = self.fileobj.read(1)
       
   181                 if not s or s=='\000':
       
   182                     break
       
   183         if flag & FHCRC:
       
   184             self.fileobj.read(2)     # Read & discard the 16-bit header CRC
       
   185 
       
   186 
       
   187     def write(self,data):
       
   188         if self.mode != WRITE:
       
   189             import errno
       
   190             raise IOError(errno.EBADF, "write() on read-only GzipFile object")
       
   191 
       
   192         if self.fileobj is None:
       
   193             raise ValueError, "write() on closed GzipFile object"
       
   194         if len(data) > 0:
       
   195             self.size = self.size + len(data)
       
   196             self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
       
   197             self.fileobj.write( self.compress.compress(data) )
       
   198             self.offset += len(data)
       
   199 
       
   200     def read(self, size=-1):
       
   201         if self.mode != READ:
       
   202             import errno
       
   203             raise IOError(errno.EBADF, "read() on write-only GzipFile object")
       
   204 
       
   205         if self.extrasize <= 0 and self.fileobj is None:
       
   206             return ''
       
   207 
       
   208         readsize = 1024
       
   209         if size < 0:        # get the whole thing
       
   210             try:
       
   211                 while True:
       
   212                     self._read(readsize)
       
   213                     readsize = min(self.max_read_chunk, readsize * 2)
       
   214             except EOFError:
       
   215                 size = self.extrasize
       
   216         else:               # just get some more of it
       
   217             try:
       
   218                 while size > self.extrasize:
       
   219                     self._read(readsize)
       
   220                     readsize = min(self.max_read_chunk, readsize * 2)
       
   221             except EOFError:
       
   222                 if size > self.extrasize:
       
   223                     size = self.extrasize
       
   224 
       
   225         chunk = self.extrabuf[:size]
       
   226         self.extrabuf = self.extrabuf[size:]
       
   227         self.extrasize = self.extrasize - size
       
   228 
       
   229         self.offset += size
       
   230         return chunk
       
   231 
       
   232     def _unread(self, buf):
       
   233         self.extrabuf = buf + self.extrabuf
       
   234         self.extrasize = len(buf) + self.extrasize
       
   235         self.offset -= len(buf)
       
   236 
       
   237     def _read(self, size=1024):
       
   238         if self.fileobj is None:
       
   239             raise EOFError, "Reached EOF"
       
   240 
       
   241         if self._new_member:
       
   242             # If the _new_member flag is set, we have to
       
   243             # jump to the next member, if there is one.
       
   244             #
       
   245             # First, check if we're at the end of the file;
       
   246             # if so, it's time to stop; no more members to read.
       
   247             pos = self.fileobj.tell()   # Save current position
       
   248             self.fileobj.seek(0, 2)     # Seek to end of file
       
   249             if pos == self.fileobj.tell():
       
   250                 raise EOFError, "Reached EOF"
       
   251             else:
       
   252                 self.fileobj.seek( pos ) # Return to original position
       
   253 
       
   254             self._init_read()
       
   255             self._read_gzip_header()
       
   256             self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
       
   257             self._new_member = False
       
   258 
       
   259         # Read a chunk of data from the file
       
   260         buf = self.fileobj.read(size)
       
   261 
       
   262         # If the EOF has been reached, flush the decompression object
       
   263         # and mark this object as finished.
       
   264 
       
   265         if buf == "":
       
   266             uncompress = self.decompress.flush()
       
   267             self._read_eof()
       
   268             self._add_read_data( uncompress )
       
   269             raise EOFError, 'Reached EOF'
       
   270 
       
   271         uncompress = self.decompress.decompress(buf)
       
   272         self._add_read_data( uncompress )
       
   273 
       
   274         if self.decompress.unused_data != "":
       
   275             # Ending case: we've come to the end of a member in the file,
       
   276             # so seek back to the start of the unused data, finish up
       
   277             # this member, and read a new gzip header.
       
   278             # (The number of bytes to seek back is the length of the unused
       
   279             # data, minus 8 because _read_eof() will rewind a further 8 bytes)
       
   280             self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
       
   281 
       
   282             # Check the CRC and file size, and set the flag so we read
       
   283             # a new member on the next call
       
   284             self._read_eof()
       
   285             self._new_member = True
       
   286 
       
   287     def _add_read_data(self, data):
       
   288         self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
       
   289         self.extrabuf = self.extrabuf + data
       
   290         self.extrasize = self.extrasize + len(data)
       
   291         self.size = self.size + len(data)
       
   292 
       
   293     def _read_eof(self):
       
   294         # We've read to the end of the file, so we have to rewind in order
       
   295         # to reread the 8 bytes containing the CRC and the file size.
       
   296         # We check the that the computed CRC and size of the
       
   297         # uncompressed data matches the stored values.  Note that the size
       
   298         # stored is the true file size mod 2**32.
       
   299         self.fileobj.seek(-8, 1)
       
   300         crc32 = read32(self.fileobj)
       
   301         isize = read32(self.fileobj)  # may exceed 2GB
       
   302         if crc32 != self.crc:
       
   303             raise IOError("CRC check failed %s != %s" % (hex(crc32),
       
   304                                                          hex(self.crc)))
       
   305         elif isize != (self.size & 0xffffffffL):
       
   306             raise IOError, "Incorrect length of data produced"
       
   307 
       
   308     def close(self):
       
   309         if self.fileobj is None:
       
   310             return
       
   311         if self.mode == WRITE:
       
   312             self.fileobj.write(self.compress.flush())
       
   313             write32u(self.fileobj, self.crc)
       
   314             # self.size may exceed 2GB, or even 4GB
       
   315             write32u(self.fileobj, self.size & 0xffffffffL)
       
   316             self.fileobj = None
       
   317         elif self.mode == READ:
       
   318             self.fileobj = None
       
   319         if self.myfileobj:
       
   320             self.myfileobj.close()
       
   321             self.myfileobj = None
       
   322 
       
   323     def __del__(self):
       
   324         try:
       
   325             if (self.myfileobj is None and
       
   326                 self.fileobj is None):
       
   327                 return
       
   328         except AttributeError:
       
   329             return
       
   330         self.close()
       
   331 
       
   332     def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
       
   333         if self.mode == WRITE:
       
   334             # Ensure the compressor's buffer is flushed
       
   335             self.fileobj.write(self.compress.flush(zlib_mode))
       
   336         self.fileobj.flush()
       
   337 
       
   338     def fileno(self):
       
   339         """Invoke the underlying file object's fileno() method.
       
   340 
       
   341         This will raise AttributeError if the underlying file object
       
   342         doesn't support fileno().
       
   343         """
       
   344         return self.fileobj.fileno()
       
   345 
       
   346     def isatty(self):
       
   347         return False
       
   348 
       
   349     def tell(self):
       
   350         return self.offset
       
   351 
       
   352     def rewind(self):
       
   353         '''Return the uncompressed stream file position indicator to the
       
   354         beginning of the file'''
       
   355         if self.mode != READ:
       
   356             raise IOError("Can't rewind in write mode")
       
   357         self.fileobj.seek(0)
       
   358         self._new_member = True
       
   359         self.extrabuf = ""
       
   360         self.extrasize = 0
       
   361         self.offset = 0
       
   362 
       
   363     def seek(self, offset, whence=0):
       
   364         if whence:
       
   365             if whence == 1:
       
   366                 offset = self.offset + offset
       
   367             else:
       
   368                 raise ValueError('Seek from end not supported')
       
   369         if self.mode == WRITE:
       
   370             if offset < self.offset:
       
   371                 raise IOError('Negative seek in write mode')
       
   372             count = offset - self.offset
       
   373             for i in range(count // 1024):
       
   374                 self.write(1024 * '\0')
       
   375             self.write((count % 1024) * '\0')
       
   376         elif self.mode == READ:
       
   377             if offset < self.offset:
       
   378                 # for negative seek, rewind and do positive seek
       
   379                 self.rewind()
       
   380             count = offset - self.offset
       
   381             for i in range(count // 1024):
       
   382                 self.read(1024)
       
   383             self.read(count % 1024)
       
   384 
       
   385     def readline(self, size=-1):
       
   386         if size < 0:
       
   387             size = sys.maxint
       
   388             readsize = self.min_readsize
       
   389         else:
       
   390             readsize = size
       
   391         bufs = []
       
   392         while size != 0:
       
   393             c = self.read(readsize)
       
   394             i = c.find('\n')
       
   395 
       
   396             # We set i=size to break out of the loop under two
       
   397             # conditions: 1) there's no newline, and the chunk is
       
   398             # larger than size, or 2) there is a newline, but the
       
   399             # resulting line would be longer than 'size'.
       
   400             if (size <= i) or (i == -1 and len(c) > size):
       
   401                 i = size - 1
       
   402 
       
   403             if i >= 0 or c == '':
       
   404                 bufs.append(c[:i + 1])    # Add portion of last chunk
       
   405                 self._unread(c[i + 1:])   # Push back rest of chunk
       
   406                 break
       
   407 
       
   408             # Append chunk to list, decrease 'size',
       
   409             bufs.append(c)
       
   410             size = size - len(c)
       
   411             readsize = min(size, readsize * 2)
       
   412         if readsize > self.min_readsize:
       
   413             self.min_readsize = min(readsize, self.min_readsize * 2, 512)
       
   414         return ''.join(bufs) # Return resulting line
       
   415 
       
   416     def readlines(self, sizehint=0):
       
   417         # Negative numbers result in reading all the lines
       
   418         if sizehint <= 0:
       
   419             sizehint = sys.maxint
       
   420         L = []
       
   421         while sizehint > 0:
       
   422             line = self.readline()
       
   423             if line == "":
       
   424                 break
       
   425             L.append(line)
       
   426             sizehint = sizehint - len(line)
       
   427 
       
   428         return L
       
   429 
       
   430     def writelines(self, L):
       
   431         for line in L:
       
   432             self.write(line)
       
   433 
       
   434     def __iter__(self):
       
   435         return self
       
   436 
       
   437     def next(self):
       
   438         line = self.readline()
       
   439         if line:
       
   440             return line
       
   441         else:
       
   442             raise StopIteration
       
   443 
       
   444 
       
   445 def _test():
       
   446     # Act like gzip; with -d, act like gunzip.
       
   447     # The input file is not deleted, however, nor are any other gzip
       
   448     # options or features supported.
       
   449     args = sys.argv[1:]
       
   450     decompress = args and args[0] == "-d"
       
   451     if decompress:
       
   452         args = args[1:]
       
   453     if not args:
       
   454         args = ["-"]
       
   455     for arg in args:
       
   456         if decompress:
       
   457             if arg == "-":
       
   458                 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
       
   459                 g = sys.stdout
       
   460             else:
       
   461                 if arg[-3:] != ".gz":
       
   462                     print "filename doesn't end in .gz:", repr(arg)
       
   463                     continue
       
   464                 f = open(arg, "rb")
       
   465                 g = __builtin__.open(arg[:-3], "wb")
       
   466         else:
       
   467             if arg == "-":
       
   468                 f = sys.stdin
       
   469                 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
       
   470             else:
       
   471                 f = __builtin__.open(arg, "rb")
       
   472                 g = open(arg + ".gz", "wb")
       
   473         while True:
       
   474             chunk = f.read(1024)
       
   475             if not chunk:
       
   476                 break
       
   477             g.write(chunk)
       
   478         if g is not sys.stdout:
       
   479             g.close()
       
   480         if f is not sys.stdin:
       
   481             f.close()
       
   482 
       
   483 if __name__ == '__main__':
       
   484     _test()