srcanamdw/codescanner/pyinstaller/archive.py
changeset 1 22878952f6e2
equal deleted inserted replaced
0:509e4801c378 1:22878952f6e2
       
     1 # Copyright (C) 2005, Giovanni Bajo
       
     2 # Based on previous work under copyright (c) 2002 McMillan Enterprises, Inc.
       
     3 #
       
     4 # This program is free software; you can redistribute it and/or
       
     5 # modify it under the terms of the GNU General Public License
       
     6 # as published by the Free Software Foundation; either version 2
       
     7 # of the License, or (at your option) any later version.
       
     8 #
       
     9 # In addition to the permissions in the GNU General Public License, the
       
    10 # authors give you unlimited permission to link or embed the compiled
       
    11 # version of this file into combinations with other programs, and to
       
    12 # distribute those combinations without any restriction coming from the
       
    13 # use of this file. (The General Public License restrictions do apply in
       
    14 # other respects; for example, they cover modification of the file, and
       
    15 # distribution when not linked into a combine executable.)
       
    16 #
       
    17 # This program is distributed in the hope that it will be useful,
       
    18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       
    20 # GNU General Public License for more details.
       
    21 #
       
    22 # You should have received a copy of the GNU General Public License
       
    23 # along with this program; if not, write to the Free Software
       
    24 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
       
    25 
       
    26 # subclasses may not need marshal or struct, but since they're
       
    27 # builtin, importing is safe.
       
    28 #
       
    29 # While an Archive is really an abstraction for any "filesystem
       
    30 # within a file", it is tuned for use with imputil.FuncImporter.
       
    31 # This assumes it contains python code objects, indexed by the
       
    32 # the internal name (ie, no '.py').
       
    33 # See carchive.py for a more general archive (contains anything)
       
    34 # that can be understood by a C program.
       
    35 
       
    36 _verbose = 0
       
    37 _listdir = None
       
    38 _environ = None
       
    39 
       
    40 # **NOTE** This module is used during bootstrap. Import *ONLY* builtin modules.
       
    41 import marshal
       
    42 import struct
       
    43 import imp
       
    44 import sys
       
    45 
       
    46 _c_suffixes = filter(lambda x: x[2] == imp.C_EXTENSION, imp.get_suffixes())
       
    47 
       
    48 for nm in ('nt', 'posix', 'dos', 'os2', 'mac'):
       
    49     if nm in sys.builtin_module_names:
       
    50         mod = __import__(nm)
       
    51         _listdir = mod.listdir
       
    52         _environ = mod.environ
       
    53         break
       
    54 
       
    55 if hasattr(sys, 'version_info'):
       
    56     versuffix = '%d%d'%(sys.version_info[0],sys.version_info[1])
       
    57 else:
       
    58     vers = sys.version
       
    59     dot1 = dot2 = 0
       
    60     for i in range(len(vers)):
       
    61         if vers[i] == '.':
       
    62             if dot1:
       
    63                 dot2 = i
       
    64                 break
       
    65             else:
       
    66                 dot1 = i
       
    67     else:
       
    68         dot2 = len(vers)
       
    69     versuffix = '%s%s' % (vers[:dot1], vers[dot1+1:dot2])
       
    70 
       
    71 if "-vi" in sys.argv[1:]:
       
    72     _verbose = 1
       
    73 
       
    74 class Archive:
       
    75     """ A base class for a repository of python code objects.
       
    76         The extract method is used by imputil.ArchiveImporter
       
    77         to get code objects by name (fully qualified name), so
       
    78         an enduser "import a.b" would become
       
    79           extract('a.__init__')
       
    80           extract('a.b')
       
    81     """
       
    82     MAGIC = 'PYL\0'
       
    83     HDRLEN = 12        # default is MAGIC followed by python's magic, int pos of toc
       
    84     TOCPOS = 8
       
    85     TRLLEN = 0        # default - no trailer
       
    86     TOCTMPLT = {}     #
       
    87     os = None
       
    88     _bincache = None
       
    89     def __init__(self, path=None, start=0):
       
    90         "Initialize an Archive. If path is omitted, it will be an empty Archive."
       
    91         self.toc = None
       
    92         self.path = path
       
    93         self.start = start
       
    94         import imp
       
    95         self.pymagic = imp.get_magic()
       
    96         if path is not None:
       
    97             self.lib = open(self.path, 'rb')
       
    98             self.checkmagic()
       
    99             self.loadtoc()
       
   100 
       
   101             ####### Sub-methods of __init__ - override as needed #############
       
   102     def checkmagic(self):
       
   103         """ Overridable.
       
   104             Check to see if the file object self.lib actually has a file
       
   105             we understand.
       
   106         """
       
   107         self.lib.seek(self.start)	#default - magic is at start of file
       
   108         if self.lib.read(len(self.MAGIC)) != self.MAGIC:
       
   109             raise RuntimeError, "%s is not a valid %s archive file" \
       
   110               % (self.path, self.__class__.__name__)
       
   111         if self.lib.read(len(self.pymagic)) != self.pymagic:
       
   112             raise RuntimeError, "%s has version mismatch to dll" % (self.path)
       
   113         self.lib.read(4)
       
   114 
       
   115     def loadtoc(self):
       
   116         """ Overridable.
       
   117             Default: After magic comes an int (4 byte native) giving the
       
   118             position of the TOC within self.lib.
       
   119             Default: The TOC is a marshal-able string.
       
   120         """
       
   121         self.lib.seek(self.start + self.TOCPOS)
       
   122         (offset,) = struct.unpack('=i', self.lib.read(4))
       
   123         self.lib.seek(self.start + offset)
       
   124         self.toc = marshal.load(self.lib)
       
   125 
       
   126         ######## This is what is called by FuncImporter #######
       
   127         ## Since an Archive is flat, we ignore parent and modname.
       
   128         #XXX obsolete - imputil only code
       
   129         ##  def get_code(self, parent, modname, fqname):
       
   130         ####    if _verbose:
       
   131         ####      print "I: get_code(%s, %s, %s, %s)" % (self, parent, modname, fqname)
       
   132         ##    iname = fqname
       
   133         ##    if parent:
       
   134         ##        iname = '%s.%s' % (parent.__dict__.get('__iname__', parent.__name__), modname)
       
   135         ####        if _verbose:
       
   136         ####            print "I: get_code: iname is %s" % iname
       
   137         ##    rslt = self.extract(iname) # None if not found, (ispkg, code) otherwise
       
   138         ####    if _verbose:
       
   139         ####        print 'I: get_code: rslt', rslt
       
   140         ##    if rslt is None:
       
   141         ####      if _verbose:
       
   142         ####          print 'I: get_code: importer', getattr(parent, "__importer__", None),'self',self
       
   143         ##      # check the cache if there is no parent or self is the parents importer
       
   144         ##      if parent is None or getattr(parent, "__importer__", None) is self:
       
   145         ####            if _verbose:
       
   146         ####                print 'I: get_code: cached 1',iname
       
   147         ##            file, desc = Archive._bincache.get(iname, (None, None))
       
   148         ####            if _verbose:
       
   149         ####                print 'I: get_code: file',file,'desc',desc
       
   150         ##            if file:
       
   151         ##              try:
       
   152         ##                fp = open(file, desc[1])
       
   153         ##              except IOError:
       
   154         ##                pass
       
   155         ##              else:
       
   156         ##                module = imp.load_module(fqname, fp, file, desc)
       
   157         ##                if _verbose:
       
   158         ##                    print "I: import %s found %s" % (fqname, file)
       
   159         ##                return 0, module, {'__file__':file}
       
   160         ##      if _verbose:
       
   161         ##          print "I: import %s failed" % fqname
       
   162         ##
       
   163         ##      return None
       
   164         ##
       
   165         ##    ispkg, code = rslt
       
   166         ##    values = {'__file__' : code.co_filename, '__iname__' : iname}
       
   167         ##    if ispkg:
       
   168         ##      values['__path__'] = [fqname]
       
   169         ##    if _verbose:
       
   170         ##        print "I: import %s found %s" % (fqname, iname)
       
   171         ##    return ispkg, code, values
       
   172 
       
   173         ####### Core method - Override as needed  #########
       
   174     def extract(self, name):
       
   175         """ Get the object corresponding to name, or None.
       
   176             For use with imputil ArchiveImporter, object is a python code object.
       
   177             'name' is the name as specified in an 'import name'.
       
   178             'import a.b' will become:
       
   179             extract('a') (return None because 'a' is not a code object)
       
   180             extract('a.__init__') (return a code object)
       
   181             extract('a.b') (return a code object)
       
   182             Default implementation:
       
   183               self.toc is a dict
       
   184               self.toc[name] is pos
       
   185               self.lib has the code object marshal-ed at pos
       
   186         """
       
   187         ispkg, pos = self.toc.get(name, (0,None))
       
   188         if pos is None:
       
   189             return None
       
   190         self.lib.seek(self.start + pos)
       
   191         return ispkg, marshal.load(self.lib)
       
   192 
       
   193         ########################################################################
       
   194         # Informational methods
       
   195 
       
   196     def contents(self):
       
   197         """Return a list of the contents
       
   198            Default implementation assumes self.toc is a dict like object.
       
   199            Not required by ArchiveImporter.
       
   200         """
       
   201         return self.toc.keys()
       
   202 
       
   203         ########################################################################
       
   204         # Building
       
   205 
       
   206         ####### Top level method - shouldn't need overriding #######
       
   207     def build(self, path, lTOC):
       
   208         """Create an archive file of name 'path'.
       
   209            lTOC is a 'logical TOC' - a list of (name, path, ...)
       
   210            where name is the internal name, eg 'a'
       
   211            and path is a file to get the object from, eg './a.pyc'.
       
   212         """
       
   213         self.path = path
       
   214         self.lib = open(path, 'wb')
       
   215         #reserve space for the header
       
   216         if self.HDRLEN:
       
   217             self.lib.write('\0'*self.HDRLEN)
       
   218 
       
   219             #create an empty toc
       
   220 
       
   221         if type(self.TOCTMPLT) == type({}):
       
   222             self.toc = {}
       
   223         else:       # assume callable
       
   224             self.toc = self.TOCTMPLT()
       
   225 
       
   226         for tocentry in lTOC:
       
   227             self.add(tocentry)   # the guts of the archive
       
   228 
       
   229         tocpos = self.lib.tell()
       
   230         self.save_toc(tocpos)
       
   231         if self.TRLLEN:
       
   232             self.save_trailer(tocpos)
       
   233         if self.HDRLEN:
       
   234             self.update_headers(tocpos)
       
   235         self.lib.close()
       
   236 
       
   237 
       
   238         ####### manages keeping the internal TOC and the guts in sync #######
       
   239     def add(self, entry):
       
   240         """Override this to influence the mechanics of the Archive.
       
   241            Assumes entry is a seq beginning with (nm, pth, ...) where
       
   242            nm is the key by which we'll be asked for the object.
       
   243            pth is the name of where we find the object. Overrides of
       
   244            get_obj_from can make use of further elements in entry.
       
   245         """
       
   246         if self.os is None:
       
   247             import os
       
   248             self.os = os
       
   249         nm = entry[0]
       
   250         pth = entry[1]
       
   251         pynm, ext = self.os.path.splitext(self.os.path.basename(pth))
       
   252         ispkg = pynm == '__init__'
       
   253         assert ext in ('.pyc', '.pyo')
       
   254         self.toc[nm] = (ispkg, self.lib.tell())
       
   255         f = open(entry[1], 'rb')
       
   256         f.seek(8)	#skip magic and timestamp
       
   257         self.lib.write(f.read())
       
   258 
       
   259     def save_toc(self, tocpos):
       
   260         """Default - toc is a dict
       
   261            Gets marshaled to self.lib
       
   262         """
       
   263         marshal.dump(self.toc, self.lib)
       
   264 
       
   265     def save_trailer(self, tocpos):
       
   266         """Default - not used"""
       
   267         pass
       
   268 
       
   269     def update_headers(self, tocpos):
       
   270         """Default - MAGIC + Python's magic + tocpos"""
       
   271         self.lib.seek(self.start)
       
   272         self.lib.write(self.MAGIC)
       
   273         self.lib.write(self.pymagic)
       
   274         self.lib.write(struct.pack('=i', tocpos))
       
   275 
       
   276 class DummyZlib:
       
   277     def decompress(self, data):
       
   278         return data
       
   279     def compress(self, data, lvl):
       
   280         return data
       
   281 
       
   282 import iu
       
   283 ##############################################################
       
   284 #
       
   285 # ZlibArchive - an archive with compressed entries
       
   286 #
       
   287 class ZlibArchive(Archive):
       
   288     MAGIC = 'PYZ\0'
       
   289     TOCPOS = 8
       
   290     HDRLEN = 16
       
   291     TRLLEN = 0
       
   292     TOCTMPLT = {}
       
   293     LEVEL = 9
       
   294 
       
   295     def __init__(self, path=None, offset=None, level=9):
       
   296         if path is None:
       
   297             offset = 0
       
   298         elif offset is None:
       
   299             for i in range(len(path)-1, -1, -1):
       
   300                 if path[i] == '?':
       
   301                     offset = int(path[i+1:])
       
   302                     path = path[:i]
       
   303                     break
       
   304             else:
       
   305                 offset = 0
       
   306         self.LEVEL = level
       
   307         Archive.__init__(self, path, offset)
       
   308         # dynamic import so not imported if not needed
       
   309         global zlib
       
   310         if self.LEVEL:
       
   311             try:
       
   312                 import zlib
       
   313             except ImportError:
       
   314                 zlib = DummyZlib()
       
   315         else:
       
   316             zlib = DummyZlib()
       
   317 
       
   318 
       
   319     def extract(self, name):
       
   320         (ispkg, pos, lngth) = self.toc.get(name, (0, None, 0))
       
   321         if pos is None:
       
   322             return None
       
   323         self.lib.seek(self.start + pos)
       
   324         try:
       
   325             co = marshal.loads(zlib.decompress(self.lib.read(lngth)))
       
   326         except EOFError:
       
   327             raise ImportError, "PYZ entry '%s' failed to unmarshal" % name
       
   328         return ispkg, co
       
   329 
       
   330     def add(self, entry):
       
   331         if self.os is None:
       
   332             import os
       
   333             self.os = os
       
   334         nm = entry[0]
       
   335         pth = entry[1]
       
   336         base, ext = self.os.path.splitext(self.os.path.basename(pth))
       
   337         ispkg = base == '__init__'
       
   338         try:
       
   339             txt = open(pth[:-1], 'r').read()+'\n'
       
   340         except (IOError, OSError):
       
   341             try:
       
   342                 f = open(pth, 'rb')
       
   343                 f.seek(8)	#skip magic and timestamp
       
   344                 bytecode = f.read()
       
   345                 marshal.loads(bytecode).co_filename # to make sure it's valid
       
   346                 obj = zlib.compress(bytecode, self.LEVEL)
       
   347             except (IOError, ValueError, EOFError, AttributeError):
       
   348                 raise ValueError("bad bytecode in %s and no source" % pth)
       
   349         else:
       
   350             txt = iu._string_replace(txt, '\r\n', '\n')
       
   351             try:
       
   352                 co = compile(txt, "%s/%s" % (self.path, nm), 'exec')
       
   353             except SyntaxError, e:
       
   354                 print "Syntax error in", pth[:-1]
       
   355                 print e.args
       
   356                 raise
       
   357             obj = zlib.compress(marshal.dumps(co), self.LEVEL)
       
   358         self.toc[nm] = (ispkg, self.lib.tell(), len(obj))
       
   359         self.lib.write(obj)
       
   360     def update_headers(self, tocpos):
       
   361         """add level"""
       
   362         Archive.update_headers(self, tocpos)
       
   363         self.lib.write(struct.pack('!i', self.LEVEL))
       
   364     def checkmagic(self):
       
   365         Archive.checkmagic(self)
       
   366         self.LEVEL = struct.unpack('!i', self.lib.read(4))[0]
       
   367 
       
   368 class PYZOwner(iu.Owner):
       
   369     def __init__(self, path):
       
   370         self.pyz = ZlibArchive(path)
       
   371         iu.Owner.__init__(self, path)
       
   372     def getmod(self, nm, newmod=imp.new_module):
       
   373         rslt = self.pyz.extract(nm)
       
   374         if rslt is None:
       
   375             return None
       
   376         ispkg, co = rslt
       
   377         mod = newmod(nm)
       
   378         try:
       
   379             mod.__file__ = co.co_filename
       
   380         except AttributeError:
       
   381             raise ImportError, "PYZ entry '%s' (%s) is not a valid code object" % (nm, repr(co))
       
   382         if ispkg:
       
   383             if _environ.has_key('_MEIPASS2'):
       
   384                 localpath = _environ['_MEIPASS2'][:-1]
       
   385             else:
       
   386                 localpath = iu._os_path_dirname(self.path)
       
   387             mod.__path__ = [self.path, localpath, iu._os_path_dirname(mod.__file__)]
       
   388             #print "PYZOwner setting %s's __path__: %s" % (nm, mod.__path__)
       
   389             importer = iu.PathImportDirector(mod.__path__,
       
   390                                               {self.path:PkgInPYZImporter(nm, self),
       
   391                                                localpath:ExtInPkgImporter(localpath, nm)},
       
   392                                               [iu.DirOwner])
       
   393             mod.__importsub__ = importer.getmod
       
   394         mod.__co__ = co
       
   395         return mod
       
   396 
       
   397 class PkgInPYZImporter:
       
   398     def __init__(self, name, owner):
       
   399         self.name = name
       
   400         self.owner = owner
       
   401     def getmod(self, nm):
       
   402         #print "PkgInPYZImporter.getmod %s -> %s" % (nm, self.name+'.'+nm)
       
   403         return self.owner.getmod(self.name+'.'+nm)
       
   404 class ExtInPkgImporter(iu.DirOwner):
       
   405     def __init__(self, path, prefix):
       
   406         iu.DirOwner.__init__(self, path)
       
   407         self.prefix = prefix
       
   408     def getmod(self, nm):
       
   409         return iu.DirOwner.getmod(self, self.prefix+'.'+nm)
       
   410 
       
   411         #XXX this should also get moved out
       
   412         ##iu._globalownertypes.insert(0, PYZOwner)
       
   413         ##iu.ImportManager().install()