symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/urllib.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """Open an arbitrary URL.
       
     2 
       
     3 See the following document for more info on URLs:
       
     4 "Names and Addresses, URIs, URLs, URNs, URCs", at
       
     5 http://www.w3.org/pub/WWW/Addressing/Overview.html
       
     6 
       
     7 See also the HTTP spec (from which the error codes are derived):
       
     8 "HTTP - Hypertext Transfer Protocol", at
       
     9 http://www.w3.org/pub/WWW/Protocols/
       
    10 
       
    11 Related standards and specs:
       
    12 - RFC1808: the "relative URL" spec. (authoritative status)
       
    13 - RFC1738 - the "URL standard". (authoritative status)
       
    14 - RFC1630 - the "URI spec". (informational status)
       
    15 
       
    16 The object returned by URLopener().open(file) will differ per
       
    17 protocol.  All you know is that is has methods read(), readline(),
       
    18 readlines(), fileno(), close() and info().  The read*(), fileno()
       
    19 and close() methods work like those of open files.
       
    20 The info() method returns a mimetools.Message object which can be
       
    21 used to query various info about the object, if available.
       
    22 (mimetools.Message objects are queried with the getheader() method.)
       
    23 """
       
    24 
       
    25 import string
       
    26 import socket
       
    27 import os
       
    28 import time
       
    29 import sys
       
    30 from urlparse import urljoin as basejoin
       
    31 import warnings
       
    32 
       
    33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
       
    34            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
       
    35            "urlencode", "url2pathname", "pathname2url", "splittag",
       
    36            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
       
    37            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
       
    38            "splitnport", "splitquery", "splitattr", "splitvalue",
       
    39            "getproxies"]
       
    40 
       
    41 __version__ = '1.17'    # XXX This version is not always updated :-(
       
    42 
       
    43 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
       
    44 
       
    45 # Helper for non-unix systems
       
    46 if os.name == 'mac':
       
    47     from macurl2path import url2pathname, pathname2url
       
    48 elif os.name == 'nt':
       
    49     from nturl2path import url2pathname, pathname2url
       
    50 elif os.name == 'riscos':
       
    51     from rourl2path import url2pathname, pathname2url
       
    52 else:
       
    53     def url2pathname(pathname):
       
    54         """OS-specific conversion from a relative URL of the 'file' scheme
       
    55         to a file system path; not recommended for general use."""
       
    56         return unquote(pathname)
       
    57 
       
    58     def pathname2url(pathname):
       
    59         """OS-specific conversion from a file system path to a relative URL
       
    60         of the 'file' scheme; not recommended for general use."""
       
    61         return quote(pathname)
       
    62 
       
    63 # This really consists of two pieces:
       
    64 # (1) a class which handles opening of all sorts of URLs
       
    65 #     (plus assorted utilities etc.)
       
    66 # (2) a set of functions for parsing URLs
       
    67 # XXX Should these be separated out into different modules?
       
    68 
       
    69 
       
    70 # Shortcut for basic usage
       
    71 _urlopener = None
       
    72 def urlopen(url, data=None, proxies=None):
       
    73     """Create a file-like object for the specified URL to read from."""
       
    74     from warnings import warnpy3k
       
    75     warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
       
    76                         "favor of urllib2.urlopen()", stacklevel=2)
       
    77 
       
    78     global _urlopener
       
    79     if proxies is not None:
       
    80         opener = FancyURLopener(proxies=proxies)
       
    81     elif not _urlopener:
       
    82         opener = FancyURLopener()
       
    83         _urlopener = opener
       
    84     else:
       
    85         opener = _urlopener
       
    86     if data is None:
       
    87         return opener.open(url)
       
    88     else:
       
    89         return opener.open(url, data)
       
    90 def urlretrieve(url, filename=None, reporthook=None, data=None):
       
    91     global _urlopener
       
    92     if not _urlopener:
       
    93         _urlopener = FancyURLopener()
       
    94     return _urlopener.retrieve(url, filename, reporthook, data)
       
    95 def urlcleanup():
       
    96     if _urlopener:
       
    97         _urlopener.cleanup()
       
    98 
       
    99 # check for SSL
       
   100 try:
       
   101     import ssl
       
   102 except:
       
   103     _have_ssl = False
       
   104 else:
       
   105     _have_ssl = True
       
   106 
       
   107 # exception raised when downloaded size does not match content-length
       
   108 class ContentTooShortError(IOError):
       
   109     def __init__(self, message, content):
       
   110         IOError.__init__(self, message)
       
   111         self.content = content
       
   112 
       
   113 ftpcache = {}
       
   114 class URLopener:
       
   115     """Class to open URLs.
       
   116     This is a class rather than just a subroutine because we may need
       
   117     more than one set of global protocol-specific options.
       
   118     Note -- this is a base class for those who don't want the
       
   119     automatic handling of errors type 302 (relocated) and 401
       
   120     (authorization needed)."""
       
   121 
       
   122     __tempfiles = None
       
   123 
       
   124     version = "Python-urllib/%s" % __version__
       
   125 
       
   126     # Constructor
       
   127     def __init__(self, proxies=None, **x509):
       
   128         if proxies is None:
       
   129             proxies = getproxies()
       
   130         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
       
   131         self.proxies = proxies
       
   132         self.key_file = x509.get('key_file')
       
   133         self.cert_file = x509.get('cert_file')
       
   134         self.addheaders = [('User-Agent', self.version)]
       
   135         self.__tempfiles = []
       
   136         self.__unlink = os.unlink # See cleanup()
       
   137         self.tempcache = None
       
   138         # Undocumented feature: if you assign {} to tempcache,
       
   139         # it is used to cache files retrieved with
       
   140         # self.retrieve().  This is not enabled by default
       
   141         # since it does not work for changing documents (and I
       
   142         # haven't got the logic to check expiration headers
       
   143         # yet).
       
   144         self.ftpcache = ftpcache
       
   145         # Undocumented feature: you can use a different
       
   146         # ftp cache by assigning to the .ftpcache member;
       
   147         # in case you want logically independent URL openers
       
   148         # XXX This is not threadsafe.  Bah.
       
   149 
       
   150     def __del__(self):
       
   151         self.close()
       
   152 
       
   153     def close(self):
       
   154         self.cleanup()
       
   155 
       
   156     def cleanup(self):
       
   157         # This code sometimes runs when the rest of this module
       
   158         # has already been deleted, so it can't use any globals
       
   159         # or import anything.
       
   160         if self.__tempfiles:
       
   161             for file in self.__tempfiles:
       
   162                 try:
       
   163                     self.__unlink(file)
       
   164                 except OSError:
       
   165                     pass
       
   166             del self.__tempfiles[:]
       
   167         if self.tempcache:
       
   168             self.tempcache.clear()
       
   169 
       
   170     def addheader(self, *args):
       
   171         """Add a header to be used by the HTTP interface only
       
   172         e.g. u.addheader('Accept', 'sound/basic')"""
       
   173         self.addheaders.append(args)
       
   174 
       
   175     # External interface
       
   176     def open(self, fullurl, data=None):
       
   177         """Use URLopener().open(file) instead of open(file, 'r')."""
       
   178         fullurl = unwrap(toBytes(fullurl))
       
   179         if self.tempcache and fullurl in self.tempcache:
       
   180             filename, headers = self.tempcache[fullurl]
       
   181             fp = open(filename, 'rb')
       
   182             return addinfourl(fp, headers, fullurl)
       
   183         urltype, url = splittype(fullurl)
       
   184         if not urltype:
       
   185             urltype = 'file'
       
   186         if urltype in self.proxies:
       
   187             proxy = self.proxies[urltype]
       
   188             urltype, proxyhost = splittype(proxy)
       
   189             host, selector = splithost(proxyhost)
       
   190             url = (host, fullurl) # Signal special case to open_*()
       
   191         else:
       
   192             proxy = None
       
   193         name = 'open_' + urltype
       
   194         self.type = urltype
       
   195         name = name.replace('-', '_')
       
   196         if not hasattr(self, name):
       
   197             if proxy:
       
   198                 return self.open_unknown_proxy(proxy, fullurl, data)
       
   199             else:
       
   200                 return self.open_unknown(fullurl, data)
       
   201         try:
       
   202             if data is None:
       
   203                 return getattr(self, name)(url)
       
   204             else:
       
   205                 return getattr(self, name)(url, data)
       
   206         except socket.error, msg:
       
   207             raise IOError, ('socket error', msg), sys.exc_info()[2]
       
   208 
       
   209     def open_unknown(self, fullurl, data=None):
       
   210         """Overridable interface to open unknown URL type."""
       
   211         type, url = splittype(fullurl)
       
   212         raise IOError, ('url error', 'unknown url type', type)
       
   213 
       
   214     def open_unknown_proxy(self, proxy, fullurl, data=None):
       
   215         """Overridable interface to open unknown URL type."""
       
   216         type, url = splittype(fullurl)
       
   217         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
       
   218 
       
   219     # External interface
       
   220     def retrieve(self, url, filename=None, reporthook=None, data=None):
       
   221         """retrieve(url) returns (filename, headers) for a local object
       
   222         or (tempfilename, headers) for a remote object."""
       
   223         url = unwrap(toBytes(url))
       
   224         if self.tempcache and url in self.tempcache:
       
   225             return self.tempcache[url]
       
   226         type, url1 = splittype(url)
       
   227         if filename is None and (not type or type == 'file'):
       
   228             try:
       
   229                 fp = self.open_local_file(url1)
       
   230                 hdrs = fp.info()
       
   231                 del fp
       
   232                 return url2pathname(splithost(url1)[1]), hdrs
       
   233             except IOError, msg:
       
   234                 pass
       
   235         fp = self.open(url, data)
       
   236         headers = fp.info()
       
   237         if filename:
       
   238             tfp = open(filename, 'wb')
       
   239         else:
       
   240             import tempfile
       
   241             garbage, path = splittype(url)
       
   242             garbage, path = splithost(path or "")
       
   243             path, garbage = splitquery(path or "")
       
   244             path, garbage = splitattr(path or "")
       
   245             suffix = os.path.splitext(path)[1]
       
   246             (fd, filename) = tempfile.mkstemp(suffix)
       
   247             self.__tempfiles.append(filename)
       
   248             tfp = os.fdopen(fd, 'wb')
       
   249         result = filename, headers
       
   250         if self.tempcache is not None:
       
   251             self.tempcache[url] = result
       
   252         bs = 1024*8
       
   253         size = -1
       
   254         read = 0
       
   255         blocknum = 0
       
   256         if reporthook:
       
   257             if "content-length" in headers:
       
   258                 size = int(headers["Content-Length"])
       
   259             reporthook(blocknum, bs, size)
       
   260         while 1:
       
   261             block = fp.read(bs)
       
   262             if block == "":
       
   263                 break
       
   264             read += len(block)
       
   265             tfp.write(block)
       
   266             blocknum += 1
       
   267             if reporthook:
       
   268                 reporthook(blocknum, bs, size)
       
   269         fp.close()
       
   270         tfp.close()
       
   271         del fp
       
   272         del tfp
       
   273 
       
   274         # raise exception if actual size does not match content-length header
       
   275         if size >= 0 and read < size:
       
   276             raise ContentTooShortError("retrieval incomplete: got only %i out "
       
   277                                        "of %i bytes" % (read, size), result)
       
   278 
       
   279         return result
       
   280 
       
   281     # Each method named open_<type> knows how to open that type of URL
       
   282 
       
   283     def open_http(self, url, data=None):
       
   284         """Use HTTP protocol."""
       
   285         import httplib
       
   286         user_passwd = None
       
   287         proxy_passwd= None
       
   288         if isinstance(url, str):
       
   289             host, selector = splithost(url)
       
   290             if host:
       
   291                 user_passwd, host = splituser(host)
       
   292                 host = unquote(host)
       
   293             realhost = host
       
   294         else:
       
   295             host, selector = url
       
   296             # check whether the proxy contains authorization information
       
   297             proxy_passwd, host = splituser(host)
       
   298             # now we proceed with the url we want to obtain
       
   299             urltype, rest = splittype(selector)
       
   300             url = rest
       
   301             user_passwd = None
       
   302             if urltype.lower() != 'http':
       
   303                 realhost = None
       
   304             else:
       
   305                 realhost, rest = splithost(rest)
       
   306                 if realhost:
       
   307                     user_passwd, realhost = splituser(realhost)
       
   308                 if user_passwd:
       
   309                     selector = "%s://%s%s" % (urltype, realhost, rest)
       
   310                 if proxy_bypass(realhost):
       
   311                     host = realhost
       
   312 
       
   313             #print "proxy via http:", host, selector
       
   314         if not host: raise IOError, ('http error', 'no host given')
       
   315 
       
   316         if proxy_passwd:
       
   317             import base64
       
   318             proxy_auth = base64.b64encode(proxy_passwd).strip()
       
   319         else:
       
   320             proxy_auth = None
       
   321 
       
   322         if user_passwd:
       
   323             import base64
       
   324             auth = base64.b64encode(user_passwd).strip()
       
   325         else:
       
   326             auth = None
       
   327         h = httplib.HTTP(host)
       
   328         if data is not None:
       
   329             h.putrequest('POST', selector)
       
   330             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
       
   331             h.putheader('Content-Length', '%d' % len(data))
       
   332         else:
       
   333             h.putrequest('GET', selector)
       
   334         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
       
   335         if auth: h.putheader('Authorization', 'Basic %s' % auth)
       
   336         if realhost: h.putheader('Host', realhost)
       
   337         for args in self.addheaders: h.putheader(*args)
       
   338         h.endheaders()
       
   339         if data is not None:
       
   340             h.send(data)
       
   341         errcode, errmsg, headers = h.getreply()
       
   342         fp = h.getfile()
       
   343         if errcode == -1:
       
   344             if fp: fp.close()
       
   345             # something went wrong with the HTTP status line
       
   346             raise IOError, ('http protocol error', 0,
       
   347                             'got a bad status line', None)
       
   348         # According to RFC 2616, "2xx" code indicates that the client's
       
   349         # request was successfully received, understood, and accepted.
       
   350         if (200 <= errcode < 300):
       
   351             return addinfourl(fp, headers, "http:" + url, errcode)
       
   352         else:
       
   353             if data is None:
       
   354                 return self.http_error(url, fp, errcode, errmsg, headers)
       
   355             else:
       
   356                 return self.http_error(url, fp, errcode, errmsg, headers, data)
       
   357 
       
   358     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
       
   359         """Handle http errors.
       
   360         Derived class can override this, or provide specific handlers
       
   361         named http_error_DDD where DDD is the 3-digit error code."""
       
   362         # First check if there's a specific handler for this error
       
   363         name = 'http_error_%d' % errcode
       
   364         if hasattr(self, name):
       
   365             method = getattr(self, name)
       
   366             if data is None:
       
   367                 result = method(url, fp, errcode, errmsg, headers)
       
   368             else:
       
   369                 result = method(url, fp, errcode, errmsg, headers, data)
       
   370             if result: return result
       
   371         return self.http_error_default(url, fp, errcode, errmsg, headers)
       
   372 
       
   373     def http_error_default(self, url, fp, errcode, errmsg, headers):
       
   374         """Default error handler: close the connection and raise IOError."""
       
   375         void = fp.read()
       
   376         fp.close()
       
   377         raise IOError, ('http error', errcode, errmsg, headers)
       
   378 
       
   379     if _have_ssl:
       
   380         def open_https(self, url, data=None):
       
   381             """Use HTTPS protocol."""
       
   382 
       
   383             import httplib
       
   384             user_passwd = None
       
   385             proxy_passwd = None
       
   386             if isinstance(url, str):
       
   387                 host, selector = splithost(url)
       
   388                 if host:
       
   389                     user_passwd, host = splituser(host)
       
   390                     host = unquote(host)
       
   391                 realhost = host
       
   392             else:
       
   393                 host, selector = url
       
   394                 # here, we determine, whether the proxy contains authorization information
       
   395                 proxy_passwd, host = splituser(host)
       
   396                 urltype, rest = splittype(selector)
       
   397                 url = rest
       
   398                 user_passwd = None
       
   399                 if urltype.lower() != 'https':
       
   400                     realhost = None
       
   401                 else:
       
   402                     realhost, rest = splithost(rest)
       
   403                     if realhost:
       
   404                         user_passwd, realhost = splituser(realhost)
       
   405                     if user_passwd:
       
   406                         selector = "%s://%s%s" % (urltype, realhost, rest)
       
   407                 #print "proxy via https:", host, selector
       
   408             if not host: raise IOError, ('https error', 'no host given')
       
   409             if proxy_passwd:
       
   410                 import base64
       
   411                 proxy_auth = base64.b64encode(proxy_passwd).strip()
       
   412             else:
       
   413                 proxy_auth = None
       
   414             if user_passwd:
       
   415                 import base64
       
   416                 auth = base64.b64encode(user_passwd).strip()
       
   417             else:
       
   418                 auth = None
       
   419             h = httplib.HTTPS(host, 0,
       
   420                               key_file=self.key_file,
       
   421                               cert_file=self.cert_file)
       
   422             if data is not None:
       
   423                 h.putrequest('POST', selector)
       
   424                 h.putheader('Content-Type',
       
   425                             'application/x-www-form-urlencoded')
       
   426                 h.putheader('Content-Length', '%d' % len(data))
       
   427             else:
       
   428                 h.putrequest('GET', selector)
       
   429             if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
       
   430             if auth: h.putheader('Authorization', 'Basic %s' % auth)
       
   431             if realhost: h.putheader('Host', realhost)
       
   432             for args in self.addheaders: h.putheader(*args)
       
   433             h.endheaders()
       
   434             if data is not None:
       
   435                 h.send(data)
       
   436             errcode, errmsg, headers = h.getreply()
       
   437             fp = h.getfile()
       
   438             if errcode == -1:
       
   439                 if fp: fp.close()
       
   440                 # something went wrong with the HTTP status line
       
   441                 raise IOError, ('http protocol error', 0,
       
   442                                 'got a bad status line', None)
       
   443             # According to RFC 2616, "2xx" code indicates that the client's
       
   444             # request was successfully received, understood, and accepted.
       
   445             if (200 <= errcode < 300):
       
   446                 return addinfourl(fp, headers, "https:" + url, errcode)
       
   447             else:
       
   448                 if data is None:
       
   449                     return self.http_error(url, fp, errcode, errmsg, headers)
       
   450                 else:
       
   451                     return self.http_error(url, fp, errcode, errmsg, headers,
       
   452                                            data)
       
   453 
       
   454     def open_file(self, url):
       
   455         """Use local file or FTP depending on form of URL."""
       
   456         if not isinstance(url, str):
       
   457             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
       
   458         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
       
   459             return self.open_ftp(url)
       
   460         else:
       
   461             return self.open_local_file(url)
       
   462 
       
   463     def open_local_file(self, url):
       
   464         """Use local file."""
       
   465         import mimetypes, mimetools, email.utils
       
   466         try:
       
   467             from cStringIO import StringIO
       
   468         except ImportError:
       
   469             from StringIO import StringIO
       
   470         host, file = splithost(url)
       
   471         localname = url2pathname(file)
       
   472         try:
       
   473             stats = os.stat(localname)
       
   474         except OSError, e:
       
   475             raise IOError(e.errno, e.strerror, e.filename)
       
   476         size = stats.st_size
       
   477         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
       
   478         mtype = mimetypes.guess_type(url)[0]
       
   479         headers = mimetools.Message(StringIO(
       
   480             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
       
   481             (mtype or 'text/plain', size, modified)))
       
   482         if not host:
       
   483             urlfile = file
       
   484             if file[:1] == '/':
       
   485                 urlfile = 'file://' + file
       
   486             return addinfourl(open(localname, 'rb'),
       
   487                               headers, urlfile)
       
   488         host, port = splitport(host)
       
   489         if not port \
       
   490            and socket.gethostbyname(host) in (localhost(), thishost()):
       
   491             urlfile = file
       
   492             if file[:1] == '/':
       
   493                 urlfile = 'file://' + file
       
   494             return addinfourl(open(localname, 'rb'),
       
   495                               headers, urlfile)
       
   496         raise IOError, ('local file error', 'not on local host')
       
   497 
       
   498     def open_ftp(self, url):
       
   499         """Use FTP protocol."""
       
   500         if not isinstance(url, str):
       
   501             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
       
   502         import mimetypes, mimetools
       
   503         try:
       
   504             from cStringIO import StringIO
       
   505         except ImportError:
       
   506             from StringIO import StringIO
       
   507         host, path = splithost(url)
       
   508         if not host: raise IOError, ('ftp error', 'no host given')
       
   509         host, port = splitport(host)
       
   510         user, host = splituser(host)
       
   511         if user: user, passwd = splitpasswd(user)
       
   512         else: passwd = None
       
   513         host = unquote(host)
       
   514         user = unquote(user or '')
       
   515         passwd = unquote(passwd or '')
       
   516         host = socket.gethostbyname(host)
       
   517         if not port:
       
   518             import ftplib
       
   519             port = ftplib.FTP_PORT
       
   520         else:
       
   521             port = int(port)
       
   522         path, attrs = splitattr(path)
       
   523         path = unquote(path)
       
   524         dirs = path.split('/')
       
   525         dirs, file = dirs[:-1], dirs[-1]
       
   526         if dirs and not dirs[0]: dirs = dirs[1:]
       
   527         if dirs and not dirs[0]: dirs[0] = '/'
       
   528         key = user, host, port, '/'.join(dirs)
       
   529         # XXX thread unsafe!
       
   530         if len(self.ftpcache) > MAXFTPCACHE:
       
   531             # Prune the cache, rather arbitrarily
       
   532             for k in self.ftpcache.keys():
       
   533                 if k != key:
       
   534                     v = self.ftpcache[k]
       
   535                     del self.ftpcache[k]
       
   536                     v.close()
       
   537         try:
       
   538             if not key in self.ftpcache:
       
   539                 self.ftpcache[key] = \
       
   540                     ftpwrapper(user, passwd, host, port, dirs)
       
   541             if not file: type = 'D'
       
   542             else: type = 'I'
       
   543             for attr in attrs:
       
   544                 attr, value = splitvalue(attr)
       
   545                 if attr.lower() == 'type' and \
       
   546                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
       
   547                     type = value.upper()
       
   548             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
       
   549             mtype = mimetypes.guess_type("ftp:" + url)[0]
       
   550             headers = ""
       
   551             if mtype:
       
   552                 headers += "Content-Type: %s\n" % mtype
       
   553             if retrlen is not None and retrlen >= 0:
       
   554                 headers += "Content-Length: %d\n" % retrlen
       
   555             headers = mimetools.Message(StringIO(headers))
       
   556             return addinfourl(fp, headers, "ftp:" + url)
       
   557         except ftperrors(), msg:
       
   558             raise IOError, ('ftp error', msg), sys.exc_info()[2]
       
   559 
       
   560     def open_data(self, url, data=None):
       
   561         """Use "data" URL."""
       
   562         if not isinstance(url, str):
       
   563             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
       
   564         # ignore POSTed data
       
   565         #
       
   566         # syntax of data URLs:
       
   567         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
       
   568         # mediatype := [ type "/" subtype ] *( ";" parameter )
       
   569         # data      := *urlchar
       
   570         # parameter := attribute "=" value
       
   571         import mimetools
       
   572         try:
       
   573             from cStringIO import StringIO
       
   574         except ImportError:
       
   575             from StringIO import StringIO
       
   576         try:
       
   577             [type, data] = url.split(',', 1)
       
   578         except ValueError:
       
   579             raise IOError, ('data error', 'bad data URL')
       
   580         if not type:
       
   581             type = 'text/plain;charset=US-ASCII'
       
   582         semi = type.rfind(';')
       
   583         if semi >= 0 and '=' not in type[semi:]:
       
   584             encoding = type[semi+1:]
       
   585             type = type[:semi]
       
   586         else:
       
   587             encoding = ''
       
   588         msg = []
       
   589         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
       
   590                                             time.gmtime(time.time())))
       
   591         msg.append('Content-type: %s' % type)
       
   592         if encoding == 'base64':
       
   593             import base64
       
   594             data = base64.decodestring(data)
       
   595         else:
       
   596             data = unquote(data)
       
   597         msg.append('Content-Length: %d' % len(data))
       
   598         msg.append('')
       
   599         msg.append(data)
       
   600         msg = '\n'.join(msg)
       
   601         f = StringIO(msg)
       
   602         headers = mimetools.Message(f, 0)
       
   603         #f.fileno = None     # needed for addinfourl
       
   604         return addinfourl(f, headers, url)
       
   605 
       
   606 
       
   607 class FancyURLopener(URLopener):
       
   608     """Derived class with handlers for errors we can handle (perhaps)."""
       
   609 
       
   610     def __init__(self, *args, **kwargs):
       
   611         URLopener.__init__(self, *args, **kwargs)
       
   612         self.auth_cache = {}
       
   613         self.tries = 0
       
   614         self.maxtries = 10
       
   615 
       
   616     def http_error_default(self, url, fp, errcode, errmsg, headers):
       
   617         """Default error handling -- don't raise an exception."""
       
   618         return addinfourl(fp, headers, "http:" + url, errcode)
       
   619 
       
   620     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
       
   621         """Error 302 -- relocated (temporarily)."""
       
   622         self.tries += 1
       
   623         if self.maxtries and self.tries >= self.maxtries:
       
   624             if hasattr(self, "http_error_500"):
       
   625                 meth = self.http_error_500
       
   626             else:
       
   627                 meth = self.http_error_default
       
   628             self.tries = 0
       
   629             return meth(url, fp, 500,
       
   630                         "Internal Server Error: Redirect Recursion", headers)
       
   631         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
       
   632                                         data)
       
   633         self.tries = 0
       
   634         return result
       
   635 
       
   636     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
       
   637         if 'location' in headers:
       
   638             newurl = headers['location']
       
   639         elif 'uri' in headers:
       
   640             newurl = headers['uri']
       
   641         else:
       
   642             return
       
   643         void = fp.read()
       
   644         fp.close()
       
   645         # In case the server sent a relative URL, join with original:
       
   646         newurl = basejoin(self.type + ":" + url, newurl)
       
   647         return self.open(newurl)
       
   648 
       
   649     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
       
   650         """Error 301 -- also relocated (permanently)."""
       
   651         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
       
   652 
       
   653     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
       
   654         """Error 303 -- also relocated (essentially identical to 302)."""
       
   655         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
       
   656 
       
   657     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
       
   658         """Error 307 -- relocated, but turn POST into error."""
       
   659         if data is None:
       
   660             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
       
   661         else:
       
   662             return self.http_error_default(url, fp, errcode, errmsg, headers)
       
   663 
       
   664     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
       
   665         """Error 401 -- authentication required.
       
   666         This function supports Basic authentication only."""
       
   667         if not 'www-authenticate' in headers:
       
   668             URLopener.http_error_default(self, url, fp,
       
   669                                          errcode, errmsg, headers)
       
   670         stuff = headers['www-authenticate']
       
   671         import re
       
   672         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
       
   673         if not match:
       
   674             URLopener.http_error_default(self, url, fp,
       
   675                                          errcode, errmsg, headers)
       
   676         scheme, realm = match.groups()
       
   677         if scheme.lower() != 'basic':
       
   678             URLopener.http_error_default(self, url, fp,
       
   679                                          errcode, errmsg, headers)
       
   680         name = 'retry_' + self.type + '_basic_auth'
       
   681         if data is None:
       
   682             return getattr(self,name)(url, realm)
       
   683         else:
       
   684             return getattr(self,name)(url, realm, data)
       
   685 
       
   686     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
       
   687         """Error 407 -- proxy authentication required.
       
   688         This function supports Basic authentication only."""
       
   689         if not 'proxy-authenticate' in headers:
       
   690             URLopener.http_error_default(self, url, fp,
       
   691                                          errcode, errmsg, headers)
       
   692         stuff = headers['proxy-authenticate']
       
   693         import re
       
   694         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
       
   695         if not match:
       
   696             URLopener.http_error_default(self, url, fp,
       
   697                                          errcode, errmsg, headers)
       
   698         scheme, realm = match.groups()
       
   699         if scheme.lower() != 'basic':
       
   700             URLopener.http_error_default(self, url, fp,
       
   701                                          errcode, errmsg, headers)
       
   702         name = 'retry_proxy_' + self.type + '_basic_auth'
       
   703         if data is None:
       
   704             return getattr(self,name)(url, realm)
       
   705         else:
       
   706             return getattr(self,name)(url, realm, data)
       
   707 
       
   708     def retry_proxy_http_basic_auth(self, url, realm, data=None):
       
   709         host, selector = splithost(url)
       
   710         newurl = 'http://' + host + selector
       
   711         proxy = self.proxies['http']
       
   712         urltype, proxyhost = splittype(proxy)
       
   713         proxyhost, proxyselector = splithost(proxyhost)
       
   714         i = proxyhost.find('@') + 1
       
   715         proxyhost = proxyhost[i:]
       
   716         user, passwd = self.get_user_passwd(proxyhost, realm, i)
       
   717         if not (user or passwd): return None
       
   718         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
       
   719         self.proxies['http'] = 'http://' + proxyhost + proxyselector
       
   720         if data is None:
       
   721             return self.open(newurl)
       
   722         else:
       
   723             return self.open(newurl, data)
       
   724 
       
   725     def retry_proxy_https_basic_auth(self, url, realm, data=None):
       
   726         host, selector = splithost(url)
       
   727         newurl = 'https://' + host + selector
       
   728         proxy = self.proxies['https']
       
   729         urltype, proxyhost = splittype(proxy)
       
   730         proxyhost, proxyselector = splithost(proxyhost)
       
   731         i = proxyhost.find('@') + 1
       
   732         proxyhost = proxyhost[i:]
       
   733         user, passwd = self.get_user_passwd(proxyhost, realm, i)
       
   734         if not (user or passwd): return None
       
   735         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
       
   736         self.proxies['https'] = 'https://' + proxyhost + proxyselector
       
   737         if data is None:
       
   738             return self.open(newurl)
       
   739         else:
       
   740             return self.open(newurl, data)
       
   741 
       
   742     def retry_http_basic_auth(self, url, realm, data=None):
       
   743         host, selector = splithost(url)
       
   744         i = host.find('@') + 1
       
   745         host = host[i:]
       
   746         user, passwd = self.get_user_passwd(host, realm, i)
       
   747         if not (user or passwd): return None
       
   748         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
       
   749         newurl = 'http://' + host + selector
       
   750         if data is None:
       
   751             return self.open(newurl)
       
   752         else:
       
   753             return self.open(newurl, data)
       
   754 
       
   755     def retry_https_basic_auth(self, url, realm, data=None):
       
   756         host, selector = splithost(url)
       
   757         i = host.find('@') + 1
       
   758         host = host[i:]
       
   759         user, passwd = self.get_user_passwd(host, realm, i)
       
   760         if not (user or passwd): return None
       
   761         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
       
   762         newurl = 'https://' + host + selector
       
   763         if data is None:
       
   764             return self.open(newurl)
       
   765         else:
       
   766             return self.open(newurl, data)
       
   767 
       
   768     def get_user_passwd(self, host, realm, clear_cache = 0):
       
   769         key = realm + '@' + host.lower()
       
   770         if key in self.auth_cache:
       
   771             if clear_cache:
       
   772                 del self.auth_cache[key]
       
   773             else:
       
   774                 return self.auth_cache[key]
       
   775         user, passwd = self.prompt_user_passwd(host, realm)
       
   776         if user or passwd: self.auth_cache[key] = (user, passwd)
       
   777         return user, passwd
       
   778 
       
   779     def prompt_user_passwd(self, host, realm):
       
   780         """Override this in a GUI environment!"""
       
   781         import getpass
       
   782         try:
       
   783             user = raw_input("Enter username for %s at %s: " % (realm,
       
   784                                                                 host))
       
   785             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
       
   786                 (user, realm, host))
       
   787             return user, passwd
       
   788         except KeyboardInterrupt:
       
   789             print
       
   790             return None, None
       
   791 
       
   792 
       
   793 # Utility functions
       
   794 
       
   795 _localhost = None
       
   796 def localhost():
       
   797     """Return the IP address of the magic hostname 'localhost'."""
       
   798     global _localhost
       
   799     if _localhost is None:
       
   800         _localhost = socket.gethostbyname('localhost')
       
   801     return _localhost
       
   802 
       
   803 _thishost = None
       
   804 def thishost():
       
   805     """Return the IP address of the current host."""
       
   806     global _thishost
       
   807     if _thishost is None:
       
   808         _thishost = socket.gethostbyname(socket.gethostname())
       
   809     return _thishost
       
   810 
       
   811 _ftperrors = None
       
   812 def ftperrors():
       
   813     """Return the set of errors raised by the FTP class."""
       
   814     global _ftperrors
       
   815     if _ftperrors is None:
       
   816         import ftplib
       
   817         _ftperrors = ftplib.all_errors
       
   818     return _ftperrors
       
   819 
       
   820 _noheaders = None
       
   821 def noheaders():
       
   822     """Return an empty mimetools.Message object."""
       
   823     global _noheaders
       
   824     if _noheaders is None:
       
   825         import mimetools
       
   826         try:
       
   827             from cStringIO import StringIO
       
   828         except ImportError:
       
   829             from StringIO import StringIO
       
   830         _noheaders = mimetools.Message(StringIO(), 0)
       
   831         _noheaders.fp.close()   # Recycle file descriptor
       
   832     return _noheaders
       
   833 
       
   834 
       
   835 # Utility classes
       
   836 
       
   837 class ftpwrapper:
       
   838     """Class used by open_ftp() for cache of open FTP connections."""
       
   839 
       
   840     def __init__(self, user, passwd, host, port, dirs,
       
   841                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
       
   842         self.user = user
       
   843         self.passwd = passwd
       
   844         self.host = host
       
   845         self.port = port
       
   846         self.dirs = dirs
       
   847         self.timeout = timeout
       
   848         self.init()
       
   849 
       
   850     def init(self):
       
   851         import ftplib
       
   852         self.busy = 0
       
   853         self.ftp = ftplib.FTP()
       
   854         self.ftp.connect(self.host, self.port, self.timeout)
       
   855         self.ftp.login(self.user, self.passwd)
       
   856         for dir in self.dirs:
       
   857             self.ftp.cwd(dir)
       
   858 
       
   859     def retrfile(self, file, type):
       
   860         import ftplib
       
   861         self.endtransfer()
       
   862         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
       
   863         else: cmd = 'TYPE ' + type; isdir = 0
       
   864         try:
       
   865             self.ftp.voidcmd(cmd)
       
   866         except ftplib.all_errors:
       
   867             self.init()
       
   868             self.ftp.voidcmd(cmd)
       
   869         conn = None
       
   870         if file and not isdir:
       
   871             # Try to retrieve as a file
       
   872             try:
       
   873                 cmd = 'RETR ' + file
       
   874                 conn = self.ftp.ntransfercmd(cmd)
       
   875             except ftplib.error_perm, reason:
       
   876                 if str(reason)[:3] != '550':
       
   877                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
       
   878         if not conn:
       
   879             # Set transfer mode to ASCII!
       
   880             self.ftp.voidcmd('TYPE A')
       
   881             # Try a directory listing. Verify that directory exists.
       
   882             if file:
       
   883                 pwd = self.ftp.pwd()
       
   884                 try:
       
   885                     try:
       
   886                         self.ftp.cwd(file)
       
   887                     except ftplib.error_perm, reason:
       
   888                         raise IOError, ('ftp error', reason), sys.exc_info()[2]
       
   889                 finally:
       
   890                     self.ftp.cwd(pwd)
       
   891                 cmd = 'LIST ' + file
       
   892             else:
       
   893                 cmd = 'LIST'
       
   894             conn = self.ftp.ntransfercmd(cmd)
       
   895         self.busy = 1
       
   896         # Pass back both a suitably decorated object and a retrieval length
       
   897         return (addclosehook(conn[0].makefile('rb'),
       
   898                              self.endtransfer), conn[1])
       
   899     def endtransfer(self):
       
   900         if not self.busy:
       
   901             return
       
   902         self.busy = 0
       
   903         try:
       
   904             self.ftp.voidresp()
       
   905         except ftperrors():
       
   906             pass
       
   907 
       
   908     def close(self):
       
   909         self.endtransfer()
       
   910         try:
       
   911             self.ftp.close()
       
   912         except ftperrors():
       
   913             pass
       
   914 
       
   915 class addbase:
       
   916     """Base class for addinfo and addclosehook."""
       
   917 
       
   918     def __init__(self, fp):
       
   919         self.fp = fp
       
   920         self.read = self.fp.read
       
   921         self.readline = self.fp.readline
       
   922         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
       
   923         if hasattr(self.fp, "fileno"):
       
   924             self.fileno = self.fp.fileno
       
   925         else:
       
   926             self.fileno = lambda: None
       
   927         if hasattr(self.fp, "__iter__"):
       
   928             self.__iter__ = self.fp.__iter__
       
   929             if hasattr(self.fp, "next"):
       
   930                 self.next = self.fp.next
       
   931 
       
   932     def __repr__(self):
       
   933         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
       
   934                                              id(self), self.fp)
       
   935 
       
   936     def close(self):
       
   937         self.read = None
       
   938         self.readline = None
       
   939         self.readlines = None
       
   940         self.fileno = None
       
   941         if self.fp: self.fp.close()
       
   942         self.fp = None
       
   943 
       
   944 class addclosehook(addbase):
       
   945     """Class to add a close hook to an open file."""
       
   946 
       
   947     def __init__(self, fp, closehook, *hookargs):
       
   948         addbase.__init__(self, fp)
       
   949         self.closehook = closehook
       
   950         self.hookargs = hookargs
       
   951 
       
   952     def close(self):
       
   953         addbase.close(self)
       
   954         if self.closehook:
       
   955             self.closehook(*self.hookargs)
       
   956             self.closehook = None
       
   957             self.hookargs = None
       
   958 
       
   959 class addinfo(addbase):
       
   960     """class to add an info() method to an open file."""
       
   961 
       
   962     def __init__(self, fp, headers):
       
   963         addbase.__init__(self, fp)
       
   964         self.headers = headers
       
   965 
       
   966     def info(self):
       
   967         return self.headers
       
   968 
       
   969 class addinfourl(addbase):
       
   970     """class to add info() and geturl() methods to an open file."""
       
   971 
       
   972     def __init__(self, fp, headers, url, code=None):
       
   973         addbase.__init__(self, fp)
       
   974         self.headers = headers
       
   975         self.url = url
       
   976         self.code = code
       
   977 
       
   978     def info(self):
       
   979         return self.headers
       
   980 
       
   981     def getcode(self):
       
   982         return self.code
       
   983 
       
   984     def geturl(self):
       
   985         return self.url
       
   986 
       
   987 
       
   988 # Utilities to parse URLs (most of these return None for missing parts):
       
   989 # unwrap('<URL:type://host/path>') --> 'type://host/path'
       
   990 # splittype('type:opaquestring') --> 'type', 'opaquestring'
       
   991 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
       
   992 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
       
   993 # splitpasswd('user:passwd') -> 'user', 'passwd'
       
   994 # splitport('host:port') --> 'host', 'port'
       
   995 # splitquery('/path?query') --> '/path', 'query'
       
   996 # splittag('/path#tag') --> '/path', 'tag'
       
   997 # splitattr('/path;attr1=value1;attr2=value2;...') ->
       
   998 #   '/path', ['attr1=value1', 'attr2=value2', ...]
       
   999 # splitvalue('attr=value') --> 'attr', 'value'
       
  1000 # unquote('abc%20def') -> 'abc def'
       
  1001 # quote('abc def') -> 'abc%20def')
       
  1002 
       
  1003 try:
       
  1004     unicode
       
  1005 except NameError:
       
  1006     def _is_unicode(x):
       
  1007         return 0
       
  1008 else:
       
  1009     def _is_unicode(x):
       
  1010         return isinstance(x, unicode)
       
  1011 
       
  1012 def toBytes(url):
       
  1013     """toBytes(u"URL") --> 'URL'."""
       
  1014     # Most URL schemes require ASCII. If that changes, the conversion
       
  1015     # can be relaxed
       
  1016     if _is_unicode(url):
       
  1017         try:
       
  1018             url = url.encode("ASCII")
       
  1019         except UnicodeError:
       
  1020             raise UnicodeError("URL " + repr(url) +
       
  1021                                " contains non-ASCII characters")
       
  1022     return url
       
  1023 
       
  1024 def unwrap(url):
       
  1025     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
       
  1026     url = url.strip()
       
  1027     if url[:1] == '<' and url[-1:] == '>':
       
  1028         url = url[1:-1].strip()
       
  1029     if url[:4] == 'URL:': url = url[4:].strip()
       
  1030     return url
       
  1031 
       
  1032 _typeprog = None
       
  1033 def splittype(url):
       
  1034     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
       
  1035     global _typeprog
       
  1036     if _typeprog is None:
       
  1037         import re
       
  1038         _typeprog = re.compile('^([^/:]+):')
       
  1039 
       
  1040     match = _typeprog.match(url)
       
  1041     if match:
       
  1042         scheme = match.group(1)
       
  1043         return scheme.lower(), url[len(scheme) + 1:]
       
  1044     return None, url
       
  1045 
       
  1046 _hostprog = None
       
  1047 def splithost(url):
       
  1048     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
       
  1049     global _hostprog
       
  1050     if _hostprog is None:
       
  1051         import re
       
  1052         _hostprog = re.compile('^//([^/?]*)(.*)$')
       
  1053 
       
  1054     match = _hostprog.match(url)
       
  1055     if match: return match.group(1, 2)
       
  1056     return None, url
       
  1057 
       
  1058 _userprog = None
       
  1059 def splituser(host):
       
  1060     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
       
  1061     global _userprog
       
  1062     if _userprog is None:
       
  1063         import re
       
  1064         _userprog = re.compile('^(.*)@(.*)$')
       
  1065 
       
  1066     match = _userprog.match(host)
       
  1067     if match: return map(unquote, match.group(1, 2))
       
  1068     return None, host
       
  1069 
       
  1070 _passwdprog = None
       
  1071 def splitpasswd(user):
       
  1072     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
       
  1073     global _passwdprog
       
  1074     if _passwdprog is None:
       
  1075         import re
       
  1076         _passwdprog = re.compile('^([^:]*):(.*)$')
       
  1077 
       
  1078     match = _passwdprog.match(user)
       
  1079     if match: return match.group(1, 2)
       
  1080     return user, None
       
  1081 
       
  1082 # splittag('/path#tag') --> '/path', 'tag'
       
  1083 _portprog = None
       
  1084 def splitport(host):
       
  1085     """splitport('host:port') --> 'host', 'port'."""
       
  1086     global _portprog
       
  1087     if _portprog is None:
       
  1088         import re
       
  1089         _portprog = re.compile('^(.*):([0-9]+)$')
       
  1090 
       
  1091     match = _portprog.match(host)
       
  1092     if match: return match.group(1, 2)
       
  1093     return host, None
       
  1094 
       
  1095 _nportprog = None
       
  1096 def splitnport(host, defport=-1):
       
  1097     """Split host and port, returning numeric port.
       
  1098     Return given default port if no ':' found; defaults to -1.
       
  1099     Return numerical port if a valid number are found after ':'.
       
  1100     Return None if ':' but not a valid number."""
       
  1101     global _nportprog
       
  1102     if _nportprog is None:
       
  1103         import re
       
  1104         _nportprog = re.compile('^(.*):(.*)$')
       
  1105 
       
  1106     match = _nportprog.match(host)
       
  1107     if match:
       
  1108         host, port = match.group(1, 2)
       
  1109         try:
       
  1110             if not port: raise ValueError, "no digits"
       
  1111             nport = int(port)
       
  1112         except ValueError:
       
  1113             nport = None
       
  1114         return host, nport
       
  1115     return host, defport
       
  1116 
       
  1117 _queryprog = None
       
  1118 def splitquery(url):
       
  1119     """splitquery('/path?query') --> '/path', 'query'."""
       
  1120     global _queryprog
       
  1121     if _queryprog is None:
       
  1122         import re
       
  1123         _queryprog = re.compile('^(.*)\?([^?]*)$')
       
  1124 
       
  1125     match = _queryprog.match(url)
       
  1126     if match: return match.group(1, 2)
       
  1127     return url, None
       
  1128 
       
  1129 _tagprog = None
       
  1130 def splittag(url):
       
  1131     """splittag('/path#tag') --> '/path', 'tag'."""
       
  1132     global _tagprog
       
  1133     if _tagprog is None:
       
  1134         import re
       
  1135         _tagprog = re.compile('^(.*)#([^#]*)$')
       
  1136 
       
  1137     match = _tagprog.match(url)
       
  1138     if match: return match.group(1, 2)
       
  1139     return url, None
       
  1140 
       
  1141 def splitattr(url):
       
  1142     """splitattr('/path;attr1=value1;attr2=value2;...') ->
       
  1143         '/path', ['attr1=value1', 'attr2=value2', ...]."""
       
  1144     words = url.split(';')
       
  1145     return words[0], words[1:]
       
  1146 
       
  1147 _valueprog = None
       
  1148 def splitvalue(attr):
       
  1149     """splitvalue('attr=value') --> 'attr', 'value'."""
       
  1150     global _valueprog
       
  1151     if _valueprog is None:
       
  1152         import re
       
  1153         _valueprog = re.compile('^([^=]*)=(.*)$')
       
  1154 
       
  1155     match = _valueprog.match(attr)
       
  1156     if match: return match.group(1, 2)
       
  1157     return attr, None
       
  1158 
       
  1159 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
       
  1160 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
       
  1161 
       
  1162 def unquote(s):
       
  1163     """unquote('abc%20def') -> 'abc def'."""
       
  1164     res = s.split('%')
       
  1165     for i in xrange(1, len(res)):
       
  1166         item = res[i]
       
  1167         try:
       
  1168             res[i] = _hextochr[item[:2]] + item[2:]
       
  1169         except KeyError:
       
  1170             res[i] = '%' + item
       
  1171         except UnicodeDecodeError:
       
  1172             res[i] = unichr(int(item[:2], 16)) + item[2:]
       
  1173     return "".join(res)
       
  1174 
       
  1175 def unquote_plus(s):
       
  1176     """unquote('%7e/abc+def') -> '~/abc def'"""
       
  1177     s = s.replace('+', ' ')
       
  1178     return unquote(s)
       
  1179 
       
  1180 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
       
  1181                'abcdefghijklmnopqrstuvwxyz'
       
  1182                '0123456789' '_.-')
       
  1183 _safemaps = {}
       
  1184 
       
  1185 def quote(s, safe = '/'):
       
  1186     """quote('abc def') -> 'abc%20def'
       
  1187 
       
  1188     Each part of a URL, e.g. the path info, the query, etc., has a
       
  1189     different set of reserved characters that must be quoted.
       
  1190 
       
  1191     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
       
  1192     the following reserved characters.
       
  1193 
       
  1194     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
       
  1195                   "$" | ","
       
  1196 
       
  1197     Each of these characters is reserved in some component of a URL,
       
  1198     but not necessarily in all of them.
       
  1199 
       
  1200     By default, the quote function is intended for quoting the path
       
  1201     section of a URL.  Thus, it will not encode '/'.  This character
       
  1202     is reserved, but in typical usage the quote function is being
       
  1203     called on a path where the existing slash characters are used as
       
  1204     reserved characters.
       
  1205     """
       
  1206     cachekey = (safe, always_safe)
       
  1207     try:
       
  1208         safe_map = _safemaps[cachekey]
       
  1209     except KeyError:
       
  1210         safe += always_safe
       
  1211         safe_map = {}
       
  1212         for i in range(256):
       
  1213             c = chr(i)
       
  1214             safe_map[c] = (c in safe) and c or ('%%%02X' % i)
       
  1215         _safemaps[cachekey] = safe_map
       
  1216     res = map(safe_map.__getitem__, s)
       
  1217     return ''.join(res)
       
  1218 
       
  1219 def quote_plus(s, safe = ''):
       
  1220     """Quote the query fragment of a URL; replacing ' ' with '+'"""
       
  1221     if ' ' in s:
       
  1222         s = quote(s, safe + ' ')
       
  1223         return s.replace(' ', '+')
       
  1224     return quote(s, safe)
       
  1225 
       
  1226 def urlencode(query,doseq=0):
       
  1227     """Encode a sequence of two-element tuples or dictionary into a URL query string.
       
  1228 
       
  1229     If any values in the query arg are sequences and doseq is true, each
       
  1230     sequence element is converted to a separate parameter.
       
  1231 
       
  1232     If the query arg is a sequence of two-element tuples, the order of the
       
  1233     parameters in the output will match the order of parameters in the
       
  1234     input.
       
  1235     """
       
  1236 
       
  1237     if hasattr(query,"items"):
       
  1238         # mapping objects
       
  1239         query = query.items()
       
  1240     else:
       
  1241         # it's a bother at times that strings and string-like objects are
       
  1242         # sequences...
       
  1243         try:
       
  1244             # non-sequence items should not work with len()
       
  1245             # non-empty strings will fail this
       
  1246             if len(query) and not isinstance(query[0], tuple):
       
  1247                 raise TypeError
       
  1248             # zero-length sequences of all types will get here and succeed,
       
  1249             # but that's a minor nit - since the original implementation
       
  1250             # allowed empty dicts that type of behavior probably should be
       
  1251             # preserved for consistency
       
  1252         except TypeError:
       
  1253             ty,va,tb = sys.exc_info()
       
  1254             raise TypeError, "not a valid non-string sequence or mapping object", tb
       
  1255 
       
  1256     l = []
       
  1257     if not doseq:
       
  1258         # preserve old behavior
       
  1259         for k, v in query:
       
  1260             k = quote_plus(str(k))
       
  1261             v = quote_plus(str(v))
       
  1262             l.append(k + '=' + v)
       
  1263     else:
       
  1264         for k, v in query:
       
  1265             k = quote_plus(str(k))
       
  1266             if isinstance(v, str):
       
  1267                 v = quote_plus(v)
       
  1268                 l.append(k + '=' + v)
       
  1269             elif _is_unicode(v):
       
  1270                 # is there a reasonable way to convert to ASCII?
       
  1271                 # encode generates a string, but "replace" or "ignore"
       
  1272                 # lose information and "strict" can raise UnicodeError
       
  1273                 v = quote_plus(v.encode("ASCII","replace"))
       
  1274                 l.append(k + '=' + v)
       
  1275             else:
       
  1276                 try:
       
  1277                     # is this a sufficient test for sequence-ness?
       
  1278                     x = len(v)
       
  1279                 except TypeError:
       
  1280                     # not a sequence
       
  1281                     v = quote_plus(str(v))
       
  1282                     l.append(k + '=' + v)
       
  1283                 else:
       
  1284                     # loop over the sequence
       
  1285                     for elt in v:
       
  1286                         l.append(k + '=' + quote_plus(str(elt)))
       
  1287     return '&'.join(l)
       
  1288 
       
  1289 # Proxy handling
       
  1290 def getproxies_environment():
       
  1291     """Return a dictionary of scheme -> proxy server URL mappings.
       
  1292 
       
  1293     Scan the environment for variables named <scheme>_proxy;
       
  1294     this seems to be the standard convention.  If you need a
       
  1295     different way, you can pass a proxies dictionary to the
       
  1296     [Fancy]URLopener constructor.
       
  1297 
       
  1298     """
       
  1299     proxies = {}
       
  1300     for name, value in os.environ.items():
       
  1301         name = name.lower()
       
  1302         if value and name[-6:] == '_proxy':
       
  1303             proxies[name[:-6]] = value
       
  1304     return proxies
       
  1305 
       
  1306 def proxy_bypass_environment(host):
       
  1307     """Test if proxies should not be used for a particular host.
       
  1308 
       
  1309     Checks the environment for a variable named no_proxy, which should
       
  1310     be a list of DNS suffixes separated by commas, or '*' for all hosts.
       
  1311     """
       
  1312     no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
       
  1313     # '*' is special case for always bypass
       
  1314     if no_proxy == '*':
       
  1315         return 1
       
  1316     # strip port off host
       
  1317     hostonly, port = splitport(host)
       
  1318     # check if the host ends with any of the DNS suffixes
       
  1319     for name in no_proxy.split(','):
       
  1320         if name and (hostonly.endswith(name) or host.endswith(name)):
       
  1321             return 1
       
  1322     # otherwise, don't bypass
       
  1323     return 0
       
  1324 
       
  1325 
       
  1326 if sys.platform == 'darwin':
       
  1327 
       
  1328     def _CFSetup(sc):
       
  1329         from ctypes import c_int32, c_void_p, c_char_p, c_int
       
  1330         sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
       
  1331         sc.CFStringCreateWithCString.restype = c_void_p
       
  1332         sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
       
  1333         sc.SCDynamicStoreCopyProxies.restype = c_void_p
       
  1334         sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
       
  1335         sc.CFDictionaryGetValue.restype = c_void_p
       
  1336         sc.CFStringGetLength.argtypes = [ c_void_p ]
       
  1337         sc.CFStringGetLength.restype = c_int32
       
  1338         sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
       
  1339         sc.CFStringGetCString.restype = c_int32
       
  1340         sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
       
  1341         sc.CFNumberGetValue.restype = c_int32
       
  1342         sc.CFRelease.argtypes = [ c_void_p ]
       
  1343         sc.CFRelease.restype = None
       
  1344 
       
  1345     def _CStringFromCFString(sc, value):
       
  1346         from ctypes import create_string_buffer
       
  1347         length = sc.CFStringGetLength(value) + 1
       
  1348         buff = create_string_buffer(length)
       
  1349         sc.CFStringGetCString(value, buff, length, 0)
       
  1350         return buff.value
       
  1351 
       
  1352     def _CFNumberToInt32(sc, cfnum):
       
  1353         from ctypes import byref, c_int
       
  1354         val = c_int()
       
  1355         kCFNumberSInt32Type = 3
       
  1356         sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
       
  1357         return val.value
       
  1358 
       
  1359 
       
  1360     def proxy_bypass_macosx_sysconf(host):
       
  1361         """
       
  1362         Return True iff this host shouldn't be accessed using a proxy
       
  1363 
       
  1364         This function uses the MacOSX framework SystemConfiguration
       
  1365         to fetch the proxy information.
       
  1366         """
       
  1367         from ctypes import cdll
       
  1368         from ctypes.util import find_library
       
  1369         import re
       
  1370         import socket
       
  1371         from fnmatch import fnmatch
       
  1372 
       
  1373         def ip2num(ipAddr):
       
  1374             parts = ipAddr.split('.')
       
  1375             parts = map(int, parts)
       
  1376             if len(parts) != 4:
       
  1377                 parts = (parts + [0, 0, 0, 0])[:4]
       
  1378             return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
       
  1379 
       
  1380         sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
       
  1381         _CFSetup(sc)
       
  1382 
       
  1383         hostIP = None
       
  1384 
       
  1385         if not sc:
       
  1386             return False
       
  1387 
       
  1388         kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
       
  1389         kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
       
  1390                 "ExcludeSimpleHostnames", 0)
       
  1391 
       
  1392 
       
  1393         proxyDict = sc.SCDynamicStoreCopyProxies(None)
       
  1394         if proxyDict is None:
       
  1395             return False
       
  1396 
       
  1397         try:
       
  1398             # Check for simple host names:
       
  1399             if '.' not in host:
       
  1400                 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
       
  1401                         kSCPropNetProxiesExcludeSimpleHostnames)
       
  1402                 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
       
  1403                     return True
       
  1404 
       
  1405 
       
  1406             # Check the exceptions list:
       
  1407             exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
       
  1408             if exceptions:
       
  1409                 # Items in the list are strings like these: *.local, 169.254/16
       
  1410                 for index in xrange(sc.CFArrayGetCount(exceptions)):
       
  1411                     value = sc.CFArrayGetValueAtIndex(exceptions, index)
       
  1412                     if not value: continue
       
  1413                     value = _CStringFromCFString(sc, value)
       
  1414 
       
  1415                     m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
       
  1416                     if m is not None:
       
  1417                         if hostIP is None:
       
  1418                             hostIP = socket.gethostbyname(host)
       
  1419                             hostIP = ip2num(hostIP)
       
  1420 
       
  1421                         base = ip2num(m.group(1))
       
  1422                         mask = int(m.group(2)[1:])
       
  1423                         mask = 32 - mask
       
  1424 
       
  1425                         if (hostIP >> mask) == (base >> mask):
       
  1426                             return True
       
  1427 
       
  1428                     elif fnmatch(host, value):
       
  1429                         return True
       
  1430 
       
  1431             return False
       
  1432 
       
  1433         finally:
       
  1434             sc.CFRelease(kSCPropNetProxiesExceptionsList)
       
  1435             sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
       
  1436 
       
  1437 
       
  1438 
       
  1439     def getproxies_macosx_sysconf():
       
  1440         """Return a dictionary of scheme -> proxy server URL mappings.
       
  1441 
       
  1442         This function uses the MacOSX framework SystemConfiguration
       
  1443         to fetch the proxy information.
       
  1444         """
       
  1445         from ctypes import cdll
       
  1446         from ctypes.util import find_library
       
  1447 
       
  1448         sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
       
  1449         _CFSetup(sc)
       
  1450 
       
  1451         if not sc:
       
  1452             return {}
       
  1453 
       
  1454         kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
       
  1455         kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
       
  1456         kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
       
  1457 
       
  1458         kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
       
  1459         kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
       
  1460         kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
       
  1461 
       
  1462         kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
       
  1463         kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
       
  1464         kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
       
  1465         kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
       
  1466 
       
  1467         kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
       
  1468         kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
       
  1469         kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
       
  1470 
       
  1471         proxies = {}
       
  1472         proxyDict = sc.SCDynamicStoreCopyProxies(None)
       
  1473 
       
  1474         try:
       
  1475             # HTTP:
       
  1476             enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
       
  1477             if enabled and _CFNumberToInt32(sc, enabled):
       
  1478                 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
       
  1479                 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
       
  1480 
       
  1481                 if proxy:
       
  1482                     proxy = _CStringFromCFString(sc, proxy)
       
  1483                     if port:
       
  1484                         port = _CFNumberToInt32(sc, port)
       
  1485                         proxies["http"] = "http://%s:%i" % (proxy, port)
       
  1486                     else:
       
  1487                         proxies["http"] = "http://%s" % (proxy, )
       
  1488 
       
  1489             # HTTPS:
       
  1490             enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
       
  1491             if enabled and _CFNumberToInt32(sc, enabled):
       
  1492                 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
       
  1493                 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
       
  1494 
       
  1495                 if proxy:
       
  1496                     proxy = _CStringFromCFString(sc, proxy)
       
  1497                     if port:
       
  1498                         port = _CFNumberToInt32(sc, port)
       
  1499                         proxies["https"] = "http://%s:%i" % (proxy, port)
       
  1500                     else:
       
  1501                         proxies["https"] = "http://%s" % (proxy, )
       
  1502 
       
  1503             # FTP:
       
  1504             enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
       
  1505             if enabled and _CFNumberToInt32(sc, enabled):
       
  1506                 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
       
  1507                 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
       
  1508 
       
  1509                 if proxy:
       
  1510                     proxy = _CStringFromCFString(sc, proxy)
       
  1511                     if port:
       
  1512                         port = _CFNumberToInt32(sc, port)
       
  1513                         proxies["ftp"] = "http://%s:%i" % (proxy, port)
       
  1514                     else:
       
  1515                         proxies["ftp"] = "http://%s" % (proxy, )
       
  1516 
       
  1517             # Gopher:
       
  1518             enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
       
  1519             if enabled and _CFNumberToInt32(sc, enabled):
       
  1520                 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
       
  1521                 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
       
  1522 
       
  1523                 if proxy:
       
  1524                     proxy = _CStringFromCFString(sc, proxy)
       
  1525                     if port:
       
  1526                         port = _CFNumberToInt32(sc, port)
       
  1527                         proxies["gopher"] = "http://%s:%i" % (proxy, port)
       
  1528                     else:
       
  1529                         proxies["gopher"] = "http://%s" % (proxy, )
       
  1530         finally:
       
  1531             sc.CFRelease(proxyDict)
       
  1532 
       
  1533         sc.CFRelease(kSCPropNetProxiesHTTPEnable)
       
  1534         sc.CFRelease(kSCPropNetProxiesHTTPProxy)
       
  1535         sc.CFRelease(kSCPropNetProxiesHTTPPort)
       
  1536         sc.CFRelease(kSCPropNetProxiesFTPEnable)
       
  1537         sc.CFRelease(kSCPropNetProxiesFTPPassive)
       
  1538         sc.CFRelease(kSCPropNetProxiesFTPPort)
       
  1539         sc.CFRelease(kSCPropNetProxiesFTPProxy)
       
  1540         sc.CFRelease(kSCPropNetProxiesGopherEnable)
       
  1541         sc.CFRelease(kSCPropNetProxiesGopherPort)
       
  1542         sc.CFRelease(kSCPropNetProxiesGopherProxy)
       
  1543 
       
  1544         return proxies
       
  1545 
       
  1546 
       
  1547 
       
  1548     def proxy_bypass(host):
       
  1549         if getproxies_environment():
       
  1550             return proxy_bypass_environment(host)
       
  1551         else:
       
  1552             return proxy_bypass_macosx_sysconf(host)
       
  1553 
       
  1554     def getproxies():
       
  1555         return getproxies_environment() or getproxies_macosx_sysconf()
       
  1556 
       
  1557 elif os.name == 'nt':
       
  1558     def getproxies_registry():
       
  1559         """Return a dictionary of scheme -> proxy server URL mappings.
       
  1560 
       
  1561         Win32 uses the registry to store proxies.
       
  1562 
       
  1563         """
       
  1564         proxies = {}
       
  1565         try:
       
  1566             import _winreg
       
  1567         except ImportError:
       
  1568             # Std module, so should be around - but you never know!
       
  1569             return proxies
       
  1570         try:
       
  1571             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
       
  1572                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
       
  1573             proxyEnable = _winreg.QueryValueEx(internetSettings,
       
  1574                                                'ProxyEnable')[0]
       
  1575             if proxyEnable:
       
  1576                 # Returned as Unicode but problems if not converted to ASCII
       
  1577                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
       
  1578                                                        'ProxyServer')[0])
       
  1579                 if '=' in proxyServer:
       
  1580                     # Per-protocol settings
       
  1581                     for p in proxyServer.split(';'):
       
  1582                         protocol, address = p.split('=', 1)
       
  1583                         # See if address has a type:// prefix
       
  1584                         import re
       
  1585                         if not re.match('^([^/:]+)://', address):
       
  1586                             address = '%s://%s' % (protocol, address)
       
  1587                         proxies[protocol] = address
       
  1588                 else:
       
  1589                     # Use one setting for all protocols
       
  1590                     if proxyServer[:5] == 'http:':
       
  1591                         proxies['http'] = proxyServer
       
  1592                     else:
       
  1593                         proxies['http'] = 'http://%s' % proxyServer
       
  1594                         proxies['ftp'] = 'ftp://%s' % proxyServer
       
  1595             internetSettings.Close()
       
  1596         except (WindowsError, ValueError, TypeError):
       
  1597             # Either registry key not found etc, or the value in an
       
  1598             # unexpected format.
       
  1599             # proxies already set up to be empty so nothing to do
       
  1600             pass
       
  1601         return proxies
       
  1602 
       
  1603     def getproxies():
       
  1604         """Return a dictionary of scheme -> proxy server URL mappings.
       
  1605 
       
  1606         Returns settings gathered from the environment, if specified,
       
  1607         or the registry.
       
  1608 
       
  1609         """
       
  1610         return getproxies_environment() or getproxies_registry()
       
  1611 
       
  1612     def proxy_bypass_registry(host):
       
  1613         try:
       
  1614             import _winreg
       
  1615             import re
       
  1616         except ImportError:
       
  1617             # Std modules, so should be around - but you never know!
       
  1618             return 0
       
  1619         try:
       
  1620             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
       
  1621                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
       
  1622             proxyEnable = _winreg.QueryValueEx(internetSettings,
       
  1623                                                'ProxyEnable')[0]
       
  1624             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
       
  1625                                                      'ProxyOverride')[0])
       
  1626             # ^^^^ Returned as Unicode but problems if not converted to ASCII
       
  1627         except WindowsError:
       
  1628             return 0
       
  1629         if not proxyEnable or not proxyOverride:
       
  1630             return 0
       
  1631         # try to make a host list from name and IP address.
       
  1632         rawHost, port = splitport(host)
       
  1633         host = [rawHost]
       
  1634         try:
       
  1635             addr = socket.gethostbyname(rawHost)
       
  1636             if addr != rawHost:
       
  1637                 host.append(addr)
       
  1638         except socket.error:
       
  1639             pass
       
  1640         try:
       
  1641             fqdn = socket.getfqdn(rawHost)
       
  1642             if fqdn != rawHost:
       
  1643                 host.append(fqdn)
       
  1644         except socket.error:
       
  1645             pass
       
  1646         # make a check value list from the registry entry: replace the
       
  1647         # '<local>' string by the localhost entry and the corresponding
       
  1648         # canonical entry.
       
  1649         proxyOverride = proxyOverride.split(';')
       
  1650         i = 0
       
  1651         while i < len(proxyOverride):
       
  1652             if proxyOverride[i] == '<local>':
       
  1653                 proxyOverride[i:i+1] = ['localhost',
       
  1654                                         '127.0.0.1',
       
  1655                                         socket.gethostname(),
       
  1656                                         socket.gethostbyname(
       
  1657                                             socket.gethostname())]
       
  1658             i += 1
       
  1659         # print proxyOverride
       
  1660         # now check if we match one of the registry values.
       
  1661         for test in proxyOverride:
       
  1662             test = test.replace(".", r"\.")     # mask dots
       
  1663             test = test.replace("*", r".*")     # change glob sequence
       
  1664             test = test.replace("?", r".")      # change glob char
       
  1665             for val in host:
       
  1666                 # print "%s <--> %s" %( test, val )
       
  1667                 if re.match(test, val, re.I):
       
  1668                     return 1
       
  1669         return 0
       
  1670 
       
  1671     def proxy_bypass(host):
       
  1672         """Return a dictionary of scheme -> proxy server URL mappings.
       
  1673 
       
  1674         Returns settings gathered from the environment, if specified,
       
  1675         or the registry.
       
  1676 
       
  1677         """
       
  1678         if getproxies_environment():
       
  1679             return proxy_bypass_environment(host)
       
  1680         else:
       
  1681             return proxy_bypass_registry(host)
       
  1682 
       
  1683 else:
       
  1684     # By default use environment variables
       
  1685     getproxies = getproxies_environment
       
  1686     proxy_bypass = proxy_bypass_environment
       
  1687 
       
  1688 # Test and time quote() and unquote()
       
  1689 def test1():
       
  1690     s = ''
       
  1691     for i in range(256): s = s + chr(i)
       
  1692     s = s*4
       
  1693     t0 = time.time()
       
  1694     qs = quote(s)
       
  1695     uqs = unquote(qs)
       
  1696     t1 = time.time()
       
  1697     if uqs != s:
       
  1698         print 'Wrong!'
       
  1699     print repr(s)
       
  1700     print repr(qs)
       
  1701     print repr(uqs)
       
  1702     print round(t1 - t0, 3), 'sec'
       
  1703 
       
  1704 
       
  1705 def reporthook(blocknum, blocksize, totalsize):
       
  1706     # Report during remote transfers
       
  1707     print "Block number: %d, Block size: %d, Total size: %d" % (
       
  1708         blocknum, blocksize, totalsize)
       
  1709 
       
  1710 # Test program
       
  1711 def test(args=[]):
       
  1712     if not args:
       
  1713         args = [
       
  1714             '/etc/passwd',
       
  1715             'file:/etc/passwd',
       
  1716             'file://localhost/etc/passwd',
       
  1717             'ftp://ftp.gnu.org/pub/README',
       
  1718             'http://www.python.org/index.html',
       
  1719             ]
       
  1720         if hasattr(URLopener, "open_https"):
       
  1721             args.append('https://synergy.as.cmu.edu/~geek/')
       
  1722     try:
       
  1723         for url in args:
       
  1724             print '-'*10, url, '-'*10
       
  1725             fn, h = urlretrieve(url, None, reporthook)
       
  1726             print fn
       
  1727             if h:
       
  1728                 print '======'
       
  1729                 for k in h.keys(): print k + ':', h[k]
       
  1730                 print '======'
       
  1731             fp = open(fn, 'rb')
       
  1732             data = fp.read()
       
  1733             del fp
       
  1734             if '\r' in data:
       
  1735                 table = string.maketrans("", "")
       
  1736                 data = data.translate(table, "\r")
       
  1737             print data
       
  1738             fn, h = None, None
       
  1739         print '-'*40
       
  1740     finally:
       
  1741         urlcleanup()
       
  1742 
       
  1743 def main():
       
  1744     import getopt, sys
       
  1745     try:
       
  1746         opts, args = getopt.getopt(sys.argv[1:], "th")
       
  1747     except getopt.error, msg:
       
  1748         print msg
       
  1749         print "Use -h for help"
       
  1750         return
       
  1751     t = 0
       
  1752     for o, a in opts:
       
  1753         if o == '-t':
       
  1754             t = t + 1
       
  1755         if o == '-h':
       
  1756             print "Usage: python urllib.py [-t] [url ...]"
       
  1757             print "-t runs self-test;",
       
  1758             print "otherwise, contents of urls are printed"
       
  1759             return
       
  1760     if t:
       
  1761         if t > 1:
       
  1762             test1()
       
  1763         test(args)
       
  1764     else:
       
  1765         if not args:
       
  1766             print "Use -h for help"
       
  1767         for url in args:
       
  1768             print urlopen(url).read(),
       
  1769 
       
  1770 # Run test program when run as a script
       
  1771 if __name__ == '__main__':
       
  1772     main()