FCL/interim/QEMU: comparison symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/urlparse.py

equal deleted inserted replaced

-:ffa851df0825
+:2fb8b9db1c86
+"""Parse (absolute and relative) URLs.
+See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
+UC Irvine, June 1995.
+"""
+__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
+"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
+# A classification of schemes ('' means apply by default)
+uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
+'wais', 'file', 'https', 'shttp', 'mms',
+'prospero', 'rtsp', 'rtspu', '', 'sftp']
+uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
+'imap', 'wais', 'file', 'mms', 'https', 'shttp',
+'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
+'svn', 'svn+ssh', 'sftp']
+non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
+'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
+uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
+'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
+'mms', '', 'sftp']
+uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
+'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
+uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
+'nntp', 'wais', 'https', 'shttp', 'snews',
+'file', 'prospero', '']
+# Characters valid in scheme names
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+'0123456789'
+'+-.')
+MAX_CACHE_SIZE = 20
+_parse_cache = {}
+def clear_cache():
+"""Clear the parse cache."""
+_parse_cache.clear()
+class ResultMixin(object):
+"""Shared methods for the parsed result objects."""
+@property
+def username(self):
+netloc = self.netloc
+if "@" in netloc:
+userinfo = netloc.rsplit("@", 1)[0]
+if ":" in userinfo:
+userinfo = userinfo.split(":", 1)[0]
+return userinfo
+return None
+@property
+def password(self):
+netloc = self.netloc
+if "@" in netloc:
+userinfo = netloc.rsplit("@", 1)[0]
+if ":" in userinfo:
+return userinfo.split(":", 1)[1]
+return None
+@property
+def hostname(self):
+netloc = self.netloc
+if "@" in netloc:
+netloc = netloc.rsplit("@", 1)[1]
+if ":" in netloc:
+netloc = netloc.split(":", 1)[0]
+return netloc.lower() or None
+@property
+def port(self):
+netloc = self.netloc
+if "@" in netloc:
+netloc = netloc.rsplit("@", 1)[1]
+if ":" in netloc:
+port = netloc.split(":", 1)[1]
+return int(port, 10)
+return None
+from collections import namedtuple
+class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
+__slots__ = ()
+def geturl(self):
+return urlunsplit(self)
+class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
+__slots__ = ()
+def geturl(self):
+return urlunparse(self)
+def urlparse(url, scheme='', allow_fragments=True):
+"""Parse a URL into 6 components:
+<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
+Note that we don't break the components up in smaller bits
+(e.g. netloc is a single string) and we don't expand % escapes."""
+tuple = urlsplit(url, scheme, allow_fragments)
+scheme, netloc, url, query, fragment = tuple
+if scheme in uses_params and ';' in url:
+url, params = _splitparams(url)
+else:
+params = ''
+return ParseResult(scheme, netloc, url, params, query, fragment)
+def _splitparams(url):
+if '/'  in url:
+i = url.find(';', url.rfind('/'))
+if i < 0:
+return url, ''
+else:
+i = url.find(';')
+return url[:i], url[i+1:]
+def _splitnetloc(url, start=0):
+delim = len(url)   # position of end of domain part of url, default is end
+for c in '/?#':    # look for delimiters; the order is NOT important
+wdelim = url.find(c, start)        # find first of this delim
+if wdelim >= 0:                    # if found
+delim = min(delim, wdelim)     # use earliest delim position
+return url[start:delim], url[delim:]   # return (domain, rest)
+def urlsplit(url, scheme='', allow_fragments=True):
+"""Parse a URL into 5 components:
+<scheme>://<netloc>/<path>?<query>#<fragment>
+Return a 5-tuple: (scheme, netloc, path, query, fragment).
+Note that we don't break the components up in smaller bits
+(e.g. netloc is a single string) and we don't expand % escapes."""
+allow_fragments = bool(allow_fragments)
+key = url, scheme, allow_fragments, type(url), type(scheme)
+cached = _parse_cache.get(key, None)
+if cached:
+return cached
+if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
+clear_cache()
+netloc = query = fragment = ''
+i = url.find(':')
+if i > 0:
+if url[:i] == 'http': # optimize the common case
+scheme = url[:i].lower()
+url = url[i+1:]
+if url[:2] == '//':
+netloc, url = _splitnetloc(url, 2)
+if allow_fragments and '#' in url:
+url, fragment = url.split('#', 1)
+if '?' in url:
+url, query = url.split('?', 1)
+v = SplitResult(scheme, netloc, url, query, fragment)
+_parse_cache[key] = v
+return v
+for c in url[:i]:
+if c not in scheme_chars:
+break
+else:
+scheme, url = url[:i].lower(), url[i+1:]
+if scheme in uses_netloc and url[:2] == '//':
+netloc, url = _splitnetloc(url, 2)
+if allow_fragments and scheme in uses_fragment and '#' in url:
+url, fragment = url.split('#', 1)
+if scheme in uses_query and '?' in url:
+url, query = url.split('?', 1)
+v = SplitResult(scheme, netloc, url, query, fragment)
+_parse_cache[key] = v
+return v
+def urlunparse(data):
+"""Put a parsed URL back together again.  This may result in a
+slightly different, but equivalent URL, if the URL that was parsed
+originally had redundant delimiters, e.g. a ? with an empty query
+(the draft states that these are equivalent)."""
+scheme, netloc, url, params, query, fragment = data
+if params:
+url = "%s;%s" % (url, params)
+return urlunsplit((scheme, netloc, url, query, fragment))
+def urlunsplit(data):
+scheme, netloc, url, query, fragment = data
+if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+if url and url[:1] != '/': url = '/' + url
+url = '//' + (netloc or '') + url
+if scheme:
+url = scheme + ':' + url
+if query:
+url = url + '?' + query
+if fragment:
+url = url + '#' + fragment
+return url
+def urljoin(base, url, allow_fragments=True):
+"""Join a base URL and a possibly relative URL to form an absolute
+interpretation of the latter."""
+if not base:
+return url
+if not url:
+return base
+bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
+urlparse(base, '', allow_fragments)
+scheme, netloc, path, params, query, fragment = \
+urlparse(url, bscheme, allow_fragments)
+if scheme != bscheme or scheme not in uses_relative:
+return url
+if scheme in uses_netloc:
+if netloc:
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+netloc = bnetloc
+if path[:1] == '/':
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+if not path:
+path = bpath
+if not params:
+params = bparams
+else:
+path = path[:-1]
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+if not query:
+query = bquery
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+segments = bpath.split('/')[:-1] + path.split('/')
+# XXX The stuff below is bogus in various ways...
+if segments[-1] == '.':
+segments[-1] = ''
+while '.' in segments:
+segments.remove('.')
+while 1:
+i = 1
+n = len(segments) - 1
+while i < n:
+if (segments[i] == '..'
+and segments[i-1] not in ('', '..')):
+del segments[i-1:i+1]
+break
+i = i+1
+else:
+break
+if segments == ['', '..']:
+segments[-1] = ''
+elif len(segments) >= 2 and segments[-1] == '..':
+segments[-2:] = ['']
+return urlunparse((scheme, netloc, '/'.join(segments),
+params, query, fragment))
+def urldefrag(url):
+"""Removes any existing fragment from URL.
+Returns a tuple of the defragmented URL and the fragment.  If
+the URL contained no fragments, the second element is the
+empty string.
+"""
+if '#' in url:
+s, n, p, a, q, frag = urlparse(url)
+defrag = urlunparse((s, n, p, a, q, ''))
+return defrag, frag
+else:
+return url, ''
+# unquote method for parse_qs and parse_qsl
+# Cannot use directly from urllib as it would create circular reference.
+# urllib uses urlparse methods ( urljoin)
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+def unquote(s):
+"""unquote('abc%20def') -> 'abc def'."""
+res = s.split('%')
+for i in xrange(1, len(res)):
+item = res[i]
+try:
+res[i] = _hextochr[item[:2]] + item[2:]
+except KeyError:
+res[i] = '%' + item
+except UnicodeDecodeError:
+res[i] = unichr(int(item[:2], 16)) + item[2:]
+return "".join(res)
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
+"""Parse a query given as a string argument.
+Arguments:
+qs: URL-encoded query string to be parsed
+keep_blank_values: flag indicating whether blank values in
+URL encoded queries should be treated as blank strings.
+A true value indicates that blanks should be retained as
+blank strings.  The default false value indicates that
+blank values are to be ignored and treated as if they were
+not included.
+strict_parsing: flag indicating what to do with parsing errors.
+If false (the default), errors are silently ignored.
+If true, errors raise a ValueError exception.
+"""
+dict = {}
+for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
+if name in dict:
+dict[name].append(value)
+else:
+dict[name] = [value]
+return dict
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
+"""Parse a query given as a string argument.
+Arguments:
+qs: URL-encoded query string to be parsed
+keep_blank_values: flag indicating whether blank values in
+URL encoded queries should be treated as blank strings.  A
+true value indicates that blanks should be retained as blank
+strings.  The default false value indicates that blank values
+are to be ignored and treated as if they were  not included.
+strict_parsing: flag indicating what to do with parsing errors. If
+false (the default), errors are silently ignored. If true,
+errors raise a ValueError exception.
+Returns a list, as G-d intended.
+"""
+pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+r = []
+for name_value in pairs:
+if not name_value and not strict_parsing:
+continue
+nv = name_value.split('=', 1)
+if len(nv) != 2:
+if strict_parsing:
+raise ValueError, "bad query field: %r" % (name_value,)
+# Handle case of a control-name with no equal sign
+if keep_blank_values:
+nv.append('')
+else:
+continue
+if len(nv[1]) or keep_blank_values:
+name = unquote(nv[0].replace('+', ' '))
+value = unquote(nv[1].replace('+', ' '))
+r.append((name, value))
+return r
+test_input = """
+http://a/b/c/d
+g:h        = <URL:g:h>
+http:g     = <URL:http://a/b/c/g>
+http:      = <URL:http://a/b/c/d>
+g          = <URL:http://a/b/c/g>
+./g        = <URL:http://a/b/c/g>
+g/         = <URL:http://a/b/c/g/>
+/g         = <URL:http://a/g>
+//g        = <URL:http://g>
+?y         = <URL:http://a/b/c/d?y>
+g?y        = <URL:http://a/b/c/g?y>
+g?y/./x    = <URL:http://a/b/c/g?y/./x>
+.          = <URL:http://a/b/c/>
+./         = <URL:http://a/b/c/>
+..         = <URL:http://a/b/>
+../        = <URL:http://a/b/>
+../g       = <URL:http://a/b/g>
+../..      = <URL:http://a/>
+../../g    = <URL:http://a/g>
+../../../g = <URL:http://a/../g>
+./../g     = <URL:http://a/b/g>
+./g/.      = <URL:http://a/b/c/g/>
+/./g       = <URL:http://a/./g>
+g/./h      = <URL:http://a/b/c/g/h>
+g/../h     = <URL:http://a/b/c/h>
+http:g     = <URL:http://a/b/c/g>
+http:      = <URL:http://a/b/c/d>
+http:?y         = <URL:http://a/b/c/d?y>
+http:g?y        = <URL:http://a/b/c/g?y>
+http:g?y/./x    = <URL:http://a/b/c/g?y/./x>
+"""
+def test():
+import sys
+base = ''
+if sys.argv[1:]:
+fn = sys.argv[1]
+if fn == '-':
+fp = sys.stdin
+else:
+fp = open(fn)
+else:
+try:
+from cStringIO import StringIO
+except ImportError:
+from StringIO import StringIO
+fp = StringIO(test_input)
+for line in fp:
+words = line.split()
+if not words:
+continue
+url = words[0]
+parts = urlparse(url)
+print '%-10s : %s' % (url, parts)
+abs = urljoin(base, url)
+if not base:
+base = abs
+wrapped = '<URL:%s>' % abs
+print '%-10s = %s' % (url, wrapped)
+if len(words) == 3 and words[1] == '=':
+if wrapped != words[2]:
+print 'EXPECTED', words[2], '!!!!!!!!!!'
+if __name__ == '__main__':
+test()