FCL/sftools/dev/hostenv/pythontoolsplat: comparison python-2.5.2/win32/Lib/urlparse.py

equal deleted inserted replaced

--1:000000000000
+:ae805ac0140d
+"""Parse (absolute and relative) URLs.
+See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
+UC Irvine, June 1995.
+"""
+__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
+"urlsplit", "urlunsplit"]
+# A classification of schemes ('' means apply by default)
+uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
+'wais', 'file', 'https', 'shttp', 'mms',
+'prospero', 'rtsp', 'rtspu', '', 'sftp']
+uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
+'imap', 'wais', 'file', 'mms', 'https', 'shttp',
+'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
+'svn', 'svn+ssh', 'sftp']
+non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
+'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
+uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
+'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
+'mms', '', 'sftp']
+uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
+'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
+uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
+'nntp', 'wais', 'https', 'shttp', 'snews',
+'file', 'prospero', '']
+# Characters valid in scheme names
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+'0123456789'
+'+-.')
+MAX_CACHE_SIZE = 20
+_parse_cache = {}
+def clear_cache():
+"""Clear the parse cache."""
+global _parse_cache
+_parse_cache = {}
+class BaseResult(tuple):
+"""Base class for the parsed result objects.
+This provides the attributes shared by the two derived result
+objects as read-only properties.  The derived classes are
+responsible for checking the right number of arguments were
+supplied to the constructor.
+"""
+__slots__ = ()
+# Attributes that access the basic components of the URL:
+@property
+def scheme(self):
+return self[0]
+@property
+def netloc(self):
+return self[1]
+@property
+def path(self):
+return self[2]
+@property
+def query(self):
+return self[-2]
+@property
+def fragment(self):
+return self[-1]
+# Additional attributes that provide access to parsed-out portions
+# of the netloc:
+@property
+def username(self):
+netloc = self.netloc
+if "@" in netloc:
+userinfo = netloc.split("@", 1)[0]
+if ":" in userinfo:
+userinfo = userinfo.split(":", 1)[0]
+return userinfo
+return None
+@property
+def password(self):
+netloc = self.netloc
+if "@" in netloc:
+userinfo = netloc.split("@", 1)[0]
+if ":" in userinfo:
+return userinfo.split(":", 1)[1]
+return None
+@property
+def hostname(self):
+netloc = self.netloc
+if "@" in netloc:
+netloc = netloc.split("@", 1)[1]
+if ":" in netloc:
+netloc = netloc.split(":", 1)[0]
+return netloc.lower() or None
+@property
+def port(self):
+netloc = self.netloc
+if "@" in netloc:
+netloc = netloc.split("@", 1)[1]
+if ":" in netloc:
+port = netloc.split(":", 1)[1]
+return int(port, 10)
+return None
+class SplitResult(BaseResult):
+__slots__ = ()
+def __new__(cls, scheme, netloc, path, query, fragment):
+return BaseResult.__new__(
+cls, (scheme, netloc, path, query, fragment))
+def geturl(self):
+return urlunsplit(self)
+class ParseResult(BaseResult):
+__slots__ = ()
+def __new__(cls, scheme, netloc, path, params, query, fragment):
+return BaseResult.__new__(
+cls, (scheme, netloc, path, params, query, fragment))
+@property
+def params(self):
+return self[3]
+def geturl(self):
+return urlunparse(self)
+def urlparse(url, scheme='', allow_fragments=True):
+"""Parse a URL into 6 components:
+<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
+Note that we don't break the components up in smaller bits
+(e.g. netloc is a single string) and we don't expand % escapes."""
+tuple = urlsplit(url, scheme, allow_fragments)
+scheme, netloc, url, query, fragment = tuple
+if scheme in uses_params and ';' in url:
+url, params = _splitparams(url)
+else:
+params = ''
+return ParseResult(scheme, netloc, url, params, query, fragment)
+def _splitparams(url):
+if '/'  in url:
+i = url.find(';', url.rfind('/'))
+if i < 0:
+return url, ''
+else:
+i = url.find(';')
+return url[:i], url[i+1:]
+def _splitnetloc(url, start=0):
+delim = len(url)   # position of end of domain part of url, default is end
+for c in '/?#':    # look for delimiters; the order is NOT important
+wdelim = url.find(c, start)        # find first of this delim
+if wdelim >= 0:                    # if found
+delim = min(delim, wdelim)     # use earliest delim position
+return url[start:delim], url[delim:]   # return (domain, rest)
+def urlsplit(url, scheme='', allow_fragments=True):
+"""Parse a URL into 5 components:
+<scheme>://<netloc>/<path>?<query>#<fragment>
+Return a 5-tuple: (scheme, netloc, path, query, fragment).
+Note that we don't break the components up in smaller bits
+(e.g. netloc is a single string) and we don't expand % escapes."""
+allow_fragments = bool(allow_fragments)
+key = url, scheme, allow_fragments, type(url), type(scheme)
+cached = _parse_cache.get(key, None)
+if cached:
+return cached
+if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
+clear_cache()
+netloc = query = fragment = ''
+i = url.find(':')
+if i > 0:
+if url[:i] == 'http': # optimize the common case
+scheme = url[:i].lower()
+url = url[i+1:]
+if url[:2] == '//':
+netloc, url = _splitnetloc(url, 2)
+if allow_fragments and '#' in url:
+url, fragment = url.split('#', 1)
+if '?' in url:
+url, query = url.split('?', 1)
+v = SplitResult(scheme, netloc, url, query, fragment)
+_parse_cache[key] = v
+return v
+for c in url[:i]:
+if c not in scheme_chars:
+break
+else:
+scheme, url = url[:i].lower(), url[i+1:]
+if scheme in uses_netloc and url[:2] == '//':
+netloc, url = _splitnetloc(url, 2)
+if allow_fragments and scheme in uses_fragment and '#' in url:
+url, fragment = url.split('#', 1)
+if scheme in uses_query and '?' in url:
+url, query = url.split('?', 1)
+v = SplitResult(scheme, netloc, url, query, fragment)
+_parse_cache[key] = v
+return v
+def urlunparse((scheme, netloc, url, params, query, fragment)):
+"""Put a parsed URL back together again.  This may result in a
+slightly different, but equivalent URL, if the URL that was parsed
+originally had redundant delimiters, e.g. a ? with an empty query
+(the draft states that these are equivalent)."""
+if params:
+url = "%s;%s" % (url, params)
+return urlunsplit((scheme, netloc, url, query, fragment))
+def urlunsplit((scheme, netloc, url, query, fragment)):
+if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+if url and url[:1] != '/': url = '/' + url
+url = '//' + (netloc or '') + url
+if scheme:
+url = scheme + ':' + url
+if query:
+url = url + '?' + query
+if fragment:
+url = url + '#' + fragment
+return url
+def urljoin(base, url, allow_fragments=True):
+"""Join a base URL and a possibly relative URL to form an absolute
+interpretation of the latter."""
+if not base:
+return url
+if not url:
+return base
+bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
+urlparse(base, '', allow_fragments)
+scheme, netloc, path, params, query, fragment = \
+urlparse(url, bscheme, allow_fragments)
+if scheme != bscheme or scheme not in uses_relative:
+return url
+if scheme in uses_netloc:
+if netloc:
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+netloc = bnetloc
+if path[:1] == '/':
+return urlunparse((scheme, netloc, path,
+params, query, fragment))
+if not (path or params or query):
+return urlunparse((scheme, netloc, bpath,
+bparams, bquery, fragment))
+segments = bpath.split('/')[:-1] + path.split('/')
+# XXX The stuff below is bogus in various ways...
+if segments[-1] == '.':
+segments[-1] = ''
+while '.' in segments:
+segments.remove('.')
+while 1:
+i = 1
+n = len(segments) - 1
+while i < n:
+if (segments[i] == '..'
+and segments[i-1] not in ('', '..')):
+del segments[i-1:i+1]
+break
+i = i+1
+else:
+break
+if segments == ['', '..']:
+segments[-1] = ''
+elif len(segments) >= 2 and segments[-1] == '..':
+segments[-2:] = ['']
+return urlunparse((scheme, netloc, '/'.join(segments),
+params, query, fragment))
+def urldefrag(url):
+"""Removes any existing fragment from URL.
+Returns a tuple of the defragmented URL and the fragment.  If
+the URL contained no fragments, the second element is the
+empty string.
+"""
+if '#' in url:
+s, n, p, a, q, frag = urlparse(url)
+defrag = urlunparse((s, n, p, a, q, ''))
+return defrag, frag
+else:
+return url, ''
+test_input = """
+http://a/b/c/d
+g:h        = <URL:g:h>
+http:g     = <URL:http://a/b/c/g>
+http:      = <URL:http://a/b/c/d>
+g          = <URL:http://a/b/c/g>
+./g        = <URL:http://a/b/c/g>
+g/         = <URL:http://a/b/c/g/>
+/g         = <URL:http://a/g>
+//g        = <URL:http://g>
+?y         = <URL:http://a/b/c/d?y>
+g?y        = <URL:http://a/b/c/g?y>
+g?y/./x    = <URL:http://a/b/c/g?y/./x>
+.          = <URL:http://a/b/c/>
+./         = <URL:http://a/b/c/>
+..         = <URL:http://a/b/>
+../        = <URL:http://a/b/>
+../g       = <URL:http://a/b/g>
+../..      = <URL:http://a/>
+../../g    = <URL:http://a/g>
+../../../g = <URL:http://a/../g>
+./../g     = <URL:http://a/b/g>
+./g/.      = <URL:http://a/b/c/g/>
+/./g       = <URL:http://a/./g>
+g/./h      = <URL:http://a/b/c/g/h>
+g/../h     = <URL:http://a/b/c/h>
+http:g     = <URL:http://a/b/c/g>
+http:      = <URL:http://a/b/c/d>
+http:?y         = <URL:http://a/b/c/d?y>
+http:g?y        = <URL:http://a/b/c/g?y>
+http:g?y/./x    = <URL:http://a/b/c/g?y/./x>
+"""
+def test():
+import sys
+base = ''
+if sys.argv[1:]:
+fn = sys.argv[1]
+if fn == '-':
+fp = sys.stdin
+else:
+fp = open(fn)
+else:
+try:
+from cStringIO import StringIO
+except ImportError:
+from StringIO import StringIO
+fp = StringIO(test_input)
+while 1:
+line = fp.readline()
+if not line: break
+words = line.split()
+if not words:
+continue
+url = words[0]
+parts = urlparse(url)
+print '%-10s : %s' % (url, parts)
+abs = urljoin(base, url)
+if not base:
+base = abs
+wrapped = '<URL:%s>' % abs
+print '%-10s = %s' % (url, wrapped)
+if len(words) == 3 and words[1] == '=':
+if wrapped != words[2]:
+print 'EXPECTED', words[2], '!!!!!!!!!!'
+if __name__ == '__main__':
+test()