symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/cookielib.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """HTTP cookie handling for web clients.
       
     2 
       
     3 This module has (now fairly distant) origins in Gisle Aas' Perl module
       
     4 HTTP::Cookies, from the libwww-perl library.
       
     5 
       
     6 Docstrings, comments and debug strings in this code refer to the
       
     7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
       
     8 them clearly from Python attributes.
       
     9 
       
    10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
       
    11 distributed with the Python standard library, but are available from
       
    12 http://wwwsearch.sf.net/):
       
    13 
       
    14                         CookieJar____
       
    15                         /     \      \
       
    16             FileCookieJar      \      \
       
    17              /    |   \         \      \
       
    18  MozillaCookieJar | LWPCookieJar \      \
       
    19                   |               |      \
       
    20                   |   ---MSIEBase |       \
       
    21                   |  /      |     |        \
       
    22                   | /   MSIEDBCookieJar BSDDBCookieJar
       
    23                   |/
       
    24                MSIECookieJar
       
    25 
       
    26 """
       
    27 
       
    28 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
       
    29            'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
       
    30            'MozillaCookieJar']
       
    31 
       
    32 import re, urlparse, copy, time, urllib
       
    33 try:
       
    34     import threading as _threading
       
    35 except ImportError:
       
    36     import dummy_threading as _threading
       
    37 import httplib  # only for the default HTTP port
       
    38 from calendar import timegm
       
    39 
       
    40 debug = False   # set to True to enable debugging via the logging module
       
    41 logger = None
       
    42 
       
    43 def _debug(*args):
       
    44     if not debug:
       
    45         return
       
    46     global logger
       
    47     if not logger:
       
    48         import logging
       
    49         logger = logging.getLogger("cookielib")
       
    50     return logger.debug(*args)
       
    51 
       
    52 
       
    53 DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
       
    54 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
       
    55                          "instance initialised with one)")
       
    56 
       
    57 def _warn_unhandled_exception():
       
    58     # There are a few catch-all except: statements in this module, for
       
    59     # catching input that's bad in unexpected ways.  Warn if any
       
    60     # exceptions are caught there.
       
    61     import warnings, traceback, StringIO
       
    62     f = StringIO.StringIO()
       
    63     traceback.print_exc(None, f)
       
    64     msg = f.getvalue()
       
    65     warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
       
    66 
       
    67 
       
    68 # Date/time conversion
       
    69 # -----------------------------------------------------------------------------
       
    70 
       
    71 EPOCH_YEAR = 1970
       
    72 def _timegm(tt):
       
    73     year, month, mday, hour, min, sec = tt[:6]
       
    74     if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
       
    75         (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
       
    76         return timegm(tt)
       
    77     else:
       
    78         return None
       
    79 
       
    80 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
       
    81 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
       
    82           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
       
    83 MONTHS_LOWER = []
       
    84 for month in MONTHS: MONTHS_LOWER.append(month.lower())
       
    85 
       
    86 def time2isoz(t=None):
       
    87     """Return a string representing time in seconds since epoch, t.
       
    88 
       
    89     If the function is called without an argument, it will use the current
       
    90     time.
       
    91 
       
    92     The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
       
    93     representing Universal Time (UTC, aka GMT).  An example of this format is:
       
    94 
       
    95     1994-11-24 08:49:37Z
       
    96 
       
    97     """
       
    98     if t is None: t = time.time()
       
    99     year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
       
   100     return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
       
   101         year, mon, mday, hour, min, sec)
       
   102 
       
   103 def time2netscape(t=None):
       
   104     """Return a string representing time in seconds since epoch, t.
       
   105 
       
   106     If the function is called without an argument, it will use the current
       
   107     time.
       
   108 
       
   109     The format of the returned string is like this:
       
   110 
       
   111     Wed, DD-Mon-YYYY HH:MM:SS GMT
       
   112 
       
   113     """
       
   114     if t is None: t = time.time()
       
   115     year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
       
   116     return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
       
   117         DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
       
   118 
       
   119 
       
   120 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
       
   121 
       
   122 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
       
   123 def offset_from_tz_string(tz):
       
   124     offset = None
       
   125     if tz in UTC_ZONES:
       
   126         offset = 0
       
   127     else:
       
   128         m = TIMEZONE_RE.search(tz)
       
   129         if m:
       
   130             offset = 3600 * int(m.group(2))
       
   131             if m.group(3):
       
   132                 offset = offset + 60 * int(m.group(3))
       
   133             if m.group(1) == '-':
       
   134                 offset = -offset
       
   135     return offset
       
   136 
       
   137 def _str2time(day, mon, yr, hr, min, sec, tz):
       
   138     # translate month name to number
       
   139     # month numbers start with 1 (January)
       
   140     try:
       
   141         mon = MONTHS_LOWER.index(mon.lower())+1
       
   142     except ValueError:
       
   143         # maybe it's already a number
       
   144         try:
       
   145             imon = int(mon)
       
   146         except ValueError:
       
   147             return None
       
   148         if 1 <= imon <= 12:
       
   149             mon = imon
       
   150         else:
       
   151             return None
       
   152 
       
   153     # make sure clock elements are defined
       
   154     if hr is None: hr = 0
       
   155     if min is None: min = 0
       
   156     if sec is None: sec = 0
       
   157 
       
   158     yr = int(yr)
       
   159     day = int(day)
       
   160     hr = int(hr)
       
   161     min = int(min)
       
   162     sec = int(sec)
       
   163 
       
   164     if yr < 1000:
       
   165         # find "obvious" year
       
   166         cur_yr = time.localtime(time.time())[0]
       
   167         m = cur_yr % 100
       
   168         tmp = yr
       
   169         yr = yr + cur_yr - m
       
   170         m = m - tmp
       
   171         if abs(m) > 50:
       
   172             if m > 0: yr = yr + 100
       
   173             else: yr = yr - 100
       
   174 
       
   175     # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
       
   176     t = _timegm((yr, mon, day, hr, min, sec, tz))
       
   177 
       
   178     if t is not None:
       
   179         # adjust time using timezone string, to get absolute time since epoch
       
   180         if tz is None:
       
   181             tz = "UTC"
       
   182         tz = tz.upper()
       
   183         offset = offset_from_tz_string(tz)
       
   184         if offset is None:
       
   185             return None
       
   186         t = t - offset
       
   187 
       
   188     return t
       
   189 
       
   190 STRICT_DATE_RE = re.compile(
       
   191     r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
       
   192     "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
       
   193 WEEKDAY_RE = re.compile(
       
   194     r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
       
   195 LOOSE_HTTP_DATE_RE = re.compile(
       
   196     r"""^
       
   197     (\d\d?)            # day
       
   198        (?:\s+|[-\/])
       
   199     (\w+)              # month
       
   200         (?:\s+|[-\/])
       
   201     (\d+)              # year
       
   202     (?:
       
   203           (?:\s+|:)    # separator before clock
       
   204        (\d\d?):(\d\d)  # hour:min
       
   205        (?::(\d\d))?    # optional seconds
       
   206     )?                 # optional clock
       
   207        \s*
       
   208     ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
       
   209        \s*
       
   210     (?:\(\w+\))?       # ASCII representation of timezone in parens.
       
   211        \s*$""", re.X)
       
   212 def http2time(text):
       
   213     """Returns time in seconds since epoch of time represented by a string.
       
   214 
       
   215     Return value is an integer.
       
   216 
       
   217     None is returned if the format of str is unrecognized, the time is outside
       
   218     the representable range, or the timezone string is not recognized.  If the
       
   219     string contains no timezone, UTC is assumed.
       
   220 
       
   221     The timezone in the string may be numerical (like "-0800" or "+0100") or a
       
   222     string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
       
   223     timezone strings equivalent to UTC (zero offset) are known to the function.
       
   224 
       
   225     The function loosely parses the following formats:
       
   226 
       
   227     Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
       
   228     Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
       
   229     Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
       
   230     09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
       
   231     08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
       
   232     08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
       
   233 
       
   234     The parser ignores leading and trailing whitespace.  The time may be
       
   235     absent.
       
   236 
       
   237     If the year is given with only 2 digits, the function will select the
       
   238     century that makes the year closest to the current date.
       
   239 
       
   240     """
       
   241     # fast exit for strictly conforming string
       
   242     m = STRICT_DATE_RE.search(text)
       
   243     if m:
       
   244         g = m.groups()
       
   245         mon = MONTHS_LOWER.index(g[1].lower()) + 1
       
   246         tt = (int(g[2]), mon, int(g[0]),
       
   247               int(g[3]), int(g[4]), float(g[5]))
       
   248         return _timegm(tt)
       
   249 
       
   250     # No, we need some messy parsing...
       
   251 
       
   252     # clean up
       
   253     text = text.lstrip()
       
   254     text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
       
   255 
       
   256     # tz is time zone specifier string
       
   257     day, mon, yr, hr, min, sec, tz = [None]*7
       
   258 
       
   259     # loose regexp parse
       
   260     m = LOOSE_HTTP_DATE_RE.search(text)
       
   261     if m is not None:
       
   262         day, mon, yr, hr, min, sec, tz = m.groups()
       
   263     else:
       
   264         return None  # bad format
       
   265 
       
   266     return _str2time(day, mon, yr, hr, min, sec, tz)
       
   267 
       
   268 ISO_DATE_RE = re.compile(
       
   269     """^
       
   270     (\d{4})              # year
       
   271        [-\/]?
       
   272     (\d\d?)              # numerical month
       
   273        [-\/]?
       
   274     (\d\d?)              # day
       
   275    (?:
       
   276          (?:\s+|[-:Tt])  # separator before clock
       
   277       (\d\d?):?(\d\d)    # hour:min
       
   278       (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
       
   279    )?                    # optional clock
       
   280       \s*
       
   281    ([-+]?\d\d?:?(:?\d\d)?
       
   282     |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
       
   283       \s*$""", re.X)
       
   284 def iso2time(text):
       
   285     """
       
   286     As for http2time, but parses the ISO 8601 formats:
       
   287 
       
   288     1994-02-03 14:15:29 -0100    -- ISO 8601 format
       
   289     1994-02-03 14:15:29          -- zone is optional
       
   290     1994-02-03                   -- only date
       
   291     1994-02-03T14:15:29          -- Use T as separator
       
   292     19940203T141529Z             -- ISO 8601 compact format
       
   293     19940203                     -- only date
       
   294 
       
   295     """
       
   296     # clean up
       
   297     text = text.lstrip()
       
   298 
       
   299     # tz is time zone specifier string
       
   300     day, mon, yr, hr, min, sec, tz = [None]*7
       
   301 
       
   302     # loose regexp parse
       
   303     m = ISO_DATE_RE.search(text)
       
   304     if m is not None:
       
   305         # XXX there's an extra bit of the timezone I'm ignoring here: is
       
   306         #   this the right thing to do?
       
   307         yr, mon, day, hr, min, sec, tz, _ = m.groups()
       
   308     else:
       
   309         return None  # bad format
       
   310 
       
   311     return _str2time(day, mon, yr, hr, min, sec, tz)
       
   312 
       
   313 
       
   314 # Header parsing
       
   315 # -----------------------------------------------------------------------------
       
   316 
       
   317 def unmatched(match):
       
   318     """Return unmatched part of re.Match object."""
       
   319     start, end = match.span(0)
       
   320     return match.string[:start]+match.string[end:]
       
   321 
       
   322 HEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)")
       
   323 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
       
   324 HEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)")
       
   325 HEADER_ESCAPE_RE = re.compile(r"\\(.)")
       
   326 def split_header_words(header_values):
       
   327     r"""Parse header values into a list of lists containing key,value pairs.
       
   328 
       
   329     The function knows how to deal with ",", ";" and "=" as well as quoted
       
   330     values after "=".  A list of space separated tokens are parsed as if they
       
   331     were separated by ";".
       
   332 
       
   333     If the header_values passed as argument contains multiple values, then they
       
   334     are treated as if they were a single value separated by comma ",".
       
   335 
       
   336     This means that this function is useful for parsing header fields that
       
   337     follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
       
   338     the requirement for tokens).
       
   339 
       
   340       headers           = #header
       
   341       header            = (token | parameter) *( [";"] (token | parameter))
       
   342 
       
   343       token             = 1*<any CHAR except CTLs or separators>
       
   344       separators        = "(" | ")" | "<" | ">" | "@"
       
   345                         | "," | ";" | ":" | "\" | <">
       
   346                         | "/" | "[" | "]" | "?" | "="
       
   347                         | "{" | "}" | SP | HT
       
   348 
       
   349       quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
       
   350       qdtext            = <any TEXT except <">>
       
   351       quoted-pair       = "\" CHAR
       
   352 
       
   353       parameter         = attribute "=" value
       
   354       attribute         = token
       
   355       value             = token | quoted-string
       
   356 
       
   357     Each header is represented by a list of key/value pairs.  The value for a
       
   358     simple token (not part of a parameter) is None.  Syntactically incorrect
       
   359     headers will not necessarily be parsed as you would want.
       
   360 
       
   361     This is easier to describe with some examples:
       
   362 
       
   363     >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
       
   364     [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
       
   365     >>> split_header_words(['text/html; charset="iso-8859-1"'])
       
   366     [[('text/html', None), ('charset', 'iso-8859-1')]]
       
   367     >>> split_header_words([r'Basic realm="\"foo\bar\""'])
       
   368     [[('Basic', None), ('realm', '"foobar"')]]
       
   369 
       
   370     """
       
   371     assert not isinstance(header_values, basestring)
       
   372     result = []
       
   373     for text in header_values:
       
   374         orig_text = text
       
   375         pairs = []
       
   376         while text:
       
   377             m = HEADER_TOKEN_RE.search(text)
       
   378             if m:
       
   379                 text = unmatched(m)
       
   380                 name = m.group(1)
       
   381                 m = HEADER_QUOTED_VALUE_RE.search(text)
       
   382                 if m:  # quoted value
       
   383                     text = unmatched(m)
       
   384                     value = m.group(1)
       
   385                     value = HEADER_ESCAPE_RE.sub(r"\1", value)
       
   386                 else:
       
   387                     m = HEADER_VALUE_RE.search(text)
       
   388                     if m:  # unquoted value
       
   389                         text = unmatched(m)
       
   390                         value = m.group(1)
       
   391                         value = value.rstrip()
       
   392                     else:
       
   393                         # no value, a lone token
       
   394                         value = None
       
   395                 pairs.append((name, value))
       
   396             elif text.lstrip().startswith(","):
       
   397                 # concatenated headers, as per RFC 2616 section 4.2
       
   398                 text = text.lstrip()[1:]
       
   399                 if pairs: result.append(pairs)
       
   400                 pairs = []
       
   401             else:
       
   402                 # skip junk
       
   403                 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
       
   404                 assert nr_junk_chars > 0, (
       
   405                     "split_header_words bug: '%s', '%s', %s" %
       
   406                     (orig_text, text, pairs))
       
   407                 text = non_junk
       
   408         if pairs: result.append(pairs)
       
   409     return result
       
   410 
       
   411 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
       
   412 def join_header_words(lists):
       
   413     """Do the inverse (almost) of the conversion done by split_header_words.
       
   414 
       
   415     Takes a list of lists of (key, value) pairs and produces a single header
       
   416     value.  Attribute values are quoted if needed.
       
   417 
       
   418     >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
       
   419     'text/plain; charset="iso-8859/1"'
       
   420     >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
       
   421     'text/plain, charset="iso-8859/1"'
       
   422 
       
   423     """
       
   424     headers = []
       
   425     for pairs in lists:
       
   426         attr = []
       
   427         for k, v in pairs:
       
   428             if v is not None:
       
   429                 if not re.search(r"^\w+$", v):
       
   430                     v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
       
   431                     v = '"%s"' % v
       
   432                 k = "%s=%s" % (k, v)
       
   433             attr.append(k)
       
   434         if attr: headers.append("; ".join(attr))
       
   435     return ", ".join(headers)
       
   436 
       
   437 def parse_ns_headers(ns_headers):
       
   438     """Ad-hoc parser for Netscape protocol cookie-attributes.
       
   439 
       
   440     The old Netscape cookie format for Set-Cookie can for instance contain
       
   441     an unquoted "," in the expires field, so we have to use this ad-hoc
       
   442     parser instead of split_header_words.
       
   443 
       
   444     XXX This may not make the best possible effort to parse all the crap
       
   445     that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
       
   446     parser is probably better, so could do worse than following that if
       
   447     this ever gives any trouble.
       
   448 
       
   449     Currently, this is also used for parsing RFC 2109 cookies.
       
   450 
       
   451     """
       
   452     known_attrs = ("expires", "domain", "path", "secure",
       
   453                    # RFC 2109 attrs (may turn up in Netscape cookies, too)
       
   454                    "port", "max-age")
       
   455 
       
   456     result = []
       
   457     for ns_header in ns_headers:
       
   458         pairs = []
       
   459         version_set = False
       
   460         for ii, param in enumerate(re.split(r";\s*", ns_header)):
       
   461             param = param.rstrip()
       
   462             if param == "": continue
       
   463             if "=" not in param:
       
   464                 k, v = param, None
       
   465             else:
       
   466                 k, v = re.split(r"\s*=\s*", param, 1)
       
   467                 k = k.lstrip()
       
   468             if ii != 0:
       
   469                 lc = k.lower()
       
   470                 if lc in known_attrs:
       
   471                     k = lc
       
   472                 if k == "version":
       
   473                     # This is an RFC 2109 cookie.
       
   474                     version_set = True
       
   475                 if k == "expires":
       
   476                     # convert expires date to seconds since epoch
       
   477                     if v.startswith('"'): v = v[1:]
       
   478                     if v.endswith('"'): v = v[:-1]
       
   479                     v = http2time(v)  # None if invalid
       
   480             pairs.append((k, v))
       
   481 
       
   482         if pairs:
       
   483             if not version_set:
       
   484                 pairs.append(("version", "0"))
       
   485             result.append(pairs)
       
   486 
       
   487     return result
       
   488 
       
   489 
       
   490 IPV4_RE = re.compile(r"\.\d+$")
       
   491 def is_HDN(text):
       
   492     """Return True if text is a host domain name."""
       
   493     # XXX
       
   494     # This may well be wrong.  Which RFC is HDN defined in, if any (for
       
   495     #  the purposes of RFC 2965)?
       
   496     # For the current implementation, what about IPv6?  Remember to look
       
   497     #  at other uses of IPV4_RE also, if change this.
       
   498     if IPV4_RE.search(text):
       
   499         return False
       
   500     if text == "":
       
   501         return False
       
   502     if text[0] == "." or text[-1] == ".":
       
   503         return False
       
   504     return True
       
   505 
       
   506 def domain_match(A, B):
       
   507     """Return True if domain A domain-matches domain B, according to RFC 2965.
       
   508 
       
   509     A and B may be host domain names or IP addresses.
       
   510 
       
   511     RFC 2965, section 1:
       
   512 
       
   513     Host names can be specified either as an IP address or a HDN string.
       
   514     Sometimes we compare one host name with another.  (Such comparisons SHALL
       
   515     be case-insensitive.)  Host A's name domain-matches host B's if
       
   516 
       
   517          *  their host name strings string-compare equal; or
       
   518 
       
   519          * A is a HDN string and has the form NB, where N is a non-empty
       
   520             name string, B has the form .B', and B' is a HDN string.  (So,
       
   521             x.y.com domain-matches .Y.com but not Y.com.)
       
   522 
       
   523     Note that domain-match is not a commutative operation: a.b.c.com
       
   524     domain-matches .c.com, but not the reverse.
       
   525 
       
   526     """
       
   527     # Note that, if A or B are IP addresses, the only relevant part of the
       
   528     # definition of the domain-match algorithm is the direct string-compare.
       
   529     A = A.lower()
       
   530     B = B.lower()
       
   531     if A == B:
       
   532         return True
       
   533     if not is_HDN(A):
       
   534         return False
       
   535     i = A.rfind(B)
       
   536     if i == -1 or i == 0:
       
   537         # A does not have form NB, or N is the empty string
       
   538         return False
       
   539     if not B.startswith("."):
       
   540         return False
       
   541     if not is_HDN(B[1:]):
       
   542         return False
       
   543     return True
       
   544 
       
   545 def liberal_is_HDN(text):
       
   546     """Return True if text is a sort-of-like a host domain name.
       
   547 
       
   548     For accepting/blocking domains.
       
   549 
       
   550     """
       
   551     if IPV4_RE.search(text):
       
   552         return False
       
   553     return True
       
   554 
       
   555 def user_domain_match(A, B):
       
   556     """For blocking/accepting domains.
       
   557 
       
   558     A and B may be host domain names or IP addresses.
       
   559 
       
   560     """
       
   561     A = A.lower()
       
   562     B = B.lower()
       
   563     if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
       
   564         if A == B:
       
   565             # equal IP addresses
       
   566             return True
       
   567         return False
       
   568     initial_dot = B.startswith(".")
       
   569     if initial_dot and A.endswith(B):
       
   570         return True
       
   571     if not initial_dot and A == B:
       
   572         return True
       
   573     return False
       
   574 
       
   575 cut_port_re = re.compile(r":\d+$")
       
   576 def request_host(request):
       
   577     """Return request-host, as defined by RFC 2965.
       
   578 
       
   579     Variation from RFC: returned value is lowercased, for convenient
       
   580     comparison.
       
   581 
       
   582     """
       
   583     url = request.get_full_url()
       
   584     host = urlparse.urlparse(url)[1]
       
   585     if host == "":
       
   586         host = request.get_header("Host", "")
       
   587 
       
   588     # remove port, if present
       
   589     host = cut_port_re.sub("", host, 1)
       
   590     return host.lower()
       
   591 
       
   592 def eff_request_host(request):
       
   593     """Return a tuple (request-host, effective request-host name).
       
   594 
       
   595     As defined by RFC 2965, except both are lowercased.
       
   596 
       
   597     """
       
   598     erhn = req_host = request_host(request)
       
   599     if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
       
   600         erhn = req_host + ".local"
       
   601     return req_host, erhn
       
   602 
       
   603 def request_path(request):
       
   604     """request-URI, as defined by RFC 2965."""
       
   605     url = request.get_full_url()
       
   606     #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
       
   607     #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
       
   608     path, parameters, query, frag = urlparse.urlparse(url)[2:]
       
   609     if parameters:
       
   610         path = "%s;%s" % (path, parameters)
       
   611     path = escape_path(path)
       
   612     req_path = urlparse.urlunparse(("", "", path, "", query, frag))
       
   613     if not req_path.startswith("/"):
       
   614         # fix bad RFC 2396 absoluteURI
       
   615         req_path = "/"+req_path
       
   616     return req_path
       
   617 
       
   618 def request_port(request):
       
   619     host = request.get_host()
       
   620     i = host.find(':')
       
   621     if i >= 0:
       
   622         port = host[i+1:]
       
   623         try:
       
   624             int(port)
       
   625         except ValueError:
       
   626             _debug("nonnumeric port: '%s'", port)
       
   627             return None
       
   628     else:
       
   629         port = DEFAULT_HTTP_PORT
       
   630     return port
       
   631 
       
   632 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
       
   633 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
       
   634 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
       
   635 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
       
   636 def uppercase_escaped_char(match):
       
   637     return "%%%s" % match.group(1).upper()
       
   638 def escape_path(path):
       
   639     """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
       
   640     # There's no knowing what character encoding was used to create URLs
       
   641     # containing %-escapes, but since we have to pick one to escape invalid
       
   642     # path characters, we pick UTF-8, as recommended in the HTML 4.0
       
   643     # specification:
       
   644     # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
       
   645     # And here, kind of: draft-fielding-uri-rfc2396bis-03
       
   646     # (And in draft IRI specification: draft-duerst-iri-05)
       
   647     # (And here, for new URI schemes: RFC 2718)
       
   648     if isinstance(path, unicode):
       
   649         path = path.encode("utf-8")
       
   650     path = urllib.quote(path, HTTP_PATH_SAFE)
       
   651     path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
       
   652     return path
       
   653 
       
   654 def reach(h):
       
   655     """Return reach of host h, as defined by RFC 2965, section 1.
       
   656 
       
   657     The reach R of a host name H is defined as follows:
       
   658 
       
   659        *  If
       
   660 
       
   661           -  H is the host domain name of a host; and,
       
   662 
       
   663           -  H has the form A.B; and
       
   664 
       
   665           -  A has no embedded (that is, interior) dots; and
       
   666 
       
   667           -  B has at least one embedded dot, or B is the string "local".
       
   668              then the reach of H is .B.
       
   669 
       
   670        *  Otherwise, the reach of H is H.
       
   671 
       
   672     >>> reach("www.acme.com")
       
   673     '.acme.com'
       
   674     >>> reach("acme.com")
       
   675     'acme.com'
       
   676     >>> reach("acme.local")
       
   677     '.local'
       
   678 
       
   679     """
       
   680     i = h.find(".")
       
   681     if i >= 0:
       
   682         #a = h[:i]  # this line is only here to show what a is
       
   683         b = h[i+1:]
       
   684         i = b.find(".")
       
   685         if is_HDN(h) and (i >= 0 or b == "local"):
       
   686             return "."+b
       
   687     return h
       
   688 
       
   689 def is_third_party(request):
       
   690     """
       
   691 
       
   692     RFC 2965, section 3.3.6:
       
   693 
       
   694         An unverifiable transaction is to a third-party host if its request-
       
   695         host U does not domain-match the reach R of the request-host O in the
       
   696         origin transaction.
       
   697 
       
   698     """
       
   699     req_host = request_host(request)
       
   700     if not domain_match(req_host, reach(request.get_origin_req_host())):
       
   701         return True
       
   702     else:
       
   703         return False
       
   704 
       
   705 
       
   706 class Cookie:
       
   707     """HTTP Cookie.
       
   708 
       
   709     This class represents both Netscape and RFC 2965 cookies.
       
   710 
       
   711     This is deliberately a very simple class.  It just holds attributes.  It's
       
   712     possible to construct Cookie instances that don't comply with the cookie
       
   713     standards.  CookieJar.make_cookies is the factory function for Cookie
       
   714     objects -- it deals with cookie parsing, supplying defaults, and
       
   715     normalising to the representation used in this class.  CookiePolicy is
       
   716     responsible for checking them to see whether they should be accepted from
       
   717     and returned to the server.
       
   718 
       
   719     Note that the port may be present in the headers, but unspecified ("Port"
       
   720     rather than"Port=80", for example); if this is the case, port is None.
       
   721 
       
   722     """
       
   723 
       
   724     def __init__(self, version, name, value,
       
   725                  port, port_specified,
       
   726                  domain, domain_specified, domain_initial_dot,
       
   727                  path, path_specified,
       
   728                  secure,
       
   729                  expires,
       
   730                  discard,
       
   731                  comment,
       
   732                  comment_url,
       
   733                  rest,
       
   734                  rfc2109=False,
       
   735                  ):
       
   736 
       
   737         if version is not None: version = int(version)
       
   738         if expires is not None: expires = int(expires)
       
   739         if port is None and port_specified is True:
       
   740             raise ValueError("if port is None, port_specified must be false")
       
   741 
       
   742         self.version = version
       
   743         self.name = name
       
   744         self.value = value
       
   745         self.port = port
       
   746         self.port_specified = port_specified
       
   747         # normalise case, as per RFC 2965 section 3.3.3
       
   748         self.domain = domain.lower()
       
   749         self.domain_specified = domain_specified
       
   750         # Sigh.  We need to know whether the domain given in the
       
   751         # cookie-attribute had an initial dot, in order to follow RFC 2965
       
   752         # (as clarified in draft errata).  Needed for the returned $Domain
       
   753         # value.
       
   754         self.domain_initial_dot = domain_initial_dot
       
   755         self.path = path
       
   756         self.path_specified = path_specified
       
   757         self.secure = secure
       
   758         self.expires = expires
       
   759         self.discard = discard
       
   760         self.comment = comment
       
   761         self.comment_url = comment_url
       
   762         self.rfc2109 = rfc2109
       
   763 
       
   764         self._rest = copy.copy(rest)
       
   765 
       
   766     def has_nonstandard_attr(self, name):
       
   767         return name in self._rest
       
   768     def get_nonstandard_attr(self, name, default=None):
       
   769         return self._rest.get(name, default)
       
   770     def set_nonstandard_attr(self, name, value):
       
   771         self._rest[name] = value
       
   772 
       
   773     def is_expired(self, now=None):
       
   774         if now is None: now = time.time()
       
   775         if (self.expires is not None) and (self.expires <= now):
       
   776             return True
       
   777         return False
       
   778 
       
   779     def __str__(self):
       
   780         if self.port is None: p = ""
       
   781         else: p = ":"+self.port
       
   782         limit = self.domain + p + self.path
       
   783         if self.value is not None:
       
   784             namevalue = "%s=%s" % (self.name, self.value)
       
   785         else:
       
   786             namevalue = self.name
       
   787         return "<Cookie %s for %s>" % (namevalue, limit)
       
   788 
       
   789     def __repr__(self):
       
   790         args = []
       
   791         for name in ("version", "name", "value",
       
   792                      "port", "port_specified",
       
   793                      "domain", "domain_specified", "domain_initial_dot",
       
   794                      "path", "path_specified",
       
   795                      "secure", "expires", "discard", "comment", "comment_url",
       
   796                      ):
       
   797             attr = getattr(self, name)
       
   798             args.append("%s=%s" % (name, repr(attr)))
       
   799         args.append("rest=%s" % repr(self._rest))
       
   800         args.append("rfc2109=%s" % repr(self.rfc2109))
       
   801         return "Cookie(%s)" % ", ".join(args)
       
   802 
       
   803 
       
   804 class CookiePolicy:
       
   805     """Defines which cookies get accepted from and returned to server.
       
   806 
       
   807     May also modify cookies, though this is probably a bad idea.
       
   808 
       
   809     The subclass DefaultCookiePolicy defines the standard rules for Netscape
       
   810     and RFC 2965 cookies -- override that if you want a customised policy.
       
   811 
       
   812     """
       
   813     def set_ok(self, cookie, request):
       
   814         """Return true if (and only if) cookie should be accepted from server.
       
   815 
       
   816         Currently, pre-expired cookies never get this far -- the CookieJar
       
   817         class deletes such cookies itself.
       
   818 
       
   819         """
       
   820         raise NotImplementedError()
       
   821 
       
   822     def return_ok(self, cookie, request):
       
   823         """Return true if (and only if) cookie should be returned to server."""
       
   824         raise NotImplementedError()
       
   825 
       
   826     def domain_return_ok(self, domain, request):
       
   827         """Return false if cookies should not be returned, given cookie domain.
       
   828         """
       
   829         return True
       
   830 
       
   831     def path_return_ok(self, path, request):
       
   832         """Return false if cookies should not be returned, given cookie path.
       
   833         """
       
   834         return True
       
   835 
       
   836 
       
   837 class DefaultCookiePolicy(CookiePolicy):
       
   838     """Implements the standard rules for accepting and returning cookies."""
       
   839 
       
   840     DomainStrictNoDots = 1
       
   841     DomainStrictNonDomain = 2
       
   842     DomainRFC2965Match = 4
       
   843 
       
   844     DomainLiberal = 0
       
   845     DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
       
   846 
       
   847     def __init__(self,
       
   848                  blocked_domains=None, allowed_domains=None,
       
   849                  netscape=True, rfc2965=False,
       
   850                  rfc2109_as_netscape=None,
       
   851                  hide_cookie2=False,
       
   852                  strict_domain=False,
       
   853                  strict_rfc2965_unverifiable=True,
       
   854                  strict_ns_unverifiable=False,
       
   855                  strict_ns_domain=DomainLiberal,
       
   856                  strict_ns_set_initial_dollar=False,
       
   857                  strict_ns_set_path=False,
       
   858                  ):
       
   859         """Constructor arguments should be passed as keyword arguments only."""
       
   860         self.netscape = netscape
       
   861         self.rfc2965 = rfc2965
       
   862         self.rfc2109_as_netscape = rfc2109_as_netscape
       
   863         self.hide_cookie2 = hide_cookie2
       
   864         self.strict_domain = strict_domain
       
   865         self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
       
   866         self.strict_ns_unverifiable = strict_ns_unverifiable
       
   867         self.strict_ns_domain = strict_ns_domain
       
   868         self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
       
   869         self.strict_ns_set_path = strict_ns_set_path
       
   870 
       
   871         if blocked_domains is not None:
       
   872             self._blocked_domains = tuple(blocked_domains)
       
   873         else:
       
   874             self._blocked_domains = ()
       
   875 
       
   876         if allowed_domains is not None:
       
   877             allowed_domains = tuple(allowed_domains)
       
   878         self._allowed_domains = allowed_domains
       
   879 
       
   880     def blocked_domains(self):
       
   881         """Return the sequence of blocked domains (as a tuple)."""
       
   882         return self._blocked_domains
       
   883     def set_blocked_domains(self, blocked_domains):
       
   884         """Set the sequence of blocked domains."""
       
   885         self._blocked_domains = tuple(blocked_domains)
       
   886 
       
   887     def is_blocked(self, domain):
       
   888         for blocked_domain in self._blocked_domains:
       
   889             if user_domain_match(domain, blocked_domain):
       
   890                 return True
       
   891         return False
       
   892 
       
   893     def allowed_domains(self):
       
   894         """Return None, or the sequence of allowed domains (as a tuple)."""
       
   895         return self._allowed_domains
       
   896     def set_allowed_domains(self, allowed_domains):
       
   897         """Set the sequence of allowed domains, or None."""
       
   898         if allowed_domains is not None:
       
   899             allowed_domains = tuple(allowed_domains)
       
   900         self._allowed_domains = allowed_domains
       
   901 
       
   902     def is_not_allowed(self, domain):
       
   903         if self._allowed_domains is None:
       
   904             return False
       
   905         for allowed_domain in self._allowed_domains:
       
   906             if user_domain_match(domain, allowed_domain):
       
   907                 return False
       
   908         return True
       
   909 
       
   910     def set_ok(self, cookie, request):
       
   911         """
       
   912         If you override .set_ok(), be sure to call this method.  If it returns
       
   913         false, so should your subclass (assuming your subclass wants to be more
       
   914         strict about which cookies to accept).
       
   915 
       
   916         """
       
   917         _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
       
   918 
       
   919         assert cookie.name is not None
       
   920 
       
   921         for n in "version", "verifiability", "name", "path", "domain", "port":
       
   922             fn_name = "set_ok_"+n
       
   923             fn = getattr(self, fn_name)
       
   924             if not fn(cookie, request):
       
   925                 return False
       
   926 
       
   927         return True
       
   928 
       
   929     def set_ok_version(self, cookie, request):
       
   930         if cookie.version is None:
       
   931             # Version is always set to 0 by parse_ns_headers if it's a Netscape
       
   932             # cookie, so this must be an invalid RFC 2965 cookie.
       
   933             _debug("   Set-Cookie2 without version attribute (%s=%s)",
       
   934                    cookie.name, cookie.value)
       
   935             return False
       
   936         if cookie.version > 0 and not self.rfc2965:
       
   937             _debug("   RFC 2965 cookies are switched off")
       
   938             return False
       
   939         elif cookie.version == 0 and not self.netscape:
       
   940             _debug("   Netscape cookies are switched off")
       
   941             return False
       
   942         return True
       
   943 
       
   944     def set_ok_verifiability(self, cookie, request):
       
   945         if request.is_unverifiable() and is_third_party(request):
       
   946             if cookie.version > 0 and self.strict_rfc2965_unverifiable:
       
   947                 _debug("   third-party RFC 2965 cookie during "
       
   948                              "unverifiable transaction")
       
   949                 return False
       
   950             elif cookie.version == 0 and self.strict_ns_unverifiable:
       
   951                 _debug("   third-party Netscape cookie during "
       
   952                              "unverifiable transaction")
       
   953                 return False
       
   954         return True
       
   955 
       
   956     def set_ok_name(self, cookie, request):
       
   957         # Try and stop servers setting V0 cookies designed to hack other
       
   958         # servers that know both V0 and V1 protocols.
       
   959         if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
       
   960             cookie.name.startswith("$")):
       
   961             _debug("   illegal name (starts with '$'): '%s'", cookie.name)
       
   962             return False
       
   963         return True
       
   964 
       
   965     def set_ok_path(self, cookie, request):
       
   966         if cookie.path_specified:
       
   967             req_path = request_path(request)
       
   968             if ((cookie.version > 0 or
       
   969                  (cookie.version == 0 and self.strict_ns_set_path)) and
       
   970                 not req_path.startswith(cookie.path)):
       
   971                 _debug("   path attribute %s is not a prefix of request "
       
   972                        "path %s", cookie.path, req_path)
       
   973                 return False
       
   974         return True
       
   975 
       
   976     def set_ok_domain(self, cookie, request):
       
   977         if self.is_blocked(cookie.domain):
       
   978             _debug("   domain %s is in user block-list", cookie.domain)
       
   979             return False
       
   980         if self.is_not_allowed(cookie.domain):
       
   981             _debug("   domain %s is not in user allow-list", cookie.domain)
       
   982             return False
       
   983         if cookie.domain_specified:
       
   984             req_host, erhn = eff_request_host(request)
       
   985             domain = cookie.domain
       
   986             if self.strict_domain and (domain.count(".") >= 2):
       
   987                 # XXX This should probably be compared with the Konqueror
       
   988                 # (kcookiejar.cpp) and Mozilla implementations, but it's a
       
   989                 # losing battle.
       
   990                 i = domain.rfind(".")
       
   991                 j = domain.rfind(".", 0, i)
       
   992                 if j == 0:  # domain like .foo.bar
       
   993                     tld = domain[i+1:]
       
   994                     sld = domain[j+1:i]
       
   995                     if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
       
   996                        "gov", "mil", "int", "aero", "biz", "cat", "coop",
       
   997                        "info", "jobs", "mobi", "museum", "name", "pro",
       
   998                        "travel", "eu") and len(tld) == 2:
       
   999                         # domain like .co.uk
       
  1000                         _debug("   country-code second level domain %s", domain)
       
  1001                         return False
       
  1002             if domain.startswith("."):
       
  1003                 undotted_domain = domain[1:]
       
  1004             else:
       
  1005                 undotted_domain = domain
       
  1006             embedded_dots = (undotted_domain.find(".") >= 0)
       
  1007             if not embedded_dots and domain != ".local":
       
  1008                 _debug("   non-local domain %s contains no embedded dot",
       
  1009                        domain)
       
  1010                 return False
       
  1011             if cookie.version == 0:
       
  1012                 if (not erhn.endswith(domain) and
       
  1013                     (not erhn.startswith(".") and
       
  1014                      not ("."+erhn).endswith(domain))):
       
  1015                     _debug("   effective request-host %s (even with added "
       
  1016                            "initial dot) does not end end with %s",
       
  1017                            erhn, domain)
       
  1018                     return False
       
  1019             if (cookie.version > 0 or
       
  1020                 (self.strict_ns_domain & self.DomainRFC2965Match)):
       
  1021                 if not domain_match(erhn, domain):
       
  1022                     _debug("   effective request-host %s does not domain-match "
       
  1023                            "%s", erhn, domain)
       
  1024                     return False
       
  1025             if (cookie.version > 0 or
       
  1026                 (self.strict_ns_domain & self.DomainStrictNoDots)):
       
  1027                 host_prefix = req_host[:-len(domain)]
       
  1028                 if (host_prefix.find(".") >= 0 and
       
  1029                     not IPV4_RE.search(req_host)):
       
  1030                     _debug("   host prefix %s for domain %s contains a dot",
       
  1031                            host_prefix, domain)
       
  1032                     return False
       
  1033         return True
       
  1034 
       
  1035     def set_ok_port(self, cookie, request):
       
  1036         if cookie.port_specified:
       
  1037             req_port = request_port(request)
       
  1038             if req_port is None:
       
  1039                 req_port = "80"
       
  1040             else:
       
  1041                 req_port = str(req_port)
       
  1042             for p in cookie.port.split(","):
       
  1043                 try:
       
  1044                     int(p)
       
  1045                 except ValueError:
       
  1046                     _debug("   bad port %s (not numeric)", p)
       
  1047                     return False
       
  1048                 if p == req_port:
       
  1049                     break
       
  1050             else:
       
  1051                 _debug("   request port (%s) not found in %s",
       
  1052                        req_port, cookie.port)
       
  1053                 return False
       
  1054         return True
       
  1055 
       
  1056     def return_ok(self, cookie, request):
       
  1057         """
       
  1058         If you override .return_ok(), be sure to call this method.  If it
       
  1059         returns false, so should your subclass (assuming your subclass wants to
       
  1060         be more strict about which cookies to return).
       
  1061 
       
  1062         """
       
  1063         # Path has already been checked by .path_return_ok(), and domain
       
  1064         # blocking done by .domain_return_ok().
       
  1065         _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
       
  1066 
       
  1067         for n in "version", "verifiability", "secure", "expires", "port", "domain":
       
  1068             fn_name = "return_ok_"+n
       
  1069             fn = getattr(self, fn_name)
       
  1070             if not fn(cookie, request):
       
  1071                 return False
       
  1072         return True
       
  1073 
       
  1074     def return_ok_version(self, cookie, request):
       
  1075         if cookie.version > 0 and not self.rfc2965:
       
  1076             _debug("   RFC 2965 cookies are switched off")
       
  1077             return False
       
  1078         elif cookie.version == 0 and not self.netscape:
       
  1079             _debug("   Netscape cookies are switched off")
       
  1080             return False
       
  1081         return True
       
  1082 
       
  1083     def return_ok_verifiability(self, cookie, request):
       
  1084         if request.is_unverifiable() and is_third_party(request):
       
  1085             if cookie.version > 0 and self.strict_rfc2965_unverifiable:
       
  1086                 _debug("   third-party RFC 2965 cookie during unverifiable "
       
  1087                        "transaction")
       
  1088                 return False
       
  1089             elif cookie.version == 0 and self.strict_ns_unverifiable:
       
  1090                 _debug("   third-party Netscape cookie during unverifiable "
       
  1091                        "transaction")
       
  1092                 return False
       
  1093         return True
       
  1094 
       
  1095     def return_ok_secure(self, cookie, request):
       
  1096         if cookie.secure and request.get_type() != "https":
       
  1097             _debug("   secure cookie with non-secure request")
       
  1098             return False
       
  1099         return True
       
  1100 
       
  1101     def return_ok_expires(self, cookie, request):
       
  1102         if cookie.is_expired(self._now):
       
  1103             _debug("   cookie expired")
       
  1104             return False
       
  1105         return True
       
  1106 
       
  1107     def return_ok_port(self, cookie, request):
       
  1108         if cookie.port:
       
  1109             req_port = request_port(request)
       
  1110             if req_port is None:
       
  1111                 req_port = "80"
       
  1112             for p in cookie.port.split(","):
       
  1113                 if p == req_port:
       
  1114                     break
       
  1115             else:
       
  1116                 _debug("   request port %s does not match cookie port %s",
       
  1117                        req_port, cookie.port)
       
  1118                 return False
       
  1119         return True
       
  1120 
       
  1121     def return_ok_domain(self, cookie, request):
       
  1122         req_host, erhn = eff_request_host(request)
       
  1123         domain = cookie.domain
       
  1124 
       
  1125         # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
       
  1126         if (cookie.version == 0 and
       
  1127             (self.strict_ns_domain & self.DomainStrictNonDomain) and
       
  1128             not cookie.domain_specified and domain != erhn):
       
  1129             _debug("   cookie with unspecified domain does not string-compare "
       
  1130                    "equal to request domain")
       
  1131             return False
       
  1132 
       
  1133         if cookie.version > 0 and not domain_match(erhn, domain):
       
  1134             _debug("   effective request-host name %s does not domain-match "
       
  1135                    "RFC 2965 cookie domain %s", erhn, domain)
       
  1136             return False
       
  1137         if cookie.version == 0 and not ("."+erhn).endswith(domain):
       
  1138             _debug("   request-host %s does not match Netscape cookie domain "
       
  1139                    "%s", req_host, domain)
       
  1140             return False
       
  1141         return True
       
  1142 
       
  1143     def domain_return_ok(self, domain, request):
       
  1144         # Liberal check of.  This is here as an optimization to avoid
       
  1145         # having to load lots of MSIE cookie files unless necessary.
       
  1146         req_host, erhn = eff_request_host(request)
       
  1147         if not req_host.startswith("."):
       
  1148             req_host = "."+req_host
       
  1149         if not erhn.startswith("."):
       
  1150             erhn = "."+erhn
       
  1151         if not (req_host.endswith(domain) or erhn.endswith(domain)):
       
  1152             #_debug("   request domain %s does not match cookie domain %s",
       
  1153             #       req_host, domain)
       
  1154             return False
       
  1155 
       
  1156         if self.is_blocked(domain):
       
  1157             _debug("   domain %s is in user block-list", domain)
       
  1158             return False
       
  1159         if self.is_not_allowed(domain):
       
  1160             _debug("   domain %s is not in user allow-list", domain)
       
  1161             return False
       
  1162 
       
  1163         return True
       
  1164 
       
  1165     def path_return_ok(self, path, request):
       
  1166         _debug("- checking cookie path=%s", path)
       
  1167         req_path = request_path(request)
       
  1168         if not req_path.startswith(path):
       
  1169             _debug("  %s does not path-match %s", req_path, path)
       
  1170             return False
       
  1171         return True
       
  1172 
       
  1173 
       
  1174 def vals_sorted_by_key(adict):
       
  1175     keys = adict.keys()
       
  1176     keys.sort()
       
  1177     return map(adict.get, keys)
       
  1178 
       
  1179 def deepvalues(mapping):
       
  1180     """Iterates over nested mapping, depth-first, in sorted order by key."""
       
  1181     values = vals_sorted_by_key(mapping)
       
  1182     for obj in values:
       
  1183         mapping = False
       
  1184         try:
       
  1185             obj.items
       
  1186         except AttributeError:
       
  1187             pass
       
  1188         else:
       
  1189             mapping = True
       
  1190             for subobj in deepvalues(obj):
       
  1191                 yield subobj
       
  1192         if not mapping:
       
  1193             yield obj
       
  1194 
       
  1195 
       
  1196 # Used as second parameter to dict.get() method, to distinguish absent
       
  1197 # dict key from one with a None value.
       
  1198 class Absent: pass
       
  1199 
       
  1200 class CookieJar:
       
  1201     """Collection of HTTP cookies.
       
  1202 
       
  1203     You may not need to know about this class: try
       
  1204     urllib2.build_opener(HTTPCookieProcessor).open(url).
       
  1205 
       
  1206     """
       
  1207 
       
  1208     non_word_re = re.compile(r"\W")
       
  1209     quote_re = re.compile(r"([\"\\])")
       
  1210     strict_domain_re = re.compile(r"\.?[^.]*")
       
  1211     domain_re = re.compile(r"[^.]*")
       
  1212     dots_re = re.compile(r"^\.+")
       
  1213 
       
  1214     magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
       
  1215 
       
  1216     def __init__(self, policy=None):
       
  1217         if policy is None:
       
  1218             policy = DefaultCookiePolicy()
       
  1219         self._policy = policy
       
  1220 
       
  1221         self._cookies_lock = _threading.RLock()
       
  1222         self._cookies = {}
       
  1223 
       
  1224     def set_policy(self, policy):
       
  1225         self._policy = policy
       
  1226 
       
  1227     def _cookies_for_domain(self, domain, request):
       
  1228         cookies = []
       
  1229         if not self._policy.domain_return_ok(domain, request):
       
  1230             return []
       
  1231         _debug("Checking %s for cookies to return", domain)
       
  1232         cookies_by_path = self._cookies[domain]
       
  1233         for path in cookies_by_path.keys():
       
  1234             if not self._policy.path_return_ok(path, request):
       
  1235                 continue
       
  1236             cookies_by_name = cookies_by_path[path]
       
  1237             for cookie in cookies_by_name.values():
       
  1238                 if not self._policy.return_ok(cookie, request):
       
  1239                     _debug("   not returning cookie")
       
  1240                     continue
       
  1241                 _debug("   it's a match")
       
  1242                 cookies.append(cookie)
       
  1243         return cookies
       
  1244 
       
  1245     def _cookies_for_request(self, request):
       
  1246         """Return a list of cookies to be returned to server."""
       
  1247         cookies = []
       
  1248         for domain in self._cookies.keys():
       
  1249             cookies.extend(self._cookies_for_domain(domain, request))
       
  1250         return cookies
       
  1251 
       
  1252     def _cookie_attrs(self, cookies):
       
  1253         """Return a list of cookie-attributes to be returned to server.
       
  1254 
       
  1255         like ['foo="bar"; $Path="/"', ...]
       
  1256 
       
  1257         The $Version attribute is also added when appropriate (currently only
       
  1258         once per request).
       
  1259 
       
  1260         """
       
  1261         # add cookies in order of most specific (ie. longest) path first
       
  1262         cookies.sort(key=lambda arg: len(arg.path), reverse=True)
       
  1263 
       
  1264         version_set = False
       
  1265 
       
  1266         attrs = []
       
  1267         for cookie in cookies:
       
  1268             # set version of Cookie header
       
  1269             # XXX
       
  1270             # What should it be if multiple matching Set-Cookie headers have
       
  1271             #  different versions themselves?
       
  1272             # Answer: there is no answer; was supposed to be settled by
       
  1273             #  RFC 2965 errata, but that may never appear...
       
  1274             version = cookie.version
       
  1275             if not version_set:
       
  1276                 version_set = True
       
  1277                 if version > 0:
       
  1278                     attrs.append("$Version=%s" % version)
       
  1279 
       
  1280             # quote cookie value if necessary
       
  1281             # (not for Netscape protocol, which already has any quotes
       
  1282             #  intact, due to the poorly-specified Netscape Cookie: syntax)
       
  1283             if ((cookie.value is not None) and
       
  1284                 self.non_word_re.search(cookie.value) and version > 0):
       
  1285                 value = self.quote_re.sub(r"\\\1", cookie.value)
       
  1286             else:
       
  1287                 value = cookie.value
       
  1288 
       
  1289             # add cookie-attributes to be returned in Cookie header
       
  1290             if cookie.value is None:
       
  1291                 attrs.append(cookie.name)
       
  1292             else:
       
  1293                 attrs.append("%s=%s" % (cookie.name, value))
       
  1294             if version > 0:
       
  1295                 if cookie.path_specified:
       
  1296                     attrs.append('$Path="%s"' % cookie.path)
       
  1297                 if cookie.domain.startswith("."):
       
  1298                     domain = cookie.domain
       
  1299                     if (not cookie.domain_initial_dot and
       
  1300                         domain.startswith(".")):
       
  1301                         domain = domain[1:]
       
  1302                     attrs.append('$Domain="%s"' % domain)
       
  1303                 if cookie.port is not None:
       
  1304                     p = "$Port"
       
  1305                     if cookie.port_specified:
       
  1306                         p = p + ('="%s"' % cookie.port)
       
  1307                     attrs.append(p)
       
  1308 
       
  1309         return attrs
       
  1310 
       
  1311     def add_cookie_header(self, request):
       
  1312         """Add correct Cookie: header to request (urllib2.Request object).
       
  1313 
       
  1314         The Cookie2 header is also added unless policy.hide_cookie2 is true.
       
  1315 
       
  1316         """
       
  1317         _debug("add_cookie_header")
       
  1318         self._cookies_lock.acquire()
       
  1319         try:
       
  1320 
       
  1321             self._policy._now = self._now = int(time.time())
       
  1322 
       
  1323             cookies = self._cookies_for_request(request)
       
  1324 
       
  1325             attrs = self._cookie_attrs(cookies)
       
  1326             if attrs:
       
  1327                 if not request.has_header("Cookie"):
       
  1328                     request.add_unredirected_header(
       
  1329                         "Cookie", "; ".join(attrs))
       
  1330 
       
  1331             # if necessary, advertise that we know RFC 2965
       
  1332             if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
       
  1333                 not request.has_header("Cookie2")):
       
  1334                 for cookie in cookies:
       
  1335                     if cookie.version != 1:
       
  1336                         request.add_unredirected_header("Cookie2", '$Version="1"')
       
  1337                         break
       
  1338 
       
  1339         finally:
       
  1340             self._cookies_lock.release()
       
  1341 
       
  1342         self.clear_expired_cookies()
       
  1343 
       
  1344     def _normalized_cookie_tuples(self, attrs_set):
       
  1345         """Return list of tuples containing normalised cookie information.
       
  1346 
       
  1347         attrs_set is the list of lists of key,value pairs extracted from
       
  1348         the Set-Cookie or Set-Cookie2 headers.
       
  1349 
       
  1350         Tuples are name, value, standard, rest, where name and value are the
       
  1351         cookie name and value, standard is a dictionary containing the standard
       
  1352         cookie-attributes (discard, secure, version, expires or max-age,
       
  1353         domain, path and port) and rest is a dictionary containing the rest of
       
  1354         the cookie-attributes.
       
  1355 
       
  1356         """
       
  1357         cookie_tuples = []
       
  1358 
       
  1359         boolean_attrs = "discard", "secure"
       
  1360         value_attrs = ("version",
       
  1361                        "expires", "max-age",
       
  1362                        "domain", "path", "port",
       
  1363                        "comment", "commenturl")
       
  1364 
       
  1365         for cookie_attrs in attrs_set:
       
  1366             name, value = cookie_attrs[0]
       
  1367 
       
  1368             # Build dictionary of standard cookie-attributes (standard) and
       
  1369             # dictionary of other cookie-attributes (rest).
       
  1370 
       
  1371             # Note: expiry time is normalised to seconds since epoch.  V0
       
  1372             # cookies should have the Expires cookie-attribute, and V1 cookies
       
  1373             # should have Max-Age, but since V1 includes RFC 2109 cookies (and
       
  1374             # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
       
  1375             # accept either (but prefer Max-Age).
       
  1376             max_age_set = False
       
  1377 
       
  1378             bad_cookie = False
       
  1379 
       
  1380             standard = {}
       
  1381             rest = {}
       
  1382             for k, v in cookie_attrs[1:]:
       
  1383                 lc = k.lower()
       
  1384                 # don't lose case distinction for unknown fields
       
  1385                 if lc in value_attrs or lc in boolean_attrs:
       
  1386                     k = lc
       
  1387                 if k in boolean_attrs and v is None:
       
  1388                     # boolean cookie-attribute is present, but has no value
       
  1389                     # (like "discard", rather than "port=80")
       
  1390                     v = True
       
  1391                 if k in standard:
       
  1392                     # only first value is significant
       
  1393                     continue
       
  1394                 if k == "domain":
       
  1395                     if v is None:
       
  1396                         _debug("   missing value for domain attribute")
       
  1397                         bad_cookie = True
       
  1398                         break
       
  1399                     # RFC 2965 section 3.3.3
       
  1400                     v = v.lower()
       
  1401                 if k == "expires":
       
  1402                     if max_age_set:
       
  1403                         # Prefer max-age to expires (like Mozilla)
       
  1404                         continue
       
  1405                     if v is None:
       
  1406                         _debug("   missing or invalid value for expires "
       
  1407                               "attribute: treating as session cookie")
       
  1408                         continue
       
  1409                 if k == "max-age":
       
  1410                     max_age_set = True
       
  1411                     try:
       
  1412                         v = int(v)
       
  1413                     except ValueError:
       
  1414                         _debug("   missing or invalid (non-numeric) value for "
       
  1415                               "max-age attribute")
       
  1416                         bad_cookie = True
       
  1417                         break
       
  1418                     # convert RFC 2965 Max-Age to seconds since epoch
       
  1419                     # XXX Strictly you're supposed to follow RFC 2616
       
  1420                     #   age-calculation rules.  Remember that zero Max-Age is a
       
  1421                     #   is a request to discard (old and new) cookie, though.
       
  1422                     k = "expires"
       
  1423                     v = self._now + v
       
  1424                 if (k in value_attrs) or (k in boolean_attrs):
       
  1425                     if (v is None and
       
  1426                         k not in ("port", "comment", "commenturl")):
       
  1427                         _debug("   missing value for %s attribute" % k)
       
  1428                         bad_cookie = True
       
  1429                         break
       
  1430                     standard[k] = v
       
  1431                 else:
       
  1432                     rest[k] = v
       
  1433 
       
  1434             if bad_cookie:
       
  1435                 continue
       
  1436 
       
  1437             cookie_tuples.append((name, value, standard, rest))
       
  1438 
       
  1439         return cookie_tuples
       
  1440 
       
  1441     def _cookie_from_cookie_tuple(self, tup, request):
       
  1442         # standard is dict of standard cookie-attributes, rest is dict of the
       
  1443         # rest of them
       
  1444         name, value, standard, rest = tup
       
  1445 
       
  1446         domain = standard.get("domain", Absent)
       
  1447         path = standard.get("path", Absent)
       
  1448         port = standard.get("port", Absent)
       
  1449         expires = standard.get("expires", Absent)
       
  1450 
       
  1451         # set the easy defaults
       
  1452         version = standard.get("version", None)
       
  1453         if version is not None: version = int(version)
       
  1454         secure = standard.get("secure", False)
       
  1455         # (discard is also set if expires is Absent)
       
  1456         discard = standard.get("discard", False)
       
  1457         comment = standard.get("comment", None)
       
  1458         comment_url = standard.get("commenturl", None)
       
  1459 
       
  1460         # set default path
       
  1461         if path is not Absent and path != "":
       
  1462             path_specified = True
       
  1463             path = escape_path(path)
       
  1464         else:
       
  1465             path_specified = False
       
  1466             path = request_path(request)
       
  1467             i = path.rfind("/")
       
  1468             if i != -1:
       
  1469                 if version == 0:
       
  1470                     # Netscape spec parts company from reality here
       
  1471                     path = path[:i]
       
  1472                 else:
       
  1473                     path = path[:i+1]
       
  1474             if len(path) == 0: path = "/"
       
  1475 
       
  1476         # set default domain
       
  1477         domain_specified = domain is not Absent
       
  1478         # but first we have to remember whether it starts with a dot
       
  1479         domain_initial_dot = False
       
  1480         if domain_specified:
       
  1481             domain_initial_dot = bool(domain.startswith("."))
       
  1482         if domain is Absent:
       
  1483             req_host, erhn = eff_request_host(request)
       
  1484             domain = erhn
       
  1485         elif not domain.startswith("."):
       
  1486             domain = "."+domain
       
  1487 
       
  1488         # set default port
       
  1489         port_specified = False
       
  1490         if port is not Absent:
       
  1491             if port is None:
       
  1492                 # Port attr present, but has no value: default to request port.
       
  1493                 # Cookie should then only be sent back on that port.
       
  1494                 port = request_port(request)
       
  1495             else:
       
  1496                 port_specified = True
       
  1497                 port = re.sub(r"\s+", "", port)
       
  1498         else:
       
  1499             # No port attr present.  Cookie can be sent back on any port.
       
  1500             port = None
       
  1501 
       
  1502         # set default expires and discard
       
  1503         if expires is Absent:
       
  1504             expires = None
       
  1505             discard = True
       
  1506         elif expires <= self._now:
       
  1507             # Expiry date in past is request to delete cookie.  This can't be
       
  1508             # in DefaultCookiePolicy, because can't delete cookies there.
       
  1509             try:
       
  1510                 self.clear(domain, path, name)
       
  1511             except KeyError:
       
  1512                 pass
       
  1513             _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
       
  1514                    domain, path, name)
       
  1515             return None
       
  1516 
       
  1517         return Cookie(version,
       
  1518                       name, value,
       
  1519                       port, port_specified,
       
  1520                       domain, domain_specified, domain_initial_dot,
       
  1521                       path, path_specified,
       
  1522                       secure,
       
  1523                       expires,
       
  1524                       discard,
       
  1525                       comment,
       
  1526                       comment_url,
       
  1527                       rest)
       
  1528 
       
  1529     def _cookies_from_attrs_set(self, attrs_set, request):
       
  1530         cookie_tuples = self._normalized_cookie_tuples(attrs_set)
       
  1531 
       
  1532         cookies = []
       
  1533         for tup in cookie_tuples:
       
  1534             cookie = self._cookie_from_cookie_tuple(tup, request)
       
  1535             if cookie: cookies.append(cookie)
       
  1536         return cookies
       
  1537 
       
  1538     def _process_rfc2109_cookies(self, cookies):
       
  1539         rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
       
  1540         if rfc2109_as_ns is None:
       
  1541             rfc2109_as_ns = not self._policy.rfc2965
       
  1542         for cookie in cookies:
       
  1543             if cookie.version == 1:
       
  1544                 cookie.rfc2109 = True
       
  1545                 if rfc2109_as_ns:
       
  1546                     # treat 2109 cookies as Netscape cookies rather than
       
  1547                     # as RFC2965 cookies
       
  1548                     cookie.version = 0
       
  1549 
       
  1550     def make_cookies(self, response, request):
       
  1551         """Return sequence of Cookie objects extracted from response object."""
       
  1552         # get cookie-attributes for RFC 2965 and Netscape protocols
       
  1553         headers = response.info()
       
  1554         rfc2965_hdrs = headers.getheaders("Set-Cookie2")
       
  1555         ns_hdrs = headers.getheaders("Set-Cookie")
       
  1556 
       
  1557         rfc2965 = self._policy.rfc2965
       
  1558         netscape = self._policy.netscape
       
  1559 
       
  1560         if ((not rfc2965_hdrs and not ns_hdrs) or
       
  1561             (not ns_hdrs and not rfc2965) or
       
  1562             (not rfc2965_hdrs and not netscape) or
       
  1563             (not netscape and not rfc2965)):
       
  1564             return []  # no relevant cookie headers: quick exit
       
  1565 
       
  1566         try:
       
  1567             cookies = self._cookies_from_attrs_set(
       
  1568                 split_header_words(rfc2965_hdrs), request)
       
  1569         except Exception:
       
  1570             _warn_unhandled_exception()
       
  1571             cookies = []
       
  1572 
       
  1573         if ns_hdrs and netscape:
       
  1574             try:
       
  1575                 # RFC 2109 and Netscape cookies
       
  1576                 ns_cookies = self._cookies_from_attrs_set(
       
  1577                     parse_ns_headers(ns_hdrs), request)
       
  1578             except Exception:
       
  1579                 _warn_unhandled_exception()
       
  1580                 ns_cookies = []
       
  1581             self._process_rfc2109_cookies(ns_cookies)
       
  1582 
       
  1583             # Look for Netscape cookies (from Set-Cookie headers) that match
       
  1584             # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
       
  1585             # For each match, keep the RFC 2965 cookie and ignore the Netscape
       
  1586             # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
       
  1587             # bundled in with the Netscape cookies for this purpose, which is
       
  1588             # reasonable behaviour.
       
  1589             if rfc2965:
       
  1590                 lookup = {}
       
  1591                 for cookie in cookies:
       
  1592                     lookup[(cookie.domain, cookie.path, cookie.name)] = None
       
  1593 
       
  1594                 def no_matching_rfc2965(ns_cookie, lookup=lookup):
       
  1595                     key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
       
  1596                     return key not in lookup
       
  1597                 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
       
  1598 
       
  1599             if ns_cookies:
       
  1600                 cookies.extend(ns_cookies)
       
  1601 
       
  1602         return cookies
       
  1603 
       
  1604     def set_cookie_if_ok(self, cookie, request):
       
  1605         """Set a cookie if policy says it's OK to do so."""
       
  1606         self._cookies_lock.acquire()
       
  1607         try:
       
  1608             self._policy._now = self._now = int(time.time())
       
  1609 
       
  1610             if self._policy.set_ok(cookie, request):
       
  1611                 self.set_cookie(cookie)
       
  1612 
       
  1613 
       
  1614         finally:
       
  1615             self._cookies_lock.release()
       
  1616 
       
  1617     def set_cookie(self, cookie):
       
  1618         """Set a cookie, without checking whether or not it should be set."""
       
  1619         c = self._cookies
       
  1620         self._cookies_lock.acquire()
       
  1621         try:
       
  1622             if cookie.domain not in c: c[cookie.domain] = {}
       
  1623             c2 = c[cookie.domain]
       
  1624             if cookie.path not in c2: c2[cookie.path] = {}
       
  1625             c3 = c2[cookie.path]
       
  1626             c3[cookie.name] = cookie
       
  1627         finally:
       
  1628             self._cookies_lock.release()
       
  1629 
       
  1630     def extract_cookies(self, response, request):
       
  1631         """Extract cookies from response, where allowable given the request."""
       
  1632         _debug("extract_cookies: %s", response.info())
       
  1633         self._cookies_lock.acquire()
       
  1634         try:
       
  1635             self._policy._now = self._now = int(time.time())
       
  1636 
       
  1637             for cookie in self.make_cookies(response, request):
       
  1638                 if self._policy.set_ok(cookie, request):
       
  1639                     _debug(" setting cookie: %s", cookie)
       
  1640                     self.set_cookie(cookie)
       
  1641         finally:
       
  1642             self._cookies_lock.release()
       
  1643 
       
  1644     def clear(self, domain=None, path=None, name=None):
       
  1645         """Clear some cookies.
       
  1646 
       
  1647         Invoking this method without arguments will clear all cookies.  If
       
  1648         given a single argument, only cookies belonging to that domain will be
       
  1649         removed.  If given two arguments, cookies belonging to the specified
       
  1650         path within that domain are removed.  If given three arguments, then
       
  1651         the cookie with the specified name, path and domain is removed.
       
  1652 
       
  1653         Raises KeyError if no matching cookie exists.
       
  1654 
       
  1655         """
       
  1656         if name is not None:
       
  1657             if (domain is None) or (path is None):
       
  1658                 raise ValueError(
       
  1659                     "domain and path must be given to remove a cookie by name")
       
  1660             del self._cookies[domain][path][name]
       
  1661         elif path is not None:
       
  1662             if domain is None:
       
  1663                 raise ValueError(
       
  1664                     "domain must be given to remove cookies by path")
       
  1665             del self._cookies[domain][path]
       
  1666         elif domain is not None:
       
  1667             del self._cookies[domain]
       
  1668         else:
       
  1669             self._cookies = {}
       
  1670 
       
  1671     def clear_session_cookies(self):
       
  1672         """Discard all session cookies.
       
  1673 
       
  1674         Note that the .save() method won't save session cookies anyway, unless
       
  1675         you ask otherwise by passing a true ignore_discard argument.
       
  1676 
       
  1677         """
       
  1678         self._cookies_lock.acquire()
       
  1679         try:
       
  1680             for cookie in self:
       
  1681                 if cookie.discard:
       
  1682                     self.clear(cookie.domain, cookie.path, cookie.name)
       
  1683         finally:
       
  1684             self._cookies_lock.release()
       
  1685 
       
  1686     def clear_expired_cookies(self):
       
  1687         """Discard all expired cookies.
       
  1688 
       
  1689         You probably don't need to call this method: expired cookies are never
       
  1690         sent back to the server (provided you're using DefaultCookiePolicy),
       
  1691         this method is called by CookieJar itself every so often, and the
       
  1692         .save() method won't save expired cookies anyway (unless you ask
       
  1693         otherwise by passing a true ignore_expires argument).
       
  1694 
       
  1695         """
       
  1696         self._cookies_lock.acquire()
       
  1697         try:
       
  1698             now = time.time()
       
  1699             for cookie in self:
       
  1700                 if cookie.is_expired(now):
       
  1701                     self.clear(cookie.domain, cookie.path, cookie.name)
       
  1702         finally:
       
  1703             self._cookies_lock.release()
       
  1704 
       
  1705     def __iter__(self):
       
  1706         return deepvalues(self._cookies)
       
  1707 
       
  1708     def __len__(self):
       
  1709         """Return number of contained cookies."""
       
  1710         i = 0
       
  1711         for cookie in self: i = i + 1
       
  1712         return i
       
  1713 
       
  1714     def __repr__(self):
       
  1715         r = []
       
  1716         for cookie in self: r.append(repr(cookie))
       
  1717         return "<%s[%s]>" % (self.__class__, ", ".join(r))
       
  1718 
       
  1719     def __str__(self):
       
  1720         r = []
       
  1721         for cookie in self: r.append(str(cookie))
       
  1722         return "<%s[%s]>" % (self.__class__, ", ".join(r))
       
  1723 
       
  1724 
       
  1725 # derives from IOError for backwards-compatibility with Python 2.4.0
       
  1726 class LoadError(IOError): pass
       
  1727 
       
  1728 class FileCookieJar(CookieJar):
       
  1729     """CookieJar that can be loaded from and saved to a file."""
       
  1730 
       
  1731     def __init__(self, filename=None, delayload=False, policy=None):
       
  1732         """
       
  1733         Cookies are NOT loaded from the named file until either the .load() or
       
  1734         .revert() method is called.
       
  1735 
       
  1736         """
       
  1737         CookieJar.__init__(self, policy)
       
  1738         if filename is not None:
       
  1739             try:
       
  1740                 filename+""
       
  1741             except:
       
  1742                 raise ValueError("filename must be string-like")
       
  1743         self.filename = filename
       
  1744         self.delayload = bool(delayload)
       
  1745 
       
  1746     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
       
  1747         """Save cookies to a file."""
       
  1748         raise NotImplementedError()
       
  1749 
       
  1750     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
       
  1751         """Load cookies from a file."""
       
  1752         if filename is None:
       
  1753             if self.filename is not None: filename = self.filename
       
  1754             else: raise ValueError(MISSING_FILENAME_TEXT)
       
  1755 
       
  1756         f = open(filename)
       
  1757         try:
       
  1758             self._really_load(f, filename, ignore_discard, ignore_expires)
       
  1759         finally:
       
  1760             f.close()
       
  1761 
       
  1762     def revert(self, filename=None,
       
  1763                ignore_discard=False, ignore_expires=False):
       
  1764         """Clear all cookies and reload cookies from a saved file.
       
  1765 
       
  1766         Raises LoadError (or IOError) if reversion is not successful; the
       
  1767         object's state will not be altered if this happens.
       
  1768 
       
  1769         """
       
  1770         if filename is None:
       
  1771             if self.filename is not None: filename = self.filename
       
  1772             else: raise ValueError(MISSING_FILENAME_TEXT)
       
  1773 
       
  1774         self._cookies_lock.acquire()
       
  1775         try:
       
  1776 
       
  1777             old_state = copy.deepcopy(self._cookies)
       
  1778             self._cookies = {}
       
  1779             try:
       
  1780                 self.load(filename, ignore_discard, ignore_expires)
       
  1781             except (LoadError, IOError):
       
  1782                 self._cookies = old_state
       
  1783                 raise
       
  1784 
       
  1785         finally:
       
  1786             self._cookies_lock.release()
       
  1787 
       
  1788 from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
       
  1789 from _MozillaCookieJar import MozillaCookieJar