|
1 """HTTP cookie handling for web clients. |
|
2 |
|
3 This module has (now fairly distant) origins in Gisle Aas' Perl module |
|
4 HTTP::Cookies, from the libwww-perl library. |
|
5 |
|
6 Docstrings, comments and debug strings in this code refer to the |
|
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish |
|
8 them clearly from Python attributes. |
|
9 |
|
10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not |
|
11 distributed with the Python standard library, but are available from |
|
12 http://wwwsearch.sf.net/): |
|
13 |
|
14 CookieJar____ |
|
15 / \ \ |
|
16 FileCookieJar \ \ |
|
17 / | \ \ \ |
|
18 MozillaCookieJar | LWPCookieJar \ \ |
|
19 | | \ |
|
20 | ---MSIEBase | \ |
|
21 | / | | \ |
|
22 | / MSIEDBCookieJar BSDDBCookieJar |
|
23 |/ |
|
24 MSIECookieJar |
|
25 |
|
26 """ |
|
27 |
|
28 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', |
|
29 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError', |
|
30 'MozillaCookieJar'] |
|
31 |
|
32 import re, urlparse, copy, time, urllib |
|
33 try: |
|
34 import threading as _threading |
|
35 except ImportError: |
|
36 import dummy_threading as _threading |
|
37 import httplib # only for the default HTTP port |
|
38 from calendar import timegm |
|
39 |
|
40 debug = False # set to True to enable debugging via the logging module |
|
41 logger = None |
|
42 |
|
43 def _debug(*args): |
|
44 if not debug: |
|
45 return |
|
46 global logger |
|
47 if not logger: |
|
48 import logging |
|
49 logger = logging.getLogger("cookielib") |
|
50 return logger.debug(*args) |
|
51 |
|
52 |
|
53 DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) |
|
54 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " |
|
55 "instance initialised with one)") |
|
56 |
|
57 def _warn_unhandled_exception(): |
|
58 # There are a few catch-all except: statements in this module, for |
|
59 # catching input that's bad in unexpected ways. Warn if any |
|
60 # exceptions are caught there. |
|
61 import warnings, traceback, StringIO |
|
62 f = StringIO.StringIO() |
|
63 traceback.print_exc(None, f) |
|
64 msg = f.getvalue() |
|
65 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) |
|
66 |
|
67 |
|
68 # Date/time conversion |
|
69 # ----------------------------------------------------------------------------- |
|
70 |
|
71 EPOCH_YEAR = 1970 |
|
72 def _timegm(tt): |
|
73 year, month, mday, hour, min, sec = tt[:6] |
|
74 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and |
|
75 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): |
|
76 return timegm(tt) |
|
77 else: |
|
78 return None |
|
79 |
|
80 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] |
|
81 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", |
|
82 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] |
|
83 MONTHS_LOWER = [] |
|
84 for month in MONTHS: MONTHS_LOWER.append(month.lower()) |
|
85 |
|
86 def time2isoz(t=None): |
|
87 """Return a string representing time in seconds since epoch, t. |
|
88 |
|
89 If the function is called without an argument, it will use the current |
|
90 time. |
|
91 |
|
92 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", |
|
93 representing Universal Time (UTC, aka GMT). An example of this format is: |
|
94 |
|
95 1994-11-24 08:49:37Z |
|
96 |
|
97 """ |
|
98 if t is None: t = time.time() |
|
99 year, mon, mday, hour, min, sec = time.gmtime(t)[:6] |
|
100 return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( |
|
101 year, mon, mday, hour, min, sec) |
|
102 |
|
103 def time2netscape(t=None): |
|
104 """Return a string representing time in seconds since epoch, t. |
|
105 |
|
106 If the function is called without an argument, it will use the current |
|
107 time. |
|
108 |
|
109 The format of the returned string is like this: |
|
110 |
|
111 Wed, DD-Mon-YYYY HH:MM:SS GMT |
|
112 |
|
113 """ |
|
114 if t is None: t = time.time() |
|
115 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] |
|
116 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( |
|
117 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) |
|
118 |
|
119 |
|
120 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} |
|
121 |
|
122 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") |
|
123 def offset_from_tz_string(tz): |
|
124 offset = None |
|
125 if tz in UTC_ZONES: |
|
126 offset = 0 |
|
127 else: |
|
128 m = TIMEZONE_RE.search(tz) |
|
129 if m: |
|
130 offset = 3600 * int(m.group(2)) |
|
131 if m.group(3): |
|
132 offset = offset + 60 * int(m.group(3)) |
|
133 if m.group(1) == '-': |
|
134 offset = -offset |
|
135 return offset |
|
136 |
|
137 def _str2time(day, mon, yr, hr, min, sec, tz): |
|
138 # translate month name to number |
|
139 # month numbers start with 1 (January) |
|
140 try: |
|
141 mon = MONTHS_LOWER.index(mon.lower())+1 |
|
142 except ValueError: |
|
143 # maybe it's already a number |
|
144 try: |
|
145 imon = int(mon) |
|
146 except ValueError: |
|
147 return None |
|
148 if 1 <= imon <= 12: |
|
149 mon = imon |
|
150 else: |
|
151 return None |
|
152 |
|
153 # make sure clock elements are defined |
|
154 if hr is None: hr = 0 |
|
155 if min is None: min = 0 |
|
156 if sec is None: sec = 0 |
|
157 |
|
158 yr = int(yr) |
|
159 day = int(day) |
|
160 hr = int(hr) |
|
161 min = int(min) |
|
162 sec = int(sec) |
|
163 |
|
164 if yr < 1000: |
|
165 # find "obvious" year |
|
166 cur_yr = time.localtime(time.time())[0] |
|
167 m = cur_yr % 100 |
|
168 tmp = yr |
|
169 yr = yr + cur_yr - m |
|
170 m = m - tmp |
|
171 if abs(m) > 50: |
|
172 if m > 0: yr = yr + 100 |
|
173 else: yr = yr - 100 |
|
174 |
|
175 # convert UTC time tuple to seconds since epoch (not timezone-adjusted) |
|
176 t = _timegm((yr, mon, day, hr, min, sec, tz)) |
|
177 |
|
178 if t is not None: |
|
179 # adjust time using timezone string, to get absolute time since epoch |
|
180 if tz is None: |
|
181 tz = "UTC" |
|
182 tz = tz.upper() |
|
183 offset = offset_from_tz_string(tz) |
|
184 if offset is None: |
|
185 return None |
|
186 t = t - offset |
|
187 |
|
188 return t |
|
189 |
|
190 STRICT_DATE_RE = re.compile( |
|
191 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " |
|
192 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") |
|
193 WEEKDAY_RE = re.compile( |
|
194 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) |
|
195 LOOSE_HTTP_DATE_RE = re.compile( |
|
196 r"""^ |
|
197 (\d\d?) # day |
|
198 (?:\s+|[-\/]) |
|
199 (\w+) # month |
|
200 (?:\s+|[-\/]) |
|
201 (\d+) # year |
|
202 (?: |
|
203 (?:\s+|:) # separator before clock |
|
204 (\d\d?):(\d\d) # hour:min |
|
205 (?::(\d\d))? # optional seconds |
|
206 )? # optional clock |
|
207 \s* |
|
208 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone |
|
209 \s* |
|
210 (?:\(\w+\))? # ASCII representation of timezone in parens. |
|
211 \s*$""", re.X) |
|
212 def http2time(text): |
|
213 """Returns time in seconds since epoch of time represented by a string. |
|
214 |
|
215 Return value is an integer. |
|
216 |
|
217 None is returned if the format of str is unrecognized, the time is outside |
|
218 the representable range, or the timezone string is not recognized. If the |
|
219 string contains no timezone, UTC is assumed. |
|
220 |
|
221 The timezone in the string may be numerical (like "-0800" or "+0100") or a |
|
222 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the |
|
223 timezone strings equivalent to UTC (zero offset) are known to the function. |
|
224 |
|
225 The function loosely parses the following formats: |
|
226 |
|
227 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format |
|
228 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format |
|
229 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format |
|
230 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) |
|
231 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) |
|
232 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) |
|
233 |
|
234 The parser ignores leading and trailing whitespace. The time may be |
|
235 absent. |
|
236 |
|
237 If the year is given with only 2 digits, the function will select the |
|
238 century that makes the year closest to the current date. |
|
239 |
|
240 """ |
|
241 # fast exit for strictly conforming string |
|
242 m = STRICT_DATE_RE.search(text) |
|
243 if m: |
|
244 g = m.groups() |
|
245 mon = MONTHS_LOWER.index(g[1].lower()) + 1 |
|
246 tt = (int(g[2]), mon, int(g[0]), |
|
247 int(g[3]), int(g[4]), float(g[5])) |
|
248 return _timegm(tt) |
|
249 |
|
250 # No, we need some messy parsing... |
|
251 |
|
252 # clean up |
|
253 text = text.lstrip() |
|
254 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday |
|
255 |
|
256 # tz is time zone specifier string |
|
257 day, mon, yr, hr, min, sec, tz = [None]*7 |
|
258 |
|
259 # loose regexp parse |
|
260 m = LOOSE_HTTP_DATE_RE.search(text) |
|
261 if m is not None: |
|
262 day, mon, yr, hr, min, sec, tz = m.groups() |
|
263 else: |
|
264 return None # bad format |
|
265 |
|
266 return _str2time(day, mon, yr, hr, min, sec, tz) |
|
267 |
|
268 ISO_DATE_RE = re.compile( |
|
269 """^ |
|
270 (\d{4}) # year |
|
271 [-\/]? |
|
272 (\d\d?) # numerical month |
|
273 [-\/]? |
|
274 (\d\d?) # day |
|
275 (?: |
|
276 (?:\s+|[-:Tt]) # separator before clock |
|
277 (\d\d?):?(\d\d) # hour:min |
|
278 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) |
|
279 )? # optional clock |
|
280 \s* |
|
281 ([-+]?\d\d?:?(:?\d\d)? |
|
282 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) |
|
283 \s*$""", re.X) |
|
284 def iso2time(text): |
|
285 """ |
|
286 As for http2time, but parses the ISO 8601 formats: |
|
287 |
|
288 1994-02-03 14:15:29 -0100 -- ISO 8601 format |
|
289 1994-02-03 14:15:29 -- zone is optional |
|
290 1994-02-03 -- only date |
|
291 1994-02-03T14:15:29 -- Use T as separator |
|
292 19940203T141529Z -- ISO 8601 compact format |
|
293 19940203 -- only date |
|
294 |
|
295 """ |
|
296 # clean up |
|
297 text = text.lstrip() |
|
298 |
|
299 # tz is time zone specifier string |
|
300 day, mon, yr, hr, min, sec, tz = [None]*7 |
|
301 |
|
302 # loose regexp parse |
|
303 m = ISO_DATE_RE.search(text) |
|
304 if m is not None: |
|
305 # XXX there's an extra bit of the timezone I'm ignoring here: is |
|
306 # this the right thing to do? |
|
307 yr, mon, day, hr, min, sec, tz, _ = m.groups() |
|
308 else: |
|
309 return None # bad format |
|
310 |
|
311 return _str2time(day, mon, yr, hr, min, sec, tz) |
|
312 |
|
313 |
|
314 # Header parsing |
|
315 # ----------------------------------------------------------------------------- |
|
316 |
|
317 def unmatched(match): |
|
318 """Return unmatched part of re.Match object.""" |
|
319 start, end = match.span(0) |
|
320 return match.string[:start]+match.string[end:] |
|
321 |
|
322 HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") |
|
323 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") |
|
324 HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") |
|
325 HEADER_ESCAPE_RE = re.compile(r"\\(.)") |
|
326 def split_header_words(header_values): |
|
327 r"""Parse header values into a list of lists containing key,value pairs. |
|
328 |
|
329 The function knows how to deal with ",", ";" and "=" as well as quoted |
|
330 values after "=". A list of space separated tokens are parsed as if they |
|
331 were separated by ";". |
|
332 |
|
333 If the header_values passed as argument contains multiple values, then they |
|
334 are treated as if they were a single value separated by comma ",". |
|
335 |
|
336 This means that this function is useful for parsing header fields that |
|
337 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax |
|
338 the requirement for tokens). |
|
339 |
|
340 headers = #header |
|
341 header = (token | parameter) *( [";"] (token | parameter)) |
|
342 |
|
343 token = 1*<any CHAR except CTLs or separators> |
|
344 separators = "(" | ")" | "<" | ">" | "@" |
|
345 | "," | ";" | ":" | "\" | <"> |
|
346 | "/" | "[" | "]" | "?" | "=" |
|
347 | "{" | "}" | SP | HT |
|
348 |
|
349 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
|
350 qdtext = <any TEXT except <">> |
|
351 quoted-pair = "\" CHAR |
|
352 |
|
353 parameter = attribute "=" value |
|
354 attribute = token |
|
355 value = token | quoted-string |
|
356 |
|
357 Each header is represented by a list of key/value pairs. The value for a |
|
358 simple token (not part of a parameter) is None. Syntactically incorrect |
|
359 headers will not necessarily be parsed as you would want. |
|
360 |
|
361 This is easier to describe with some examples: |
|
362 |
|
363 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) |
|
364 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] |
|
365 >>> split_header_words(['text/html; charset="iso-8859-1"']) |
|
366 [[('text/html', None), ('charset', 'iso-8859-1')]] |
|
367 >>> split_header_words([r'Basic realm="\"foo\bar\""']) |
|
368 [[('Basic', None), ('realm', '"foobar"')]] |
|
369 |
|
370 """ |
|
371 assert not isinstance(header_values, basestring) |
|
372 result = [] |
|
373 for text in header_values: |
|
374 orig_text = text |
|
375 pairs = [] |
|
376 while text: |
|
377 m = HEADER_TOKEN_RE.search(text) |
|
378 if m: |
|
379 text = unmatched(m) |
|
380 name = m.group(1) |
|
381 m = HEADER_QUOTED_VALUE_RE.search(text) |
|
382 if m: # quoted value |
|
383 text = unmatched(m) |
|
384 value = m.group(1) |
|
385 value = HEADER_ESCAPE_RE.sub(r"\1", value) |
|
386 else: |
|
387 m = HEADER_VALUE_RE.search(text) |
|
388 if m: # unquoted value |
|
389 text = unmatched(m) |
|
390 value = m.group(1) |
|
391 value = value.rstrip() |
|
392 else: |
|
393 # no value, a lone token |
|
394 value = None |
|
395 pairs.append((name, value)) |
|
396 elif text.lstrip().startswith(","): |
|
397 # concatenated headers, as per RFC 2616 section 4.2 |
|
398 text = text.lstrip()[1:] |
|
399 if pairs: result.append(pairs) |
|
400 pairs = [] |
|
401 else: |
|
402 # skip junk |
|
403 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) |
|
404 assert nr_junk_chars > 0, ( |
|
405 "split_header_words bug: '%s', '%s', %s" % |
|
406 (orig_text, text, pairs)) |
|
407 text = non_junk |
|
408 if pairs: result.append(pairs) |
|
409 return result |
|
410 |
|
411 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") |
|
412 def join_header_words(lists): |
|
413 """Do the inverse (almost) of the conversion done by split_header_words. |
|
414 |
|
415 Takes a list of lists of (key, value) pairs and produces a single header |
|
416 value. Attribute values are quoted if needed. |
|
417 |
|
418 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) |
|
419 'text/plain; charset="iso-8859/1"' |
|
420 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) |
|
421 'text/plain, charset="iso-8859/1"' |
|
422 |
|
423 """ |
|
424 headers = [] |
|
425 for pairs in lists: |
|
426 attr = [] |
|
427 for k, v in pairs: |
|
428 if v is not None: |
|
429 if not re.search(r"^\w+$", v): |
|
430 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ |
|
431 v = '"%s"' % v |
|
432 k = "%s=%s" % (k, v) |
|
433 attr.append(k) |
|
434 if attr: headers.append("; ".join(attr)) |
|
435 return ", ".join(headers) |
|
436 |
|
437 def parse_ns_headers(ns_headers): |
|
438 """Ad-hoc parser for Netscape protocol cookie-attributes. |
|
439 |
|
440 The old Netscape cookie format for Set-Cookie can for instance contain |
|
441 an unquoted "," in the expires field, so we have to use this ad-hoc |
|
442 parser instead of split_header_words. |
|
443 |
|
444 XXX This may not make the best possible effort to parse all the crap |
|
445 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient |
|
446 parser is probably better, so could do worse than following that if |
|
447 this ever gives any trouble. |
|
448 |
|
449 Currently, this is also used for parsing RFC 2109 cookies. |
|
450 |
|
451 """ |
|
452 known_attrs = ("expires", "domain", "path", "secure", |
|
453 # RFC 2109 attrs (may turn up in Netscape cookies, too) |
|
454 "port", "max-age") |
|
455 |
|
456 result = [] |
|
457 for ns_header in ns_headers: |
|
458 pairs = [] |
|
459 version_set = False |
|
460 for ii, param in enumerate(re.split(r";\s*", ns_header)): |
|
461 param = param.rstrip() |
|
462 if param == "": continue |
|
463 if "=" not in param: |
|
464 k, v = param, None |
|
465 else: |
|
466 k, v = re.split(r"\s*=\s*", param, 1) |
|
467 k = k.lstrip() |
|
468 if ii != 0: |
|
469 lc = k.lower() |
|
470 if lc in known_attrs: |
|
471 k = lc |
|
472 if k == "version": |
|
473 # This is an RFC 2109 cookie. |
|
474 version_set = True |
|
475 if k == "expires": |
|
476 # convert expires date to seconds since epoch |
|
477 if v.startswith('"'): v = v[1:] |
|
478 if v.endswith('"'): v = v[:-1] |
|
479 v = http2time(v) # None if invalid |
|
480 pairs.append((k, v)) |
|
481 |
|
482 if pairs: |
|
483 if not version_set: |
|
484 pairs.append(("version", "0")) |
|
485 result.append(pairs) |
|
486 |
|
487 return result |
|
488 |
|
489 |
|
490 IPV4_RE = re.compile(r"\.\d+$") |
|
491 def is_HDN(text): |
|
492 """Return True if text is a host domain name.""" |
|
493 # XXX |
|
494 # This may well be wrong. Which RFC is HDN defined in, if any (for |
|
495 # the purposes of RFC 2965)? |
|
496 # For the current implementation, what about IPv6? Remember to look |
|
497 # at other uses of IPV4_RE also, if change this. |
|
498 if IPV4_RE.search(text): |
|
499 return False |
|
500 if text == "": |
|
501 return False |
|
502 if text[0] == "." or text[-1] == ".": |
|
503 return False |
|
504 return True |
|
505 |
|
506 def domain_match(A, B): |
|
507 """Return True if domain A domain-matches domain B, according to RFC 2965. |
|
508 |
|
509 A and B may be host domain names or IP addresses. |
|
510 |
|
511 RFC 2965, section 1: |
|
512 |
|
513 Host names can be specified either as an IP address or a HDN string. |
|
514 Sometimes we compare one host name with another. (Such comparisons SHALL |
|
515 be case-insensitive.) Host A's name domain-matches host B's if |
|
516 |
|
517 * their host name strings string-compare equal; or |
|
518 |
|
519 * A is a HDN string and has the form NB, where N is a non-empty |
|
520 name string, B has the form .B', and B' is a HDN string. (So, |
|
521 x.y.com domain-matches .Y.com but not Y.com.) |
|
522 |
|
523 Note that domain-match is not a commutative operation: a.b.c.com |
|
524 domain-matches .c.com, but not the reverse. |
|
525 |
|
526 """ |
|
527 # Note that, if A or B are IP addresses, the only relevant part of the |
|
528 # definition of the domain-match algorithm is the direct string-compare. |
|
529 A = A.lower() |
|
530 B = B.lower() |
|
531 if A == B: |
|
532 return True |
|
533 if not is_HDN(A): |
|
534 return False |
|
535 i = A.rfind(B) |
|
536 if i == -1 or i == 0: |
|
537 # A does not have form NB, or N is the empty string |
|
538 return False |
|
539 if not B.startswith("."): |
|
540 return False |
|
541 if not is_HDN(B[1:]): |
|
542 return False |
|
543 return True |
|
544 |
|
545 def liberal_is_HDN(text): |
|
546 """Return True if text is a sort-of-like a host domain name. |
|
547 |
|
548 For accepting/blocking domains. |
|
549 |
|
550 """ |
|
551 if IPV4_RE.search(text): |
|
552 return False |
|
553 return True |
|
554 |
|
555 def user_domain_match(A, B): |
|
556 """For blocking/accepting domains. |
|
557 |
|
558 A and B may be host domain names or IP addresses. |
|
559 |
|
560 """ |
|
561 A = A.lower() |
|
562 B = B.lower() |
|
563 if not (liberal_is_HDN(A) and liberal_is_HDN(B)): |
|
564 if A == B: |
|
565 # equal IP addresses |
|
566 return True |
|
567 return False |
|
568 initial_dot = B.startswith(".") |
|
569 if initial_dot and A.endswith(B): |
|
570 return True |
|
571 if not initial_dot and A == B: |
|
572 return True |
|
573 return False |
|
574 |
|
575 cut_port_re = re.compile(r":\d+$") |
|
576 def request_host(request): |
|
577 """Return request-host, as defined by RFC 2965. |
|
578 |
|
579 Variation from RFC: returned value is lowercased, for convenient |
|
580 comparison. |
|
581 |
|
582 """ |
|
583 url = request.get_full_url() |
|
584 host = urlparse.urlparse(url)[1] |
|
585 if host == "": |
|
586 host = request.get_header("Host", "") |
|
587 |
|
588 # remove port, if present |
|
589 host = cut_port_re.sub("", host, 1) |
|
590 return host.lower() |
|
591 |
|
592 def eff_request_host(request): |
|
593 """Return a tuple (request-host, effective request-host name). |
|
594 |
|
595 As defined by RFC 2965, except both are lowercased. |
|
596 |
|
597 """ |
|
598 erhn = req_host = request_host(request) |
|
599 if req_host.find(".") == -1 and not IPV4_RE.search(req_host): |
|
600 erhn = req_host + ".local" |
|
601 return req_host, erhn |
|
602 |
|
603 def request_path(request): |
|
604 """request-URI, as defined by RFC 2965.""" |
|
605 url = request.get_full_url() |
|
606 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) |
|
607 #req_path = escape_path("".join(urlparse.urlparse(url)[2:])) |
|
608 path, parameters, query, frag = urlparse.urlparse(url)[2:] |
|
609 if parameters: |
|
610 path = "%s;%s" % (path, parameters) |
|
611 path = escape_path(path) |
|
612 req_path = urlparse.urlunparse(("", "", path, "", query, frag)) |
|
613 if not req_path.startswith("/"): |
|
614 # fix bad RFC 2396 absoluteURI |
|
615 req_path = "/"+req_path |
|
616 return req_path |
|
617 |
|
618 def request_port(request): |
|
619 host = request.get_host() |
|
620 i = host.find(':') |
|
621 if i >= 0: |
|
622 port = host[i+1:] |
|
623 try: |
|
624 int(port) |
|
625 except ValueError: |
|
626 _debug("nonnumeric port: '%s'", port) |
|
627 return None |
|
628 else: |
|
629 port = DEFAULT_HTTP_PORT |
|
630 return port |
|
631 |
|
632 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't |
|
633 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). |
|
634 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" |
|
635 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") |
|
636 def uppercase_escaped_char(match): |
|
637 return "%%%s" % match.group(1).upper() |
|
638 def escape_path(path): |
|
639 """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" |
|
640 # There's no knowing what character encoding was used to create URLs |
|
641 # containing %-escapes, but since we have to pick one to escape invalid |
|
642 # path characters, we pick UTF-8, as recommended in the HTML 4.0 |
|
643 # specification: |
|
644 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 |
|
645 # And here, kind of: draft-fielding-uri-rfc2396bis-03 |
|
646 # (And in draft IRI specification: draft-duerst-iri-05) |
|
647 # (And here, for new URI schemes: RFC 2718) |
|
648 if isinstance(path, unicode): |
|
649 path = path.encode("utf-8") |
|
650 path = urllib.quote(path, HTTP_PATH_SAFE) |
|
651 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) |
|
652 return path |
|
653 |
|
654 def reach(h): |
|
655 """Return reach of host h, as defined by RFC 2965, section 1. |
|
656 |
|
657 The reach R of a host name H is defined as follows: |
|
658 |
|
659 * If |
|
660 |
|
661 - H is the host domain name of a host; and, |
|
662 |
|
663 - H has the form A.B; and |
|
664 |
|
665 - A has no embedded (that is, interior) dots; and |
|
666 |
|
667 - B has at least one embedded dot, or B is the string "local". |
|
668 then the reach of H is .B. |
|
669 |
|
670 * Otherwise, the reach of H is H. |
|
671 |
|
672 >>> reach("www.acme.com") |
|
673 '.acme.com' |
|
674 >>> reach("acme.com") |
|
675 'acme.com' |
|
676 >>> reach("acme.local") |
|
677 '.local' |
|
678 |
|
679 """ |
|
680 i = h.find(".") |
|
681 if i >= 0: |
|
682 #a = h[:i] # this line is only here to show what a is |
|
683 b = h[i+1:] |
|
684 i = b.find(".") |
|
685 if is_HDN(h) and (i >= 0 or b == "local"): |
|
686 return "."+b |
|
687 return h |
|
688 |
|
689 def is_third_party(request): |
|
690 """ |
|
691 |
|
692 RFC 2965, section 3.3.6: |
|
693 |
|
694 An unverifiable transaction is to a third-party host if its request- |
|
695 host U does not domain-match the reach R of the request-host O in the |
|
696 origin transaction. |
|
697 |
|
698 """ |
|
699 req_host = request_host(request) |
|
700 if not domain_match(req_host, reach(request.get_origin_req_host())): |
|
701 return True |
|
702 else: |
|
703 return False |
|
704 |
|
705 |
|
706 class Cookie: |
|
707 """HTTP Cookie. |
|
708 |
|
709 This class represents both Netscape and RFC 2965 cookies. |
|
710 |
|
711 This is deliberately a very simple class. It just holds attributes. It's |
|
712 possible to construct Cookie instances that don't comply with the cookie |
|
713 standards. CookieJar.make_cookies is the factory function for Cookie |
|
714 objects -- it deals with cookie parsing, supplying defaults, and |
|
715 normalising to the representation used in this class. CookiePolicy is |
|
716 responsible for checking them to see whether they should be accepted from |
|
717 and returned to the server. |
|
718 |
|
719 Note that the port may be present in the headers, but unspecified ("Port" |
|
720 rather than"Port=80", for example); if this is the case, port is None. |
|
721 |
|
722 """ |
|
723 |
|
724 def __init__(self, version, name, value, |
|
725 port, port_specified, |
|
726 domain, domain_specified, domain_initial_dot, |
|
727 path, path_specified, |
|
728 secure, |
|
729 expires, |
|
730 discard, |
|
731 comment, |
|
732 comment_url, |
|
733 rest, |
|
734 rfc2109=False, |
|
735 ): |
|
736 |
|
737 if version is not None: version = int(version) |
|
738 if expires is not None: expires = int(expires) |
|
739 if port is None and port_specified is True: |
|
740 raise ValueError("if port is None, port_specified must be false") |
|
741 |
|
742 self.version = version |
|
743 self.name = name |
|
744 self.value = value |
|
745 self.port = port |
|
746 self.port_specified = port_specified |
|
747 # normalise case, as per RFC 2965 section 3.3.3 |
|
748 self.domain = domain.lower() |
|
749 self.domain_specified = domain_specified |
|
750 # Sigh. We need to know whether the domain given in the |
|
751 # cookie-attribute had an initial dot, in order to follow RFC 2965 |
|
752 # (as clarified in draft errata). Needed for the returned $Domain |
|
753 # value. |
|
754 self.domain_initial_dot = domain_initial_dot |
|
755 self.path = path |
|
756 self.path_specified = path_specified |
|
757 self.secure = secure |
|
758 self.expires = expires |
|
759 self.discard = discard |
|
760 self.comment = comment |
|
761 self.comment_url = comment_url |
|
762 self.rfc2109 = rfc2109 |
|
763 |
|
764 self._rest = copy.copy(rest) |
|
765 |
|
766 def has_nonstandard_attr(self, name): |
|
767 return name in self._rest |
|
768 def get_nonstandard_attr(self, name, default=None): |
|
769 return self._rest.get(name, default) |
|
770 def set_nonstandard_attr(self, name, value): |
|
771 self._rest[name] = value |
|
772 |
|
773 def is_expired(self, now=None): |
|
774 if now is None: now = time.time() |
|
775 if (self.expires is not None) and (self.expires <= now): |
|
776 return True |
|
777 return False |
|
778 |
|
779 def __str__(self): |
|
780 if self.port is None: p = "" |
|
781 else: p = ":"+self.port |
|
782 limit = self.domain + p + self.path |
|
783 if self.value is not None: |
|
784 namevalue = "%s=%s" % (self.name, self.value) |
|
785 else: |
|
786 namevalue = self.name |
|
787 return "<Cookie %s for %s>" % (namevalue, limit) |
|
788 |
|
789 def __repr__(self): |
|
790 args = [] |
|
791 for name in ("version", "name", "value", |
|
792 "port", "port_specified", |
|
793 "domain", "domain_specified", "domain_initial_dot", |
|
794 "path", "path_specified", |
|
795 "secure", "expires", "discard", "comment", "comment_url", |
|
796 ): |
|
797 attr = getattr(self, name) |
|
798 args.append("%s=%s" % (name, repr(attr))) |
|
799 args.append("rest=%s" % repr(self._rest)) |
|
800 args.append("rfc2109=%s" % repr(self.rfc2109)) |
|
801 return "Cookie(%s)" % ", ".join(args) |
|
802 |
|
803 |
|
804 class CookiePolicy: |
|
805 """Defines which cookies get accepted from and returned to server. |
|
806 |
|
807 May also modify cookies, though this is probably a bad idea. |
|
808 |
|
809 The subclass DefaultCookiePolicy defines the standard rules for Netscape |
|
810 and RFC 2965 cookies -- override that if you want a customised policy. |
|
811 |
|
812 """ |
|
813 def set_ok(self, cookie, request): |
|
814 """Return true if (and only if) cookie should be accepted from server. |
|
815 |
|
816 Currently, pre-expired cookies never get this far -- the CookieJar |
|
817 class deletes such cookies itself. |
|
818 |
|
819 """ |
|
820 raise NotImplementedError() |
|
821 |
|
822 def return_ok(self, cookie, request): |
|
823 """Return true if (and only if) cookie should be returned to server.""" |
|
824 raise NotImplementedError() |
|
825 |
|
826 def domain_return_ok(self, domain, request): |
|
827 """Return false if cookies should not be returned, given cookie domain. |
|
828 """ |
|
829 return True |
|
830 |
|
831 def path_return_ok(self, path, request): |
|
832 """Return false if cookies should not be returned, given cookie path. |
|
833 """ |
|
834 return True |
|
835 |
|
836 |
|
837 class DefaultCookiePolicy(CookiePolicy): |
|
838 """Implements the standard rules for accepting and returning cookies.""" |
|
839 |
|
840 DomainStrictNoDots = 1 |
|
841 DomainStrictNonDomain = 2 |
|
842 DomainRFC2965Match = 4 |
|
843 |
|
844 DomainLiberal = 0 |
|
845 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain |
|
846 |
|
847 def __init__(self, |
|
848 blocked_domains=None, allowed_domains=None, |
|
849 netscape=True, rfc2965=False, |
|
850 rfc2109_as_netscape=None, |
|
851 hide_cookie2=False, |
|
852 strict_domain=False, |
|
853 strict_rfc2965_unverifiable=True, |
|
854 strict_ns_unverifiable=False, |
|
855 strict_ns_domain=DomainLiberal, |
|
856 strict_ns_set_initial_dollar=False, |
|
857 strict_ns_set_path=False, |
|
858 ): |
|
859 """Constructor arguments should be passed as keyword arguments only.""" |
|
860 self.netscape = netscape |
|
861 self.rfc2965 = rfc2965 |
|
862 self.rfc2109_as_netscape = rfc2109_as_netscape |
|
863 self.hide_cookie2 = hide_cookie2 |
|
864 self.strict_domain = strict_domain |
|
865 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable |
|
866 self.strict_ns_unverifiable = strict_ns_unverifiable |
|
867 self.strict_ns_domain = strict_ns_domain |
|
868 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar |
|
869 self.strict_ns_set_path = strict_ns_set_path |
|
870 |
|
871 if blocked_domains is not None: |
|
872 self._blocked_domains = tuple(blocked_domains) |
|
873 else: |
|
874 self._blocked_domains = () |
|
875 |
|
876 if allowed_domains is not None: |
|
877 allowed_domains = tuple(allowed_domains) |
|
878 self._allowed_domains = allowed_domains |
|
879 |
|
880 def blocked_domains(self): |
|
881 """Return the sequence of blocked domains (as a tuple).""" |
|
882 return self._blocked_domains |
|
883 def set_blocked_domains(self, blocked_domains): |
|
884 """Set the sequence of blocked domains.""" |
|
885 self._blocked_domains = tuple(blocked_domains) |
|
886 |
|
887 def is_blocked(self, domain): |
|
888 for blocked_domain in self._blocked_domains: |
|
889 if user_domain_match(domain, blocked_domain): |
|
890 return True |
|
891 return False |
|
892 |
|
893 def allowed_domains(self): |
|
894 """Return None, or the sequence of allowed domains (as a tuple).""" |
|
895 return self._allowed_domains |
|
896 def set_allowed_domains(self, allowed_domains): |
|
897 """Set the sequence of allowed domains, or None.""" |
|
898 if allowed_domains is not None: |
|
899 allowed_domains = tuple(allowed_domains) |
|
900 self._allowed_domains = allowed_domains |
|
901 |
|
902 def is_not_allowed(self, domain): |
|
903 if self._allowed_domains is None: |
|
904 return False |
|
905 for allowed_domain in self._allowed_domains: |
|
906 if user_domain_match(domain, allowed_domain): |
|
907 return False |
|
908 return True |
|
909 |
|
910 def set_ok(self, cookie, request): |
|
911 """ |
|
912 If you override .set_ok(), be sure to call this method. If it returns |
|
913 false, so should your subclass (assuming your subclass wants to be more |
|
914 strict about which cookies to accept). |
|
915 |
|
916 """ |
|
917 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) |
|
918 |
|
919 assert cookie.name is not None |
|
920 |
|
921 for n in "version", "verifiability", "name", "path", "domain", "port": |
|
922 fn_name = "set_ok_"+n |
|
923 fn = getattr(self, fn_name) |
|
924 if not fn(cookie, request): |
|
925 return False |
|
926 |
|
927 return True |
|
928 |
|
929 def set_ok_version(self, cookie, request): |
|
930 if cookie.version is None: |
|
931 # Version is always set to 0 by parse_ns_headers if it's a Netscape |
|
932 # cookie, so this must be an invalid RFC 2965 cookie. |
|
933 _debug(" Set-Cookie2 without version attribute (%s=%s)", |
|
934 cookie.name, cookie.value) |
|
935 return False |
|
936 if cookie.version > 0 and not self.rfc2965: |
|
937 _debug(" RFC 2965 cookies are switched off") |
|
938 return False |
|
939 elif cookie.version == 0 and not self.netscape: |
|
940 _debug(" Netscape cookies are switched off") |
|
941 return False |
|
942 return True |
|
943 |
|
944 def set_ok_verifiability(self, cookie, request): |
|
945 if request.is_unverifiable() and is_third_party(request): |
|
946 if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
|
947 _debug(" third-party RFC 2965 cookie during " |
|
948 "unverifiable transaction") |
|
949 return False |
|
950 elif cookie.version == 0 and self.strict_ns_unverifiable: |
|
951 _debug(" third-party Netscape cookie during " |
|
952 "unverifiable transaction") |
|
953 return False |
|
954 return True |
|
955 |
|
956 def set_ok_name(self, cookie, request): |
|
957 # Try and stop servers setting V0 cookies designed to hack other |
|
958 # servers that know both V0 and V1 protocols. |
|
959 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and |
|
960 cookie.name.startswith("$")): |
|
961 _debug(" illegal name (starts with '$'): '%s'", cookie.name) |
|
962 return False |
|
963 return True |
|
964 |
|
965 def set_ok_path(self, cookie, request): |
|
966 if cookie.path_specified: |
|
967 req_path = request_path(request) |
|
968 if ((cookie.version > 0 or |
|
969 (cookie.version == 0 and self.strict_ns_set_path)) and |
|
970 not req_path.startswith(cookie.path)): |
|
971 _debug(" path attribute %s is not a prefix of request " |
|
972 "path %s", cookie.path, req_path) |
|
973 return False |
|
974 return True |
|
975 |
|
976 def set_ok_domain(self, cookie, request): |
|
977 if self.is_blocked(cookie.domain): |
|
978 _debug(" domain %s is in user block-list", cookie.domain) |
|
979 return False |
|
980 if self.is_not_allowed(cookie.domain): |
|
981 _debug(" domain %s is not in user allow-list", cookie.domain) |
|
982 return False |
|
983 if cookie.domain_specified: |
|
984 req_host, erhn = eff_request_host(request) |
|
985 domain = cookie.domain |
|
986 if self.strict_domain and (domain.count(".") >= 2): |
|
987 # XXX This should probably be compared with the Konqueror |
|
988 # (kcookiejar.cpp) and Mozilla implementations, but it's a |
|
989 # losing battle. |
|
990 i = domain.rfind(".") |
|
991 j = domain.rfind(".", 0, i) |
|
992 if j == 0: # domain like .foo.bar |
|
993 tld = domain[i+1:] |
|
994 sld = domain[j+1:i] |
|
995 if sld.lower() in ("co", "ac", "com", "edu", "org", "net", |
|
996 "gov", "mil", "int", "aero", "biz", "cat", "coop", |
|
997 "info", "jobs", "mobi", "museum", "name", "pro", |
|
998 "travel", "eu") and len(tld) == 2: |
|
999 # domain like .co.uk |
|
1000 _debug(" country-code second level domain %s", domain) |
|
1001 return False |
|
1002 if domain.startswith("."): |
|
1003 undotted_domain = domain[1:] |
|
1004 else: |
|
1005 undotted_domain = domain |
|
1006 embedded_dots = (undotted_domain.find(".") >= 0) |
|
1007 if not embedded_dots and domain != ".local": |
|
1008 _debug(" non-local domain %s contains no embedded dot", |
|
1009 domain) |
|
1010 return False |
|
1011 if cookie.version == 0: |
|
1012 if (not erhn.endswith(domain) and |
|
1013 (not erhn.startswith(".") and |
|
1014 not ("."+erhn).endswith(domain))): |
|
1015 _debug(" effective request-host %s (even with added " |
|
1016 "initial dot) does not end end with %s", |
|
1017 erhn, domain) |
|
1018 return False |
|
1019 if (cookie.version > 0 or |
|
1020 (self.strict_ns_domain & self.DomainRFC2965Match)): |
|
1021 if not domain_match(erhn, domain): |
|
1022 _debug(" effective request-host %s does not domain-match " |
|
1023 "%s", erhn, domain) |
|
1024 return False |
|
1025 if (cookie.version > 0 or |
|
1026 (self.strict_ns_domain & self.DomainStrictNoDots)): |
|
1027 host_prefix = req_host[:-len(domain)] |
|
1028 if (host_prefix.find(".") >= 0 and |
|
1029 not IPV4_RE.search(req_host)): |
|
1030 _debug(" host prefix %s for domain %s contains a dot", |
|
1031 host_prefix, domain) |
|
1032 return False |
|
1033 return True |
|
1034 |
|
1035 def set_ok_port(self, cookie, request): |
|
1036 if cookie.port_specified: |
|
1037 req_port = request_port(request) |
|
1038 if req_port is None: |
|
1039 req_port = "80" |
|
1040 else: |
|
1041 req_port = str(req_port) |
|
1042 for p in cookie.port.split(","): |
|
1043 try: |
|
1044 int(p) |
|
1045 except ValueError: |
|
1046 _debug(" bad port %s (not numeric)", p) |
|
1047 return False |
|
1048 if p == req_port: |
|
1049 break |
|
1050 else: |
|
1051 _debug(" request port (%s) not found in %s", |
|
1052 req_port, cookie.port) |
|
1053 return False |
|
1054 return True |
|
1055 |
|
1056 def return_ok(self, cookie, request): |
|
1057 """ |
|
1058 If you override .return_ok(), be sure to call this method. If it |
|
1059 returns false, so should your subclass (assuming your subclass wants to |
|
1060 be more strict about which cookies to return). |
|
1061 |
|
1062 """ |
|
1063 # Path has already been checked by .path_return_ok(), and domain |
|
1064 # blocking done by .domain_return_ok(). |
|
1065 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) |
|
1066 |
|
1067 for n in "version", "verifiability", "secure", "expires", "port", "domain": |
|
1068 fn_name = "return_ok_"+n |
|
1069 fn = getattr(self, fn_name) |
|
1070 if not fn(cookie, request): |
|
1071 return False |
|
1072 return True |
|
1073 |
|
1074 def return_ok_version(self, cookie, request): |
|
1075 if cookie.version > 0 and not self.rfc2965: |
|
1076 _debug(" RFC 2965 cookies are switched off") |
|
1077 return False |
|
1078 elif cookie.version == 0 and not self.netscape: |
|
1079 _debug(" Netscape cookies are switched off") |
|
1080 return False |
|
1081 return True |
|
1082 |
|
1083 def return_ok_verifiability(self, cookie, request): |
|
1084 if request.is_unverifiable() and is_third_party(request): |
|
1085 if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
|
1086 _debug(" third-party RFC 2965 cookie during unverifiable " |
|
1087 "transaction") |
|
1088 return False |
|
1089 elif cookie.version == 0 and self.strict_ns_unverifiable: |
|
1090 _debug(" third-party Netscape cookie during unverifiable " |
|
1091 "transaction") |
|
1092 return False |
|
1093 return True |
|
1094 |
|
1095 def return_ok_secure(self, cookie, request): |
|
1096 if cookie.secure and request.get_type() != "https": |
|
1097 _debug(" secure cookie with non-secure request") |
|
1098 return False |
|
1099 return True |
|
1100 |
|
1101 def return_ok_expires(self, cookie, request): |
|
1102 if cookie.is_expired(self._now): |
|
1103 _debug(" cookie expired") |
|
1104 return False |
|
1105 return True |
|
1106 |
|
1107 def return_ok_port(self, cookie, request): |
|
1108 if cookie.port: |
|
1109 req_port = request_port(request) |
|
1110 if req_port is None: |
|
1111 req_port = "80" |
|
1112 for p in cookie.port.split(","): |
|
1113 if p == req_port: |
|
1114 break |
|
1115 else: |
|
1116 _debug(" request port %s does not match cookie port %s", |
|
1117 req_port, cookie.port) |
|
1118 return False |
|
1119 return True |
|
1120 |
|
1121 def return_ok_domain(self, cookie, request): |
|
1122 req_host, erhn = eff_request_host(request) |
|
1123 domain = cookie.domain |
|
1124 |
|
1125 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't |
|
1126 if (cookie.version == 0 and |
|
1127 (self.strict_ns_domain & self.DomainStrictNonDomain) and |
|
1128 not cookie.domain_specified and domain != erhn): |
|
1129 _debug(" cookie with unspecified domain does not string-compare " |
|
1130 "equal to request domain") |
|
1131 return False |
|
1132 |
|
1133 if cookie.version > 0 and not domain_match(erhn, domain): |
|
1134 _debug(" effective request-host name %s does not domain-match " |
|
1135 "RFC 2965 cookie domain %s", erhn, domain) |
|
1136 return False |
|
1137 if cookie.version == 0 and not ("."+erhn).endswith(domain): |
|
1138 _debug(" request-host %s does not match Netscape cookie domain " |
|
1139 "%s", req_host, domain) |
|
1140 return False |
|
1141 return True |
|
1142 |
|
1143 def domain_return_ok(self, domain, request): |
|
1144 # Liberal check of. This is here as an optimization to avoid |
|
1145 # having to load lots of MSIE cookie files unless necessary. |
|
1146 req_host, erhn = eff_request_host(request) |
|
1147 if not req_host.startswith("."): |
|
1148 req_host = "."+req_host |
|
1149 if not erhn.startswith("."): |
|
1150 erhn = "."+erhn |
|
1151 if not (req_host.endswith(domain) or erhn.endswith(domain)): |
|
1152 #_debug(" request domain %s does not match cookie domain %s", |
|
1153 # req_host, domain) |
|
1154 return False |
|
1155 |
|
1156 if self.is_blocked(domain): |
|
1157 _debug(" domain %s is in user block-list", domain) |
|
1158 return False |
|
1159 if self.is_not_allowed(domain): |
|
1160 _debug(" domain %s is not in user allow-list", domain) |
|
1161 return False |
|
1162 |
|
1163 return True |
|
1164 |
|
1165 def path_return_ok(self, path, request): |
|
1166 _debug("- checking cookie path=%s", path) |
|
1167 req_path = request_path(request) |
|
1168 if not req_path.startswith(path): |
|
1169 _debug(" %s does not path-match %s", req_path, path) |
|
1170 return False |
|
1171 return True |
|
1172 |
|
1173 |
|
1174 def vals_sorted_by_key(adict): |
|
1175 keys = adict.keys() |
|
1176 keys.sort() |
|
1177 return map(adict.get, keys) |
|
1178 |
|
1179 def deepvalues(mapping): |
|
1180 """Iterates over nested mapping, depth-first, in sorted order by key.""" |
|
1181 values = vals_sorted_by_key(mapping) |
|
1182 for obj in values: |
|
1183 mapping = False |
|
1184 try: |
|
1185 obj.items |
|
1186 except AttributeError: |
|
1187 pass |
|
1188 else: |
|
1189 mapping = True |
|
1190 for subobj in deepvalues(obj): |
|
1191 yield subobj |
|
1192 if not mapping: |
|
1193 yield obj |
|
1194 |
|
1195 |
|
1196 # Used as second parameter to dict.get() method, to distinguish absent |
|
1197 # dict key from one with a None value. |
|
1198 class Absent: pass |
|
1199 |
|
1200 class CookieJar: |
|
1201 """Collection of HTTP cookies. |
|
1202 |
|
1203 You may not need to know about this class: try |
|
1204 urllib2.build_opener(HTTPCookieProcessor).open(url). |
|
1205 |
|
1206 """ |
|
1207 |
|
1208 non_word_re = re.compile(r"\W") |
|
1209 quote_re = re.compile(r"([\"\\])") |
|
1210 strict_domain_re = re.compile(r"\.?[^.]*") |
|
1211 domain_re = re.compile(r"[^.]*") |
|
1212 dots_re = re.compile(r"^\.+") |
|
1213 |
|
1214 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" |
|
1215 |
|
1216 def __init__(self, policy=None): |
|
1217 if policy is None: |
|
1218 policy = DefaultCookiePolicy() |
|
1219 self._policy = policy |
|
1220 |
|
1221 self._cookies_lock = _threading.RLock() |
|
1222 self._cookies = {} |
|
1223 |
|
1224 def set_policy(self, policy): |
|
1225 self._policy = policy |
|
1226 |
|
1227 def _cookies_for_domain(self, domain, request): |
|
1228 cookies = [] |
|
1229 if not self._policy.domain_return_ok(domain, request): |
|
1230 return [] |
|
1231 _debug("Checking %s for cookies to return", domain) |
|
1232 cookies_by_path = self._cookies[domain] |
|
1233 for path in cookies_by_path.keys(): |
|
1234 if not self._policy.path_return_ok(path, request): |
|
1235 continue |
|
1236 cookies_by_name = cookies_by_path[path] |
|
1237 for cookie in cookies_by_name.values(): |
|
1238 if not self._policy.return_ok(cookie, request): |
|
1239 _debug(" not returning cookie") |
|
1240 continue |
|
1241 _debug(" it's a match") |
|
1242 cookies.append(cookie) |
|
1243 return cookies |
|
1244 |
|
1245 def _cookies_for_request(self, request): |
|
1246 """Return a list of cookies to be returned to server.""" |
|
1247 cookies = [] |
|
1248 for domain in self._cookies.keys(): |
|
1249 cookies.extend(self._cookies_for_domain(domain, request)) |
|
1250 return cookies |
|
1251 |
|
1252 def _cookie_attrs(self, cookies): |
|
1253 """Return a list of cookie-attributes to be returned to server. |
|
1254 |
|
1255 like ['foo="bar"; $Path="/"', ...] |
|
1256 |
|
1257 The $Version attribute is also added when appropriate (currently only |
|
1258 once per request). |
|
1259 |
|
1260 """ |
|
1261 # add cookies in order of most specific (ie. longest) path first |
|
1262 cookies.sort(key=lambda arg: len(arg.path), reverse=True) |
|
1263 |
|
1264 version_set = False |
|
1265 |
|
1266 attrs = [] |
|
1267 for cookie in cookies: |
|
1268 # set version of Cookie header |
|
1269 # XXX |
|
1270 # What should it be if multiple matching Set-Cookie headers have |
|
1271 # different versions themselves? |
|
1272 # Answer: there is no answer; was supposed to be settled by |
|
1273 # RFC 2965 errata, but that may never appear... |
|
1274 version = cookie.version |
|
1275 if not version_set: |
|
1276 version_set = True |
|
1277 if version > 0: |
|
1278 attrs.append("$Version=%s" % version) |
|
1279 |
|
1280 # quote cookie value if necessary |
|
1281 # (not for Netscape protocol, which already has any quotes |
|
1282 # intact, due to the poorly-specified Netscape Cookie: syntax) |
|
1283 if ((cookie.value is not None) and |
|
1284 self.non_word_re.search(cookie.value) and version > 0): |
|
1285 value = self.quote_re.sub(r"\\\1", cookie.value) |
|
1286 else: |
|
1287 value = cookie.value |
|
1288 |
|
1289 # add cookie-attributes to be returned in Cookie header |
|
1290 if cookie.value is None: |
|
1291 attrs.append(cookie.name) |
|
1292 else: |
|
1293 attrs.append("%s=%s" % (cookie.name, value)) |
|
1294 if version > 0: |
|
1295 if cookie.path_specified: |
|
1296 attrs.append('$Path="%s"' % cookie.path) |
|
1297 if cookie.domain.startswith("."): |
|
1298 domain = cookie.domain |
|
1299 if (not cookie.domain_initial_dot and |
|
1300 domain.startswith(".")): |
|
1301 domain = domain[1:] |
|
1302 attrs.append('$Domain="%s"' % domain) |
|
1303 if cookie.port is not None: |
|
1304 p = "$Port" |
|
1305 if cookie.port_specified: |
|
1306 p = p + ('="%s"' % cookie.port) |
|
1307 attrs.append(p) |
|
1308 |
|
1309 return attrs |
|
1310 |
|
1311 def add_cookie_header(self, request): |
|
1312 """Add correct Cookie: header to request (urllib2.Request object). |
|
1313 |
|
1314 The Cookie2 header is also added unless policy.hide_cookie2 is true. |
|
1315 |
|
1316 """ |
|
1317 _debug("add_cookie_header") |
|
1318 self._cookies_lock.acquire() |
|
1319 try: |
|
1320 |
|
1321 self._policy._now = self._now = int(time.time()) |
|
1322 |
|
1323 cookies = self._cookies_for_request(request) |
|
1324 |
|
1325 attrs = self._cookie_attrs(cookies) |
|
1326 if attrs: |
|
1327 if not request.has_header("Cookie"): |
|
1328 request.add_unredirected_header( |
|
1329 "Cookie", "; ".join(attrs)) |
|
1330 |
|
1331 # if necessary, advertise that we know RFC 2965 |
|
1332 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and |
|
1333 not request.has_header("Cookie2")): |
|
1334 for cookie in cookies: |
|
1335 if cookie.version != 1: |
|
1336 request.add_unredirected_header("Cookie2", '$Version="1"') |
|
1337 break |
|
1338 |
|
1339 finally: |
|
1340 self._cookies_lock.release() |
|
1341 |
|
1342 self.clear_expired_cookies() |
|
1343 |
|
1344 def _normalized_cookie_tuples(self, attrs_set): |
|
1345 """Return list of tuples containing normalised cookie information. |
|
1346 |
|
1347 attrs_set is the list of lists of key,value pairs extracted from |
|
1348 the Set-Cookie or Set-Cookie2 headers. |
|
1349 |
|
1350 Tuples are name, value, standard, rest, where name and value are the |
|
1351 cookie name and value, standard is a dictionary containing the standard |
|
1352 cookie-attributes (discard, secure, version, expires or max-age, |
|
1353 domain, path and port) and rest is a dictionary containing the rest of |
|
1354 the cookie-attributes. |
|
1355 |
|
1356 """ |
|
1357 cookie_tuples = [] |
|
1358 |
|
1359 boolean_attrs = "discard", "secure" |
|
1360 value_attrs = ("version", |
|
1361 "expires", "max-age", |
|
1362 "domain", "path", "port", |
|
1363 "comment", "commenturl") |
|
1364 |
|
1365 for cookie_attrs in attrs_set: |
|
1366 name, value = cookie_attrs[0] |
|
1367 |
|
1368 # Build dictionary of standard cookie-attributes (standard) and |
|
1369 # dictionary of other cookie-attributes (rest). |
|
1370 |
|
1371 # Note: expiry time is normalised to seconds since epoch. V0 |
|
1372 # cookies should have the Expires cookie-attribute, and V1 cookies |
|
1373 # should have Max-Age, but since V1 includes RFC 2109 cookies (and |
|
1374 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we |
|
1375 # accept either (but prefer Max-Age). |
|
1376 max_age_set = False |
|
1377 |
|
1378 bad_cookie = False |
|
1379 |
|
1380 standard = {} |
|
1381 rest = {} |
|
1382 for k, v in cookie_attrs[1:]: |
|
1383 lc = k.lower() |
|
1384 # don't lose case distinction for unknown fields |
|
1385 if lc in value_attrs or lc in boolean_attrs: |
|
1386 k = lc |
|
1387 if k in boolean_attrs and v is None: |
|
1388 # boolean cookie-attribute is present, but has no value |
|
1389 # (like "discard", rather than "port=80") |
|
1390 v = True |
|
1391 if k in standard: |
|
1392 # only first value is significant |
|
1393 continue |
|
1394 if k == "domain": |
|
1395 if v is None: |
|
1396 _debug(" missing value for domain attribute") |
|
1397 bad_cookie = True |
|
1398 break |
|
1399 # RFC 2965 section 3.3.3 |
|
1400 v = v.lower() |
|
1401 if k == "expires": |
|
1402 if max_age_set: |
|
1403 # Prefer max-age to expires (like Mozilla) |
|
1404 continue |
|
1405 if v is None: |
|
1406 _debug(" missing or invalid value for expires " |
|
1407 "attribute: treating as session cookie") |
|
1408 continue |
|
1409 if k == "max-age": |
|
1410 max_age_set = True |
|
1411 try: |
|
1412 v = int(v) |
|
1413 except ValueError: |
|
1414 _debug(" missing or invalid (non-numeric) value for " |
|
1415 "max-age attribute") |
|
1416 bad_cookie = True |
|
1417 break |
|
1418 # convert RFC 2965 Max-Age to seconds since epoch |
|
1419 # XXX Strictly you're supposed to follow RFC 2616 |
|
1420 # age-calculation rules. Remember that zero Max-Age is a |
|
1421 # is a request to discard (old and new) cookie, though. |
|
1422 k = "expires" |
|
1423 v = self._now + v |
|
1424 if (k in value_attrs) or (k in boolean_attrs): |
|
1425 if (v is None and |
|
1426 k not in ("port", "comment", "commenturl")): |
|
1427 _debug(" missing value for %s attribute" % k) |
|
1428 bad_cookie = True |
|
1429 break |
|
1430 standard[k] = v |
|
1431 else: |
|
1432 rest[k] = v |
|
1433 |
|
1434 if bad_cookie: |
|
1435 continue |
|
1436 |
|
1437 cookie_tuples.append((name, value, standard, rest)) |
|
1438 |
|
1439 return cookie_tuples |
|
1440 |
|
1441 def _cookie_from_cookie_tuple(self, tup, request): |
|
1442 # standard is dict of standard cookie-attributes, rest is dict of the |
|
1443 # rest of them |
|
1444 name, value, standard, rest = tup |
|
1445 |
|
1446 domain = standard.get("domain", Absent) |
|
1447 path = standard.get("path", Absent) |
|
1448 port = standard.get("port", Absent) |
|
1449 expires = standard.get("expires", Absent) |
|
1450 |
|
1451 # set the easy defaults |
|
1452 version = standard.get("version", None) |
|
1453 if version is not None: version = int(version) |
|
1454 secure = standard.get("secure", False) |
|
1455 # (discard is also set if expires is Absent) |
|
1456 discard = standard.get("discard", False) |
|
1457 comment = standard.get("comment", None) |
|
1458 comment_url = standard.get("commenturl", None) |
|
1459 |
|
1460 # set default path |
|
1461 if path is not Absent and path != "": |
|
1462 path_specified = True |
|
1463 path = escape_path(path) |
|
1464 else: |
|
1465 path_specified = False |
|
1466 path = request_path(request) |
|
1467 i = path.rfind("/") |
|
1468 if i != -1: |
|
1469 if version == 0: |
|
1470 # Netscape spec parts company from reality here |
|
1471 path = path[:i] |
|
1472 else: |
|
1473 path = path[:i+1] |
|
1474 if len(path) == 0: path = "/" |
|
1475 |
|
1476 # set default domain |
|
1477 domain_specified = domain is not Absent |
|
1478 # but first we have to remember whether it starts with a dot |
|
1479 domain_initial_dot = False |
|
1480 if domain_specified: |
|
1481 domain_initial_dot = bool(domain.startswith(".")) |
|
1482 if domain is Absent: |
|
1483 req_host, erhn = eff_request_host(request) |
|
1484 domain = erhn |
|
1485 elif not domain.startswith("."): |
|
1486 domain = "."+domain |
|
1487 |
|
1488 # set default port |
|
1489 port_specified = False |
|
1490 if port is not Absent: |
|
1491 if port is None: |
|
1492 # Port attr present, but has no value: default to request port. |
|
1493 # Cookie should then only be sent back on that port. |
|
1494 port = request_port(request) |
|
1495 else: |
|
1496 port_specified = True |
|
1497 port = re.sub(r"\s+", "", port) |
|
1498 else: |
|
1499 # No port attr present. Cookie can be sent back on any port. |
|
1500 port = None |
|
1501 |
|
1502 # set default expires and discard |
|
1503 if expires is Absent: |
|
1504 expires = None |
|
1505 discard = True |
|
1506 elif expires <= self._now: |
|
1507 # Expiry date in past is request to delete cookie. This can't be |
|
1508 # in DefaultCookiePolicy, because can't delete cookies there. |
|
1509 try: |
|
1510 self.clear(domain, path, name) |
|
1511 except KeyError: |
|
1512 pass |
|
1513 _debug("Expiring cookie, domain='%s', path='%s', name='%s'", |
|
1514 domain, path, name) |
|
1515 return None |
|
1516 |
|
1517 return Cookie(version, |
|
1518 name, value, |
|
1519 port, port_specified, |
|
1520 domain, domain_specified, domain_initial_dot, |
|
1521 path, path_specified, |
|
1522 secure, |
|
1523 expires, |
|
1524 discard, |
|
1525 comment, |
|
1526 comment_url, |
|
1527 rest) |
|
1528 |
|
1529 def _cookies_from_attrs_set(self, attrs_set, request): |
|
1530 cookie_tuples = self._normalized_cookie_tuples(attrs_set) |
|
1531 |
|
1532 cookies = [] |
|
1533 for tup in cookie_tuples: |
|
1534 cookie = self._cookie_from_cookie_tuple(tup, request) |
|
1535 if cookie: cookies.append(cookie) |
|
1536 return cookies |
|
1537 |
|
1538 def _process_rfc2109_cookies(self, cookies): |
|
1539 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) |
|
1540 if rfc2109_as_ns is None: |
|
1541 rfc2109_as_ns = not self._policy.rfc2965 |
|
1542 for cookie in cookies: |
|
1543 if cookie.version == 1: |
|
1544 cookie.rfc2109 = True |
|
1545 if rfc2109_as_ns: |
|
1546 # treat 2109 cookies as Netscape cookies rather than |
|
1547 # as RFC2965 cookies |
|
1548 cookie.version = 0 |
|
1549 |
|
1550 def make_cookies(self, response, request): |
|
1551 """Return sequence of Cookie objects extracted from response object.""" |
|
1552 # get cookie-attributes for RFC 2965 and Netscape protocols |
|
1553 headers = response.info() |
|
1554 rfc2965_hdrs = headers.getheaders("Set-Cookie2") |
|
1555 ns_hdrs = headers.getheaders("Set-Cookie") |
|
1556 |
|
1557 rfc2965 = self._policy.rfc2965 |
|
1558 netscape = self._policy.netscape |
|
1559 |
|
1560 if ((not rfc2965_hdrs and not ns_hdrs) or |
|
1561 (not ns_hdrs and not rfc2965) or |
|
1562 (not rfc2965_hdrs and not netscape) or |
|
1563 (not netscape and not rfc2965)): |
|
1564 return [] # no relevant cookie headers: quick exit |
|
1565 |
|
1566 try: |
|
1567 cookies = self._cookies_from_attrs_set( |
|
1568 split_header_words(rfc2965_hdrs), request) |
|
1569 except Exception: |
|
1570 _warn_unhandled_exception() |
|
1571 cookies = [] |
|
1572 |
|
1573 if ns_hdrs and netscape: |
|
1574 try: |
|
1575 # RFC 2109 and Netscape cookies |
|
1576 ns_cookies = self._cookies_from_attrs_set( |
|
1577 parse_ns_headers(ns_hdrs), request) |
|
1578 except Exception: |
|
1579 _warn_unhandled_exception() |
|
1580 ns_cookies = [] |
|
1581 self._process_rfc2109_cookies(ns_cookies) |
|
1582 |
|
1583 # Look for Netscape cookies (from Set-Cookie headers) that match |
|
1584 # corresponding RFC 2965 cookies (from Set-Cookie2 headers). |
|
1585 # For each match, keep the RFC 2965 cookie and ignore the Netscape |
|
1586 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are |
|
1587 # bundled in with the Netscape cookies for this purpose, which is |
|
1588 # reasonable behaviour. |
|
1589 if rfc2965: |
|
1590 lookup = {} |
|
1591 for cookie in cookies: |
|
1592 lookup[(cookie.domain, cookie.path, cookie.name)] = None |
|
1593 |
|
1594 def no_matching_rfc2965(ns_cookie, lookup=lookup): |
|
1595 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name |
|
1596 return key not in lookup |
|
1597 ns_cookies = filter(no_matching_rfc2965, ns_cookies) |
|
1598 |
|
1599 if ns_cookies: |
|
1600 cookies.extend(ns_cookies) |
|
1601 |
|
1602 return cookies |
|
1603 |
|
1604 def set_cookie_if_ok(self, cookie, request): |
|
1605 """Set a cookie if policy says it's OK to do so.""" |
|
1606 self._cookies_lock.acquire() |
|
1607 try: |
|
1608 self._policy._now = self._now = int(time.time()) |
|
1609 |
|
1610 if self._policy.set_ok(cookie, request): |
|
1611 self.set_cookie(cookie) |
|
1612 |
|
1613 |
|
1614 finally: |
|
1615 self._cookies_lock.release() |
|
1616 |
|
1617 def set_cookie(self, cookie): |
|
1618 """Set a cookie, without checking whether or not it should be set.""" |
|
1619 c = self._cookies |
|
1620 self._cookies_lock.acquire() |
|
1621 try: |
|
1622 if cookie.domain not in c: c[cookie.domain] = {} |
|
1623 c2 = c[cookie.domain] |
|
1624 if cookie.path not in c2: c2[cookie.path] = {} |
|
1625 c3 = c2[cookie.path] |
|
1626 c3[cookie.name] = cookie |
|
1627 finally: |
|
1628 self._cookies_lock.release() |
|
1629 |
|
1630 def extract_cookies(self, response, request): |
|
1631 """Extract cookies from response, where allowable given the request.""" |
|
1632 _debug("extract_cookies: %s", response.info()) |
|
1633 self._cookies_lock.acquire() |
|
1634 try: |
|
1635 self._policy._now = self._now = int(time.time()) |
|
1636 |
|
1637 for cookie in self.make_cookies(response, request): |
|
1638 if self._policy.set_ok(cookie, request): |
|
1639 _debug(" setting cookie: %s", cookie) |
|
1640 self.set_cookie(cookie) |
|
1641 finally: |
|
1642 self._cookies_lock.release() |
|
1643 |
|
1644 def clear(self, domain=None, path=None, name=None): |
|
1645 """Clear some cookies. |
|
1646 |
|
1647 Invoking this method without arguments will clear all cookies. If |
|
1648 given a single argument, only cookies belonging to that domain will be |
|
1649 removed. If given two arguments, cookies belonging to the specified |
|
1650 path within that domain are removed. If given three arguments, then |
|
1651 the cookie with the specified name, path and domain is removed. |
|
1652 |
|
1653 Raises KeyError if no matching cookie exists. |
|
1654 |
|
1655 """ |
|
1656 if name is not None: |
|
1657 if (domain is None) or (path is None): |
|
1658 raise ValueError( |
|
1659 "domain and path must be given to remove a cookie by name") |
|
1660 del self._cookies[domain][path][name] |
|
1661 elif path is not None: |
|
1662 if domain is None: |
|
1663 raise ValueError( |
|
1664 "domain must be given to remove cookies by path") |
|
1665 del self._cookies[domain][path] |
|
1666 elif domain is not None: |
|
1667 del self._cookies[domain] |
|
1668 else: |
|
1669 self._cookies = {} |
|
1670 |
|
1671 def clear_session_cookies(self): |
|
1672 """Discard all session cookies. |
|
1673 |
|
1674 Note that the .save() method won't save session cookies anyway, unless |
|
1675 you ask otherwise by passing a true ignore_discard argument. |
|
1676 |
|
1677 """ |
|
1678 self._cookies_lock.acquire() |
|
1679 try: |
|
1680 for cookie in self: |
|
1681 if cookie.discard: |
|
1682 self.clear(cookie.domain, cookie.path, cookie.name) |
|
1683 finally: |
|
1684 self._cookies_lock.release() |
|
1685 |
|
1686 def clear_expired_cookies(self): |
|
1687 """Discard all expired cookies. |
|
1688 |
|
1689 You probably don't need to call this method: expired cookies are never |
|
1690 sent back to the server (provided you're using DefaultCookiePolicy), |
|
1691 this method is called by CookieJar itself every so often, and the |
|
1692 .save() method won't save expired cookies anyway (unless you ask |
|
1693 otherwise by passing a true ignore_expires argument). |
|
1694 |
|
1695 """ |
|
1696 self._cookies_lock.acquire() |
|
1697 try: |
|
1698 now = time.time() |
|
1699 for cookie in self: |
|
1700 if cookie.is_expired(now): |
|
1701 self.clear(cookie.domain, cookie.path, cookie.name) |
|
1702 finally: |
|
1703 self._cookies_lock.release() |
|
1704 |
|
1705 def __iter__(self): |
|
1706 return deepvalues(self._cookies) |
|
1707 |
|
1708 def __len__(self): |
|
1709 """Return number of contained cookies.""" |
|
1710 i = 0 |
|
1711 for cookie in self: i = i + 1 |
|
1712 return i |
|
1713 |
|
1714 def __repr__(self): |
|
1715 r = [] |
|
1716 for cookie in self: r.append(repr(cookie)) |
|
1717 return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
|
1718 |
|
1719 def __str__(self): |
|
1720 r = [] |
|
1721 for cookie in self: r.append(str(cookie)) |
|
1722 return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
|
1723 |
|
1724 |
|
1725 # derives from IOError for backwards-compatibility with Python 2.4.0 |
|
1726 class LoadError(IOError): pass |
|
1727 |
|
1728 class FileCookieJar(CookieJar): |
|
1729 """CookieJar that can be loaded from and saved to a file.""" |
|
1730 |
|
1731 def __init__(self, filename=None, delayload=False, policy=None): |
|
1732 """ |
|
1733 Cookies are NOT loaded from the named file until either the .load() or |
|
1734 .revert() method is called. |
|
1735 |
|
1736 """ |
|
1737 CookieJar.__init__(self, policy) |
|
1738 if filename is not None: |
|
1739 try: |
|
1740 filename+"" |
|
1741 except: |
|
1742 raise ValueError("filename must be string-like") |
|
1743 self.filename = filename |
|
1744 self.delayload = bool(delayload) |
|
1745 |
|
1746 def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
|
1747 """Save cookies to a file.""" |
|
1748 raise NotImplementedError() |
|
1749 |
|
1750 def load(self, filename=None, ignore_discard=False, ignore_expires=False): |
|
1751 """Load cookies from a file.""" |
|
1752 if filename is None: |
|
1753 if self.filename is not None: filename = self.filename |
|
1754 else: raise ValueError(MISSING_FILENAME_TEXT) |
|
1755 |
|
1756 f = open(filename) |
|
1757 try: |
|
1758 self._really_load(f, filename, ignore_discard, ignore_expires) |
|
1759 finally: |
|
1760 f.close() |
|
1761 |
|
1762 def revert(self, filename=None, |
|
1763 ignore_discard=False, ignore_expires=False): |
|
1764 """Clear all cookies and reload cookies from a saved file. |
|
1765 |
|
1766 Raises LoadError (or IOError) if reversion is not successful; the |
|
1767 object's state will not be altered if this happens. |
|
1768 |
|
1769 """ |
|
1770 if filename is None: |
|
1771 if self.filename is not None: filename = self.filename |
|
1772 else: raise ValueError(MISSING_FILENAME_TEXT) |
|
1773 |
|
1774 self._cookies_lock.acquire() |
|
1775 try: |
|
1776 |
|
1777 old_state = copy.deepcopy(self._cookies) |
|
1778 self._cookies = {} |
|
1779 try: |
|
1780 self.load(filename, ignore_discard, ignore_expires) |
|
1781 except (LoadError, IOError): |
|
1782 self._cookies = old_state |
|
1783 raise |
|
1784 |
|
1785 finally: |
|
1786 self._cookies_lock.release() |
|
1787 |
|
1788 from _LWPCookieJar import LWPCookieJar, lwp_cookie_str |
|
1789 from _MozillaCookieJar import MozillaCookieJar |