|
1 # Copyright (C) 2001-2006 Python Software Foundation |
|
2 # Author: Barry Warsaw |
|
3 # Contact: email-sig@python.org |
|
4 |
|
5 """Miscellaneous utilities.""" |
|
6 |
|
7 __all__ = [ |
|
8 'collapse_rfc2231_value', |
|
9 'decode_params', |
|
10 'decode_rfc2231', |
|
11 'encode_rfc2231', |
|
12 'formataddr', |
|
13 'formatdate', |
|
14 'getaddresses', |
|
15 'make_msgid', |
|
16 'parseaddr', |
|
17 'parsedate', |
|
18 'parsedate_tz', |
|
19 'unquote', |
|
20 ] |
|
21 |
|
22 import os |
|
23 import re |
|
24 import time |
|
25 import base64 |
|
26 import random |
|
27 import socket |
|
28 import urllib |
|
29 import warnings |
|
30 |
|
31 from email._parseaddr import quote |
|
32 from email._parseaddr import AddressList as _AddressList |
|
33 from email._parseaddr import mktime_tz |
|
34 |
|
35 # We need wormarounds for bugs in these methods in older Pythons (see below) |
|
36 from email._parseaddr import parsedate as _parsedate |
|
37 from email._parseaddr import parsedate_tz as _parsedate_tz |
|
38 |
|
39 from quopri import decodestring as _qdecode |
|
40 |
|
41 # Intrapackage imports |
|
42 from email.encoders import _bencode, _qencode |
|
43 |
|
44 COMMASPACE = ', ' |
|
45 EMPTYSTRING = '' |
|
46 UEMPTYSTRING = u'' |
|
47 CRLF = '\r\n' |
|
48 TICK = "'" |
|
49 |
|
50 specialsre = re.compile(r'[][\\()<>@,:;".]') |
|
51 escapesre = re.compile(r'[][\\()"]') |
|
52 |
|
53 |
|
54 |
|
55 # Helpers |
|
56 |
|
57 def _identity(s): |
|
58 return s |
|
59 |
|
60 |
|
61 def _bdecode(s): |
|
62 # We can't quite use base64.encodestring() since it tacks on a "courtesy |
|
63 # newline". Blech! |
|
64 if not s: |
|
65 return s |
|
66 value = base64.decodestring(s) |
|
67 if not s.endswith('\n') and value.endswith('\n'): |
|
68 return value[:-1] |
|
69 return value |
|
70 |
|
71 |
|
72 |
|
73 def fix_eols(s): |
|
74 """Replace all line-ending characters with \r\n.""" |
|
75 # Fix newlines with no preceding carriage return |
|
76 s = re.sub(r'(?<!\r)\n', CRLF, s) |
|
77 # Fix carriage returns with no following newline |
|
78 s = re.sub(r'\r(?!\n)', CRLF, s) |
|
79 return s |
|
80 |
|
81 |
|
82 |
|
83 def formataddr(pair): |
|
84 """The inverse of parseaddr(), this takes a 2-tuple of the form |
|
85 (realname, email_address) and returns the string value suitable |
|
86 for an RFC 2822 From, To or Cc header. |
|
87 |
|
88 If the first element of pair is false, then the second element is |
|
89 returned unmodified. |
|
90 """ |
|
91 name, address = pair |
|
92 if name: |
|
93 quotes = '' |
|
94 if specialsre.search(name): |
|
95 quotes = '"' |
|
96 name = escapesre.sub(r'\\\g<0>', name) |
|
97 return '%s%s%s <%s>' % (quotes, name, quotes, address) |
|
98 return address |
|
99 |
|
100 |
|
101 |
|
102 def getaddresses(fieldvalues): |
|
103 """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
|
104 all = COMMASPACE.join(fieldvalues) |
|
105 a = _AddressList(all) |
|
106 return a.addresslist |
|
107 |
|
108 |
|
109 |
|
110 ecre = re.compile(r''' |
|
111 =\? # literal =? |
|
112 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset |
|
113 \? # literal ? |
|
114 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive |
|
115 \? # literal ? |
|
116 (?P<atom>.*?) # non-greedy up to the next ?= is the atom |
|
117 \?= # literal ?= |
|
118 ''', re.VERBOSE | re.IGNORECASE) |
|
119 |
|
120 |
|
121 |
|
122 def formatdate(timeval=None, localtime=False, usegmt=False): |
|
123 """Returns a date string as specified by RFC 2822, e.g.: |
|
124 |
|
125 Fri, 09 Nov 2001 01:08:47 -0000 |
|
126 |
|
127 Optional timeval if given is a floating point time value as accepted by |
|
128 gmtime() and localtime(), otherwise the current time is used. |
|
129 |
|
130 Optional localtime is a flag that when True, interprets timeval, and |
|
131 returns a date relative to the local timezone instead of UTC, properly |
|
132 taking daylight savings time into account. |
|
133 |
|
134 Optional argument usegmt means that the timezone is written out as |
|
135 an ascii string, not numeric one (so "GMT" instead of "+0000"). This |
|
136 is needed for HTTP, and is only used when localtime==False. |
|
137 """ |
|
138 # Note: we cannot use strftime() because that honors the locale and RFC |
|
139 # 2822 requires that day and month names be the English abbreviations. |
|
140 if timeval is None: |
|
141 timeval = time.time() |
|
142 if localtime: |
|
143 now = time.localtime(timeval) |
|
144 # Calculate timezone offset, based on whether the local zone has |
|
145 # daylight savings time, and whether DST is in effect. |
|
146 if time.daylight and now[-1]: |
|
147 offset = time.altzone |
|
148 else: |
|
149 offset = time.timezone |
|
150 hours, minutes = divmod(abs(offset), 3600) |
|
151 # Remember offset is in seconds west of UTC, but the timezone is in |
|
152 # minutes east of UTC, so the signs differ. |
|
153 if offset > 0: |
|
154 sign = '-' |
|
155 else: |
|
156 sign = '+' |
|
157 zone = '%s%02d%02d' % (sign, hours, minutes // 60) |
|
158 else: |
|
159 now = time.gmtime(timeval) |
|
160 # Timezone offset is always -0000 |
|
161 if usegmt: |
|
162 zone = 'GMT' |
|
163 else: |
|
164 zone = '-0000' |
|
165 return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( |
|
166 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], |
|
167 now[2], |
|
168 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
169 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], |
|
170 now[0], now[3], now[4], now[5], |
|
171 zone) |
|
172 |
|
173 |
|
174 |
|
175 def make_msgid(idstring=None): |
|
176 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: |
|
177 |
|
178 <20020201195627.33539.96671@nightshade.la.mastaler.com> |
|
179 |
|
180 Optional idstring if given is a string used to strengthen the |
|
181 uniqueness of the message id. |
|
182 """ |
|
183 timeval = time.time() |
|
184 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) |
|
185 pid = os.getpid() |
|
186 randint = random.randrange(100000) |
|
187 if idstring is None: |
|
188 idstring = '' |
|
189 else: |
|
190 idstring = '.' + idstring |
|
191 idhost = socket.getfqdn() |
|
192 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) |
|
193 return msgid |
|
194 |
|
195 |
|
196 |
|
197 # These functions are in the standalone mimelib version only because they've |
|
198 # subsequently been fixed in the latest Python versions. We use this to worm |
|
199 # around broken older Pythons. |
|
200 def parsedate(data): |
|
201 if not data: |
|
202 return None |
|
203 return _parsedate(data) |
|
204 |
|
205 |
|
206 def parsedate_tz(data): |
|
207 if not data: |
|
208 return None |
|
209 return _parsedate_tz(data) |
|
210 |
|
211 |
|
212 def parseaddr(addr): |
|
213 addrs = _AddressList(addr).addresslist |
|
214 if not addrs: |
|
215 return '', '' |
|
216 return addrs[0] |
|
217 |
|
218 |
|
219 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. |
|
220 def unquote(str): |
|
221 """Remove quotes from a string.""" |
|
222 if len(str) > 1: |
|
223 if str.startswith('"') and str.endswith('"'): |
|
224 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') |
|
225 if str.startswith('<') and str.endswith('>'): |
|
226 return str[1:-1] |
|
227 return str |
|
228 |
|
229 |
|
230 |
|
231 # RFC2231-related functions - parameter encoding and decoding |
|
232 def decode_rfc2231(s): |
|
233 """Decode string according to RFC 2231""" |
|
234 parts = s.split(TICK, 2) |
|
235 if len(parts) <= 2: |
|
236 return None, None, s |
|
237 return parts |
|
238 |
|
239 |
|
240 def encode_rfc2231(s, charset=None, language=None): |
|
241 """Encode string according to RFC 2231. |
|
242 |
|
243 If neither charset nor language is given, then s is returned as-is. If |
|
244 charset is given but not language, the string is encoded using the empty |
|
245 string for language. |
|
246 """ |
|
247 import urllib |
|
248 s = urllib.quote(s, safe='') |
|
249 if charset is None and language is None: |
|
250 return s |
|
251 if language is None: |
|
252 language = '' |
|
253 return "%s'%s'%s" % (charset, language, s) |
|
254 |
|
255 |
|
256 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') |
|
257 |
|
258 def decode_params(params): |
|
259 """Decode parameters list according to RFC 2231. |
|
260 |
|
261 params is a sequence of 2-tuples containing (param name, string value). |
|
262 """ |
|
263 # Copy params so we don't mess with the original |
|
264 params = params[:] |
|
265 new_params = [] |
|
266 # Map parameter's name to a list of continuations. The values are a |
|
267 # 3-tuple of the continuation number, the string value, and a flag |
|
268 # specifying whether a particular segment is %-encoded. |
|
269 rfc2231_params = {} |
|
270 name, value = params.pop(0) |
|
271 new_params.append((name, value)) |
|
272 while params: |
|
273 name, value = params.pop(0) |
|
274 if name.endswith('*'): |
|
275 encoded = True |
|
276 else: |
|
277 encoded = False |
|
278 value = unquote(value) |
|
279 mo = rfc2231_continuation.match(name) |
|
280 if mo: |
|
281 name, num = mo.group('name', 'num') |
|
282 if num is not None: |
|
283 num = int(num) |
|
284 rfc2231_params.setdefault(name, []).append((num, value, encoded)) |
|
285 else: |
|
286 new_params.append((name, '"%s"' % quote(value))) |
|
287 if rfc2231_params: |
|
288 for name, continuations in rfc2231_params.items(): |
|
289 value = [] |
|
290 extended = False |
|
291 # Sort by number |
|
292 continuations.sort() |
|
293 # And now append all values in numerical order, converting |
|
294 # %-encodings for the encoded segments. If any of the |
|
295 # continuation names ends in a *, then the entire string, after |
|
296 # decoding segments and concatenating, must have the charset and |
|
297 # language specifiers at the beginning of the string. |
|
298 for num, s, encoded in continuations: |
|
299 if encoded: |
|
300 s = urllib.unquote(s) |
|
301 extended = True |
|
302 value.append(s) |
|
303 value = quote(EMPTYSTRING.join(value)) |
|
304 if extended: |
|
305 charset, language, value = decode_rfc2231(value) |
|
306 new_params.append((name, (charset, language, '"%s"' % value))) |
|
307 else: |
|
308 new_params.append((name, '"%s"' % value)) |
|
309 return new_params |
|
310 |
|
311 def collapse_rfc2231_value(value, errors='replace', |
|
312 fallback_charset='us-ascii'): |
|
313 if isinstance(value, tuple): |
|
314 rawval = unquote(value[2]) |
|
315 charset = value[0] or 'us-ascii' |
|
316 try: |
|
317 return unicode(rawval, charset, errors) |
|
318 except LookupError: |
|
319 # XXX charset is unknown to Python. |
|
320 return unicode(rawval, fallback_charset, errors) |
|
321 else: |
|
322 return unquote(value) |