|
1 # Copyright (C) 2002-2007 Python Software Foundation |
|
2 # Contact: email-sig@python.org |
|
3 |
|
4 """Email address parsing code. |
|
5 |
|
6 Lifted directly from rfc822.py. This should eventually be rewritten. |
|
7 """ |
|
8 |
|
9 __all__ = [ |
|
10 'mktime_tz', |
|
11 'parsedate', |
|
12 'parsedate_tz', |
|
13 'quote', |
|
14 ] |
|
15 |
|
16 import time |
|
17 |
|
18 SPACE = ' ' |
|
19 EMPTYSTRING = '' |
|
20 COMMASPACE = ', ' |
|
21 |
|
22 # Parse a date field |
|
23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', |
|
24 'aug', 'sep', 'oct', 'nov', 'dec', |
|
25 'january', 'february', 'march', 'april', 'may', 'june', 'july', |
|
26 'august', 'september', 'october', 'november', 'december'] |
|
27 |
|
28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] |
|
29 |
|
30 # The timezone table does not include the military time zones defined |
|
31 # in RFC822, other than Z. According to RFC1123, the description in |
|
32 # RFC822 gets the signs wrong, so we can't rely on any such time |
|
33 # zones. RFC1123 recommends that numeric timezone indicators be used |
|
34 # instead of timezone names. |
|
35 |
|
36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, |
|
37 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) |
|
38 'EST': -500, 'EDT': -400, # Eastern |
|
39 'CST': -600, 'CDT': -500, # Central |
|
40 'MST': -700, 'MDT': -600, # Mountain |
|
41 'PST': -800, 'PDT': -700 # Pacific |
|
42 } |
|
43 |
|
44 |
|
45 def parsedate_tz(data): |
|
46 """Convert a date string to a time tuple. |
|
47 |
|
48 Accounts for military timezones. |
|
49 """ |
|
50 data = data.split() |
|
51 # The FWS after the comma after the day-of-week is optional, so search and |
|
52 # adjust for this. |
|
53 if data[0].endswith(',') or data[0].lower() in _daynames: |
|
54 # There's a dayname here. Skip it |
|
55 del data[0] |
|
56 else: |
|
57 i = data[0].rfind(',') |
|
58 if i >= 0: |
|
59 data[0] = data[0][i+1:] |
|
60 if len(data) == 3: # RFC 850 date, deprecated |
|
61 stuff = data[0].split('-') |
|
62 if len(stuff) == 3: |
|
63 data = stuff + data[1:] |
|
64 if len(data) == 4: |
|
65 s = data[3] |
|
66 i = s.find('+') |
|
67 if i > 0: |
|
68 data[3:] = [s[:i], s[i+1:]] |
|
69 else: |
|
70 data.append('') # Dummy tz |
|
71 if len(data) < 5: |
|
72 return None |
|
73 data = data[:5] |
|
74 [dd, mm, yy, tm, tz] = data |
|
75 mm = mm.lower() |
|
76 if mm not in _monthnames: |
|
77 dd, mm = mm, dd.lower() |
|
78 if mm not in _monthnames: |
|
79 return None |
|
80 mm = _monthnames.index(mm) + 1 |
|
81 if mm > 12: |
|
82 mm -= 12 |
|
83 if dd[-1] == ',': |
|
84 dd = dd[:-1] |
|
85 i = yy.find(':') |
|
86 if i > 0: |
|
87 yy, tm = tm, yy |
|
88 if yy[-1] == ',': |
|
89 yy = yy[:-1] |
|
90 if not yy[0].isdigit(): |
|
91 yy, tz = tz, yy |
|
92 if tm[-1] == ',': |
|
93 tm = tm[:-1] |
|
94 tm = tm.split(':') |
|
95 if len(tm) == 2: |
|
96 [thh, tmm] = tm |
|
97 tss = '0' |
|
98 elif len(tm) == 3: |
|
99 [thh, tmm, tss] = tm |
|
100 else: |
|
101 return None |
|
102 try: |
|
103 yy = int(yy) |
|
104 dd = int(dd) |
|
105 thh = int(thh) |
|
106 tmm = int(tmm) |
|
107 tss = int(tss) |
|
108 except ValueError: |
|
109 return None |
|
110 tzoffset = None |
|
111 tz = tz.upper() |
|
112 if tz in _timezones: |
|
113 tzoffset = _timezones[tz] |
|
114 else: |
|
115 try: |
|
116 tzoffset = int(tz) |
|
117 except ValueError: |
|
118 pass |
|
119 # Convert a timezone offset into seconds ; -0500 -> -18000 |
|
120 if tzoffset: |
|
121 if tzoffset < 0: |
|
122 tzsign = -1 |
|
123 tzoffset = -tzoffset |
|
124 else: |
|
125 tzsign = 1 |
|
126 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) |
|
127 # Daylight Saving Time flag is set to -1, since DST is unknown. |
|
128 return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset |
|
129 |
|
130 |
|
131 def parsedate(data): |
|
132 """Convert a time string to a time tuple.""" |
|
133 t = parsedate_tz(data) |
|
134 if isinstance(t, tuple): |
|
135 return t[:9] |
|
136 else: |
|
137 return t |
|
138 |
|
139 |
|
140 def mktime_tz(data): |
|
141 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" |
|
142 if data[9] is None: |
|
143 # No zone info, so localtime is better assumption than GMT |
|
144 return time.mktime(data[:8] + (-1,)) |
|
145 else: |
|
146 t = time.mktime(data[:8] + (0,)) |
|
147 return t - data[9] - time.timezone |
|
148 |
|
149 |
|
150 def quote(str): |
|
151 """Add quotes around a string.""" |
|
152 return str.replace('\\', '\\\\').replace('"', '\\"') |
|
153 |
|
154 |
|
155 class AddrlistClass: |
|
156 """Address parser class by Ben Escoto. |
|
157 |
|
158 To understand what this class does, it helps to have a copy of RFC 2822 in |
|
159 front of you. |
|
160 |
|
161 Note: this class interface is deprecated and may be removed in the future. |
|
162 Use rfc822.AddressList instead. |
|
163 """ |
|
164 |
|
165 def __init__(self, field): |
|
166 """Initialize a new instance. |
|
167 |
|
168 `field' is an unparsed address header field, containing |
|
169 one or more addresses. |
|
170 """ |
|
171 self.specials = '()<>@,:;.\"[]' |
|
172 self.pos = 0 |
|
173 self.LWS = ' \t' |
|
174 self.CR = '\r\n' |
|
175 self.FWS = self.LWS + self.CR |
|
176 self.atomends = self.specials + self.LWS + self.CR |
|
177 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it |
|
178 # is obsolete syntax. RFC 2822 requires that we recognize obsolete |
|
179 # syntax, so allow dots in phrases. |
|
180 self.phraseends = self.atomends.replace('.', '') |
|
181 self.field = field |
|
182 self.commentlist = [] |
|
183 |
|
184 def gotonext(self): |
|
185 """Parse up to the start of the next address.""" |
|
186 while self.pos < len(self.field): |
|
187 if self.field[self.pos] in self.LWS + '\n\r': |
|
188 self.pos += 1 |
|
189 elif self.field[self.pos] == '(': |
|
190 self.commentlist.append(self.getcomment()) |
|
191 else: |
|
192 break |
|
193 |
|
194 def getaddrlist(self): |
|
195 """Parse all addresses. |
|
196 |
|
197 Returns a list containing all of the addresses. |
|
198 """ |
|
199 result = [] |
|
200 while self.pos < len(self.field): |
|
201 ad = self.getaddress() |
|
202 if ad: |
|
203 result += ad |
|
204 else: |
|
205 result.append(('', '')) |
|
206 return result |
|
207 |
|
208 def getaddress(self): |
|
209 """Parse the next address.""" |
|
210 self.commentlist = [] |
|
211 self.gotonext() |
|
212 |
|
213 oldpos = self.pos |
|
214 oldcl = self.commentlist |
|
215 plist = self.getphraselist() |
|
216 |
|
217 self.gotonext() |
|
218 returnlist = [] |
|
219 |
|
220 if self.pos >= len(self.field): |
|
221 # Bad email address technically, no domain. |
|
222 if plist: |
|
223 returnlist = [(SPACE.join(self.commentlist), plist[0])] |
|
224 |
|
225 elif self.field[self.pos] in '.@': |
|
226 # email address is just an addrspec |
|
227 # this isn't very efficient since we start over |
|
228 self.pos = oldpos |
|
229 self.commentlist = oldcl |
|
230 addrspec = self.getaddrspec() |
|
231 returnlist = [(SPACE.join(self.commentlist), addrspec)] |
|
232 |
|
233 elif self.field[self.pos] == ':': |
|
234 # address is a group |
|
235 returnlist = [] |
|
236 |
|
237 fieldlen = len(self.field) |
|
238 self.pos += 1 |
|
239 while self.pos < len(self.field): |
|
240 self.gotonext() |
|
241 if self.pos < fieldlen and self.field[self.pos] == ';': |
|
242 self.pos += 1 |
|
243 break |
|
244 returnlist = returnlist + self.getaddress() |
|
245 |
|
246 elif self.field[self.pos] == '<': |
|
247 # Address is a phrase then a route addr |
|
248 routeaddr = self.getrouteaddr() |
|
249 |
|
250 if self.commentlist: |
|
251 returnlist = [(SPACE.join(plist) + ' (' + |
|
252 ' '.join(self.commentlist) + ')', routeaddr)] |
|
253 else: |
|
254 returnlist = [(SPACE.join(plist), routeaddr)] |
|
255 |
|
256 else: |
|
257 if plist: |
|
258 returnlist = [(SPACE.join(self.commentlist), plist[0])] |
|
259 elif self.field[self.pos] in self.specials: |
|
260 self.pos += 1 |
|
261 |
|
262 self.gotonext() |
|
263 if self.pos < len(self.field) and self.field[self.pos] == ',': |
|
264 self.pos += 1 |
|
265 return returnlist |
|
266 |
|
267 def getrouteaddr(self): |
|
268 """Parse a route address (Return-path value). |
|
269 |
|
270 This method just skips all the route stuff and returns the addrspec. |
|
271 """ |
|
272 if self.field[self.pos] != '<': |
|
273 return |
|
274 |
|
275 expectroute = False |
|
276 self.pos += 1 |
|
277 self.gotonext() |
|
278 adlist = '' |
|
279 while self.pos < len(self.field): |
|
280 if expectroute: |
|
281 self.getdomain() |
|
282 expectroute = False |
|
283 elif self.field[self.pos] == '>': |
|
284 self.pos += 1 |
|
285 break |
|
286 elif self.field[self.pos] == '@': |
|
287 self.pos += 1 |
|
288 expectroute = True |
|
289 elif self.field[self.pos] == ':': |
|
290 self.pos += 1 |
|
291 else: |
|
292 adlist = self.getaddrspec() |
|
293 self.pos += 1 |
|
294 break |
|
295 self.gotonext() |
|
296 |
|
297 return adlist |
|
298 |
|
299 def getaddrspec(self): |
|
300 """Parse an RFC 2822 addr-spec.""" |
|
301 aslist = [] |
|
302 |
|
303 self.gotonext() |
|
304 while self.pos < len(self.field): |
|
305 if self.field[self.pos] == '.': |
|
306 aslist.append('.') |
|
307 self.pos += 1 |
|
308 elif self.field[self.pos] == '"': |
|
309 aslist.append('"%s"' % self.getquote()) |
|
310 elif self.field[self.pos] in self.atomends: |
|
311 break |
|
312 else: |
|
313 aslist.append(self.getatom()) |
|
314 self.gotonext() |
|
315 |
|
316 if self.pos >= len(self.field) or self.field[self.pos] != '@': |
|
317 return EMPTYSTRING.join(aslist) |
|
318 |
|
319 aslist.append('@') |
|
320 self.pos += 1 |
|
321 self.gotonext() |
|
322 return EMPTYSTRING.join(aslist) + self.getdomain() |
|
323 |
|
324 def getdomain(self): |
|
325 """Get the complete domain name from an address.""" |
|
326 sdlist = [] |
|
327 while self.pos < len(self.field): |
|
328 if self.field[self.pos] in self.LWS: |
|
329 self.pos += 1 |
|
330 elif self.field[self.pos] == '(': |
|
331 self.commentlist.append(self.getcomment()) |
|
332 elif self.field[self.pos] == '[': |
|
333 sdlist.append(self.getdomainliteral()) |
|
334 elif self.field[self.pos] == '.': |
|
335 self.pos += 1 |
|
336 sdlist.append('.') |
|
337 elif self.field[self.pos] in self.atomends: |
|
338 break |
|
339 else: |
|
340 sdlist.append(self.getatom()) |
|
341 return EMPTYSTRING.join(sdlist) |
|
342 |
|
343 def getdelimited(self, beginchar, endchars, allowcomments=True): |
|
344 """Parse a header fragment delimited by special characters. |
|
345 |
|
346 `beginchar' is the start character for the fragment. |
|
347 If self is not looking at an instance of `beginchar' then |
|
348 getdelimited returns the empty string. |
|
349 |
|
350 `endchars' is a sequence of allowable end-delimiting characters. |
|
351 Parsing stops when one of these is encountered. |
|
352 |
|
353 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed |
|
354 within the parsed fragment. |
|
355 """ |
|
356 if self.field[self.pos] != beginchar: |
|
357 return '' |
|
358 |
|
359 slist = [''] |
|
360 quote = False |
|
361 self.pos += 1 |
|
362 while self.pos < len(self.field): |
|
363 if quote: |
|
364 slist.append(self.field[self.pos]) |
|
365 quote = False |
|
366 elif self.field[self.pos] in endchars: |
|
367 self.pos += 1 |
|
368 break |
|
369 elif allowcomments and self.field[self.pos] == '(': |
|
370 slist.append(self.getcomment()) |
|
371 continue # have already advanced pos from getcomment |
|
372 elif self.field[self.pos] == '\\': |
|
373 quote = True |
|
374 else: |
|
375 slist.append(self.field[self.pos]) |
|
376 self.pos += 1 |
|
377 |
|
378 return EMPTYSTRING.join(slist) |
|
379 |
|
380 def getquote(self): |
|
381 """Get a quote-delimited fragment from self's field.""" |
|
382 return self.getdelimited('"', '"\r', False) |
|
383 |
|
384 def getcomment(self): |
|
385 """Get a parenthesis-delimited fragment from self's field.""" |
|
386 return self.getdelimited('(', ')\r', True) |
|
387 |
|
388 def getdomainliteral(self): |
|
389 """Parse an RFC 2822 domain-literal.""" |
|
390 return '[%s]' % self.getdelimited('[', ']\r', False) |
|
391 |
|
392 def getatom(self, atomends=None): |
|
393 """Parse an RFC 2822 atom. |
|
394 |
|
395 Optional atomends specifies a different set of end token delimiters |
|
396 (the default is to use self.atomends). This is used e.g. in |
|
397 getphraselist() since phrase endings must not include the `.' (which |
|
398 is legal in phrases).""" |
|
399 atomlist = [''] |
|
400 if atomends is None: |
|
401 atomends = self.atomends |
|
402 |
|
403 while self.pos < len(self.field): |
|
404 if self.field[self.pos] in atomends: |
|
405 break |
|
406 else: |
|
407 atomlist.append(self.field[self.pos]) |
|
408 self.pos += 1 |
|
409 |
|
410 return EMPTYSTRING.join(atomlist) |
|
411 |
|
412 def getphraselist(self): |
|
413 """Parse a sequence of RFC 2822 phrases. |
|
414 |
|
415 A phrase is a sequence of words, which are in turn either RFC 2822 |
|
416 atoms or quoted-strings. Phrases are canonicalized by squeezing all |
|
417 runs of continuous whitespace into one space. |
|
418 """ |
|
419 plist = [] |
|
420 |
|
421 while self.pos < len(self.field): |
|
422 if self.field[self.pos] in self.FWS: |
|
423 self.pos += 1 |
|
424 elif self.field[self.pos] == '"': |
|
425 plist.append(self.getquote()) |
|
426 elif self.field[self.pos] == '(': |
|
427 self.commentlist.append(self.getcomment()) |
|
428 elif self.field[self.pos] in self.phraseends: |
|
429 break |
|
430 else: |
|
431 plist.append(self.getatom(self.phraseends)) |
|
432 |
|
433 return plist |
|
434 |
|
435 class AddressList(AddrlistClass): |
|
436 """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" |
|
437 def __init__(self, field): |
|
438 AddrlistClass.__init__(self, field) |
|
439 if field: |
|
440 self.addresslist = self.getaddrlist() |
|
441 else: |
|
442 self.addresslist = [] |
|
443 |
|
444 def __len__(self): |
|
445 return len(self.addresslist) |
|
446 |
|
447 def __add__(self, other): |
|
448 # Set union |
|
449 newaddr = AddressList(None) |
|
450 newaddr.addresslist = self.addresslist[:] |
|
451 for x in other.addresslist: |
|
452 if not x in self.addresslist: |
|
453 newaddr.addresslist.append(x) |
|
454 return newaddr |
|
455 |
|
456 def __iadd__(self, other): |
|
457 # Set union, in-place |
|
458 for x in other.addresslist: |
|
459 if not x in self.addresslist: |
|
460 self.addresslist.append(x) |
|
461 return self |
|
462 |
|
463 def __sub__(self, other): |
|
464 # Set difference |
|
465 newaddr = AddressList(None) |
|
466 for x in self.addresslist: |
|
467 if not x in other.addresslist: |
|
468 newaddr.addresslist.append(x) |
|
469 return newaddr |
|
470 |
|
471 def __isub__(self, other): |
|
472 # Set difference, in-place |
|
473 for x in other.addresslist: |
|
474 if x in self.addresslist: |
|
475 self.addresslist.remove(x) |
|
476 return self |
|
477 |
|
478 def __getitem__(self, index): |
|
479 # Make indexing, slices, and 'in' work |
|
480 return self.addresslist[index] |