symbian-qemu-0.9.1-12/python-2.6.1/Lib/rfc822.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """RFC 2822 message manipulation.
       
     2 
       
     3 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
       
     4 the tokenizing of addresses does not adhere to all the quoting rules.
       
     5 
       
     6 Note: RFC 2822 is a long awaited update to RFC 822.  This module should
       
     7 conform to RFC 2822, and is thus mis-named (it's not worth renaming it).  Some
       
     8 effort at RFC 2822 updates have been made, but a thorough audit has not been
       
     9 performed.  Consider any RFC 2822 non-conformance to be a bug.
       
    10 
       
    11     RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
       
    12     RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
       
    13 
       
    14 Directions for use:
       
    15 
       
    16 To create a Message object: first open a file, e.g.:
       
    17 
       
    18   fp = open(file, 'r')
       
    19 
       
    20 You can use any other legal way of getting an open file object, e.g. use
       
    21 sys.stdin or call os.popen().  Then pass the open file object to the Message()
       
    22 constructor:
       
    23 
       
    24   m = Message(fp)
       
    25 
       
    26 This class can work with any input object that supports a readline method.  If
       
    27 the input object has seek and tell capability, the rewindbody method will
       
    28 work; also illegal lines will be pushed back onto the input stream.  If the
       
    29 input object lacks seek but has an `unread' method that can push back a line
       
    30 of input, Message will use that to push back illegal lines.  Thus this class
       
    31 can be used to parse messages coming from a buffered stream.
       
    32 
       
    33 The optional `seekable' argument is provided as a workaround for certain stdio
       
    34 libraries in which tell() discards buffered data before discovering that the
       
    35 lseek() system call doesn't work.  For maximum portability, you should set the
       
    36 seekable argument to zero to prevent that initial \code{tell} when passing in
       
    37 an unseekable object such as a a file object created from a socket object.  If
       
    38 it is 1 on entry -- which it is by default -- the tell() method of the open
       
    39 file object is called once; if this raises an exception, seekable is reset to
       
    40 0.  For other nonzero values of seekable, this test is not made.
       
    41 
       
    42 To get the text of a particular header there are several methods:
       
    43 
       
    44   str = m.getheader(name)
       
    45   str = m.getrawheader(name)
       
    46 
       
    47 where name is the name of the header, e.g. 'Subject'.  The difference is that
       
    48 getheader() strips the leading and trailing whitespace, while getrawheader()
       
    49 doesn't.  Both functions retain embedded whitespace (including newlines)
       
    50 exactly as they are specified in the header, and leave the case of the text
       
    51 unchanged.
       
    52 
       
    53 For addresses and address lists there are functions
       
    54 
       
    55   realname, mailaddress = m.getaddr(name)
       
    56   list = m.getaddrlist(name)
       
    57 
       
    58 where the latter returns a list of (realname, mailaddr) tuples.
       
    59 
       
    60 There is also a method
       
    61 
       
    62   time = m.getdate(name)
       
    63 
       
    64 which parses a Date-like field and returns a time-compatible tuple,
       
    65 i.e. a tuple such as returned by time.localtime() or accepted by
       
    66 time.mktime().
       
    67 
       
    68 See the class definition for lower level access methods.
       
    69 
       
    70 There are also some utility functions here.
       
    71 """
       
    72 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
       
    73 
       
    74 import time
       
    75 
       
    76 from warnings import warnpy3k
       
    77 warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
       
    78          stacklevel=2)
       
    79 
       
    80 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
       
    81 
       
    82 _blanklines = ('\r\n', '\n')            # Optimization for islast()
       
    83 
       
    84 
       
    85 class Message:
       
    86     """Represents a single RFC 2822-compliant message."""
       
    87 
       
    88     def __init__(self, fp, seekable = 1):
       
    89         """Initialize the class instance and read the headers."""
       
    90         if seekable == 1:
       
    91             # Exercise tell() to make sure it works
       
    92             # (and then assume seek() works, too)
       
    93             try:
       
    94                 fp.tell()
       
    95             except (AttributeError, IOError):
       
    96                 seekable = 0
       
    97         self.fp = fp
       
    98         self.seekable = seekable
       
    99         self.startofheaders = None
       
   100         self.startofbody = None
       
   101         #
       
   102         if self.seekable:
       
   103             try:
       
   104                 self.startofheaders = self.fp.tell()
       
   105             except IOError:
       
   106                 self.seekable = 0
       
   107         #
       
   108         self.readheaders()
       
   109         #
       
   110         if self.seekable:
       
   111             try:
       
   112                 self.startofbody = self.fp.tell()
       
   113             except IOError:
       
   114                 self.seekable = 0
       
   115 
       
   116     def rewindbody(self):
       
   117         """Rewind the file to the start of the body (if seekable)."""
       
   118         if not self.seekable:
       
   119             raise IOError, "unseekable file"
       
   120         self.fp.seek(self.startofbody)
       
   121 
       
   122     def readheaders(self):
       
   123         """Read header lines.
       
   124 
       
   125         Read header lines up to the entirely blank line that terminates them.
       
   126         The (normally blank) line that ends the headers is skipped, but not
       
   127         included in the returned list.  If a non-header line ends the headers,
       
   128         (which is an error), an attempt is made to backspace over it; it is
       
   129         never included in the returned list.
       
   130 
       
   131         The variable self.status is set to the empty string if all went well,
       
   132         otherwise it is an error message.  The variable self.headers is a
       
   133         completely uninterpreted list of lines contained in the header (so
       
   134         printing them will reproduce the header exactly as it appears in the
       
   135         file).
       
   136         """
       
   137         self.dict = {}
       
   138         self.unixfrom = ''
       
   139         self.headers = lst = []
       
   140         self.status = ''
       
   141         headerseen = ""
       
   142         firstline = 1
       
   143         startofline = unread = tell = None
       
   144         if hasattr(self.fp, 'unread'):
       
   145             unread = self.fp.unread
       
   146         elif self.seekable:
       
   147             tell = self.fp.tell
       
   148         while 1:
       
   149             if tell:
       
   150                 try:
       
   151                     startofline = tell()
       
   152                 except IOError:
       
   153                     startofline = tell = None
       
   154                     self.seekable = 0
       
   155             line = self.fp.readline()
       
   156             if not line:
       
   157                 self.status = 'EOF in headers'
       
   158                 break
       
   159             # Skip unix From name time lines
       
   160             if firstline and line.startswith('From '):
       
   161                 self.unixfrom = self.unixfrom + line
       
   162                 continue
       
   163             firstline = 0
       
   164             if headerseen and line[0] in ' \t':
       
   165                 # It's a continuation line.
       
   166                 lst.append(line)
       
   167                 x = (self.dict[headerseen] + "\n " + line.strip())
       
   168                 self.dict[headerseen] = x.strip()
       
   169                 continue
       
   170             elif self.iscomment(line):
       
   171                 # It's a comment.  Ignore it.
       
   172                 continue
       
   173             elif self.islast(line):
       
   174                 # Note! No pushback here!  The delimiter line gets eaten.
       
   175                 break
       
   176             headerseen = self.isheader(line)
       
   177             if headerseen:
       
   178                 # It's a legal header line, save it.
       
   179                 lst.append(line)
       
   180                 self.dict[headerseen] = line[len(headerseen)+1:].strip()
       
   181                 continue
       
   182             else:
       
   183                 # It's not a header line; throw it back and stop here.
       
   184                 if not self.dict:
       
   185                     self.status = 'No headers'
       
   186                 else:
       
   187                     self.status = 'Non-header line where header expected'
       
   188                 # Try to undo the read.
       
   189                 if unread:
       
   190                     unread(line)
       
   191                 elif tell:
       
   192                     self.fp.seek(startofline)
       
   193                 else:
       
   194                     self.status = self.status + '; bad seek'
       
   195                 break
       
   196 
       
   197     def isheader(self, line):
       
   198         """Determine whether a given line is a legal header.
       
   199 
       
   200         This method should return the header name, suitably canonicalized.
       
   201         You may override this method in order to use Message parsing on tagged
       
   202         data in RFC 2822-like formats with special header formats.
       
   203         """
       
   204         i = line.find(':')
       
   205         if i > 0:
       
   206             return line[:i].lower()
       
   207         return None
       
   208 
       
   209     def islast(self, line):
       
   210         """Determine whether a line is a legal end of RFC 2822 headers.
       
   211 
       
   212         You may override this method if your application wants to bend the
       
   213         rules, e.g. to strip trailing whitespace, or to recognize MH template
       
   214         separators ('--------').  For convenience (e.g. for code reading from
       
   215         sockets) a line consisting of \r\n also matches.
       
   216         """
       
   217         return line in _blanklines
       
   218 
       
   219     def iscomment(self, line):
       
   220         """Determine whether a line should be skipped entirely.
       
   221 
       
   222         You may override this method in order to use Message parsing on tagged
       
   223         data in RFC 2822-like formats that support embedded comments or
       
   224         free-text data.
       
   225         """
       
   226         return False
       
   227 
       
   228     def getallmatchingheaders(self, name):
       
   229         """Find all header lines matching a given header name.
       
   230 
       
   231         Look through the list of headers and find all lines matching a given
       
   232         header name (and their continuation lines).  A list of the lines is
       
   233         returned, without interpretation.  If the header does not occur, an
       
   234         empty list is returned.  If the header occurs multiple times, all
       
   235         occurrences are returned.  Case is not important in the header name.
       
   236         """
       
   237         name = name.lower() + ':'
       
   238         n = len(name)
       
   239         lst = []
       
   240         hit = 0
       
   241         for line in self.headers:
       
   242             if line[:n].lower() == name:
       
   243                 hit = 1
       
   244             elif not line[:1].isspace():
       
   245                 hit = 0
       
   246             if hit:
       
   247                 lst.append(line)
       
   248         return lst
       
   249 
       
   250     def getfirstmatchingheader(self, name):
       
   251         """Get the first header line matching name.
       
   252 
       
   253         This is similar to getallmatchingheaders, but it returns only the
       
   254         first matching header (and its continuation lines).
       
   255         """
       
   256         name = name.lower() + ':'
       
   257         n = len(name)
       
   258         lst = []
       
   259         hit = 0
       
   260         for line in self.headers:
       
   261             if hit:
       
   262                 if not line[:1].isspace():
       
   263                     break
       
   264             elif line[:n].lower() == name:
       
   265                 hit = 1
       
   266             if hit:
       
   267                 lst.append(line)
       
   268         return lst
       
   269 
       
   270     def getrawheader(self, name):
       
   271         """A higher-level interface to getfirstmatchingheader().
       
   272 
       
   273         Return a string containing the literal text of the header but with the
       
   274         keyword stripped.  All leading, trailing and embedded whitespace is
       
   275         kept in the string, however.  Return None if the header does not
       
   276         occur.
       
   277         """
       
   278 
       
   279         lst = self.getfirstmatchingheader(name)
       
   280         if not lst:
       
   281             return None
       
   282         lst[0] = lst[0][len(name) + 1:]
       
   283         return ''.join(lst)
       
   284 
       
   285     def getheader(self, name, default=None):
       
   286         """Get the header value for a name.
       
   287 
       
   288         This is the normal interface: it returns a stripped version of the
       
   289         header value for a given header name, or None if it doesn't exist.
       
   290         This uses the dictionary version which finds the *last* such header.
       
   291         """
       
   292         return self.dict.get(name.lower(), default)
       
   293     get = getheader
       
   294 
       
   295     def getheaders(self, name):
       
   296         """Get all values for a header.
       
   297 
       
   298         This returns a list of values for headers given more than once; each
       
   299         value in the result list is stripped in the same way as the result of
       
   300         getheader().  If the header is not given, return an empty list.
       
   301         """
       
   302         result = []
       
   303         current = ''
       
   304         have_header = 0
       
   305         for s in self.getallmatchingheaders(name):
       
   306             if s[0].isspace():
       
   307                 if current:
       
   308                     current = "%s\n %s" % (current, s.strip())
       
   309                 else:
       
   310                     current = s.strip()
       
   311             else:
       
   312                 if have_header:
       
   313                     result.append(current)
       
   314                 current = s[s.find(":") + 1:].strip()
       
   315                 have_header = 1
       
   316         if have_header:
       
   317             result.append(current)
       
   318         return result
       
   319 
       
   320     def getaddr(self, name):
       
   321         """Get a single address from a header, as a tuple.
       
   322 
       
   323         An example return value:
       
   324         ('Guido van Rossum', 'guido@cwi.nl')
       
   325         """
       
   326         # New, by Ben Escoto
       
   327         alist = self.getaddrlist(name)
       
   328         if alist:
       
   329             return alist[0]
       
   330         else:
       
   331             return (None, None)
       
   332 
       
   333     def getaddrlist(self, name):
       
   334         """Get a list of addresses from a header.
       
   335 
       
   336         Retrieves a list of addresses from a header, where each address is a
       
   337         tuple as returned by getaddr().  Scans all named headers, so it works
       
   338         properly with multiple To: or Cc: headers for example.
       
   339         """
       
   340         raw = []
       
   341         for h in self.getallmatchingheaders(name):
       
   342             if h[0] in ' \t':
       
   343                 raw.append(h)
       
   344             else:
       
   345                 if raw:
       
   346                     raw.append(', ')
       
   347                 i = h.find(':')
       
   348                 if i > 0:
       
   349                     addr = h[i+1:]
       
   350                 raw.append(addr)
       
   351         alladdrs = ''.join(raw)
       
   352         a = AddressList(alladdrs)
       
   353         return a.addresslist
       
   354 
       
   355     def getdate(self, name):
       
   356         """Retrieve a date field from a header.
       
   357 
       
   358         Retrieves a date field from the named header, returning a tuple
       
   359         compatible with time.mktime().
       
   360         """
       
   361         try:
       
   362             data = self[name]
       
   363         except KeyError:
       
   364             return None
       
   365         return parsedate(data)
       
   366 
       
   367     def getdate_tz(self, name):
       
   368         """Retrieve a date field from a header as a 10-tuple.
       
   369 
       
   370         The first 9 elements make up a tuple compatible with time.mktime(),
       
   371         and the 10th is the offset of the poster's time zone from GMT/UTC.
       
   372         """
       
   373         try:
       
   374             data = self[name]
       
   375         except KeyError:
       
   376             return None
       
   377         return parsedate_tz(data)
       
   378 
       
   379 
       
   380     # Access as a dictionary (only finds *last* header of each type):
       
   381 
       
   382     def __len__(self):
       
   383         """Get the number of headers in a message."""
       
   384         return len(self.dict)
       
   385 
       
   386     def __getitem__(self, name):
       
   387         """Get a specific header, as from a dictionary."""
       
   388         return self.dict[name.lower()]
       
   389 
       
   390     def __setitem__(self, name, value):
       
   391         """Set the value of a header.
       
   392 
       
   393         Note: This is not a perfect inversion of __getitem__, because any
       
   394         changed headers get stuck at the end of the raw-headers list rather
       
   395         than where the altered header was.
       
   396         """
       
   397         del self[name] # Won't fail if it doesn't exist
       
   398         self.dict[name.lower()] = value
       
   399         text = name + ": " + value
       
   400         for line in text.split("\n"):
       
   401             self.headers.append(line + "\n")
       
   402 
       
   403     def __delitem__(self, name):
       
   404         """Delete all occurrences of a specific header, if it is present."""
       
   405         name = name.lower()
       
   406         if not name in self.dict:
       
   407             return
       
   408         del self.dict[name]
       
   409         name = name + ':'
       
   410         n = len(name)
       
   411         lst = []
       
   412         hit = 0
       
   413         for i in range(len(self.headers)):
       
   414             line = self.headers[i]
       
   415             if line[:n].lower() == name:
       
   416                 hit = 1
       
   417             elif not line[:1].isspace():
       
   418                 hit = 0
       
   419             if hit:
       
   420                 lst.append(i)
       
   421         for i in reversed(lst):
       
   422             del self.headers[i]
       
   423 
       
   424     def setdefault(self, name, default=""):
       
   425         lowername = name.lower()
       
   426         if lowername in self.dict:
       
   427             return self.dict[lowername]
       
   428         else:
       
   429             text = name + ": " + default
       
   430             for line in text.split("\n"):
       
   431                 self.headers.append(line + "\n")
       
   432             self.dict[lowername] = default
       
   433             return default
       
   434 
       
   435     def has_key(self, name):
       
   436         """Determine whether a message contains the named header."""
       
   437         return name.lower() in self.dict
       
   438 
       
   439     def __contains__(self, name):
       
   440         """Determine whether a message contains the named header."""
       
   441         return name.lower() in self.dict
       
   442 
       
   443     def __iter__(self):
       
   444         return iter(self.dict)
       
   445 
       
   446     def keys(self):
       
   447         """Get all of a message's header field names."""
       
   448         return self.dict.keys()
       
   449 
       
   450     def values(self):
       
   451         """Get all of a message's header field values."""
       
   452         return self.dict.values()
       
   453 
       
   454     def items(self):
       
   455         """Get all of a message's headers.
       
   456 
       
   457         Returns a list of name, value tuples.
       
   458         """
       
   459         return self.dict.items()
       
   460 
       
   461     def __str__(self):
       
   462         return ''.join(self.headers)
       
   463 
       
   464 
       
   465 # Utility functions
       
   466 # -----------------
       
   467 
       
   468 # XXX Should fix unquote() and quote() to be really conformant.
       
   469 # XXX The inverses of the parse functions may also be useful.
       
   470 
       
   471 
       
   472 def unquote(s):
       
   473     """Remove quotes from a string."""
       
   474     if len(s) > 1:
       
   475         if s.startswith('"') and s.endswith('"'):
       
   476             return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
       
   477         if s.startswith('<') and s.endswith('>'):
       
   478             return s[1:-1]
       
   479     return s
       
   480 
       
   481 
       
   482 def quote(s):
       
   483     """Add quotes around a string."""
       
   484     return s.replace('\\', '\\\\').replace('"', '\\"')
       
   485 
       
   486 
       
   487 def parseaddr(address):
       
   488     """Parse an address into a (realname, mailaddr) tuple."""
       
   489     a = AddressList(address)
       
   490     lst = a.addresslist
       
   491     if not lst:
       
   492         return (None, None)
       
   493     return lst[0]
       
   494 
       
   495 
       
   496 class AddrlistClass:
       
   497     """Address parser class by Ben Escoto.
       
   498 
       
   499     To understand what this class does, it helps to have a copy of
       
   500     RFC 2822 in front of you.
       
   501 
       
   502     http://www.faqs.org/rfcs/rfc2822.html
       
   503 
       
   504     Note: this class interface is deprecated and may be removed in the future.
       
   505     Use rfc822.AddressList instead.
       
   506     """
       
   507 
       
   508     def __init__(self, field):
       
   509         """Initialize a new instance.
       
   510 
       
   511         `field' is an unparsed address header field, containing one or more
       
   512         addresses.
       
   513         """
       
   514         self.specials = '()<>@,:;.\"[]'
       
   515         self.pos = 0
       
   516         self.LWS = ' \t'
       
   517         self.CR = '\r\n'
       
   518         self.atomends = self.specials + self.LWS + self.CR
       
   519         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
       
   520         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
       
   521         # syntax, so allow dots in phrases.
       
   522         self.phraseends = self.atomends.replace('.', '')
       
   523         self.field = field
       
   524         self.commentlist = []
       
   525 
       
   526     def gotonext(self):
       
   527         """Parse up to the start of the next address."""
       
   528         while self.pos < len(self.field):
       
   529             if self.field[self.pos] in self.LWS + '\n\r':
       
   530                 self.pos = self.pos + 1
       
   531             elif self.field[self.pos] == '(':
       
   532                 self.commentlist.append(self.getcomment())
       
   533             else: break
       
   534 
       
   535     def getaddrlist(self):
       
   536         """Parse all addresses.
       
   537 
       
   538         Returns a list containing all of the addresses.
       
   539         """
       
   540         result = []
       
   541         ad = self.getaddress()
       
   542         while ad:
       
   543             result += ad
       
   544             ad = self.getaddress()
       
   545         return result
       
   546 
       
   547     def getaddress(self):
       
   548         """Parse the next address."""
       
   549         self.commentlist = []
       
   550         self.gotonext()
       
   551 
       
   552         oldpos = self.pos
       
   553         oldcl = self.commentlist
       
   554         plist = self.getphraselist()
       
   555 
       
   556         self.gotonext()
       
   557         returnlist = []
       
   558 
       
   559         if self.pos >= len(self.field):
       
   560             # Bad email address technically, no domain.
       
   561             if plist:
       
   562                 returnlist = [(' '.join(self.commentlist), plist[0])]
       
   563 
       
   564         elif self.field[self.pos] in '.@':
       
   565             # email address is just an addrspec
       
   566             # this isn't very efficient since we start over
       
   567             self.pos = oldpos
       
   568             self.commentlist = oldcl
       
   569             addrspec = self.getaddrspec()
       
   570             returnlist = [(' '.join(self.commentlist), addrspec)]
       
   571 
       
   572         elif self.field[self.pos] == ':':
       
   573             # address is a group
       
   574             returnlist = []
       
   575 
       
   576             fieldlen = len(self.field)
       
   577             self.pos += 1
       
   578             while self.pos < len(self.field):
       
   579                 self.gotonext()
       
   580                 if self.pos < fieldlen and self.field[self.pos] == ';':
       
   581                     self.pos += 1
       
   582                     break
       
   583                 returnlist = returnlist + self.getaddress()
       
   584 
       
   585         elif self.field[self.pos] == '<':
       
   586             # Address is a phrase then a route addr
       
   587             routeaddr = self.getrouteaddr()
       
   588 
       
   589             if self.commentlist:
       
   590                 returnlist = [(' '.join(plist) + ' (' + \
       
   591                          ' '.join(self.commentlist) + ')', routeaddr)]
       
   592             else: returnlist = [(' '.join(plist), routeaddr)]
       
   593 
       
   594         else:
       
   595             if plist:
       
   596                 returnlist = [(' '.join(self.commentlist), plist[0])]
       
   597             elif self.field[self.pos] in self.specials:
       
   598                 self.pos += 1
       
   599 
       
   600         self.gotonext()
       
   601         if self.pos < len(self.field) and self.field[self.pos] == ',':
       
   602             self.pos += 1
       
   603         return returnlist
       
   604 
       
   605     def getrouteaddr(self):
       
   606         """Parse a route address (Return-path value).
       
   607 
       
   608         This method just skips all the route stuff and returns the addrspec.
       
   609         """
       
   610         if self.field[self.pos] != '<':
       
   611             return
       
   612 
       
   613         expectroute = 0
       
   614         self.pos += 1
       
   615         self.gotonext()
       
   616         adlist = ""
       
   617         while self.pos < len(self.field):
       
   618             if expectroute:
       
   619                 self.getdomain()
       
   620                 expectroute = 0
       
   621             elif self.field[self.pos] == '>':
       
   622                 self.pos += 1
       
   623                 break
       
   624             elif self.field[self.pos] == '@':
       
   625                 self.pos += 1
       
   626                 expectroute = 1
       
   627             elif self.field[self.pos] == ':':
       
   628                 self.pos += 1
       
   629             else:
       
   630                 adlist = self.getaddrspec()
       
   631                 self.pos += 1
       
   632                 break
       
   633             self.gotonext()
       
   634 
       
   635         return adlist
       
   636 
       
   637     def getaddrspec(self):
       
   638         """Parse an RFC 2822 addr-spec."""
       
   639         aslist = []
       
   640 
       
   641         self.gotonext()
       
   642         while self.pos < len(self.field):
       
   643             if self.field[self.pos] == '.':
       
   644                 aslist.append('.')
       
   645                 self.pos += 1
       
   646             elif self.field[self.pos] == '"':
       
   647                 aslist.append('"%s"' % self.getquote())
       
   648             elif self.field[self.pos] in self.atomends:
       
   649                 break
       
   650             else: aslist.append(self.getatom())
       
   651             self.gotonext()
       
   652 
       
   653         if self.pos >= len(self.field) or self.field[self.pos] != '@':
       
   654             return ''.join(aslist)
       
   655 
       
   656         aslist.append('@')
       
   657         self.pos += 1
       
   658         self.gotonext()
       
   659         return ''.join(aslist) + self.getdomain()
       
   660 
       
   661     def getdomain(self):
       
   662         """Get the complete domain name from an address."""
       
   663         sdlist = []
       
   664         while self.pos < len(self.field):
       
   665             if self.field[self.pos] in self.LWS:
       
   666                 self.pos += 1
       
   667             elif self.field[self.pos] == '(':
       
   668                 self.commentlist.append(self.getcomment())
       
   669             elif self.field[self.pos] == '[':
       
   670                 sdlist.append(self.getdomainliteral())
       
   671             elif self.field[self.pos] == '.':
       
   672                 self.pos += 1
       
   673                 sdlist.append('.')
       
   674             elif self.field[self.pos] in self.atomends:
       
   675                 break
       
   676             else: sdlist.append(self.getatom())
       
   677         return ''.join(sdlist)
       
   678 
       
   679     def getdelimited(self, beginchar, endchars, allowcomments = 1):
       
   680         """Parse a header fragment delimited by special characters.
       
   681 
       
   682         `beginchar' is the start character for the fragment.  If self is not
       
   683         looking at an instance of `beginchar' then getdelimited returns the
       
   684         empty string.
       
   685 
       
   686         `endchars' is a sequence of allowable end-delimiting characters.
       
   687         Parsing stops when one of these is encountered.
       
   688 
       
   689         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
       
   690         within the parsed fragment.
       
   691         """
       
   692         if self.field[self.pos] != beginchar:
       
   693             return ''
       
   694 
       
   695         slist = ['']
       
   696         quote = 0
       
   697         self.pos += 1
       
   698         while self.pos < len(self.field):
       
   699             if quote == 1:
       
   700                 slist.append(self.field[self.pos])
       
   701                 quote = 0
       
   702             elif self.field[self.pos] in endchars:
       
   703                 self.pos += 1
       
   704                 break
       
   705             elif allowcomments and self.field[self.pos] == '(':
       
   706                 slist.append(self.getcomment())
       
   707                 continue        # have already advanced pos from getcomment
       
   708             elif self.field[self.pos] == '\\':
       
   709                 quote = 1
       
   710             else:
       
   711                 slist.append(self.field[self.pos])
       
   712             self.pos += 1
       
   713 
       
   714         return ''.join(slist)
       
   715 
       
   716     def getquote(self):
       
   717         """Get a quote-delimited fragment from self's field."""
       
   718         return self.getdelimited('"', '"\r', 0)
       
   719 
       
   720     def getcomment(self):
       
   721         """Get a parenthesis-delimited fragment from self's field."""
       
   722         return self.getdelimited('(', ')\r', 1)
       
   723 
       
   724     def getdomainliteral(self):
       
   725         """Parse an RFC 2822 domain-literal."""
       
   726         return '[%s]' % self.getdelimited('[', ']\r', 0)
       
   727 
       
   728     def getatom(self, atomends=None):
       
   729         """Parse an RFC 2822 atom.
       
   730 
       
   731         Optional atomends specifies a different set of end token delimiters
       
   732         (the default is to use self.atomends).  This is used e.g. in
       
   733         getphraselist() since phrase endings must not include the `.' (which
       
   734         is legal in phrases)."""
       
   735         atomlist = ['']
       
   736         if atomends is None:
       
   737             atomends = self.atomends
       
   738 
       
   739         while self.pos < len(self.field):
       
   740             if self.field[self.pos] in atomends:
       
   741                 break
       
   742             else: atomlist.append(self.field[self.pos])
       
   743             self.pos += 1
       
   744 
       
   745         return ''.join(atomlist)
       
   746 
       
   747     def getphraselist(self):
       
   748         """Parse a sequence of RFC 2822 phrases.
       
   749 
       
   750         A phrase is a sequence of words, which are in turn either RFC 2822
       
   751         atoms or quoted-strings.  Phrases are canonicalized by squeezing all
       
   752         runs of continuous whitespace into one space.
       
   753         """
       
   754         plist = []
       
   755 
       
   756         while self.pos < len(self.field):
       
   757             if self.field[self.pos] in self.LWS:
       
   758                 self.pos += 1
       
   759             elif self.field[self.pos] == '"':
       
   760                 plist.append(self.getquote())
       
   761             elif self.field[self.pos] == '(':
       
   762                 self.commentlist.append(self.getcomment())
       
   763             elif self.field[self.pos] in self.phraseends:
       
   764                 break
       
   765             else:
       
   766                 plist.append(self.getatom(self.phraseends))
       
   767 
       
   768         return plist
       
   769 
       
   770 class AddressList(AddrlistClass):
       
   771     """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
       
   772     def __init__(self, field):
       
   773         AddrlistClass.__init__(self, field)
       
   774         if field:
       
   775             self.addresslist = self.getaddrlist()
       
   776         else:
       
   777             self.addresslist = []
       
   778 
       
   779     def __len__(self):
       
   780         return len(self.addresslist)
       
   781 
       
   782     def __str__(self):
       
   783         return ", ".join(map(dump_address_pair, self.addresslist))
       
   784 
       
   785     def __add__(self, other):
       
   786         # Set union
       
   787         newaddr = AddressList(None)
       
   788         newaddr.addresslist = self.addresslist[:]
       
   789         for x in other.addresslist:
       
   790             if not x in self.addresslist:
       
   791                 newaddr.addresslist.append(x)
       
   792         return newaddr
       
   793 
       
   794     def __iadd__(self, other):
       
   795         # Set union, in-place
       
   796         for x in other.addresslist:
       
   797             if not x in self.addresslist:
       
   798                 self.addresslist.append(x)
       
   799         return self
       
   800 
       
   801     def __sub__(self, other):
       
   802         # Set difference
       
   803         newaddr = AddressList(None)
       
   804         for x in self.addresslist:
       
   805             if not x in other.addresslist:
       
   806                 newaddr.addresslist.append(x)
       
   807         return newaddr
       
   808 
       
   809     def __isub__(self, other):
       
   810         # Set difference, in-place
       
   811         for x in other.addresslist:
       
   812             if x in self.addresslist:
       
   813                 self.addresslist.remove(x)
       
   814         return self
       
   815 
       
   816     def __getitem__(self, index):
       
   817         # Make indexing, slices, and 'in' work
       
   818         return self.addresslist[index]
       
   819 
       
   820 def dump_address_pair(pair):
       
   821     """Dump a (name, address) pair in a canonicalized form."""
       
   822     if pair[0]:
       
   823         return '"' + pair[0] + '" <' + pair[1] + '>'
       
   824     else:
       
   825         return pair[1]
       
   826 
       
   827 # Parse a date field
       
   828 
       
   829 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
       
   830                'aug', 'sep', 'oct', 'nov', 'dec',
       
   831                'january', 'february', 'march', 'april', 'may', 'june', 'july',
       
   832                'august', 'september', 'october', 'november', 'december']
       
   833 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
       
   834 
       
   835 # The timezone table does not include the military time zones defined
       
   836 # in RFC822, other than Z.  According to RFC1123, the description in
       
   837 # RFC822 gets the signs wrong, so we can't rely on any such time
       
   838 # zones.  RFC1123 recommends that numeric timezone indicators be used
       
   839 # instead of timezone names.
       
   840 
       
   841 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
       
   842               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
       
   843               'EST': -500, 'EDT': -400,  # Eastern
       
   844               'CST': -600, 'CDT': -500,  # Central
       
   845               'MST': -700, 'MDT': -600,  # Mountain
       
   846               'PST': -800, 'PDT': -700   # Pacific
       
   847               }
       
   848 
       
   849 
       
   850 def parsedate_tz(data):
       
   851     """Convert a date string to a time tuple.
       
   852 
       
   853     Accounts for military timezones.
       
   854     """
       
   855     if not data:
       
   856         return None
       
   857     data = data.split()
       
   858     if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
       
   859         # There's a dayname here. Skip it
       
   860         del data[0]
       
   861     else:
       
   862         # no space after the "weekday,"?
       
   863         i = data[0].rfind(',')
       
   864         if i >= 0:
       
   865             data[0] = data[0][i+1:]
       
   866     if len(data) == 3: # RFC 850 date, deprecated
       
   867         stuff = data[0].split('-')
       
   868         if len(stuff) == 3:
       
   869             data = stuff + data[1:]
       
   870     if len(data) == 4:
       
   871         s = data[3]
       
   872         i = s.find('+')
       
   873         if i > 0:
       
   874             data[3:] = [s[:i], s[i+1:]]
       
   875         else:
       
   876             data.append('') # Dummy tz
       
   877     if len(data) < 5:
       
   878         return None
       
   879     data = data[:5]
       
   880     [dd, mm, yy, tm, tz] = data
       
   881     mm = mm.lower()
       
   882     if not mm in _monthnames:
       
   883         dd, mm = mm, dd.lower()
       
   884         if not mm in _monthnames:
       
   885             return None
       
   886     mm = _monthnames.index(mm)+1
       
   887     if mm > 12: mm = mm - 12
       
   888     if dd[-1] == ',':
       
   889         dd = dd[:-1]
       
   890     i = yy.find(':')
       
   891     if i > 0:
       
   892         yy, tm = tm, yy
       
   893     if yy[-1] == ',':
       
   894         yy = yy[:-1]
       
   895     if not yy[0].isdigit():
       
   896         yy, tz = tz, yy
       
   897     if tm[-1] == ',':
       
   898         tm = tm[:-1]
       
   899     tm = tm.split(':')
       
   900     if len(tm) == 2:
       
   901         [thh, tmm] = tm
       
   902         tss = '0'
       
   903     elif len(tm) == 3:
       
   904         [thh, tmm, tss] = tm
       
   905     else:
       
   906         return None
       
   907     try:
       
   908         yy = int(yy)
       
   909         dd = int(dd)
       
   910         thh = int(thh)
       
   911         tmm = int(tmm)
       
   912         tss = int(tss)
       
   913     except ValueError:
       
   914         return None
       
   915     tzoffset = None
       
   916     tz = tz.upper()
       
   917     if tz in _timezones:
       
   918         tzoffset = _timezones[tz]
       
   919     else:
       
   920         try:
       
   921             tzoffset = int(tz)
       
   922         except ValueError:
       
   923             pass
       
   924     # Convert a timezone offset into seconds ; -0500 -> -18000
       
   925     if tzoffset:
       
   926         if tzoffset < 0:
       
   927             tzsign = -1
       
   928             tzoffset = -tzoffset
       
   929         else:
       
   930             tzsign = 1
       
   931         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
       
   932     return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
       
   933 
       
   934 
       
   935 def parsedate(data):
       
   936     """Convert a time string to a time tuple."""
       
   937     t = parsedate_tz(data)
       
   938     if t is None:
       
   939         return t
       
   940     return t[:9]
       
   941 
       
   942 
       
   943 def mktime_tz(data):
       
   944     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
       
   945     if data[9] is None:
       
   946         # No zone info, so localtime is better assumption than GMT
       
   947         return time.mktime(data[:8] + (-1,))
       
   948     else:
       
   949         t = time.mktime(data[:8] + (0,))
       
   950         return t - data[9] - time.timezone
       
   951 
       
   952 def formatdate(timeval=None):
       
   953     """Returns time format preferred for Internet standards.
       
   954 
       
   955     Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
       
   956 
       
   957     According to RFC 1123, day and month names must always be in
       
   958     English.  If not for that, this code could use strftime().  It
       
   959     can't because strftime() honors the locale and could generated
       
   960     non-English names.
       
   961     """
       
   962     if timeval is None:
       
   963         timeval = time.time()
       
   964     timeval = time.gmtime(timeval)
       
   965     return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
       
   966             ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
       
   967             timeval[2],
       
   968             ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
       
   969              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
       
   970                                 timeval[0], timeval[3], timeval[4], timeval[5])
       
   971 
       
   972 
       
   973 # When used as script, run a small test program.
       
   974 # The first command line argument must be a filename containing one
       
   975 # message in RFC-822 format.
       
   976 
       
   977 if __name__ == '__main__':
       
   978     import sys, os
       
   979     file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
       
   980     if sys.argv[1:]: file = sys.argv[1]
       
   981     f = open(file, 'r')
       
   982     m = Message(f)
       
   983     print 'From:', m.getaddr('from')
       
   984     print 'To:', m.getaddrlist('to')
       
   985     print 'Subject:', m.getheader('subject')
       
   986     print 'Date:', m.getheader('date')
       
   987     date = m.getdate_tz('date')
       
   988     tz = date[-1]
       
   989     date = time.localtime(mktime_tz(date))
       
   990     if date:
       
   991         print 'ParsedDate:', time.asctime(date),
       
   992         hhmmss = tz
       
   993         hhmm, ss = divmod(hhmmss, 60)
       
   994         hh, mm = divmod(hhmm, 60)
       
   995         print "%+03d%02d" % (hh, mm),
       
   996         if ss: print ".%02d" % ss,
       
   997         print
       
   998     else:
       
   999         print 'ParsedDate:', None
       
  1000     m.rewindbody()
       
  1001     n = 0
       
  1002     while f.readline():
       
  1003         n += 1
       
  1004     print 'Lines:', n
       
  1005     print '-'*70
       
  1006     print 'len =', len(m)
       
  1007     if 'Date' in m: print 'Date =', m['Date']
       
  1008     if 'X-Nonsense' in m: pass
       
  1009     print 'keys =', m.keys()
       
  1010     print 'values =', m.values()
       
  1011     print 'items =', m.items()