symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/email/_parseaddr.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # Copyright (C) 2002-2007 Python Software Foundation
       
     2 # Contact: email-sig@python.org
       
     3 
       
     4 """Email address parsing code.
       
     5 
       
     6 Lifted directly from rfc822.py.  This should eventually be rewritten.
       
     7 """
       
     8 
       
     9 __all__ = [
       
    10     'mktime_tz',
       
    11     'parsedate',
       
    12     'parsedate_tz',
       
    13     'quote',
       
    14     ]
       
    15 
       
    16 import time
       
    17 
       
    18 SPACE = ' '
       
    19 EMPTYSTRING = ''
       
    20 COMMASPACE = ', '
       
    21 
       
    22 # Parse a date field
       
    23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
       
    24                'aug', 'sep', 'oct', 'nov', 'dec',
       
    25                'january', 'february', 'march', 'april', 'may', 'june', 'july',
       
    26                'august', 'september', 'october', 'november', 'december']
       
    27 
       
    28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
       
    29 
       
    30 # The timezone table does not include the military time zones defined
       
    31 # in RFC822, other than Z.  According to RFC1123, the description in
       
    32 # RFC822 gets the signs wrong, so we can't rely on any such time
       
    33 # zones.  RFC1123 recommends that numeric timezone indicators be used
       
    34 # instead of timezone names.
       
    35 
       
    36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
       
    37               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
       
    38               'EST': -500, 'EDT': -400,  # Eastern
       
    39               'CST': -600, 'CDT': -500,  # Central
       
    40               'MST': -700, 'MDT': -600,  # Mountain
       
    41               'PST': -800, 'PDT': -700   # Pacific
       
    42               }
       
    43 
       
    44 
       
    45 def parsedate_tz(data):
       
    46     """Convert a date string to a time tuple.
       
    47 
       
    48     Accounts for military timezones.
       
    49     """
       
    50     data = data.split()
       
    51     # The FWS after the comma after the day-of-week is optional, so search and
       
    52     # adjust for this.
       
    53     if data[0].endswith(',') or data[0].lower() in _daynames:
       
    54         # There's a dayname here. Skip it
       
    55         del data[0]
       
    56     else:
       
    57         i = data[0].rfind(',')
       
    58         if i >= 0:
       
    59             data[0] = data[0][i+1:]
       
    60     if len(data) == 3: # RFC 850 date, deprecated
       
    61         stuff = data[0].split('-')
       
    62         if len(stuff) == 3:
       
    63             data = stuff + data[1:]
       
    64     if len(data) == 4:
       
    65         s = data[3]
       
    66         i = s.find('+')
       
    67         if i > 0:
       
    68             data[3:] = [s[:i], s[i+1:]]
       
    69         else:
       
    70             data.append('') # Dummy tz
       
    71     if len(data) < 5:
       
    72         return None
       
    73     data = data[:5]
       
    74     [dd, mm, yy, tm, tz] = data
       
    75     mm = mm.lower()
       
    76     if mm not in _monthnames:
       
    77         dd, mm = mm, dd.lower()
       
    78         if mm not in _monthnames:
       
    79             return None
       
    80     mm = _monthnames.index(mm) + 1
       
    81     if mm > 12:
       
    82         mm -= 12
       
    83     if dd[-1] == ',':
       
    84         dd = dd[:-1]
       
    85     i = yy.find(':')
       
    86     if i > 0:
       
    87         yy, tm = tm, yy
       
    88     if yy[-1] == ',':
       
    89         yy = yy[:-1]
       
    90     if not yy[0].isdigit():
       
    91         yy, tz = tz, yy
       
    92     if tm[-1] == ',':
       
    93         tm = tm[:-1]
       
    94     tm = tm.split(':')
       
    95     if len(tm) == 2:
       
    96         [thh, tmm] = tm
       
    97         tss = '0'
       
    98     elif len(tm) == 3:
       
    99         [thh, tmm, tss] = tm
       
   100     else:
       
   101         return None
       
   102     try:
       
   103         yy = int(yy)
       
   104         dd = int(dd)
       
   105         thh = int(thh)
       
   106         tmm = int(tmm)
       
   107         tss = int(tss)
       
   108     except ValueError:
       
   109         return None
       
   110     tzoffset = None
       
   111     tz = tz.upper()
       
   112     if tz in _timezones:
       
   113         tzoffset = _timezones[tz]
       
   114     else:
       
   115         try:
       
   116             tzoffset = int(tz)
       
   117         except ValueError:
       
   118             pass
       
   119     # Convert a timezone offset into seconds ; -0500 -> -18000
       
   120     if tzoffset:
       
   121         if tzoffset < 0:
       
   122             tzsign = -1
       
   123             tzoffset = -tzoffset
       
   124         else:
       
   125             tzsign = 1
       
   126         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
       
   127     # Daylight Saving Time flag is set to -1, since DST is unknown.
       
   128     return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
       
   129 
       
   130 
       
   131 def parsedate(data):
       
   132     """Convert a time string to a time tuple."""
       
   133     t = parsedate_tz(data)
       
   134     if isinstance(t, tuple):
       
   135         return t[:9]
       
   136     else:
       
   137         return t
       
   138 
       
   139 
       
   140 def mktime_tz(data):
       
   141     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
       
   142     if data[9] is None:
       
   143         # No zone info, so localtime is better assumption than GMT
       
   144         return time.mktime(data[:8] + (-1,))
       
   145     else:
       
   146         t = time.mktime(data[:8] + (0,))
       
   147         return t - data[9] - time.timezone
       
   148 
       
   149 
       
   150 def quote(str):
       
   151     """Add quotes around a string."""
       
   152     return str.replace('\\', '\\\\').replace('"', '\\"')
       
   153 
       
   154 
       
   155 class AddrlistClass:
       
   156     """Address parser class by Ben Escoto.
       
   157 
       
   158     To understand what this class does, it helps to have a copy of RFC 2822 in
       
   159     front of you.
       
   160 
       
   161     Note: this class interface is deprecated and may be removed in the future.
       
   162     Use rfc822.AddressList instead.
       
   163     """
       
   164 
       
   165     def __init__(self, field):
       
   166         """Initialize a new instance.
       
   167 
       
   168         `field' is an unparsed address header field, containing
       
   169         one or more addresses.
       
   170         """
       
   171         self.specials = '()<>@,:;.\"[]'
       
   172         self.pos = 0
       
   173         self.LWS = ' \t'
       
   174         self.CR = '\r\n'
       
   175         self.FWS = self.LWS + self.CR
       
   176         self.atomends = self.specials + self.LWS + self.CR
       
   177         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
       
   178         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
       
   179         # syntax, so allow dots in phrases.
       
   180         self.phraseends = self.atomends.replace('.', '')
       
   181         self.field = field
       
   182         self.commentlist = []
       
   183 
       
   184     def gotonext(self):
       
   185         """Parse up to the start of the next address."""
       
   186         while self.pos < len(self.field):
       
   187             if self.field[self.pos] in self.LWS + '\n\r':
       
   188                 self.pos += 1
       
   189             elif self.field[self.pos] == '(':
       
   190                 self.commentlist.append(self.getcomment())
       
   191             else:
       
   192                 break
       
   193 
       
   194     def getaddrlist(self):
       
   195         """Parse all addresses.
       
   196 
       
   197         Returns a list containing all of the addresses.
       
   198         """
       
   199         result = []
       
   200         while self.pos < len(self.field):
       
   201             ad = self.getaddress()
       
   202             if ad:
       
   203                 result += ad
       
   204             else:
       
   205                 result.append(('', ''))
       
   206         return result
       
   207 
       
   208     def getaddress(self):
       
   209         """Parse the next address."""
       
   210         self.commentlist = []
       
   211         self.gotonext()
       
   212 
       
   213         oldpos = self.pos
       
   214         oldcl = self.commentlist
       
   215         plist = self.getphraselist()
       
   216 
       
   217         self.gotonext()
       
   218         returnlist = []
       
   219 
       
   220         if self.pos >= len(self.field):
       
   221             # Bad email address technically, no domain.
       
   222             if plist:
       
   223                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
       
   224 
       
   225         elif self.field[self.pos] in '.@':
       
   226             # email address is just an addrspec
       
   227             # this isn't very efficient since we start over
       
   228             self.pos = oldpos
       
   229             self.commentlist = oldcl
       
   230             addrspec = self.getaddrspec()
       
   231             returnlist = [(SPACE.join(self.commentlist), addrspec)]
       
   232 
       
   233         elif self.field[self.pos] == ':':
       
   234             # address is a group
       
   235             returnlist = []
       
   236 
       
   237             fieldlen = len(self.field)
       
   238             self.pos += 1
       
   239             while self.pos < len(self.field):
       
   240                 self.gotonext()
       
   241                 if self.pos < fieldlen and self.field[self.pos] == ';':
       
   242                     self.pos += 1
       
   243                     break
       
   244                 returnlist = returnlist + self.getaddress()
       
   245 
       
   246         elif self.field[self.pos] == '<':
       
   247             # Address is a phrase then a route addr
       
   248             routeaddr = self.getrouteaddr()
       
   249 
       
   250             if self.commentlist:
       
   251                 returnlist = [(SPACE.join(plist) + ' (' +
       
   252                                ' '.join(self.commentlist) + ')', routeaddr)]
       
   253             else:
       
   254                 returnlist = [(SPACE.join(plist), routeaddr)]
       
   255 
       
   256         else:
       
   257             if plist:
       
   258                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
       
   259             elif self.field[self.pos] in self.specials:
       
   260                 self.pos += 1
       
   261 
       
   262         self.gotonext()
       
   263         if self.pos < len(self.field) and self.field[self.pos] == ',':
       
   264             self.pos += 1
       
   265         return returnlist
       
   266 
       
   267     def getrouteaddr(self):
       
   268         """Parse a route address (Return-path value).
       
   269 
       
   270         This method just skips all the route stuff and returns the addrspec.
       
   271         """
       
   272         if self.field[self.pos] != '<':
       
   273             return
       
   274 
       
   275         expectroute = False
       
   276         self.pos += 1
       
   277         self.gotonext()
       
   278         adlist = ''
       
   279         while self.pos < len(self.field):
       
   280             if expectroute:
       
   281                 self.getdomain()
       
   282                 expectroute = False
       
   283             elif self.field[self.pos] == '>':
       
   284                 self.pos += 1
       
   285                 break
       
   286             elif self.field[self.pos] == '@':
       
   287                 self.pos += 1
       
   288                 expectroute = True
       
   289             elif self.field[self.pos] == ':':
       
   290                 self.pos += 1
       
   291             else:
       
   292                 adlist = self.getaddrspec()
       
   293                 self.pos += 1
       
   294                 break
       
   295             self.gotonext()
       
   296 
       
   297         return adlist
       
   298 
       
   299     def getaddrspec(self):
       
   300         """Parse an RFC 2822 addr-spec."""
       
   301         aslist = []
       
   302 
       
   303         self.gotonext()
       
   304         while self.pos < len(self.field):
       
   305             if self.field[self.pos] == '.':
       
   306                 aslist.append('.')
       
   307                 self.pos += 1
       
   308             elif self.field[self.pos] == '"':
       
   309                 aslist.append('"%s"' % self.getquote())
       
   310             elif self.field[self.pos] in self.atomends:
       
   311                 break
       
   312             else:
       
   313                 aslist.append(self.getatom())
       
   314             self.gotonext()
       
   315 
       
   316         if self.pos >= len(self.field) or self.field[self.pos] != '@':
       
   317             return EMPTYSTRING.join(aslist)
       
   318 
       
   319         aslist.append('@')
       
   320         self.pos += 1
       
   321         self.gotonext()
       
   322         return EMPTYSTRING.join(aslist) + self.getdomain()
       
   323 
       
   324     def getdomain(self):
       
   325         """Get the complete domain name from an address."""
       
   326         sdlist = []
       
   327         while self.pos < len(self.field):
       
   328             if self.field[self.pos] in self.LWS:
       
   329                 self.pos += 1
       
   330             elif self.field[self.pos] == '(':
       
   331                 self.commentlist.append(self.getcomment())
       
   332             elif self.field[self.pos] == '[':
       
   333                 sdlist.append(self.getdomainliteral())
       
   334             elif self.field[self.pos] == '.':
       
   335                 self.pos += 1
       
   336                 sdlist.append('.')
       
   337             elif self.field[self.pos] in self.atomends:
       
   338                 break
       
   339             else:
       
   340                 sdlist.append(self.getatom())
       
   341         return EMPTYSTRING.join(sdlist)
       
   342 
       
   343     def getdelimited(self, beginchar, endchars, allowcomments=True):
       
   344         """Parse a header fragment delimited by special characters.
       
   345 
       
   346         `beginchar' is the start character for the fragment.
       
   347         If self is not looking at an instance of `beginchar' then
       
   348         getdelimited returns the empty string.
       
   349 
       
   350         `endchars' is a sequence of allowable end-delimiting characters.
       
   351         Parsing stops when one of these is encountered.
       
   352 
       
   353         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
       
   354         within the parsed fragment.
       
   355         """
       
   356         if self.field[self.pos] != beginchar:
       
   357             return ''
       
   358 
       
   359         slist = ['']
       
   360         quote = False
       
   361         self.pos += 1
       
   362         while self.pos < len(self.field):
       
   363             if quote:
       
   364                 slist.append(self.field[self.pos])
       
   365                 quote = False
       
   366             elif self.field[self.pos] in endchars:
       
   367                 self.pos += 1
       
   368                 break
       
   369             elif allowcomments and self.field[self.pos] == '(':
       
   370                 slist.append(self.getcomment())
       
   371                 continue        # have already advanced pos from getcomment
       
   372             elif self.field[self.pos] == '\\':
       
   373                 quote = True
       
   374             else:
       
   375                 slist.append(self.field[self.pos])
       
   376             self.pos += 1
       
   377 
       
   378         return EMPTYSTRING.join(slist)
       
   379 
       
   380     def getquote(self):
       
   381         """Get a quote-delimited fragment from self's field."""
       
   382         return self.getdelimited('"', '"\r', False)
       
   383 
       
   384     def getcomment(self):
       
   385         """Get a parenthesis-delimited fragment from self's field."""
       
   386         return self.getdelimited('(', ')\r', True)
       
   387 
       
   388     def getdomainliteral(self):
       
   389         """Parse an RFC 2822 domain-literal."""
       
   390         return '[%s]' % self.getdelimited('[', ']\r', False)
       
   391 
       
   392     def getatom(self, atomends=None):
       
   393         """Parse an RFC 2822 atom.
       
   394 
       
   395         Optional atomends specifies a different set of end token delimiters
       
   396         (the default is to use self.atomends).  This is used e.g. in
       
   397         getphraselist() since phrase endings must not include the `.' (which
       
   398         is legal in phrases)."""
       
   399         atomlist = ['']
       
   400         if atomends is None:
       
   401             atomends = self.atomends
       
   402 
       
   403         while self.pos < len(self.field):
       
   404             if self.field[self.pos] in atomends:
       
   405                 break
       
   406             else:
       
   407                 atomlist.append(self.field[self.pos])
       
   408             self.pos += 1
       
   409 
       
   410         return EMPTYSTRING.join(atomlist)
       
   411 
       
   412     def getphraselist(self):
       
   413         """Parse a sequence of RFC 2822 phrases.
       
   414 
       
   415         A phrase is a sequence of words, which are in turn either RFC 2822
       
   416         atoms or quoted-strings.  Phrases are canonicalized by squeezing all
       
   417         runs of continuous whitespace into one space.
       
   418         """
       
   419         plist = []
       
   420 
       
   421         while self.pos < len(self.field):
       
   422             if self.field[self.pos] in self.FWS:
       
   423                 self.pos += 1
       
   424             elif self.field[self.pos] == '"':
       
   425                 plist.append(self.getquote())
       
   426             elif self.field[self.pos] == '(':
       
   427                 self.commentlist.append(self.getcomment())
       
   428             elif self.field[self.pos] in self.phraseends:
       
   429                 break
       
   430             else:
       
   431                 plist.append(self.getatom(self.phraseends))
       
   432 
       
   433         return plist
       
   434 
       
   435 class AddressList(AddrlistClass):
       
   436     """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
       
   437     def __init__(self, field):
       
   438         AddrlistClass.__init__(self, field)
       
   439         if field:
       
   440             self.addresslist = self.getaddrlist()
       
   441         else:
       
   442             self.addresslist = []
       
   443 
       
   444     def __len__(self):
       
   445         return len(self.addresslist)
       
   446 
       
   447     def __add__(self, other):
       
   448         # Set union
       
   449         newaddr = AddressList(None)
       
   450         newaddr.addresslist = self.addresslist[:]
       
   451         for x in other.addresslist:
       
   452             if not x in self.addresslist:
       
   453                 newaddr.addresslist.append(x)
       
   454         return newaddr
       
   455 
       
   456     def __iadd__(self, other):
       
   457         # Set union, in-place
       
   458         for x in other.addresslist:
       
   459             if not x in self.addresslist:
       
   460                 self.addresslist.append(x)
       
   461         return self
       
   462 
       
   463     def __sub__(self, other):
       
   464         # Set difference
       
   465         newaddr = AddressList(None)
       
   466         for x in self.addresslist:
       
   467             if not x in other.addresslist:
       
   468                 newaddr.addresslist.append(x)
       
   469         return newaddr
       
   470 
       
   471     def __isub__(self, other):
       
   472         # Set difference, in-place
       
   473         for x in other.addresslist:
       
   474             if x in self.addresslist:
       
   475                 self.addresslist.remove(x)
       
   476         return self
       
   477 
       
   478     def __getitem__(self, index):
       
   479         # Make indexing, slices, and 'in' work
       
   480         return self.addresslist[index]