symbian-qemu-0.9.1-12/python-2.6.1/Lib/email/utils.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # Copyright (C) 2001-2006 Python Software Foundation
       
     2 # Author: Barry Warsaw
       
     3 # Contact: email-sig@python.org
       
     4 
       
     5 """Miscellaneous utilities."""
       
     6 
       
     7 __all__ = [
       
     8     'collapse_rfc2231_value',
       
     9     'decode_params',
       
    10     'decode_rfc2231',
       
    11     'encode_rfc2231',
       
    12     'formataddr',
       
    13     'formatdate',
       
    14     'getaddresses',
       
    15     'make_msgid',
       
    16     'parseaddr',
       
    17     'parsedate',
       
    18     'parsedate_tz',
       
    19     'unquote',
       
    20     ]
       
    21 
       
    22 import os
       
    23 import re
       
    24 import time
       
    25 import base64
       
    26 import random
       
    27 import socket
       
    28 import urllib
       
    29 import warnings
       
    30 
       
    31 from email._parseaddr import quote
       
    32 from email._parseaddr import AddressList as _AddressList
       
    33 from email._parseaddr import mktime_tz
       
    34 
       
    35 # We need wormarounds for bugs in these methods in older Pythons (see below)
       
    36 from email._parseaddr import parsedate as _parsedate
       
    37 from email._parseaddr import parsedate_tz as _parsedate_tz
       
    38 
       
    39 from quopri import decodestring as _qdecode
       
    40 
       
    41 # Intrapackage imports
       
    42 from email.encoders import _bencode, _qencode
       
    43 
       
    44 COMMASPACE = ', '
       
    45 EMPTYSTRING = ''
       
    46 UEMPTYSTRING = u''
       
    47 CRLF = '\r\n'
       
    48 TICK = "'"
       
    49 
       
    50 specialsre = re.compile(r'[][\\()<>@,:;".]')
       
    51 escapesre = re.compile(r'[][\\()"]')
       
    52 
       
    53 
       
    54 
       
    55 # Helpers
       
    56 
       
    57 def _identity(s):
       
    58     return s
       
    59 
       
    60 
       
    61 def _bdecode(s):
       
    62     # We can't quite use base64.encodestring() since it tacks on a "courtesy
       
    63     # newline".  Blech!
       
    64     if not s:
       
    65         return s
       
    66     value = base64.decodestring(s)
       
    67     if not s.endswith('\n') and value.endswith('\n'):
       
    68         return value[:-1]
       
    69     return value
       
    70 
       
    71 
       
    72 
       
    73 def fix_eols(s):
       
    74     """Replace all line-ending characters with \r\n."""
       
    75     # Fix newlines with no preceding carriage return
       
    76     s = re.sub(r'(?<!\r)\n', CRLF, s)
       
    77     # Fix carriage returns with no following newline
       
    78     s = re.sub(r'\r(?!\n)', CRLF, s)
       
    79     return s
       
    80 
       
    81 
       
    82 
       
    83 def formataddr(pair):
       
    84     """The inverse of parseaddr(), this takes a 2-tuple of the form
       
    85     (realname, email_address) and returns the string value suitable
       
    86     for an RFC 2822 From, To or Cc header.
       
    87 
       
    88     If the first element of pair is false, then the second element is
       
    89     returned unmodified.
       
    90     """
       
    91     name, address = pair
       
    92     if name:
       
    93         quotes = ''
       
    94         if specialsre.search(name):
       
    95             quotes = '"'
       
    96         name = escapesre.sub(r'\\\g<0>', name)
       
    97         return '%s%s%s <%s>' % (quotes, name, quotes, address)
       
    98     return address
       
    99 
       
   100 
       
   101 
       
   102 def getaddresses(fieldvalues):
       
   103     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
       
   104     all = COMMASPACE.join(fieldvalues)
       
   105     a = _AddressList(all)
       
   106     return a.addresslist
       
   107 
       
   108 
       
   109 
       
   110 ecre = re.compile(r'''
       
   111   =\?                   # literal =?
       
   112   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
       
   113   \?                    # literal ?
       
   114   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
       
   115   \?                    # literal ?
       
   116   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
       
   117   \?=                   # literal ?=
       
   118   ''', re.VERBOSE | re.IGNORECASE)
       
   119 
       
   120 
       
   121 
       
   122 def formatdate(timeval=None, localtime=False, usegmt=False):
       
   123     """Returns a date string as specified by RFC 2822, e.g.:
       
   124 
       
   125     Fri, 09 Nov 2001 01:08:47 -0000
       
   126 
       
   127     Optional timeval if given is a floating point time value as accepted by
       
   128     gmtime() and localtime(), otherwise the current time is used.
       
   129 
       
   130     Optional localtime is a flag that when True, interprets timeval, and
       
   131     returns a date relative to the local timezone instead of UTC, properly
       
   132     taking daylight savings time into account.
       
   133 
       
   134     Optional argument usegmt means that the timezone is written out as
       
   135     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
       
   136     is needed for HTTP, and is only used when localtime==False.
       
   137     """
       
   138     # Note: we cannot use strftime() because that honors the locale and RFC
       
   139     # 2822 requires that day and month names be the English abbreviations.
       
   140     if timeval is None:
       
   141         timeval = time.time()
       
   142     if localtime:
       
   143         now = time.localtime(timeval)
       
   144         # Calculate timezone offset, based on whether the local zone has
       
   145         # daylight savings time, and whether DST is in effect.
       
   146         if time.daylight and now[-1]:
       
   147             offset = time.altzone
       
   148         else:
       
   149             offset = time.timezone
       
   150         hours, minutes = divmod(abs(offset), 3600)
       
   151         # Remember offset is in seconds west of UTC, but the timezone is in
       
   152         # minutes east of UTC, so the signs differ.
       
   153         if offset > 0:
       
   154             sign = '-'
       
   155         else:
       
   156             sign = '+'
       
   157         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
       
   158     else:
       
   159         now = time.gmtime(timeval)
       
   160         # Timezone offset is always -0000
       
   161         if usegmt:
       
   162             zone = 'GMT'
       
   163         else:
       
   164             zone = '-0000'
       
   165     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
       
   166         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
       
   167         now[2],
       
   168         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       
   169          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
       
   170         now[0], now[3], now[4], now[5],
       
   171         zone)
       
   172 
       
   173 
       
   174 
       
   175 def make_msgid(idstring=None):
       
   176     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
       
   177 
       
   178     <20020201195627.33539.96671@nightshade.la.mastaler.com>
       
   179 
       
   180     Optional idstring if given is a string used to strengthen the
       
   181     uniqueness of the message id.
       
   182     """
       
   183     timeval = time.time()
       
   184     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
       
   185     pid = os.getpid()
       
   186     randint = random.randrange(100000)
       
   187     if idstring is None:
       
   188         idstring = ''
       
   189     else:
       
   190         idstring = '.' + idstring
       
   191     idhost = socket.getfqdn()
       
   192     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
       
   193     return msgid
       
   194 
       
   195 
       
   196 
       
   197 # These functions are in the standalone mimelib version only because they've
       
   198 # subsequently been fixed in the latest Python versions.  We use this to worm
       
   199 # around broken older Pythons.
       
   200 def parsedate(data):
       
   201     if not data:
       
   202         return None
       
   203     return _parsedate(data)
       
   204 
       
   205 
       
   206 def parsedate_tz(data):
       
   207     if not data:
       
   208         return None
       
   209     return _parsedate_tz(data)
       
   210 
       
   211 
       
   212 def parseaddr(addr):
       
   213     addrs = _AddressList(addr).addresslist
       
   214     if not addrs:
       
   215         return '', ''
       
   216     return addrs[0]
       
   217 
       
   218 
       
   219 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
       
   220 def unquote(str):
       
   221     """Remove quotes from a string."""
       
   222     if len(str) > 1:
       
   223         if str.startswith('"') and str.endswith('"'):
       
   224             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
       
   225         if str.startswith('<') and str.endswith('>'):
       
   226             return str[1:-1]
       
   227     return str
       
   228 
       
   229 
       
   230 
       
   231 # RFC2231-related functions - parameter encoding and decoding
       
   232 def decode_rfc2231(s):
       
   233     """Decode string according to RFC 2231"""
       
   234     parts = s.split(TICK, 2)
       
   235     if len(parts) <= 2:
       
   236         return None, None, s
       
   237     return parts
       
   238 
       
   239 
       
   240 def encode_rfc2231(s, charset=None, language=None):
       
   241     """Encode string according to RFC 2231.
       
   242 
       
   243     If neither charset nor language is given, then s is returned as-is.  If
       
   244     charset is given but not language, the string is encoded using the empty
       
   245     string for language.
       
   246     """
       
   247     import urllib
       
   248     s = urllib.quote(s, safe='')
       
   249     if charset is None and language is None:
       
   250         return s
       
   251     if language is None:
       
   252         language = ''
       
   253     return "%s'%s'%s" % (charset, language, s)
       
   254 
       
   255 
       
   256 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
       
   257 
       
   258 def decode_params(params):
       
   259     """Decode parameters list according to RFC 2231.
       
   260 
       
   261     params is a sequence of 2-tuples containing (param name, string value).
       
   262     """
       
   263     # Copy params so we don't mess with the original
       
   264     params = params[:]
       
   265     new_params = []
       
   266     # Map parameter's name to a list of continuations.  The values are a
       
   267     # 3-tuple of the continuation number, the string value, and a flag
       
   268     # specifying whether a particular segment is %-encoded.
       
   269     rfc2231_params = {}
       
   270     name, value = params.pop(0)
       
   271     new_params.append((name, value))
       
   272     while params:
       
   273         name, value = params.pop(0)
       
   274         if name.endswith('*'):
       
   275             encoded = True
       
   276         else:
       
   277             encoded = False
       
   278         value = unquote(value)
       
   279         mo = rfc2231_continuation.match(name)
       
   280         if mo:
       
   281             name, num = mo.group('name', 'num')
       
   282             if num is not None:
       
   283                 num = int(num)
       
   284             rfc2231_params.setdefault(name, []).append((num, value, encoded))
       
   285         else:
       
   286             new_params.append((name, '"%s"' % quote(value)))
       
   287     if rfc2231_params:
       
   288         for name, continuations in rfc2231_params.items():
       
   289             value = []
       
   290             extended = False
       
   291             # Sort by number
       
   292             continuations.sort()
       
   293             # And now append all values in numerical order, converting
       
   294             # %-encodings for the encoded segments.  If any of the
       
   295             # continuation names ends in a *, then the entire string, after
       
   296             # decoding segments and concatenating, must have the charset and
       
   297             # language specifiers at the beginning of the string.
       
   298             for num, s, encoded in continuations:
       
   299                 if encoded:
       
   300                     s = urllib.unquote(s)
       
   301                     extended = True
       
   302                 value.append(s)
       
   303             value = quote(EMPTYSTRING.join(value))
       
   304             if extended:
       
   305                 charset, language, value = decode_rfc2231(value)
       
   306                 new_params.append((name, (charset, language, '"%s"' % value)))
       
   307             else:
       
   308                 new_params.append((name, '"%s"' % value))
       
   309     return new_params
       
   310 
       
   311 def collapse_rfc2231_value(value, errors='replace',
       
   312                            fallback_charset='us-ascii'):
       
   313     if isinstance(value, tuple):
       
   314         rawval = unquote(value[2])
       
   315         charset = value[0] or 'us-ascii'
       
   316         try:
       
   317             return unicode(rawval, charset, errors)
       
   318         except LookupError:
       
   319             # XXX charset is unknown to Python.
       
   320             return unicode(rawval, fallback_charset, errors)
       
   321     else:
       
   322         return unquote(value)