symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/encodings/punycode.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # -*- coding: iso-8859-1 -*-
       
     2 """ Codec for the Punicode encoding, as specified in RFC 3492
       
     3 
       
     4 Written by Martin v. Löwis.
       
     5 """
       
     6 
       
     7 import codecs
       
     8 
       
     9 ##################### Encoding #####################################
       
    10 
       
    11 def segregate(str):
       
    12     """3.1 Basic code point segregation"""
       
    13     base = []
       
    14     extended = {}
       
    15     for c in str:
       
    16         if ord(c) < 128:
       
    17             base.append(c)
       
    18         else:
       
    19             extended[c] = 1
       
    20     extended = extended.keys()
       
    21     extended.sort()
       
    22     return "".join(base).encode("ascii"),extended
       
    23 
       
    24 def selective_len(str, max):
       
    25     """Return the length of str, considering only characters below max."""
       
    26     res = 0
       
    27     for c in str:
       
    28         if ord(c) < max:
       
    29             res += 1
       
    30     return res
       
    31 
       
    32 def selective_find(str, char, index, pos):
       
    33     """Return a pair (index, pos), indicating the next occurrence of
       
    34     char in str. index is the position of the character considering
       
    35     only ordinals up to and including char, and pos is the position in
       
    36     the full string. index/pos is the starting position in the full
       
    37     string."""
       
    38 
       
    39     l = len(str)
       
    40     while 1:
       
    41         pos += 1
       
    42         if pos == l:
       
    43             return (-1, -1)
       
    44         c = str[pos]
       
    45         if c == char:
       
    46             return index+1, pos
       
    47         elif c < char:
       
    48             index += 1
       
    49 
       
    50 def insertion_unsort(str, extended):
       
    51     """3.2 Insertion unsort coding"""
       
    52     oldchar = 0x80
       
    53     result = []
       
    54     oldindex = -1
       
    55     for c in extended:
       
    56         index = pos = -1
       
    57         char = ord(c)
       
    58         curlen = selective_len(str, char)
       
    59         delta = (curlen+1) * (char - oldchar)
       
    60         while 1:
       
    61             index,pos = selective_find(str,c,index,pos)
       
    62             if index == -1:
       
    63                 break
       
    64             delta += index - oldindex
       
    65             result.append(delta-1)
       
    66             oldindex = index
       
    67             delta = 0
       
    68         oldchar = char
       
    69 
       
    70     return result
       
    71 
       
    72 def T(j, bias):
       
    73     # Punycode parameters: tmin = 1, tmax = 26, base = 36
       
    74     res = 36 * (j + 1) - bias
       
    75     if res < 1: return 1
       
    76     if res > 26: return 26
       
    77     return res
       
    78 
       
    79 digits = "abcdefghijklmnopqrstuvwxyz0123456789"
       
    80 def generate_generalized_integer(N, bias):
       
    81     """3.3 Generalized variable-length integers"""
       
    82     result = []
       
    83     j = 0
       
    84     while 1:
       
    85         t = T(j, bias)
       
    86         if N < t:
       
    87             result.append(digits[N])
       
    88             return result
       
    89         result.append(digits[t + ((N - t) % (36 - t))])
       
    90         N = (N - t) // (36 - t)
       
    91         j += 1
       
    92 
       
    93 def adapt(delta, first, numchars):
       
    94     if first:
       
    95         delta //= 700
       
    96     else:
       
    97         delta //= 2
       
    98     delta += delta // numchars
       
    99     # ((base - tmin) * tmax) // 2 == 455
       
   100     divisions = 0
       
   101     while delta > 455:
       
   102         delta = delta // 35 # base - tmin
       
   103         divisions += 36
       
   104     bias = divisions + (36 * delta // (delta + 38))
       
   105     return bias
       
   106 
       
   107 
       
   108 def generate_integers(baselen, deltas):
       
   109     """3.4 Bias adaptation"""
       
   110     # Punycode parameters: initial bias = 72, damp = 700, skew = 38
       
   111     result = []
       
   112     bias = 72
       
   113     for points, delta in enumerate(deltas):
       
   114         s = generate_generalized_integer(delta, bias)
       
   115         result.extend(s)
       
   116         bias = adapt(delta, points==0, baselen+points+1)
       
   117     return "".join(result)
       
   118 
       
   119 def punycode_encode(text):
       
   120     base, extended = segregate(text)
       
   121     base = base.encode("ascii")
       
   122     deltas = insertion_unsort(text, extended)
       
   123     extended = generate_integers(len(base), deltas)
       
   124     if base:
       
   125         return base + "-" + extended
       
   126     return extended
       
   127 
       
   128 ##################### Decoding #####################################
       
   129 
       
   130 def decode_generalized_number(extended, extpos, bias, errors):
       
   131     """3.3 Generalized variable-length integers"""
       
   132     result = 0
       
   133     w = 1
       
   134     j = 0
       
   135     while 1:
       
   136         try:
       
   137             char = ord(extended[extpos])
       
   138         except IndexError:
       
   139             if errors == "strict":
       
   140                 raise UnicodeError, "incomplete punicode string"
       
   141             return extpos + 1, None
       
   142         extpos += 1
       
   143         if 0x41 <= char <= 0x5A: # A-Z
       
   144             digit = char - 0x41
       
   145         elif 0x30 <= char <= 0x39:
       
   146             digit = char - 22 # 0x30-26
       
   147         elif errors == "strict":
       
   148             raise UnicodeError("Invalid extended code point '%s'"
       
   149                                % extended[extpos])
       
   150         else:
       
   151             return extpos, None
       
   152         t = T(j, bias)
       
   153         result += digit * w
       
   154         if digit < t:
       
   155             return extpos, result
       
   156         w = w * (36 - t)
       
   157         j += 1
       
   158 
       
   159 
       
   160 def insertion_sort(base, extended, errors):
       
   161     """3.2 Insertion unsort coding"""
       
   162     char = 0x80
       
   163     pos = -1
       
   164     bias = 72
       
   165     extpos = 0
       
   166     while extpos < len(extended):
       
   167         newpos, delta = decode_generalized_number(extended, extpos,
       
   168                                                   bias, errors)
       
   169         if delta is None:
       
   170             # There was an error in decoding. We can't continue because
       
   171             # synchronization is lost.
       
   172             return base
       
   173         pos += delta+1
       
   174         char += pos // (len(base) + 1)
       
   175         if char > 0x10FFFF:
       
   176             if errors == "strict":
       
   177                 raise UnicodeError, ("Invalid character U+%x" % char)
       
   178             char = ord('?')
       
   179         pos = pos % (len(base) + 1)
       
   180         base = base[:pos] + unichr(char) + base[pos:]
       
   181         bias = adapt(delta, (extpos == 0), len(base))
       
   182         extpos = newpos
       
   183     return base
       
   184 
       
   185 def punycode_decode(text, errors):
       
   186     pos = text.rfind("-")
       
   187     if pos == -1:
       
   188         base = ""
       
   189         extended = text
       
   190     else:
       
   191         base = text[:pos]
       
   192         extended = text[pos+1:]
       
   193     base = unicode(base, "ascii", errors)
       
   194     extended = extended.upper()
       
   195     return insertion_sort(base, extended, errors)
       
   196 
       
   197 ### Codec APIs
       
   198 
       
   199 class Codec(codecs.Codec):
       
   200 
       
   201     def encode(self,input,errors='strict'):
       
   202         res = punycode_encode(input)
       
   203         return res, len(input)
       
   204 
       
   205     def decode(self,input,errors='strict'):
       
   206         if errors not in ('strict', 'replace', 'ignore'):
       
   207             raise UnicodeError, "Unsupported error handling "+errors
       
   208         res = punycode_decode(input, errors)
       
   209         return res, len(input)
       
   210 
       
   211 class IncrementalEncoder(codecs.IncrementalEncoder):
       
   212     def encode(self, input, final=False):
       
   213         return punycode_encode(input)
       
   214 
       
   215 class IncrementalDecoder(codecs.IncrementalDecoder):
       
   216     def decode(self, input, final=False):
       
   217         if self.errors not in ('strict', 'replace', 'ignore'):
       
   218             raise UnicodeError, "Unsupported error handling "+self.errors
       
   219         return punycode_decode(input, self.errors)
       
   220 
       
   221 class StreamWriter(Codec,codecs.StreamWriter):
       
   222     pass
       
   223 
       
   224 class StreamReader(Codec,codecs.StreamReader):
       
   225     pass
       
   226 
       
   227 ### encodings module API
       
   228 
       
   229 def getregentry():
       
   230     return codecs.CodecInfo(
       
   231         name='punycode',
       
   232         encode=Codec().encode,
       
   233         decode=Codec().decode,
       
   234         incrementalencoder=IncrementalEncoder,
       
   235         incrementaldecoder=IncrementalDecoder,
       
   236         streamwriter=StreamWriter,
       
   237         streamreader=StreamReader,
       
   238     )