symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/encodings/idna.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
       
     2 
       
     3 import stringprep, re, codecs
       
     4 from unicodedata import ucd_3_2_0 as unicodedata
       
     5 
       
     6 # IDNA section 3.1
       
     7 dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
       
     8 
       
     9 # IDNA section 5
       
    10 ace_prefix = "xn--"
       
    11 uace_prefix = unicode(ace_prefix, "ascii")
       
    12 
       
    13 # This assumes query strings, so AllowUnassigned is true
       
    14 def nameprep(label):
       
    15     # Map
       
    16     newlabel = []
       
    17     for c in label:
       
    18         if stringprep.in_table_b1(c):
       
    19             # Map to nothing
       
    20             continue
       
    21         newlabel.append(stringprep.map_table_b2(c))
       
    22     label = u"".join(newlabel)
       
    23 
       
    24     # Normalize
       
    25     label = unicodedata.normalize("NFKC", label)
       
    26 
       
    27     # Prohibit
       
    28     for c in label:
       
    29         if stringprep.in_table_c12(c) or \
       
    30            stringprep.in_table_c22(c) or \
       
    31            stringprep.in_table_c3(c) or \
       
    32            stringprep.in_table_c4(c) or \
       
    33            stringprep.in_table_c5(c) or \
       
    34            stringprep.in_table_c6(c) or \
       
    35            stringprep.in_table_c7(c) or \
       
    36            stringprep.in_table_c8(c) or \
       
    37            stringprep.in_table_c9(c):
       
    38             raise UnicodeError("Invalid character %r" % c)
       
    39 
       
    40     # Check bidi
       
    41     RandAL = map(stringprep.in_table_d1, label)
       
    42     for c in RandAL:
       
    43         if c:
       
    44             # There is a RandAL char in the string. Must perform further
       
    45             # tests:
       
    46             # 1) The characters in section 5.8 MUST be prohibited.
       
    47             # This is table C.8, which was already checked
       
    48             # 2) If a string contains any RandALCat character, the string
       
    49             # MUST NOT contain any LCat character.
       
    50             if filter(stringprep.in_table_d2, label):
       
    51                 raise UnicodeError("Violation of BIDI requirement 2")
       
    52 
       
    53             # 3) If a string contains any RandALCat character, a
       
    54             # RandALCat character MUST be the first character of the
       
    55             # string, and a RandALCat character MUST be the last
       
    56             # character of the string.
       
    57             if not RandAL[0] or not RandAL[-1]:
       
    58                 raise UnicodeError("Violation of BIDI requirement 3")
       
    59 
       
    60     return label
       
    61 
       
    62 def ToASCII(label):
       
    63     try:
       
    64         # Step 1: try ASCII
       
    65         label = label.encode("ascii")
       
    66     except UnicodeError:
       
    67         pass
       
    68     else:
       
    69         # Skip to step 3: UseSTD3ASCIIRules is false, so
       
    70         # Skip to step 8.
       
    71         if 0 < len(label) < 64:
       
    72             return label
       
    73         raise UnicodeError("label empty or too long")
       
    74 
       
    75     # Step 2: nameprep
       
    76     label = nameprep(label)
       
    77 
       
    78     # Step 3: UseSTD3ASCIIRules is false
       
    79     # Step 4: try ASCII
       
    80     try:
       
    81         label = label.encode("ascii")
       
    82     except UnicodeError:
       
    83         pass
       
    84     else:
       
    85         # Skip to step 8.
       
    86         if 0 < len(label) < 64:
       
    87             return label
       
    88         raise UnicodeError("label empty or too long")
       
    89 
       
    90     # Step 5: Check ACE prefix
       
    91     if label.startswith(uace_prefix):
       
    92         raise UnicodeError("Label starts with ACE prefix")
       
    93 
       
    94     # Step 6: Encode with PUNYCODE
       
    95     label = label.encode("punycode")
       
    96 
       
    97     # Step 7: Prepend ACE prefix
       
    98     label = ace_prefix + label
       
    99 
       
   100     # Step 8: Check size
       
   101     if 0 < len(label) < 64:
       
   102         return label
       
   103     raise UnicodeError("label empty or too long")
       
   104 
       
   105 def ToUnicode(label):
       
   106     # Step 1: Check for ASCII
       
   107     if isinstance(label, str):
       
   108         pure_ascii = True
       
   109     else:
       
   110         try:
       
   111             label = label.encode("ascii")
       
   112             pure_ascii = True
       
   113         except UnicodeError:
       
   114             pure_ascii = False
       
   115     if not pure_ascii:
       
   116         # Step 2: Perform nameprep
       
   117         label = nameprep(label)
       
   118         # It doesn't say this, but apparently, it should be ASCII now
       
   119         try:
       
   120             label = label.encode("ascii")
       
   121         except UnicodeError:
       
   122             raise UnicodeError("Invalid character in IDN label")
       
   123     # Step 3: Check for ACE prefix
       
   124     if not label.startswith(ace_prefix):
       
   125         return unicode(label, "ascii")
       
   126 
       
   127     # Step 4: Remove ACE prefix
       
   128     label1 = label[len(ace_prefix):]
       
   129 
       
   130     # Step 5: Decode using PUNYCODE
       
   131     result = label1.decode("punycode")
       
   132 
       
   133     # Step 6: Apply ToASCII
       
   134     label2 = ToASCII(result)
       
   135 
       
   136     # Step 7: Compare the result of step 6 with the one of step 3
       
   137     # label2 will already be in lower case.
       
   138     if label.lower() != label2:
       
   139         raise UnicodeError("IDNA does not round-trip", label, label2)
       
   140 
       
   141     # Step 8: return the result of step 5
       
   142     return result
       
   143 
       
   144 ### Codec APIs
       
   145 
       
   146 class Codec(codecs.Codec):
       
   147     def encode(self,input,errors='strict'):
       
   148 
       
   149         if errors != 'strict':
       
   150             # IDNA is quite clear that implementations must be strict
       
   151             raise UnicodeError("unsupported error handling "+errors)
       
   152 
       
   153         if not input:
       
   154             return "", 0
       
   155 
       
   156         result = []
       
   157         labels = dots.split(input)
       
   158         if labels and len(labels[-1])==0:
       
   159             trailing_dot = '.'
       
   160             del labels[-1]
       
   161         else:
       
   162             trailing_dot = ''
       
   163         for label in labels:
       
   164             result.append(ToASCII(label))
       
   165         # Join with U+002E
       
   166         return ".".join(result)+trailing_dot, len(input)
       
   167 
       
   168     def decode(self,input,errors='strict'):
       
   169 
       
   170         if errors != 'strict':
       
   171             raise UnicodeError("Unsupported error handling "+errors)
       
   172 
       
   173         if not input:
       
   174             return u"", 0
       
   175 
       
   176         # IDNA allows decoding to operate on Unicode strings, too.
       
   177         if isinstance(input, unicode):
       
   178             labels = dots.split(input)
       
   179         else:
       
   180             # Must be ASCII string
       
   181             input = str(input)
       
   182             unicode(input, "ascii")
       
   183             labels = input.split(".")
       
   184 
       
   185         if labels and len(labels[-1]) == 0:
       
   186             trailing_dot = u'.'
       
   187             del labels[-1]
       
   188         else:
       
   189             trailing_dot = u''
       
   190 
       
   191         result = []
       
   192         for label in labels:
       
   193             result.append(ToUnicode(label))
       
   194 
       
   195         return u".".join(result)+trailing_dot, len(input)
       
   196 
       
   197 class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
       
   198     def _buffer_encode(self, input, errors, final):
       
   199         if errors != 'strict':
       
   200             # IDNA is quite clear that implementations must be strict
       
   201             raise UnicodeError("unsupported error handling "+errors)
       
   202 
       
   203         if not input:
       
   204             return ("", 0)
       
   205 
       
   206         labels = dots.split(input)
       
   207         trailing_dot = u''
       
   208         if labels:
       
   209             if not labels[-1]:
       
   210                 trailing_dot = '.'
       
   211                 del labels[-1]
       
   212             elif not final:
       
   213                 # Keep potentially unfinished label until the next call
       
   214                 del labels[-1]
       
   215                 if labels:
       
   216                     trailing_dot = '.'
       
   217 
       
   218         result = []
       
   219         size = 0
       
   220         for label in labels:
       
   221             result.append(ToASCII(label))
       
   222             if size:
       
   223                 size += 1
       
   224             size += len(label)
       
   225 
       
   226         # Join with U+002E
       
   227         result = ".".join(result) + trailing_dot
       
   228         size += len(trailing_dot)
       
   229         return (result, size)
       
   230 
       
   231 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
       
   232     def _buffer_decode(self, input, errors, final):
       
   233         if errors != 'strict':
       
   234             raise UnicodeError("Unsupported error handling "+errors)
       
   235 
       
   236         if not input:
       
   237             return (u"", 0)
       
   238 
       
   239         # IDNA allows decoding to operate on Unicode strings, too.
       
   240         if isinstance(input, unicode):
       
   241             labels = dots.split(input)
       
   242         else:
       
   243             # Must be ASCII string
       
   244             input = str(input)
       
   245             unicode(input, "ascii")
       
   246             labels = input.split(".")
       
   247 
       
   248         trailing_dot = u''
       
   249         if labels:
       
   250             if not labels[-1]:
       
   251                 trailing_dot = u'.'
       
   252                 del labels[-1]
       
   253             elif not final:
       
   254                 # Keep potentially unfinished label until the next call
       
   255                 del labels[-1]
       
   256                 if labels:
       
   257                     trailing_dot = u'.'
       
   258 
       
   259         result = []
       
   260         size = 0
       
   261         for label in labels:
       
   262             result.append(ToUnicode(label))
       
   263             if size:
       
   264                 size += 1
       
   265             size += len(label)
       
   266 
       
   267         result = u".".join(result) + trailing_dot
       
   268         size += len(trailing_dot)
       
   269         return (result, size)
       
   270 
       
   271 class StreamWriter(Codec,codecs.StreamWriter):
       
   272     pass
       
   273 
       
   274 class StreamReader(Codec,codecs.StreamReader):
       
   275     pass
       
   276 
       
   277 ### encodings module API
       
   278 
       
   279 def getregentry():
       
   280     return codecs.CodecInfo(
       
   281         name='idna',
       
   282         encode=Codec().encode,
       
   283         decode=Codec().decode,
       
   284         incrementalencoder=IncrementalEncoder,
       
   285         incrementaldecoder=IncrementalDecoder,
       
   286         streamwriter=StreamWriter,
       
   287         streamreader=StreamReader,
       
   288     )