symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_codeccallbacks.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 import test.test_support, unittest
       
     2 import sys, codecs, htmlentitydefs, unicodedata
       
     3 
       
     4 class PosReturn:
       
     5     # this can be used for configurable callbacks
       
     6 
       
     7     def __init__(self):
       
     8         self.pos = 0
       
     9 
       
    10     def handle(self, exc):
       
    11         oldpos = self.pos
       
    12         realpos = oldpos
       
    13         if realpos<0:
       
    14             realpos = len(exc.object) + realpos
       
    15         # if we don't advance this time, terminate on the next call
       
    16         # otherwise we'd get an endless loop
       
    17         if realpos <= exc.start:
       
    18             self.pos = len(exc.object)
       
    19         return (u"<?>", oldpos)
       
    20 
       
    21 # A UnicodeEncodeError object with a bad start attribute
       
    22 class BadStartUnicodeEncodeError(UnicodeEncodeError):
       
    23     def __init__(self):
       
    24         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
       
    25         self.start = []
       
    26 
       
    27 # A UnicodeEncodeError object with a bad object attribute
       
    28 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
       
    29     def __init__(self):
       
    30         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
       
    31         self.object = []
       
    32 
       
    33 # A UnicodeDecodeError object without an end attribute
       
    34 class NoEndUnicodeDecodeError(UnicodeDecodeError):
       
    35     def __init__(self):
       
    36         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
       
    37         del self.end
       
    38 
       
    39 # A UnicodeDecodeError object with a bad object attribute
       
    40 class BadObjectUnicodeDecodeError(UnicodeDecodeError):
       
    41     def __init__(self):
       
    42         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
       
    43         self.object = []
       
    44 
       
    45 # A UnicodeTranslateError object without a start attribute
       
    46 class NoStartUnicodeTranslateError(UnicodeTranslateError):
       
    47     def __init__(self):
       
    48         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
       
    49         del self.start
       
    50 
       
    51 # A UnicodeTranslateError object without an end attribute
       
    52 class NoEndUnicodeTranslateError(UnicodeTranslateError):
       
    53     def __init__(self):
       
    54         UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
       
    55         del self.end
       
    56 
       
    57 # A UnicodeTranslateError object without an object attribute
       
    58 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
       
    59     def __init__(self):
       
    60         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
       
    61         del self.object
       
    62 
       
    63 class CodecCallbackTest(unittest.TestCase):
       
    64 
       
    65     def test_xmlcharrefreplace(self):
       
    66         # replace unencodable characters which numeric character entities.
       
    67         # For ascii, latin-1 and charmaps this is completely implemented
       
    68         # in C and should be reasonably fast.
       
    69         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
       
    70         self.assertEqual(
       
    71             s.encode("ascii", "xmlcharrefreplace"),
       
    72             "&#12473;&#12497;&#12514; &#228;nd eggs"
       
    73         )
       
    74         self.assertEqual(
       
    75             s.encode("latin-1", "xmlcharrefreplace"),
       
    76             "&#12473;&#12497;&#12514; \xe4nd eggs"
       
    77         )
       
    78 
       
    79     def test_xmlcharnamereplace(self):
       
    80         # This time use a named character entity for unencodable
       
    81         # characters, if one is available.
       
    82 
       
    83         def xmlcharnamereplace(exc):
       
    84             if not isinstance(exc, UnicodeEncodeError):
       
    85                 raise TypeError("don't know how to handle %r" % exc)
       
    86             l = []
       
    87             for c in exc.object[exc.start:exc.end]:
       
    88                 try:
       
    89                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
       
    90                 except KeyError:
       
    91                     l.append(u"&#%d;" % ord(c))
       
    92             return (u"".join(l), exc.end)
       
    93 
       
    94         codecs.register_error(
       
    95             "test.xmlcharnamereplace", xmlcharnamereplace)
       
    96 
       
    97         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
       
    98         sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
       
    99         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
       
   100         sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
       
   101         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
       
   102         sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
       
   103         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
       
   104 
       
   105     def test_uninamereplace(self):
       
   106         # We're using the names from the unicode database this time,
       
   107         # and we're doing "syntax highlighting" here, i.e. we include
       
   108         # the replaced text in ANSI escape sequences. For this it is
       
   109         # useful that the error handler is not called for every single
       
   110         # unencodable character, but for a complete sequence of
       
   111         # unencodable characters, otherwise we would output many
       
   112         # unneccessary escape sequences.
       
   113 
       
   114         def uninamereplace(exc):
       
   115             if not isinstance(exc, UnicodeEncodeError):
       
   116                 raise TypeError("don't know how to handle %r" % exc)
       
   117             l = []
       
   118             for c in exc.object[exc.start:exc.end]:
       
   119                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
       
   120             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
       
   121 
       
   122         codecs.register_error(
       
   123             "test.uninamereplace", uninamereplace)
       
   124 
       
   125         sin = u"\xac\u1234\u20ac\u8000"
       
   126         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   127         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
       
   128 
       
   129         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   130         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
       
   131 
       
   132         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   133         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
       
   134 
       
   135     def test_backslashescape(self):
       
   136         # Does the same as the "unicode-escape" encoding, but with different
       
   137         # base encodings.
       
   138         sin = u"a\xac\u1234\u20ac\u8000"
       
   139         if sys.maxunicode > 0xffff:
       
   140             sin += unichr(sys.maxunicode)
       
   141         sout = "a\\xac\\u1234\\u20ac\\u8000"
       
   142         if sys.maxunicode > 0xffff:
       
   143             sout += "\\U%08x" % sys.maxunicode
       
   144         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
       
   145 
       
   146         sout = "a\xac\\u1234\\u20ac\\u8000"
       
   147         if sys.maxunicode > 0xffff:
       
   148             sout += "\\U%08x" % sys.maxunicode
       
   149         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
       
   150 
       
   151         sout = "a\xac\\u1234\xa4\\u8000"
       
   152         if sys.maxunicode > 0xffff:
       
   153             sout += "\\U%08x" % sys.maxunicode
       
   154         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
       
   155 
       
   156     def test_decoderelaxedutf8(self):
       
   157         # This is the test for a decoding callback handler,
       
   158         # that relaxes the UTF-8 minimal encoding restriction.
       
   159         # A null byte that is encoded as "\xc0\x80" will be
       
   160         # decoded as a null byte. All other illegal sequences
       
   161         # will be handled strictly.
       
   162         def relaxedutf8(exc):
       
   163             if not isinstance(exc, UnicodeDecodeError):
       
   164                 raise TypeError("don't know how to handle %r" % exc)
       
   165             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
       
   166                 return (u"\x00", exc.start+2) # retry after two bytes
       
   167             else:
       
   168                 raise exc
       
   169 
       
   170         codecs.register_error(
       
   171             "test.relaxedutf8", relaxedutf8)
       
   172 
       
   173         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
       
   174         sout = u"a\x00b\x00c\xfc\x00\x00"
       
   175         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
       
   176         sin = "\xc0\x80\xc0\x81"
       
   177         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
       
   178 
       
   179     def test_charmapencode(self):
       
   180         # For charmap encodings the replacement string will be
       
   181         # mapped through the encoding again. This means, that
       
   182         # to be able to use e.g. the "replace" handler, the
       
   183         # charmap has to have a mapping for "?".
       
   184         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
       
   185         sin = u"abc"
       
   186         sout = "AABBCC"
       
   187         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
       
   188 
       
   189         sin = u"abcA"
       
   190         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
       
   191 
       
   192         charmap[ord("?")] = "XYZ"
       
   193         sin = u"abcDEF"
       
   194         sout = "AABBCCXYZXYZXYZ"
       
   195         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
       
   196 
       
   197         charmap[ord("?")] = u"XYZ"
       
   198         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
       
   199 
       
   200         charmap[ord("?")] = u"XYZ"
       
   201         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
       
   202 
       
   203     def test_decodeunicodeinternal(self):
       
   204         self.assertRaises(
       
   205             UnicodeDecodeError,
       
   206             "\x00\x00\x00\x00\x00".decode,
       
   207             "unicode-internal",
       
   208         )
       
   209         if sys.maxunicode > 0xffff:
       
   210             def handler_unicodeinternal(exc):
       
   211                 if not isinstance(exc, UnicodeDecodeError):
       
   212                     raise TypeError("don't know how to handle %r" % exc)
       
   213                 return (u"\x01", 1)
       
   214 
       
   215             self.assertEqual(
       
   216                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
       
   217                 u"\u0000"
       
   218             )
       
   219 
       
   220             self.assertEqual(
       
   221                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
       
   222                 u"\u0000\ufffd"
       
   223             )
       
   224 
       
   225             codecs.register_error("test.hui", handler_unicodeinternal)
       
   226 
       
   227             self.assertEqual(
       
   228                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
       
   229                 u"\u0000\u0001\u0000"
       
   230             )
       
   231 
       
   232     def test_callbacks(self):
       
   233         def handler1(exc):
       
   234             if not isinstance(exc, UnicodeEncodeError) \
       
   235                and not isinstance(exc, UnicodeDecodeError):
       
   236                 raise TypeError("don't know how to handle %r" % exc)
       
   237             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
       
   238             return (u"[%s]" % u"".join(l), exc.end)
       
   239 
       
   240         codecs.register_error("test.handler1", handler1)
       
   241 
       
   242         def handler2(exc):
       
   243             if not isinstance(exc, UnicodeDecodeError):
       
   244                 raise TypeError("don't know how to handle %r" % exc)
       
   245             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
       
   246             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
       
   247 
       
   248         codecs.register_error("test.handler2", handler2)
       
   249 
       
   250         s = "\x00\x81\x7f\x80\xff"
       
   251 
       
   252         self.assertEqual(
       
   253             s.decode("ascii", "test.handler1"),
       
   254             u"\x00[<129>]\x7f[<128>][<255>]"
       
   255         )
       
   256         self.assertEqual(
       
   257             s.decode("ascii", "test.handler2"),
       
   258             u"\x00[<129>][<128>]"
       
   259         )
       
   260 
       
   261         self.assertEqual(
       
   262             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
       
   263             u"\u3042[<92><117><51><120>]xx"
       
   264         )
       
   265 
       
   266         self.assertEqual(
       
   267             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
       
   268             u"\u3042[<92><117><51><120><120>]"
       
   269         )
       
   270 
       
   271         self.assertEqual(
       
   272             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
       
   273             u"z[<98>][<99>]"
       
   274         )
       
   275 
       
   276         self.assertEqual(
       
   277             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
       
   278             u"g[<252><223>]rk"
       
   279         )
       
   280 
       
   281         self.assertEqual(
       
   282             u"g\xfc\xdf".encode("ascii", "test.handler1"),
       
   283             u"g[<252><223>]"
       
   284         )
       
   285 
       
   286     def test_longstrings(self):
       
   287         # test long strings to check for memory overflow problems
       
   288         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
       
   289                    "backslashreplace"]
       
   290         # register the handlers under different names,
       
   291         # to prevent the codec from recognizing the name
       
   292         for err in errors:
       
   293             codecs.register_error("test." + err, codecs.lookup_error(err))
       
   294         l = 1000
       
   295         errors += [ "test." + err for err in errors ]
       
   296         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
       
   297             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
       
   298                         "utf-8", "utf-7", "utf-16", "utf-32"):
       
   299                 for err in errors:
       
   300                     try:
       
   301                         uni.encode(enc, err)
       
   302                     except UnicodeError:
       
   303                         pass
       
   304 
       
   305     def check_exceptionobjectargs(self, exctype, args, msg):
       
   306         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
       
   307         # check with one missing argument
       
   308         self.assertRaises(TypeError, exctype, *args[:-1])
       
   309         # check with one argument too much
       
   310         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
       
   311         # check with one argument of the wrong type
       
   312         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
       
   313         for i in xrange(len(args)):
       
   314             for wrongarg in wrongargs:
       
   315                 if type(wrongarg) is type(args[i]):
       
   316                     continue
       
   317                 # build argument array
       
   318                 callargs = []
       
   319                 for j in xrange(len(args)):
       
   320                     if i==j:
       
   321                         callargs.append(wrongarg)
       
   322                     else:
       
   323                         callargs.append(args[i])
       
   324                 self.assertRaises(TypeError, exctype, *callargs)
       
   325 
       
   326         # check with the correct number and type of arguments
       
   327         exc = exctype(*args)
       
   328         self.assertEquals(str(exc), msg)
       
   329 
       
   330     def test_unicodeencodeerror(self):
       
   331         self.check_exceptionobjectargs(
       
   332             UnicodeEncodeError,
       
   333             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
       
   334             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
       
   335         )
       
   336         self.check_exceptionobjectargs(
       
   337             UnicodeEncodeError,
       
   338             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
       
   339             "'ascii' codec can't encode characters in position 1-3: ouch"
       
   340         )
       
   341         self.check_exceptionobjectargs(
       
   342             UnicodeEncodeError,
       
   343             ["ascii", u"\xfcx", 0, 1, "ouch"],
       
   344             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
       
   345         )
       
   346         self.check_exceptionobjectargs(
       
   347             UnicodeEncodeError,
       
   348             ["ascii", u"\u0100x", 0, 1, "ouch"],
       
   349             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
       
   350         )
       
   351         self.check_exceptionobjectargs(
       
   352             UnicodeEncodeError,
       
   353             ["ascii", u"\uffffx", 0, 1, "ouch"],
       
   354             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
       
   355         )
       
   356         if sys.maxunicode > 0xffff:
       
   357             self.check_exceptionobjectargs(
       
   358                 UnicodeEncodeError,
       
   359                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
       
   360                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
       
   361             )
       
   362 
       
   363     def test_unicodedecodeerror(self):
       
   364         self.check_exceptionobjectargs(
       
   365             UnicodeDecodeError,
       
   366             ["ascii", "g\xfcrk", 1, 2, "ouch"],
       
   367             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
       
   368         )
       
   369         self.check_exceptionobjectargs(
       
   370             UnicodeDecodeError,
       
   371             ["ascii", "g\xfcrk", 1, 3, "ouch"],
       
   372             "'ascii' codec can't decode bytes in position 1-2: ouch"
       
   373         )
       
   374 
       
   375     def test_unicodetranslateerror(self):
       
   376         self.check_exceptionobjectargs(
       
   377             UnicodeTranslateError,
       
   378             [u"g\xfcrk", 1, 2, "ouch"],
       
   379             "can't translate character u'\\xfc' in position 1: ouch"
       
   380         )
       
   381         self.check_exceptionobjectargs(
       
   382             UnicodeTranslateError,
       
   383             [u"g\u0100rk", 1, 2, "ouch"],
       
   384             "can't translate character u'\\u0100' in position 1: ouch"
       
   385         )
       
   386         self.check_exceptionobjectargs(
       
   387             UnicodeTranslateError,
       
   388             [u"g\uffffrk", 1, 2, "ouch"],
       
   389             "can't translate character u'\\uffff' in position 1: ouch"
       
   390         )
       
   391         if sys.maxunicode > 0xffff:
       
   392             self.check_exceptionobjectargs(
       
   393                 UnicodeTranslateError,
       
   394                 [u"g\U00010000rk", 1, 2, "ouch"],
       
   395                 "can't translate character u'\\U00010000' in position 1: ouch"
       
   396             )
       
   397         self.check_exceptionobjectargs(
       
   398             UnicodeTranslateError,
       
   399             [u"g\xfcrk", 1, 3, "ouch"],
       
   400             "can't translate characters in position 1-2: ouch"
       
   401         )
       
   402 
       
   403     def test_badandgoodstrictexceptions(self):
       
   404         # "strict" complains about a non-exception passed in
       
   405         self.assertRaises(
       
   406             TypeError,
       
   407             codecs.strict_errors,
       
   408             42
       
   409         )
       
   410         # "strict" complains about the wrong exception type
       
   411         self.assertRaises(
       
   412             Exception,
       
   413             codecs.strict_errors,
       
   414             Exception("ouch")
       
   415         )
       
   416 
       
   417         # If the correct exception is passed in, "strict" raises it
       
   418         self.assertRaises(
       
   419             UnicodeEncodeError,
       
   420             codecs.strict_errors,
       
   421             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
       
   422         )
       
   423 
       
   424     def test_badandgoodignoreexceptions(self):
       
   425         # "ignore" complains about a non-exception passed in
       
   426         self.assertRaises(
       
   427            TypeError,
       
   428            codecs.ignore_errors,
       
   429            42
       
   430         )
       
   431         # "ignore" complains about the wrong exception type
       
   432         self.assertRaises(
       
   433            TypeError,
       
   434            codecs.ignore_errors,
       
   435            UnicodeError("ouch")
       
   436         )
       
   437         # If the correct exception is passed in, "ignore" returns an empty replacement
       
   438         self.assertEquals(
       
   439             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   440             (u"", 1)
       
   441         )
       
   442         self.assertEquals(
       
   443             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
       
   444             (u"", 1)
       
   445         )
       
   446         self.assertEquals(
       
   447             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
       
   448             (u"", 1)
       
   449         )
       
   450 
       
   451     def test_badandgoodreplaceexceptions(self):
       
   452         # "replace" complains about a non-exception passed in
       
   453         self.assertRaises(
       
   454            TypeError,
       
   455            codecs.replace_errors,
       
   456            42
       
   457         )
       
   458         # "replace" complains about the wrong exception type
       
   459         self.assertRaises(
       
   460            TypeError,
       
   461            codecs.replace_errors,
       
   462            UnicodeError("ouch")
       
   463         )
       
   464         self.assertRaises(
       
   465             TypeError,
       
   466             codecs.replace_errors,
       
   467             BadObjectUnicodeEncodeError()
       
   468         )
       
   469         self.assertRaises(
       
   470             TypeError,
       
   471             codecs.replace_errors,
       
   472             BadObjectUnicodeDecodeError()
       
   473         )
       
   474         # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
       
   475         self.assertEquals(
       
   476             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   477             (u"?", 1)
       
   478         )
       
   479         self.assertEquals(
       
   480             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
       
   481             (u"\ufffd", 1)
       
   482         )
       
   483         self.assertEquals(
       
   484             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
       
   485             (u"\ufffd", 1)
       
   486         )
       
   487 
       
   488     def test_badandgoodxmlcharrefreplaceexceptions(self):
       
   489         # "xmlcharrefreplace" complains about a non-exception passed in
       
   490         self.assertRaises(
       
   491            TypeError,
       
   492            codecs.xmlcharrefreplace_errors,
       
   493            42
       
   494         )
       
   495         # "xmlcharrefreplace" complains about the wrong exception types
       
   496         self.assertRaises(
       
   497            TypeError,
       
   498            codecs.xmlcharrefreplace_errors,
       
   499            UnicodeError("ouch")
       
   500         )
       
   501         # "xmlcharrefreplace" can only be used for encoding
       
   502         self.assertRaises(
       
   503             TypeError,
       
   504             codecs.xmlcharrefreplace_errors,
       
   505             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
       
   506         )
       
   507         self.assertRaises(
       
   508             TypeError,
       
   509             codecs.xmlcharrefreplace_errors,
       
   510             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
       
   511         )
       
   512         # Use the correct exception
       
   513         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
       
   514         s = "".join(unichr(c) for c in cs)
       
   515         self.assertEquals(
       
   516             codecs.xmlcharrefreplace_errors(
       
   517                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
       
   518             ),
       
   519             (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
       
   520         )
       
   521 
       
   522     def test_badandgoodbackslashreplaceexceptions(self):
       
   523         # "backslashreplace" complains about a non-exception passed in
       
   524         self.assertRaises(
       
   525            TypeError,
       
   526            codecs.backslashreplace_errors,
       
   527            42
       
   528         )
       
   529         # "backslashreplace" complains about the wrong exception types
       
   530         self.assertRaises(
       
   531            TypeError,
       
   532            codecs.backslashreplace_errors,
       
   533            UnicodeError("ouch")
       
   534         )
       
   535         # "backslashreplace" can only be used for encoding
       
   536         self.assertRaises(
       
   537             TypeError,
       
   538             codecs.backslashreplace_errors,
       
   539             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
       
   540         )
       
   541         self.assertRaises(
       
   542             TypeError,
       
   543             codecs.backslashreplace_errors,
       
   544             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
       
   545         )
       
   546         # Use the correct exception
       
   547         self.assertEquals(
       
   548             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   549             (u"\\u3042", 1)
       
   550         )
       
   551         self.assertEquals(
       
   552             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
       
   553             (u"\\x00", 1)
       
   554         )
       
   555         self.assertEquals(
       
   556             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
       
   557             (u"\\xff", 1)
       
   558         )
       
   559         self.assertEquals(
       
   560             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
       
   561             (u"\\u0100", 1)
       
   562         )
       
   563         self.assertEquals(
       
   564             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
       
   565             (u"\\uffff", 1)
       
   566         )
       
   567         if sys.maxunicode>0xffff:
       
   568             self.assertEquals(
       
   569                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
       
   570                 (u"\\U00010000", 1)
       
   571             )
       
   572             self.assertEquals(
       
   573                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
       
   574                 (u"\\U0010ffff", 1)
       
   575             )
       
   576 
       
   577     def test_badhandlerresults(self):
       
   578         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
       
   579         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
       
   580 
       
   581         for res in results:
       
   582             codecs.register_error("test.badhandler", lambda: res)
       
   583             for enc in encs:
       
   584                 self.assertRaises(
       
   585                     TypeError,
       
   586                     u"\u3042".encode,
       
   587                     enc,
       
   588                     "test.badhandler"
       
   589                 )
       
   590             for (enc, bytes) in (
       
   591                 ("ascii", "\xff"),
       
   592                 ("utf-8", "\xff"),
       
   593                 ("utf-7", "+x-"),
       
   594                 ("unicode-internal", "\x00"),
       
   595             ):
       
   596                 self.assertRaises(
       
   597                     TypeError,
       
   598                     bytes.decode,
       
   599                     enc,
       
   600                     "test.badhandler"
       
   601                 )
       
   602 
       
   603     def test_lookup(self):
       
   604         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
       
   605         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
       
   606         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
       
   607         self.assertEquals(
       
   608             codecs.xmlcharrefreplace_errors,
       
   609             codecs.lookup_error("xmlcharrefreplace")
       
   610         )
       
   611         self.assertEquals(
       
   612             codecs.backslashreplace_errors,
       
   613             codecs.lookup_error("backslashreplace")
       
   614         )
       
   615 
       
   616     def test_unencodablereplacement(self):
       
   617         def unencrepl(exc):
       
   618             if isinstance(exc, UnicodeEncodeError):
       
   619                 return (u"\u4242", exc.end)
       
   620             else:
       
   621                 raise TypeError("don't know how to handle %r" % exc)
       
   622         codecs.register_error("test.unencreplhandler", unencrepl)
       
   623         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
       
   624             self.assertRaises(
       
   625                 UnicodeEncodeError,
       
   626                 u"\u4242".encode,
       
   627                 enc,
       
   628                 "test.unencreplhandler"
       
   629             )
       
   630 
       
   631     def test_badregistercall(self):
       
   632         # enhance coverage of:
       
   633         # Modules/_codecsmodule.c::register_error()
       
   634         # Python/codecs.c::PyCodec_RegisterError()
       
   635         self.assertRaises(TypeError, codecs.register_error, 42)
       
   636         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
       
   637 
       
   638     def test_badlookupcall(self):
       
   639         # enhance coverage of:
       
   640         # Modules/_codecsmodule.c::lookup_error()
       
   641         self.assertRaises(TypeError, codecs.lookup_error)
       
   642 
       
   643     def test_unknownhandler(self):
       
   644         # enhance coverage of:
       
   645         # Modules/_codecsmodule.c::lookup_error()
       
   646         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
       
   647 
       
   648     def test_xmlcharrefvalues(self):
       
   649         # enhance coverage of:
       
   650         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
       
   651         # and inline implementations
       
   652         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
       
   653         if sys.maxunicode>=100000:
       
   654             v += (100000, 500000, 1000000)
       
   655         s = u"".join([unichr(x) for x in v])
       
   656         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
       
   657         for enc in ("ascii", "iso-8859-15"):
       
   658             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
       
   659                 s.encode(enc, err)
       
   660 
       
   661     def test_decodehelper(self):
       
   662         # enhance coverage of:
       
   663         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
       
   664         # and callers
       
   665         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
       
   666 
       
   667         def baddecodereturn1(exc):
       
   668             return 42
       
   669         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
       
   670         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
       
   671         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
       
   672         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
       
   673         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
       
   674         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
       
   675         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
       
   676 
       
   677         def baddecodereturn2(exc):
       
   678             return (u"?", None)
       
   679         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
       
   680         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
       
   681 
       
   682         handler = PosReturn()
       
   683         codecs.register_error("test.posreturn", handler.handle)
       
   684 
       
   685         # Valid negative position
       
   686         handler.pos = -1
       
   687         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
       
   688 
       
   689         # Valid negative position
       
   690         handler.pos = -2
       
   691         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
       
   692 
       
   693         # Negative position out of bounds
       
   694         handler.pos = -3
       
   695         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
       
   696 
       
   697         # Valid positive position
       
   698         handler.pos = 1
       
   699         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
       
   700 
       
   701         # Largest valid positive position (one beyond end of input)
       
   702         handler.pos = 2
       
   703         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
       
   704 
       
   705         # Invalid positive position
       
   706         handler.pos = 3
       
   707         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
       
   708 
       
   709         # Restart at the "0"
       
   710         handler.pos = 6
       
   711         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
       
   712 
       
   713         class D(dict):
       
   714             def __getitem__(self, key):
       
   715                 raise ValueError
       
   716         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
       
   717         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
       
   718         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
       
   719 
       
   720     def test_encodehelper(self):
       
   721         # enhance coverage of:
       
   722         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
       
   723         # and callers
       
   724         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
       
   725 
       
   726         def badencodereturn1(exc):
       
   727             return 42
       
   728         codecs.register_error("test.badencodereturn1", badencodereturn1)
       
   729         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
       
   730 
       
   731         def badencodereturn2(exc):
       
   732             return (u"?", None)
       
   733         codecs.register_error("test.badencodereturn2", badencodereturn2)
       
   734         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
       
   735 
       
   736         handler = PosReturn()
       
   737         codecs.register_error("test.posreturn", handler.handle)
       
   738 
       
   739         # Valid negative position
       
   740         handler.pos = -1
       
   741         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
       
   742 
       
   743         # Valid negative position
       
   744         handler.pos = -2
       
   745         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
       
   746 
       
   747         # Negative position out of bounds
       
   748         handler.pos = -3
       
   749         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
       
   750 
       
   751         # Valid positive position
       
   752         handler.pos = 1
       
   753         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
       
   754 
       
   755         # Largest valid positive position (one beyond end of input
       
   756         handler.pos = 2
       
   757         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
       
   758 
       
   759         # Invalid positive position
       
   760         handler.pos = 3
       
   761         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
       
   762 
       
   763         handler.pos = 0
       
   764 
       
   765         class D(dict):
       
   766             def __getitem__(self, key):
       
   767                 raise ValueError
       
   768         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
       
   769             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
       
   770             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
       
   771             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
       
   772 
       
   773     def test_translatehelper(self):
       
   774         # enhance coverage of:
       
   775         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
       
   776         # and callers
       
   777         # (Unfortunately the errors argument is not directly accessible
       
   778         # from Python, so we can't test that much)
       
   779         class D(dict):
       
   780             def __getitem__(self, key):
       
   781                 raise ValueError
       
   782         self.assertRaises(ValueError, u"\xff".translate, D())
       
   783         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
       
   784         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
       
   785 
       
   786     def test_bug828737(self):
       
   787         charmap = {
       
   788             ord("&"): u"&amp;",
       
   789             ord("<"): u"&lt;",
       
   790             ord(">"): u"&gt;",
       
   791             ord('"'): u"&quot;",
       
   792         }
       
   793 
       
   794         for n in (1, 10, 100, 1000):
       
   795             text = u'abc<def>ghi'*n
       
   796             text.translate(charmap)
       
   797 
       
   798 def test_main():
       
   799     test.test_support.run_unittest(CodecCallbackTest)
       
   800 
       
   801 if __name__ == "__main__":
       
   802     test_main()