symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_unicode.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # -*- coding: iso-8859-1 -*-
       
     2 """ Test script for the Unicode implementation.
       
     3 
       
     4 Written by Marc-Andre Lemburg (mal@lemburg.com).
       
     5 
       
     6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
       
     7 
       
     8 """#"
       
     9 import sys, struct, codecs
       
    10 from test import test_support, string_tests
       
    11 
       
    12 # Error handling (bad decoder return)
       
    13 def search_function(encoding):
       
    14     def decode1(input, errors="strict"):
       
    15         return 42 # not a tuple
       
    16     def encode1(input, errors="strict"):
       
    17         return 42 # not a tuple
       
    18     def encode2(input, errors="strict"):
       
    19         return (42, 42) # no unicode
       
    20     def decode2(input, errors="strict"):
       
    21         return (42, 42) # no unicode
       
    22     if encoding=="test.unicode1":
       
    23         return (encode1, decode1, None, None)
       
    24     elif encoding=="test.unicode2":
       
    25         return (encode2, decode2, None, None)
       
    26     else:
       
    27         return None
       
    28 codecs.register(search_function)
       
    29 
       
    30 class UnicodeTest(
       
    31     string_tests.CommonTest,
       
    32     string_tests.MixinStrUnicodeUserStringTest,
       
    33     string_tests.MixinStrUnicodeTest,
       
    34     ):
       
    35     type2test = unicode
       
    36 
       
    37     def checkequalnofix(self, result, object, methodname, *args):
       
    38         method = getattr(object, methodname)
       
    39         realresult = method(*args)
       
    40         self.assertEqual(realresult, result)
       
    41         self.assert_(type(realresult) is type(result))
       
    42 
       
    43         # if the original is returned make sure that
       
    44         # this doesn't happen with subclasses
       
    45         if realresult is object:
       
    46             class usub(unicode):
       
    47                 def __repr__(self):
       
    48                     return 'usub(%r)' % unicode.__repr__(self)
       
    49             object = usub(object)
       
    50             method = getattr(object, methodname)
       
    51             realresult = method(*args)
       
    52             self.assertEqual(realresult, result)
       
    53             self.assert_(object is not realresult)
       
    54 
       
    55     def test_literals(self):
       
    56         self.assertEqual(u'\xff', u'\u00ff')
       
    57         self.assertEqual(u'\uffff', u'\U0000ffff')
       
    58         self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'')
       
    59         self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'')
       
    60         self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000)
       
    61 
       
    62     def test_repr(self):
       
    63         if not sys.platform.startswith('java'):
       
    64             # Test basic sanity of repr()
       
    65             self.assertEqual(repr(u'abc'), "u'abc'")
       
    66             self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
       
    67             self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
       
    68             self.assertEqual(repr(u'\\c'), "u'\\\\c'")
       
    69             self.assertEqual(repr(u'\\'), "u'\\\\'")
       
    70             self.assertEqual(repr(u'\n'), "u'\\n'")
       
    71             self.assertEqual(repr(u'\r'), "u'\\r'")
       
    72             self.assertEqual(repr(u'\t'), "u'\\t'")
       
    73             self.assertEqual(repr(u'\b'), "u'\\x08'")
       
    74             self.assertEqual(repr(u"'\""), """u'\\'"'""")
       
    75             self.assertEqual(repr(u"'\""), """u'\\'"'""")
       
    76             self.assertEqual(repr(u"'"), '''u"'"''')
       
    77             self.assertEqual(repr(u'"'), """u'"'""")
       
    78             latin1repr = (
       
    79                 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
       
    80                 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
       
    81                 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
       
    82                 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
       
    83                 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
       
    84                 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
       
    85                 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
       
    86                 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
       
    87                 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
       
    88                 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
       
    89                 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
       
    90                 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
       
    91                 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
       
    92                 "\\xfe\\xff'")
       
    93             testrepr = repr(u''.join(map(unichr, xrange(256))))
       
    94             self.assertEqual(testrepr, latin1repr)
       
    95             # Test repr works on wide unicode escapes without overflow.
       
    96             self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
       
    97                              repr(u"\U00010000" * 39 + u"\uffff" * 4096))
       
    98 
       
    99 
       
   100     def test_count(self):
       
   101         string_tests.CommonTest.test_count(self)
       
   102         # check mixed argument types
       
   103         self.checkequalnofix(3,  'aaa', 'count', u'a')
       
   104         self.checkequalnofix(0,  'aaa', 'count', u'b')
       
   105         self.checkequalnofix(3, u'aaa', 'count',  'a')
       
   106         self.checkequalnofix(0, u'aaa', 'count',  'b')
       
   107         self.checkequalnofix(0, u'aaa', 'count',  'b')
       
   108         self.checkequalnofix(1, u'aaa', 'count',  'a', -1)
       
   109         self.checkequalnofix(3, u'aaa', 'count',  'a', -10)
       
   110         self.checkequalnofix(2, u'aaa', 'count',  'a', 0, -1)
       
   111         self.checkequalnofix(0, u'aaa', 'count',  'a', 0, -10)
       
   112 
       
   113     def test_find(self):
       
   114         self.checkequalnofix(0,  u'abcdefghiabc', 'find', u'abc')
       
   115         self.checkequalnofix(9,  u'abcdefghiabc', 'find', u'abc', 1)
       
   116         self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
       
   117 
       
   118         self.assertRaises(TypeError, u'hello'.find)
       
   119         self.assertRaises(TypeError, u'hello'.find, 42)
       
   120 
       
   121     def test_rfind(self):
       
   122         string_tests.CommonTest.test_rfind(self)
       
   123         # check mixed argument types
       
   124         self.checkequalnofix(9,   'abcdefghiabc', 'rfind', u'abc')
       
   125         self.checkequalnofix(12,  'abcdefghiabc', 'rfind', u'')
       
   126         self.checkequalnofix(12, u'abcdefghiabc', 'rfind',  '')
       
   127 
       
   128     def test_index(self):
       
   129         string_tests.CommonTest.test_index(self)
       
   130         # check mixed argument types
       
   131         for (t1, t2) in ((str, unicode), (unicode, str)):
       
   132             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2(''))
       
   133             self.checkequalnofix(3, t1('abcdefghiabc'), 'index',  t2('def'))
       
   134             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2('abc'))
       
   135             self.checkequalnofix(9, t1('abcdefghiabc'), 'index',  t2('abc'), 1)
       
   136             self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
       
   137             self.assertRaises(ValueError, t1('abcdefghiab').index,  t2('abc'), 1)
       
   138             self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), 8)
       
   139             self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), -1)
       
   140 
       
   141     def test_rindex(self):
       
   142         string_tests.CommonTest.test_rindex(self)
       
   143         # check mixed argument types
       
   144         for (t1, t2) in ((str, unicode), (unicode, str)):
       
   145             self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex',  t2(''))
       
   146             self.checkequalnofix(3,  t1('abcdefghiabc'), 'rindex',  t2('def'))
       
   147             self.checkequalnofix(9,  t1('abcdefghiabc'), 'rindex',  t2('abc'))
       
   148             self.checkequalnofix(0,  t1('abcdefghiabc'), 'rindex',  t2('abc'), 0, -1)
       
   149 
       
   150             self.assertRaises(ValueError, t1('abcdefghiabc').rindex,  t2('hib'))
       
   151             self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('def'), 1)
       
   152             self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('abc'), 0, -1)
       
   153             self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, 8)
       
   154             self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, -1)
       
   155 
       
   156     def test_translate(self):
       
   157         self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
       
   158         self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
       
   159         self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
       
   160         self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
       
   161         self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
       
   162         self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
       
   163 
       
   164         self.assertRaises(TypeError, u'hello'.translate)
       
   165         self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
       
   166 
       
   167     def test_split(self):
       
   168         string_tests.CommonTest.test_split(self)
       
   169 
       
   170         # Mixed arguments
       
   171         self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
       
   172         self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
       
   173         self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
       
   174 
       
   175     def test_join(self):
       
   176         string_tests.MixinStrUnicodeUserStringTest.test_join(self)
       
   177 
       
   178         # mixed arguments
       
   179         self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
       
   180         self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
       
   181         self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
       
   182         self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
       
   183         self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
       
   184         self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
       
   185         self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
       
   186 
       
   187     def test_strip(self):
       
   188         string_tests.CommonTest.test_strip(self)
       
   189         self.assertRaises(UnicodeError, u"hello".strip, "\xff")
       
   190 
       
   191     def test_replace(self):
       
   192         string_tests.CommonTest.test_replace(self)
       
   193 
       
   194         # method call forwarded from str implementation because of unicode argument
       
   195         self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
       
   196         self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
       
   197 
       
   198     def test_comparison(self):
       
   199         # Comparisons:
       
   200         self.assertEqual(u'abc', 'abc')
       
   201         self.assertEqual('abc', u'abc')
       
   202         self.assertEqual(u'abc', u'abc')
       
   203         self.assert_(u'abcd' > 'abc')
       
   204         self.assert_('abcd' > u'abc')
       
   205         self.assert_(u'abcd' > u'abc')
       
   206         self.assert_(u'abc' < 'abcd')
       
   207         self.assert_('abc' < u'abcd')
       
   208         self.assert_(u'abc' < u'abcd')
       
   209 
       
   210         if 0:
       
   211             # Move these tests to a Unicode collation module test...
       
   212             # Testing UTF-16 code point order comparisons...
       
   213 
       
   214             # No surrogates, no fixup required.
       
   215             self.assert_(u'\u0061' < u'\u20ac')
       
   216             # Non surrogate below surrogate value, no fixup required
       
   217             self.assert_(u'\u0061' < u'\ud800\udc02')
       
   218 
       
   219             # Non surrogate above surrogate value, fixup required
       
   220             def test_lecmp(s, s2):
       
   221                 self.assert_(s < s2)
       
   222 
       
   223             def test_fixup(s):
       
   224                 s2 = u'\ud800\udc01'
       
   225                 test_lecmp(s, s2)
       
   226                 s2 = u'\ud900\udc01'
       
   227                 test_lecmp(s, s2)
       
   228                 s2 = u'\uda00\udc01'
       
   229                 test_lecmp(s, s2)
       
   230                 s2 = u'\udb00\udc01'
       
   231                 test_lecmp(s, s2)
       
   232                 s2 = u'\ud800\udd01'
       
   233                 test_lecmp(s, s2)
       
   234                 s2 = u'\ud900\udd01'
       
   235                 test_lecmp(s, s2)
       
   236                 s2 = u'\uda00\udd01'
       
   237                 test_lecmp(s, s2)
       
   238                 s2 = u'\udb00\udd01'
       
   239                 test_lecmp(s, s2)
       
   240                 s2 = u'\ud800\ude01'
       
   241                 test_lecmp(s, s2)
       
   242                 s2 = u'\ud900\ude01'
       
   243                 test_lecmp(s, s2)
       
   244                 s2 = u'\uda00\ude01'
       
   245                 test_lecmp(s, s2)
       
   246                 s2 = u'\udb00\ude01'
       
   247                 test_lecmp(s, s2)
       
   248                 s2 = u'\ud800\udfff'
       
   249                 test_lecmp(s, s2)
       
   250                 s2 = u'\ud900\udfff'
       
   251                 test_lecmp(s, s2)
       
   252                 s2 = u'\uda00\udfff'
       
   253                 test_lecmp(s, s2)
       
   254                 s2 = u'\udb00\udfff'
       
   255                 test_lecmp(s, s2)
       
   256 
       
   257                 test_fixup(u'\ue000')
       
   258                 test_fixup(u'\uff61')
       
   259 
       
   260         # Surrogates on both sides, no fixup required
       
   261         self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
       
   262 
       
   263     def test_islower(self):
       
   264         string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
       
   265         self.checkequalnofix(False, u'\u1FFc', 'islower')
       
   266 
       
   267     def test_isupper(self):
       
   268         string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
       
   269         if not sys.platform.startswith('java'):
       
   270             self.checkequalnofix(False, u'\u1FFc', 'isupper')
       
   271 
       
   272     def test_istitle(self):
       
   273         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
       
   274         self.checkequalnofix(True, u'\u1FFc', 'istitle')
       
   275         self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
       
   276 
       
   277     def test_isspace(self):
       
   278         string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
       
   279         self.checkequalnofix(True, u'\u2000', 'isspace')
       
   280         self.checkequalnofix(True, u'\u200a', 'isspace')
       
   281         self.checkequalnofix(False, u'\u2014', 'isspace')
       
   282 
       
   283     def test_isalpha(self):
       
   284         string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
       
   285         self.checkequalnofix(True, u'\u1FFc', 'isalpha')
       
   286 
       
   287     def test_isdecimal(self):
       
   288         self.checkequalnofix(False, u'', 'isdecimal')
       
   289         self.checkequalnofix(False, u'a', 'isdecimal')
       
   290         self.checkequalnofix(True, u'0', 'isdecimal')
       
   291         self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
       
   292         self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
       
   293         self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
       
   294         self.checkequalnofix(True, u'0123456789', 'isdecimal')
       
   295         self.checkequalnofix(False, u'0123456789a', 'isdecimal')
       
   296 
       
   297         self.checkraises(TypeError, 'abc', 'isdecimal', 42)
       
   298 
       
   299     def test_isdigit(self):
       
   300         string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
       
   301         self.checkequalnofix(True, u'\u2460', 'isdigit')
       
   302         self.checkequalnofix(False, u'\xbc', 'isdigit')
       
   303         self.checkequalnofix(True, u'\u0660', 'isdigit')
       
   304 
       
   305     def test_isnumeric(self):
       
   306         self.checkequalnofix(False, u'', 'isnumeric')
       
   307         self.checkequalnofix(False, u'a', 'isnumeric')
       
   308         self.checkequalnofix(True, u'0', 'isnumeric')
       
   309         self.checkequalnofix(True, u'\u2460', 'isnumeric')
       
   310         self.checkequalnofix(True, u'\xbc', 'isnumeric')
       
   311         self.checkequalnofix(True, u'\u0660', 'isnumeric')
       
   312         self.checkequalnofix(True, u'0123456789', 'isnumeric')
       
   313         self.checkequalnofix(False, u'0123456789a', 'isnumeric')
       
   314 
       
   315         self.assertRaises(TypeError, u"abc".isnumeric, 42)
       
   316 
       
   317     def test_contains(self):
       
   318         # Testing Unicode contains method
       
   319         self.assert_('a' in u'abdb')
       
   320         self.assert_('a' in u'bdab')
       
   321         self.assert_('a' in u'bdaba')
       
   322         self.assert_('a' in u'bdba')
       
   323         self.assert_('a' in u'bdba')
       
   324         self.assert_(u'a' in u'bdba')
       
   325         self.assert_(u'a' not in u'bdb')
       
   326         self.assert_(u'a' not in 'bdb')
       
   327         self.assert_(u'a' in 'bdba')
       
   328         self.assert_(u'a' in ('a',1,None))
       
   329         self.assert_(u'a' in (1,None,'a'))
       
   330         self.assert_(u'a' in (1,None,u'a'))
       
   331         self.assert_('a' in ('a',1,None))
       
   332         self.assert_('a' in (1,None,'a'))
       
   333         self.assert_('a' in (1,None,u'a'))
       
   334         self.assert_('a' not in ('x',1,u'y'))
       
   335         self.assert_('a' not in ('x',1,None))
       
   336         self.assert_(u'abcd' not in u'abcxxxx')
       
   337         self.assert_(u'ab' in u'abcd')
       
   338         self.assert_('ab' in u'abc')
       
   339         self.assert_(u'ab' in 'abc')
       
   340         self.assert_(u'ab' in (1,None,u'ab'))
       
   341         self.assert_(u'' in u'abc')
       
   342         self.assert_('' in u'abc')
       
   343 
       
   344         # If the following fails either
       
   345         # the contains operator does not propagate UnicodeErrors or
       
   346         # someone has changed the default encoding
       
   347         self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
       
   348 
       
   349         self.assert_(u'' in '')
       
   350         self.assert_('' in u'')
       
   351         self.assert_(u'' in u'')
       
   352         self.assert_(u'' in 'abc')
       
   353         self.assert_('' in u'abc')
       
   354         self.assert_(u'' in u'abc')
       
   355         self.assert_(u'\0' not in 'abc')
       
   356         self.assert_('\0' not in u'abc')
       
   357         self.assert_(u'\0' not in u'abc')
       
   358         self.assert_(u'\0' in '\0abc')
       
   359         self.assert_('\0' in u'\0abc')
       
   360         self.assert_(u'\0' in u'\0abc')
       
   361         self.assert_(u'\0' in 'abc\0')
       
   362         self.assert_('\0' in u'abc\0')
       
   363         self.assert_(u'\0' in u'abc\0')
       
   364         self.assert_(u'a' in '\0abc')
       
   365         self.assert_('a' in u'\0abc')
       
   366         self.assert_(u'a' in u'\0abc')
       
   367         self.assert_(u'asdf' in 'asdf')
       
   368         self.assert_('asdf' in u'asdf')
       
   369         self.assert_(u'asdf' in u'asdf')
       
   370         self.assert_(u'asdf' not in 'asd')
       
   371         self.assert_('asdf' not in u'asd')
       
   372         self.assert_(u'asdf' not in u'asd')
       
   373         self.assert_(u'asdf' not in '')
       
   374         self.assert_('asdf' not in u'')
       
   375         self.assert_(u'asdf' not in u'')
       
   376 
       
   377         self.assertRaises(TypeError, u"abc".__contains__)
       
   378 
       
   379     def test_formatting(self):
       
   380         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
       
   381         # Testing Unicode formatting strings...
       
   382         self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
       
   383         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000,  3.00')
       
   384         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000,  3.00')
       
   385         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
       
   386         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
       
   387         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
       
   388         if not sys.platform.startswith('java'):
       
   389             self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
       
   390         self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
       
   391         self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
       
   392 
       
   393         self.assertEqual(u'%c' % 0x1234, u'\u1234')
       
   394         self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
       
   395 
       
   396         # formatting jobs delegated from the string implementation:
       
   397         self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
       
   398         self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
       
   399         self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
       
   400         self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
       
   401         self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123},  u'...abc...')
       
   402         self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
       
   403         self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
       
   404         self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
       
   405         self.assertEqual('...%s...' % u"abc", u'...abc...')
       
   406         self.assertEqual('%*s' % (5,u'abc',), u'  abc')
       
   407         self.assertEqual('%*s' % (-5,u'abc',), u'abc  ')
       
   408         self.assertEqual('%*.*s' % (5,2,u'abc',), u'   ab')
       
   409         self.assertEqual('%*.*s' % (5,3,u'abc',), u'  abc')
       
   410         self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10   abc')
       
   411         self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103   abc')
       
   412         self.assertEqual('%c' % u'a', u'a')
       
   413         class Wrapper:
       
   414             def __str__(self):
       
   415                 return u'\u1234'
       
   416         self.assertEqual('%s' % Wrapper(), u'\u1234')
       
   417 
       
   418     @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
       
   419     def test_format_float(self):
       
   420         # should not format with a comma, but always with C locale
       
   421         self.assertEqual(u'1.0', u'%.1f' % 1.0)
       
   422 
       
   423     def test_constructor(self):
       
   424         # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
       
   425 
       
   426         self.assertEqual(
       
   427             unicode(u'unicode remains unicode'),
       
   428             u'unicode remains unicode'
       
   429         )
       
   430 
       
   431         class UnicodeSubclass(unicode):
       
   432             pass
       
   433 
       
   434         self.assertEqual(
       
   435             unicode(UnicodeSubclass('unicode subclass becomes unicode')),
       
   436             u'unicode subclass becomes unicode'
       
   437         )
       
   438 
       
   439         self.assertEqual(
       
   440             unicode('strings are converted to unicode'),
       
   441             u'strings are converted to unicode'
       
   442         )
       
   443 
       
   444         class UnicodeCompat:
       
   445             def __init__(self, x):
       
   446                 self.x = x
       
   447             def __unicode__(self):
       
   448                 return self.x
       
   449 
       
   450         self.assertEqual(
       
   451             unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
       
   452             u'__unicode__ compatible objects are recognized')
       
   453 
       
   454         class StringCompat:
       
   455             def __init__(self, x):
       
   456                 self.x = x
       
   457             def __str__(self):
       
   458                 return self.x
       
   459 
       
   460         self.assertEqual(
       
   461             unicode(StringCompat('__str__ compatible objects are recognized')),
       
   462             u'__str__ compatible objects are recognized'
       
   463         )
       
   464 
       
   465         # unicode(obj) is compatible to str():
       
   466 
       
   467         o = StringCompat('unicode(obj) is compatible to str()')
       
   468         self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
       
   469         self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
       
   470 
       
   471         # %-formatting and .__unicode__()
       
   472         self.assertEqual(u'%s' %
       
   473                          UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
       
   474                          u"u'%s' % obj uses obj.__unicode__()")
       
   475         self.assertEqual(u'%s' %
       
   476                          UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
       
   477                          u"u'%s' % obj falls back to obj.__str__()")
       
   478 
       
   479         for obj in (123, 123.45, 123L):
       
   480             self.assertEqual(unicode(obj), unicode(str(obj)))
       
   481 
       
   482         # unicode(obj, encoding, error) tests (this maps to
       
   483         # PyUnicode_FromEncodedObject() at C level)
       
   484 
       
   485         if not sys.platform.startswith('java'):
       
   486             self.assertRaises(
       
   487                 TypeError,
       
   488                 unicode,
       
   489                 u'decoding unicode is not supported',
       
   490                 'utf-8',
       
   491                 'strict'
       
   492             )
       
   493 
       
   494         self.assertEqual(
       
   495             unicode('strings are decoded to unicode', 'utf-8', 'strict'),
       
   496             u'strings are decoded to unicode'
       
   497         )
       
   498 
       
   499         if not sys.platform.startswith('java'):
       
   500             self.assertEqual(
       
   501                 unicode(
       
   502                     buffer('character buffers are decoded to unicode'),
       
   503                     'utf-8',
       
   504                     'strict'
       
   505                 ),
       
   506                 u'character buffers are decoded to unicode'
       
   507             )
       
   508 
       
   509         self.assertRaises(TypeError, unicode, 42, 42, 42)
       
   510 
       
   511     def test_codecs_utf7(self):
       
   512         utfTests = [
       
   513             (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
       
   514             (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
       
   515             (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
       
   516             (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
       
   517             (u'+', '+-'),
       
   518             (u'+-', '+--'),
       
   519             (u'+?', '+-?'),
       
   520             (u'\?', '+AFw?'),
       
   521             (u'+?', '+-?'),
       
   522             (ur'\\?', '+AFwAXA?'),
       
   523             (ur'\\\?', '+AFwAXABc?'),
       
   524             (ur'++--', '+-+---')
       
   525         ]
       
   526 
       
   527         for (x, y) in utfTests:
       
   528             self.assertEqual(x.encode('utf-7'), y)
       
   529 
       
   530         # surrogates not supported
       
   531         self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
       
   532 
       
   533         self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
       
   534 
       
   535         # Issue #2242: crash on some Windows/MSVC versions
       
   536         self.assertRaises(UnicodeDecodeError, '+\xc1'.decode, 'utf-7')
       
   537 
       
   538     def test_codecs_utf8(self):
       
   539         self.assertEqual(u''.encode('utf-8'), '')
       
   540         self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
       
   541         self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
       
   542         self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
       
   543         self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
       
   544         self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
       
   545         self.assertEqual(
       
   546             (u'\ud800\udc02'*1000).encode('utf-8'),
       
   547             '\xf0\x90\x80\x82'*1000
       
   548         )
       
   549         self.assertEqual(
       
   550             u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
       
   551             u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
       
   552             u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
       
   553             u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
       
   554             u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
       
   555             u' Nunstuck git und'.encode('utf-8'),
       
   556             '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
       
   557             '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
       
   558             '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
       
   559             '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
       
   560             '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
       
   561             '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
       
   562             '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
       
   563             '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
       
   564             '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
       
   565             '\xe3\x80\x8cWenn ist das Nunstuck git und'
       
   566         )
       
   567 
       
   568         # UTF-8 specific decoding tests
       
   569         self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
       
   570         self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
       
   571         self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
       
   572 
       
   573         # Other possible utf-8 test cases:
       
   574         # * strict decoding testing for all of the
       
   575         #   UTF8_ERROR cases in PyUnicode_DecodeUTF8
       
   576 
       
   577     def test_codecs_idna(self):
       
   578         # Test whether trailing dot is preserved
       
   579         self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
       
   580 
       
   581     def test_codecs_errors(self):
       
   582         # Error handling (encoding)
       
   583         self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
       
   584         self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
       
   585         self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
       
   586         self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
       
   587 
       
   588         # Error handling (decoding)
       
   589         self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
       
   590         self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
       
   591         self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
       
   592         self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
       
   593 
       
   594         # Error handling (unknown character names)
       
   595         self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
       
   596 
       
   597         # Error handling (truncated escape sequence)
       
   598         self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
       
   599 
       
   600         self.assertRaises(TypeError, "hello".decode, "test.unicode1")
       
   601         self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
       
   602         self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
       
   603         self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
       
   604         # executes PyUnicode_Encode()
       
   605         import imp
       
   606         self.assertRaises(
       
   607             ImportError,
       
   608             imp.find_module,
       
   609             "non-existing module",
       
   610             [u"non-existing dir"]
       
   611         )
       
   612 
       
   613         # Error handling (wrong arguments)
       
   614         self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
       
   615 
       
   616         # Error handling (PyUnicode_EncodeDecimal())
       
   617         self.assertRaises(UnicodeError, int, u"\u0200")
       
   618 
       
   619     def test_codecs(self):
       
   620         # Encoding
       
   621         self.assertEqual(u'hello'.encode('ascii'), 'hello')
       
   622         self.assertEqual(u'hello'.encode('utf-7'), 'hello')
       
   623         self.assertEqual(u'hello'.encode('utf-8'), 'hello')
       
   624         self.assertEqual(u'hello'.encode('utf8'), 'hello')
       
   625         self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
       
   626         self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
       
   627         self.assertEqual(u'hello'.encode('latin-1'), 'hello')
       
   628 
       
   629         # Roundtrip safety for BMP (just the first 1024 chars)
       
   630         for c in xrange(1024):
       
   631             u = unichr(c)
       
   632             for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
       
   633                              'utf-16-be', 'raw_unicode_escape',
       
   634                              'unicode_escape', 'unicode_internal'):
       
   635                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
       
   636 
       
   637         # Roundtrip safety for BMP (just the first 256 chars)
       
   638         for c in xrange(256):
       
   639             u = unichr(c)
       
   640             for encoding in ('latin-1',):
       
   641                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
       
   642 
       
   643         # Roundtrip safety for BMP (just the first 128 chars)
       
   644         for c in xrange(128):
       
   645             u = unichr(c)
       
   646             for encoding in ('ascii',):
       
   647                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
       
   648 
       
   649         # Roundtrip safety for non-BMP (just a few chars)
       
   650         u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
       
   651         for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
       
   652                          #'raw_unicode_escape',
       
   653                          'unicode_escape', 'unicode_internal'):
       
   654             self.assertEqual(unicode(u.encode(encoding),encoding), u)
       
   655 
       
   656         # UTF-8 must be roundtrip safe for all UCS-2 code points
       
   657         # This excludes surrogates: in the full range, there would be
       
   658         # a surrogate pair (\udbff\udc00), which gets converted back
       
   659         # to a non-BMP character (\U0010fc00)
       
   660         u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
       
   661         for encoding in ('utf-8',):
       
   662             self.assertEqual(unicode(u.encode(encoding),encoding), u)
       
   663 
       
   664     def test_codecs_charmap(self):
       
   665         # 0-127
       
   666         s = ''.join(map(chr, xrange(128)))
       
   667         for encoding in (
       
   668             'cp037', 'cp1026',
       
   669             'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
       
   670             'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
       
   671             'cp863', 'cp865', 'cp866',
       
   672             'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
       
   673             'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
       
   674             'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
       
   675             'mac_cyrillic', 'mac_latin2',
       
   676 
       
   677             'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
       
   678             'cp1256', 'cp1257', 'cp1258',
       
   679             'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
       
   680 
       
   681             'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
       
   682             'cp1006', 'iso8859_8',
       
   683 
       
   684             ### These have undefined mappings:
       
   685             #'cp424',
       
   686 
       
   687             ### These fail the round-trip:
       
   688             #'cp875'
       
   689 
       
   690             ):
       
   691             self.assertEqual(unicode(s, encoding).encode(encoding), s)
       
   692 
       
   693         # 128-255
       
   694         s = ''.join(map(chr, xrange(128, 256)))
       
   695         for encoding in (
       
   696             'cp037', 'cp1026',
       
   697             'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
       
   698             'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
       
   699             'cp863', 'cp865', 'cp866',
       
   700             'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
       
   701             'iso8859_2', 'iso8859_4', 'iso8859_5',
       
   702             'iso8859_9', 'koi8_r', 'latin_1',
       
   703             'mac_cyrillic', 'mac_latin2',
       
   704 
       
   705             ### These have undefined mappings:
       
   706             #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
       
   707             #'cp1256', 'cp1257', 'cp1258',
       
   708             #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
       
   709             #'iso8859_3', 'iso8859_6', 'iso8859_7',
       
   710             #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
       
   711 
       
   712             ### These fail the round-trip:
       
   713             #'cp1006', 'cp875', 'iso8859_8',
       
   714 
       
   715             ):
       
   716             self.assertEqual(unicode(s, encoding).encode(encoding), s)
       
   717 
       
   718     def test_concatenation(self):
       
   719         self.assertEqual((u"abc" u"def"), u"abcdef")
       
   720         self.assertEqual(("abc" u"def"), u"abcdef")
       
   721         self.assertEqual((u"abc" "def"), u"abcdef")
       
   722         self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
       
   723         self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
       
   724 
       
   725     def test_printing(self):
       
   726         class BitBucket:
       
   727             def write(self, text):
       
   728                 pass
       
   729 
       
   730         out = BitBucket()
       
   731         print >>out, u'abc'
       
   732         print >>out, u'abc', u'def'
       
   733         print >>out, u'abc', 'def'
       
   734         print >>out, 'abc', u'def'
       
   735         print >>out, u'abc\n'
       
   736         print >>out, u'abc\n',
       
   737         print >>out, u'abc\n',
       
   738         print >>out, u'def\n'
       
   739         print >>out, u'def\n'
       
   740 
       
   741     def test_ucs4(self):
       
   742         x = u'\U00100000'
       
   743         y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
       
   744         self.assertEqual(x, y)
       
   745 
       
   746         y = r'\U00100000'
       
   747         x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
       
   748         self.assertEqual(x, y)
       
   749         y = r'\U00010000'
       
   750         x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
       
   751         self.assertEqual(x, y)
       
   752 
       
   753         try:
       
   754             '\U11111111'.decode("raw-unicode-escape")
       
   755         except UnicodeDecodeError as e:
       
   756             self.assertEqual(e.start, 0)
       
   757             self.assertEqual(e.end, 10)
       
   758         else:
       
   759             self.fail("Should have raised UnicodeDecodeError")
       
   760 
       
   761     def test_conversion(self):
       
   762         # Make sure __unicode__() works properly
       
   763         class Foo0:
       
   764             def __str__(self):
       
   765                 return "foo"
       
   766 
       
   767         class Foo1:
       
   768             def __unicode__(self):
       
   769                 return u"foo"
       
   770 
       
   771         class Foo2(object):
       
   772             def __unicode__(self):
       
   773                 return u"foo"
       
   774 
       
   775         class Foo3(object):
       
   776             def __unicode__(self):
       
   777                 return "foo"
       
   778 
       
   779         class Foo4(str):
       
   780             def __unicode__(self):
       
   781                 return "foo"
       
   782 
       
   783         class Foo5(unicode):
       
   784             def __unicode__(self):
       
   785                 return "foo"
       
   786 
       
   787         class Foo6(str):
       
   788             def __str__(self):
       
   789                 return "foos"
       
   790 
       
   791             def __unicode__(self):
       
   792                 return u"foou"
       
   793 
       
   794         class Foo7(unicode):
       
   795             def __str__(self):
       
   796                 return "foos"
       
   797             def __unicode__(self):
       
   798                 return u"foou"
       
   799 
       
   800         class Foo8(unicode):
       
   801             def __new__(cls, content=""):
       
   802                 return unicode.__new__(cls, 2*content)
       
   803             def __unicode__(self):
       
   804                 return self
       
   805 
       
   806         class Foo9(unicode):
       
   807             def __str__(self):
       
   808                 return "string"
       
   809             def __unicode__(self):
       
   810                 return "not unicode"
       
   811 
       
   812         self.assertEqual(unicode(Foo0()), u"foo")
       
   813         self.assertEqual(unicode(Foo1()), u"foo")
       
   814         self.assertEqual(unicode(Foo2()), u"foo")
       
   815         self.assertEqual(unicode(Foo3()), u"foo")
       
   816         self.assertEqual(unicode(Foo4("bar")), u"foo")
       
   817         self.assertEqual(unicode(Foo5("bar")), u"foo")
       
   818         self.assertEqual(unicode(Foo6("bar")), u"foou")
       
   819         self.assertEqual(unicode(Foo7("bar")), u"foou")
       
   820         self.assertEqual(unicode(Foo8("foo")), u"foofoo")
       
   821         self.assertEqual(str(Foo9("foo")), "string")
       
   822         self.assertEqual(unicode(Foo9("foo")), u"not unicode")
       
   823 
       
   824     def test_unicode_repr(self):
       
   825         class s1:
       
   826             def __repr__(self):
       
   827                 return '\\n'
       
   828 
       
   829         class s2:
       
   830             def __repr__(self):
       
   831                 return u'\\n'
       
   832 
       
   833         self.assertEqual(repr(s1()), '\\n')
       
   834         self.assertEqual(repr(s2()), '\\n')
       
   835 
       
   836     def test_expandtabs_overflows_gracefully(self):
       
   837         # This test only affects 32-bit platforms because expandtabs can only take
       
   838         # an int as the max value, not a 64-bit C long.  If expandtabs is changed
       
   839         # to take a 64-bit long, this test should apply to all platforms.
       
   840         if sys.maxint > (1 << 32) or struct.calcsize('P') != 4:
       
   841             return
       
   842         self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
       
   843 
       
   844     def test__format__(self):
       
   845         def test(value, format, expected):
       
   846             # test both with and without the trailing 's'
       
   847             self.assertEqual(value.__format__(format), expected)
       
   848             self.assertEqual(value.__format__(format + u's'), expected)
       
   849 
       
   850         test(u'', u'', u'')
       
   851         test(u'abc', u'', u'abc')
       
   852         test(u'abc', u'.3', u'abc')
       
   853         test(u'ab', u'.3', u'ab')
       
   854         test(u'abcdef', u'.3', u'abc')
       
   855         test(u'abcdef', u'.0', u'')
       
   856         test(u'abc', u'3.3', u'abc')
       
   857         test(u'abc', u'2.3', u'abc')
       
   858         test(u'abc', u'2.2', u'ab')
       
   859         test(u'abc', u'3.2', u'ab ')
       
   860         test(u'result', u'x<0', u'result')
       
   861         test(u'result', u'x<5', u'result')
       
   862         test(u'result', u'x<6', u'result')
       
   863         test(u'result', u'x<7', u'resultx')
       
   864         test(u'result', u'x<8', u'resultxx')
       
   865         test(u'result', u' <7', u'result ')
       
   866         test(u'result', u'<7', u'result ')
       
   867         test(u'result', u'>7', u' result')
       
   868         test(u'result', u'>8', u'  result')
       
   869         test(u'result', u'^8', u' result ')
       
   870         test(u'result', u'^9', u' result  ')
       
   871         test(u'result', u'^10', u'  result  ')
       
   872         test(u'a', u'10000', u'a' + u' ' * 9999)
       
   873         test(u'', u'10000', u' ' * 10000)
       
   874         test(u'', u'10000000', u' ' * 10000000)
       
   875 
       
   876         # test mixing unicode and str
       
   877         self.assertEqual(u'abc'.__format__('s'), u'abc')
       
   878         self.assertEqual(u'abc'.__format__('->10s'), u'-------abc')
       
   879 
       
   880     def test_format(self):
       
   881         self.assertEqual(u''.format(), u'')
       
   882         self.assertEqual(u'a'.format(), u'a')
       
   883         self.assertEqual(u'ab'.format(), u'ab')
       
   884         self.assertEqual(u'a{{'.format(), u'a{')
       
   885         self.assertEqual(u'a}}'.format(), u'a}')
       
   886         self.assertEqual(u'{{b'.format(), u'{b')
       
   887         self.assertEqual(u'}}b'.format(), u'}b')
       
   888         self.assertEqual(u'a{{b'.format(), u'a{b')
       
   889 
       
   890         # examples from the PEP:
       
   891         import datetime
       
   892         self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred")
       
   893         self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')),
       
   894                          u"My name is Fred")
       
   895         self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'),
       
   896                          u"My name is Fred :-{}")
       
   897 
       
   898         # datetime.__format__ doesn't work with unicode
       
   899         #d = datetime.date(2007, 8, 18)
       
   900         #self.assertEqual("The year is {0.year}".format(d),
       
   901         #                 "The year is 2007")
       
   902 
       
   903         # classes we'll use for testing
       
   904         class C:
       
   905             def __init__(self, x=100):
       
   906                 self._x = x
       
   907             def __format__(self, spec):
       
   908                 return spec
       
   909 
       
   910         class D:
       
   911             def __init__(self, x):
       
   912                 self.x = x
       
   913             def __format__(self, spec):
       
   914                 return str(self.x)
       
   915 
       
   916         # class with __str__, but no __format__
       
   917         class E:
       
   918             def __init__(self, x):
       
   919                 self.x = x
       
   920             def __str__(self):
       
   921                 return u'E(' + self.x + u')'
       
   922 
       
   923         # class with __repr__, but no __format__ or __str__
       
   924         class F:
       
   925             def __init__(self, x):
       
   926                 self.x = x
       
   927             def __repr__(self):
       
   928                 return u'F(' + self.x + u')'
       
   929 
       
   930         # class with __format__ that forwards to string, for some format_spec's
       
   931         class G:
       
   932             def __init__(self, x):
       
   933                 self.x = x
       
   934             def __str__(self):
       
   935                 return u"string is " + self.x
       
   936             def __format__(self, format_spec):
       
   937                 if format_spec == 'd':
       
   938                     return u'G(' + self.x + u')'
       
   939                 return object.__format__(self, format_spec)
       
   940 
       
   941         # class that returns a bad type from __format__
       
   942         class H:
       
   943             def __format__(self, format_spec):
       
   944                 return 1.0
       
   945 
       
   946         class I(datetime.date):
       
   947             def __format__(self, format_spec):
       
   948                 return self.strftime(format_spec)
       
   949 
       
   950         class J(int):
       
   951             def __format__(self, format_spec):
       
   952                 return int.__format__(self * 2, format_spec)
       
   953 
       
   954 
       
   955         self.assertEqual(u''.format(), u'')
       
   956         self.assertEqual(u'abc'.format(), u'abc')
       
   957         self.assertEqual(u'{0}'.format(u'abc'), u'abc')
       
   958         self.assertEqual(u'{0:}'.format(u'abc'), u'abc')
       
   959         self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc')
       
   960         self.assertEqual(u'{0}X'.format(u'abc'), u'abcX')
       
   961         self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY')
       
   962         self.assertEqual(u'{1}'.format(1, u'abc'), u'abc')
       
   963         self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc')
       
   964         self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX')
       
   965         self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY')
       
   966         self.assertEqual(u'{0}'.format(-15), u'-15')
       
   967         self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc')
       
   968         self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc')
       
   969         self.assertEqual(u'{{'.format(), u'{')
       
   970         self.assertEqual(u'}}'.format(), u'}')
       
   971         self.assertEqual(u'{{}}'.format(), u'{}')
       
   972         self.assertEqual(u'{{x}}'.format(), u'{x}')
       
   973         self.assertEqual(u'{{{0}}}'.format(123), u'{123}')
       
   974         self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}')
       
   975         self.assertEqual(u'}}{{'.format(), u'}{')
       
   976         self.assertEqual(u'}}x{{'.format(), u'}x{')
       
   977 
       
   978         # weird field names
       
   979         self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz')
       
   980         self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz')
       
   981         self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3')
       
   982 
       
   983         self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20')
       
   984         self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010')
       
   985         self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc')
       
   986         self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc')
       
   987         self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def')
       
   988         self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def')
       
   989         self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def')
       
   990 
       
   991         # strings
       
   992         self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc')
       
   993         self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab')
       
   994         self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc')
       
   995         self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'')
       
   996         self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc')
       
   997         self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc')
       
   998         self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab')
       
   999         self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ')
       
  1000         self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result')
       
  1001         self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result')
       
  1002         self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result')
       
  1003         self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx')
       
  1004         self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx')
       
  1005         self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ')
       
  1006         self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ')
       
  1007         self.assertEqual(u'{0:>7s}'.format(u'result'), u' result')
       
  1008         self.assertEqual(u'{0:>8s}'.format(u'result'), u'  result')
       
  1009         self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ')
       
  1010         self.assertEqual(u'{0:^9s}'.format(u'result'), u' result  ')
       
  1011         self.assertEqual(u'{0:^10s}'.format(u'result'), u'  result  ')
       
  1012         self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999)
       
  1013         self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000)
       
  1014         self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000)
       
  1015 
       
  1016         # format specifiers for user defined type
       
  1017         self.assertEqual(u'{0:abc}'.format(C()), u'abc')
       
  1018 
       
  1019         # !r and !s coersions
       
  1020         self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
       
  1021         self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
       
  1022         self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello          ')
       
  1023         self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello          ')
       
  1024         self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'")
       
  1025         self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'")
       
  1026         self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)')
       
  1027 
       
  1028         # test fallback to object.__format__
       
  1029         self.assertEqual(u'{0}'.format({}), u'{}')
       
  1030         self.assertEqual(u'{0}'.format([]), u'[]')
       
  1031         self.assertEqual(u'{0}'.format([1]), u'[1]')
       
  1032         self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
       
  1033         self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
       
  1034         self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
       
  1035         self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
       
  1036         self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
       
  1037         self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
       
  1038 
       
  1039         self.assertEqual("{0:date: %Y-%m-%d}".format(I(year=2007,
       
  1040                                                        month=8,
       
  1041                                                        day=27)),
       
  1042                          "date: 2007-08-27")
       
  1043 
       
  1044         # test deriving from a builtin type and overriding __format__
       
  1045         self.assertEqual("{0}".format(J(10)), "20")
       
  1046 
       
  1047 
       
  1048         # string format specifiers
       
  1049         self.assertEqual('{0:}'.format('a'), 'a')
       
  1050 
       
  1051         # computed format specifiers
       
  1052         self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
       
  1053         self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
       
  1054         self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
       
  1055         self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello     ')
       
  1056         self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello     ')
       
  1057 
       
  1058         # test various errors
       
  1059         self.assertRaises(ValueError, '{'.format)
       
  1060         self.assertRaises(ValueError, '}'.format)
       
  1061         self.assertRaises(ValueError, 'a{'.format)
       
  1062         self.assertRaises(ValueError, 'a}'.format)
       
  1063         self.assertRaises(ValueError, '{a'.format)
       
  1064         self.assertRaises(ValueError, '}a'.format)
       
  1065         self.assertRaises(IndexError, '{0}'.format)
       
  1066         self.assertRaises(IndexError, '{1}'.format, 'abc')
       
  1067         self.assertRaises(KeyError,   '{x}'.format)
       
  1068         self.assertRaises(ValueError, "}{".format)
       
  1069         self.assertRaises(ValueError, "{".format)
       
  1070         self.assertRaises(ValueError, "}".format)
       
  1071         self.assertRaises(ValueError, "abc{0:{}".format)
       
  1072         self.assertRaises(ValueError, "{0".format)
       
  1073         self.assertRaises(IndexError, "{0.}".format)
       
  1074         self.assertRaises(ValueError, "{0.}".format, 0)
       
  1075         self.assertRaises(IndexError, "{0[}".format)
       
  1076         self.assertRaises(ValueError, "{0[}".format, [])
       
  1077         self.assertRaises(KeyError,   "{0]}".format)
       
  1078         self.assertRaises(ValueError, "{0.[]}".format, 0)
       
  1079         self.assertRaises(ValueError, "{0..foo}".format, 0)
       
  1080         self.assertRaises(ValueError, "{0[0}".format, 0)
       
  1081         self.assertRaises(ValueError, "{0[0:foo}".format, 0)
       
  1082         self.assertRaises(KeyError,   "{c]}".format)
       
  1083         self.assertRaises(ValueError, "{{ {{{0}}".format, 0)
       
  1084         self.assertRaises(ValueError, "{0}}".format, 0)
       
  1085         self.assertRaises(KeyError,   "{foo}".format, bar=3)
       
  1086         self.assertRaises(ValueError, "{0!x}".format, 3)
       
  1087         self.assertRaises(ValueError, "{0!}".format, 0)
       
  1088         self.assertRaises(ValueError, "{0!rs}".format, 0)
       
  1089         self.assertRaises(ValueError, "{!}".format)
       
  1090         self.assertRaises(ValueError, "{:}".format)
       
  1091         self.assertRaises(ValueError, "{:s}".format)
       
  1092         self.assertRaises(ValueError, "{}".format)
       
  1093 
       
  1094         # can't have a replacement on the field name portion
       
  1095         self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
       
  1096 
       
  1097         # exceed maximum recursion depth
       
  1098         self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
       
  1099         self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
       
  1100                           0, 1, 2, 3, 4, 5, 6, 7)
       
  1101 
       
  1102         # string format spec errors
       
  1103         self.assertRaises(ValueError, "{0:-s}".format, '')
       
  1104         self.assertRaises(ValueError, format, "", "-")
       
  1105         self.assertRaises(ValueError, "{0:=s}".format, '')
       
  1106 
       
  1107         # test combining string and unicode
       
  1108         self.assertEqual(u"foo{0}".format('bar'), u'foobar')
       
  1109         # This will try to convert the argument from unicode to str, which
       
  1110         #  will succeed
       
  1111         self.assertEqual("foo{0}".format(u'bar'), 'foobar')
       
  1112         # This will try to convert the argument from unicode to str, which
       
  1113         #  will fail
       
  1114         self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
       
  1115 
       
  1116     def test_raiseMemError(self):
       
  1117         # Ensure that the freelist contains a consistent object, even
       
  1118         # when a string allocation fails with a MemoryError.
       
  1119         # This used to crash the interpreter,
       
  1120         # or leak references when the number was smaller.
       
  1121         charwidth = 4 if sys.maxunicode >= 0x10000 else 2
       
  1122         # Note: sys.maxsize is half of the actual max allocation because of
       
  1123         # the signedness of Py_ssize_t.
       
  1124         alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
       
  1125         self.assertRaises(MemoryError, alloc)
       
  1126         self.assertRaises(MemoryError, alloc)
       
  1127 
       
  1128 def test_main():
       
  1129     test_support.run_unittest(__name__)
       
  1130 
       
  1131 if __name__ == "__main__":
       
  1132     test_main()