symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_unicodedata.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """ Test script for the unicodedata module.
       
     2 
       
     3     Written by Marc-Andre Lemburg (mal@lemburg.com).
       
     4 
       
     5     (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
       
     6 
       
     7 """
       
     8 
       
     9 import sys
       
    10 import unittest
       
    11 import hashlib
       
    12 import subprocess
       
    13 import test.test_support
       
    14 
       
    15 encoding = 'utf-8'
       
    16 
       
    17 
       
    18 ### Run tests
       
    19 
       
    20 class UnicodeMethodsTest(unittest.TestCase):
       
    21 
       
    22     # update this, if the database changes
       
    23     expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
       
    24 
       
    25     def test_method_checksum(self):
       
    26         h = hashlib.sha1()
       
    27         for i in range(65536):
       
    28             char = unichr(i)
       
    29             data = [
       
    30                 # Predicates (single char)
       
    31                 u"01"[char.isalnum()],
       
    32                 u"01"[char.isalpha()],
       
    33                 u"01"[char.isdecimal()],
       
    34                 u"01"[char.isdigit()],
       
    35                 u"01"[char.islower()],
       
    36                 u"01"[char.isnumeric()],
       
    37                 u"01"[char.isspace()],
       
    38                 u"01"[char.istitle()],
       
    39                 u"01"[char.isupper()],
       
    40 
       
    41                 # Predicates (multiple chars)
       
    42                 u"01"[(char + u'abc').isalnum()],
       
    43                 u"01"[(char + u'abc').isalpha()],
       
    44                 u"01"[(char + u'123').isdecimal()],
       
    45                 u"01"[(char + u'123').isdigit()],
       
    46                 u"01"[(char + u'abc').islower()],
       
    47                 u"01"[(char + u'123').isnumeric()],
       
    48                 u"01"[(char + u' \t').isspace()],
       
    49                 u"01"[(char + u'abc').istitle()],
       
    50                 u"01"[(char + u'ABC').isupper()],
       
    51 
       
    52                 # Mappings (single char)
       
    53                 char.lower(),
       
    54                 char.upper(),
       
    55                 char.title(),
       
    56 
       
    57                 # Mappings (multiple chars)
       
    58                 (char + u'abc').lower(),
       
    59                 (char + u'ABC').upper(),
       
    60                 (char + u'abc').title(),
       
    61                 (char + u'ABC').title(),
       
    62 
       
    63                 ]
       
    64             h.update(u''.join(data).encode(encoding))
       
    65         result = h.hexdigest()
       
    66         self.assertEqual(result, self.expectedchecksum)
       
    67 
       
    68 class UnicodeDatabaseTest(unittest.TestCase):
       
    69 
       
    70     def setUp(self):
       
    71         # In case unicodedata is not available, this will raise an ImportError,
       
    72         # but the other test cases will still be run
       
    73         import unicodedata
       
    74         self.db = unicodedata
       
    75 
       
    76     def tearDown(self):
       
    77         del self.db
       
    78 
       
    79 class UnicodeFunctionsTest(UnicodeDatabaseTest):
       
    80 
       
    81     # update this, if the database changes
       
    82     expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca'
       
    83 
       
    84     def test_function_checksum(self):
       
    85         data = []
       
    86         h = hashlib.sha1()
       
    87 
       
    88         for i in range(0x10000):
       
    89             char = unichr(i)
       
    90             data = [
       
    91                 # Properties
       
    92                 str(self.db.digit(char, -1)),
       
    93                 str(self.db.numeric(char, -1)),
       
    94                 str(self.db.decimal(char, -1)),
       
    95                 self.db.category(char),
       
    96                 self.db.bidirectional(char),
       
    97                 self.db.decomposition(char),
       
    98                 str(self.db.mirrored(char)),
       
    99                 str(self.db.combining(char)),
       
   100             ]
       
   101             h.update(''.join(data))
       
   102         result = h.hexdigest()
       
   103         self.assertEqual(result, self.expectedchecksum)
       
   104 
       
   105     def test_digit(self):
       
   106         self.assertEqual(self.db.digit(u'A', None), None)
       
   107         self.assertEqual(self.db.digit(u'9'), 9)
       
   108         self.assertEqual(self.db.digit(u'\u215b', None), None)
       
   109         self.assertEqual(self.db.digit(u'\u2468'), 9)
       
   110         self.assertEqual(self.db.digit(u'\U00020000', None), None)
       
   111 
       
   112         self.assertRaises(TypeError, self.db.digit)
       
   113         self.assertRaises(TypeError, self.db.digit, u'xx')
       
   114         self.assertRaises(ValueError, self.db.digit, u'x')
       
   115 
       
   116     def test_numeric(self):
       
   117         self.assertEqual(self.db.numeric(u'A',None), None)
       
   118         self.assertEqual(self.db.numeric(u'9'), 9)
       
   119         self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
       
   120         self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
       
   121         self.assertEqual(self.db.numeric(u'\U00020000', None), None)
       
   122 
       
   123         self.assertRaises(TypeError, self.db.numeric)
       
   124         self.assertRaises(TypeError, self.db.numeric, u'xx')
       
   125         self.assertRaises(ValueError, self.db.numeric, u'x')
       
   126 
       
   127     def test_decimal(self):
       
   128         self.assertEqual(self.db.decimal(u'A',None), None)
       
   129         self.assertEqual(self.db.decimal(u'9'), 9)
       
   130         self.assertEqual(self.db.decimal(u'\u215b', None), None)
       
   131         self.assertEqual(self.db.decimal(u'\u2468', None), None)
       
   132         self.assertEqual(self.db.decimal(u'\U00020000', None), None)
       
   133 
       
   134         self.assertRaises(TypeError, self.db.decimal)
       
   135         self.assertRaises(TypeError, self.db.decimal, u'xx')
       
   136         self.assertRaises(ValueError, self.db.decimal, u'x')
       
   137 
       
   138     def test_category(self):
       
   139         self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
       
   140         self.assertEqual(self.db.category(u'a'), 'Ll')
       
   141         self.assertEqual(self.db.category(u'A'), 'Lu')
       
   142         self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
       
   143 
       
   144         self.assertRaises(TypeError, self.db.category)
       
   145         self.assertRaises(TypeError, self.db.category, u'xx')
       
   146 
       
   147     def test_bidirectional(self):
       
   148         self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
       
   149         self.assertEqual(self.db.bidirectional(u' '), 'WS')
       
   150         self.assertEqual(self.db.bidirectional(u'A'), 'L')
       
   151         self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
       
   152 
       
   153         self.assertRaises(TypeError, self.db.bidirectional)
       
   154         self.assertRaises(TypeError, self.db.bidirectional, u'xx')
       
   155 
       
   156     def test_decomposition(self):
       
   157         self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
       
   158         self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
       
   159 
       
   160         self.assertRaises(TypeError, self.db.decomposition)
       
   161         self.assertRaises(TypeError, self.db.decomposition, u'xx')
       
   162 
       
   163     def test_mirrored(self):
       
   164         self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
       
   165         self.assertEqual(self.db.mirrored(u'a'), 0)
       
   166         self.assertEqual(self.db.mirrored(u'\u2201'), 1)
       
   167         self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
       
   168 
       
   169         self.assertRaises(TypeError, self.db.mirrored)
       
   170         self.assertRaises(TypeError, self.db.mirrored, u'xx')
       
   171 
       
   172     def test_combining(self):
       
   173         self.assertEqual(self.db.combining(u'\uFFFE'), 0)
       
   174         self.assertEqual(self.db.combining(u'a'), 0)
       
   175         self.assertEqual(self.db.combining(u'\u20e1'), 230)
       
   176         self.assertEqual(self.db.combining(u'\U00020000'), 0)
       
   177 
       
   178         self.assertRaises(TypeError, self.db.combining)
       
   179         self.assertRaises(TypeError, self.db.combining, u'xx')
       
   180 
       
   181     def test_normalize(self):
       
   182         self.assertRaises(TypeError, self.db.normalize)
       
   183         self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
       
   184         self.assertEqual(self.db.normalize('NFKC', u''), u'')
       
   185         # The rest can be found in test_normalization.py
       
   186         # which requires an external file.
       
   187 
       
   188     def test_east_asian_width(self):
       
   189         eaw = self.db.east_asian_width
       
   190         self.assertRaises(TypeError, eaw, 'a')
       
   191         self.assertRaises(TypeError, eaw, u'')
       
   192         self.assertRaises(TypeError, eaw, u'ra')
       
   193         self.assertEqual(eaw(u'\x1e'), 'N')
       
   194         self.assertEqual(eaw(u'\x20'), 'Na')
       
   195         self.assertEqual(eaw(u'\uC894'), 'W')
       
   196         self.assertEqual(eaw(u'\uFF66'), 'H')
       
   197         self.assertEqual(eaw(u'\uFF1F'), 'F')
       
   198         self.assertEqual(eaw(u'\u2010'), 'A')
       
   199         self.assertEqual(eaw(u'\U00020000'), 'W')
       
   200 
       
   201 class UnicodeMiscTest(UnicodeDatabaseTest):
       
   202 
       
   203     def test_failed_import_during_compiling(self):
       
   204         # Issue 4367
       
   205         # Decoding \N escapes requires the unicodedata module. If it can't be
       
   206         # imported, we shouldn't segfault.
       
   207 
       
   208         # This program should raise a SyntaxError in the eval.
       
   209         code = "import sys;" \
       
   210             "sys.modules['unicodedata'] = None;" \
       
   211             """eval("u'\N{SOFT HYPHEN}'")"""
       
   212         args = [sys.executable, "-c", code]
       
   213         # We use a subprocess because the unicodedata module may already have
       
   214         # been loaded in this process.
       
   215         popen = subprocess.Popen(args, stderr=subprocess.PIPE)
       
   216         popen.wait()
       
   217         self.assertEqual(popen.returncode, 1)
       
   218         error = "SyntaxError: (unicode error) \N escapes not supported " \
       
   219             "(can't load unicodedata module)"
       
   220         self.assertTrue(error in popen.stderr.read())
       
   221 
       
   222     def test_decimal_numeric_consistent(self):
       
   223         # Test that decimal and numeric are consistent,
       
   224         # i.e. if a character has a decimal value,
       
   225         # its numeric value should be the same.
       
   226         count = 0
       
   227         for i in xrange(0x10000):
       
   228             c = unichr(i)
       
   229             dec = self.db.decimal(c, -1)
       
   230             if dec != -1:
       
   231                 self.assertEqual(dec, self.db.numeric(c))
       
   232                 count += 1
       
   233         self.assert_(count >= 10) # should have tested at least the ASCII digits
       
   234 
       
   235     def test_digit_numeric_consistent(self):
       
   236         # Test that digit and numeric are consistent,
       
   237         # i.e. if a character has a digit value,
       
   238         # its numeric value should be the same.
       
   239         count = 0
       
   240         for i in xrange(0x10000):
       
   241             c = unichr(i)
       
   242             dec = self.db.digit(c, -1)
       
   243             if dec != -1:
       
   244                 self.assertEqual(dec, self.db.numeric(c))
       
   245                 count += 1
       
   246         self.assert_(count >= 10) # should have tested at least the ASCII digits
       
   247 
       
   248     def test_bug_1704793(self):
       
   249         self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
       
   250 
       
   251     def test_ucd_510(self):
       
   252         import unicodedata
       
   253         # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
       
   254         self.assert_(unicodedata.mirrored(u"\u0f3a"))
       
   255         self.assert_(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
       
   256         # Also, we now have two ways of representing
       
   257         # the upper-case mapping: as delta, or as absolute value
       
   258         self.assert_(u"a".upper()==u'A')
       
   259         self.assert_(u"\u1d79".upper()==u'\ua77d')
       
   260 
       
   261 def test_main():
       
   262     test.test_support.run_unittest(
       
   263         UnicodeMiscTest,
       
   264         UnicodeMethodsTest,
       
   265         UnicodeFunctionsTest
       
   266     )
       
   267 
       
   268 if __name__ == "__main__":
       
   269     test_main()