python-2.5.2/win32/Lib/test/test_unicodedata.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """ Test script for the unicodedata module.
       
     2 
       
     3     Written by Marc-Andre Lemburg (mal@lemburg.com).
       
     4 
       
     5     (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
       
     6 
       
     7 """#"
       
     8 import unittest, test.test_support
       
     9 import hashlib, sys
       
    10 
       
    11 encoding = 'utf-8'
       
    12 
       
    13 
       
    14 ### Run tests
       
    15 
       
    16 class UnicodeMethodsTest(unittest.TestCase):
       
    17 
       
    18     # update this, if the database changes
       
    19     expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
       
    20 
       
    21     def test_method_checksum(self):
       
    22         h = hashlib.sha1()
       
    23         for i in range(65536):
       
    24             char = unichr(i)
       
    25             data = [
       
    26                 # Predicates (single char)
       
    27                 u"01"[char.isalnum()],
       
    28                 u"01"[char.isalpha()],
       
    29                 u"01"[char.isdecimal()],
       
    30                 u"01"[char.isdigit()],
       
    31                 u"01"[char.islower()],
       
    32                 u"01"[char.isnumeric()],
       
    33                 u"01"[char.isspace()],
       
    34                 u"01"[char.istitle()],
       
    35                 u"01"[char.isupper()],
       
    36 
       
    37                 # Predicates (multiple chars)
       
    38                 u"01"[(char + u'abc').isalnum()],
       
    39                 u"01"[(char + u'abc').isalpha()],
       
    40                 u"01"[(char + u'123').isdecimal()],
       
    41                 u"01"[(char + u'123').isdigit()],
       
    42                 u"01"[(char + u'abc').islower()],
       
    43                 u"01"[(char + u'123').isnumeric()],
       
    44                 u"01"[(char + u' \t').isspace()],
       
    45                 u"01"[(char + u'abc').istitle()],
       
    46                 u"01"[(char + u'ABC').isupper()],
       
    47 
       
    48                 # Mappings (single char)
       
    49                 char.lower(),
       
    50                 char.upper(),
       
    51                 char.title(),
       
    52 
       
    53                 # Mappings (multiple chars)
       
    54                 (char + u'abc').lower(),
       
    55                 (char + u'ABC').upper(),
       
    56                 (char + u'abc').title(),
       
    57                 (char + u'ABC').title(),
       
    58 
       
    59                 ]
       
    60             h.update(u''.join(data).encode(encoding))
       
    61         result = h.hexdigest()
       
    62         self.assertEqual(result, self.expectedchecksum)
       
    63 
       
    64 class UnicodeDatabaseTest(unittest.TestCase):
       
    65 
       
    66     def setUp(self):
       
    67         # In case unicodedata is not available, this will raise an ImportError,
       
    68         # but the other test cases will still be run
       
    69         import unicodedata
       
    70         self.db = unicodedata
       
    71 
       
    72     def tearDown(self):
       
    73         del self.db
       
    74 
       
    75 class UnicodeFunctionsTest(UnicodeDatabaseTest):
       
    76 
       
    77     # update this, if the database changes
       
    78     expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
       
    79 
       
    80     def test_function_checksum(self):
       
    81         data = []
       
    82         h = hashlib.sha1()
       
    83 
       
    84         for i in range(0x10000):
       
    85             char = unichr(i)
       
    86             data = [
       
    87                 # Properties
       
    88                 str(self.db.digit(char, -1)),
       
    89                 str(self.db.numeric(char, -1)),
       
    90                 str(self.db.decimal(char, -1)),
       
    91                 self.db.category(char),
       
    92                 self.db.bidirectional(char),
       
    93                 self.db.decomposition(char),
       
    94                 str(self.db.mirrored(char)),
       
    95                 str(self.db.combining(char)),
       
    96             ]
       
    97             h.update(''.join(data))
       
    98         result = h.hexdigest()
       
    99         self.assertEqual(result, self.expectedchecksum)
       
   100 
       
   101     def test_digit(self):
       
   102         self.assertEqual(self.db.digit(u'A', None), None)
       
   103         self.assertEqual(self.db.digit(u'9'), 9)
       
   104         self.assertEqual(self.db.digit(u'\u215b', None), None)
       
   105         self.assertEqual(self.db.digit(u'\u2468'), 9)
       
   106 
       
   107         self.assertRaises(TypeError, self.db.digit)
       
   108         self.assertRaises(TypeError, self.db.digit, u'xx')
       
   109         self.assertRaises(ValueError, self.db.digit, u'x')
       
   110 
       
   111     def test_numeric(self):
       
   112         self.assertEqual(self.db.numeric(u'A',None), None)
       
   113         self.assertEqual(self.db.numeric(u'9'), 9)
       
   114         self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
       
   115         self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
       
   116 
       
   117         self.assertRaises(TypeError, self.db.numeric)
       
   118         self.assertRaises(TypeError, self.db.numeric, u'xx')
       
   119         self.assertRaises(ValueError, self.db.numeric, u'x')
       
   120 
       
   121     def test_decimal(self):
       
   122         self.assertEqual(self.db.decimal(u'A',None), None)
       
   123         self.assertEqual(self.db.decimal(u'9'), 9)
       
   124         self.assertEqual(self.db.decimal(u'\u215b', None), None)
       
   125         self.assertEqual(self.db.decimal(u'\u2468', None), None)
       
   126 
       
   127         self.assertRaises(TypeError, self.db.decimal)
       
   128         self.assertRaises(TypeError, self.db.decimal, u'xx')
       
   129         self.assertRaises(ValueError, self.db.decimal, u'x')
       
   130 
       
   131     def test_category(self):
       
   132         self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
       
   133         self.assertEqual(self.db.category(u'a'), 'Ll')
       
   134         self.assertEqual(self.db.category(u'A'), 'Lu')
       
   135 
       
   136         self.assertRaises(TypeError, self.db.category)
       
   137         self.assertRaises(TypeError, self.db.category, u'xx')
       
   138 
       
   139     def test_bidirectional(self):
       
   140         self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
       
   141         self.assertEqual(self.db.bidirectional(u' '), 'WS')
       
   142         self.assertEqual(self.db.bidirectional(u'A'), 'L')
       
   143 
       
   144         self.assertRaises(TypeError, self.db.bidirectional)
       
   145         self.assertRaises(TypeError, self.db.bidirectional, u'xx')
       
   146 
       
   147     def test_decomposition(self):
       
   148         self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
       
   149         self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
       
   150 
       
   151         self.assertRaises(TypeError, self.db.decomposition)
       
   152         self.assertRaises(TypeError, self.db.decomposition, u'xx')
       
   153 
       
   154     def test_mirrored(self):
       
   155         self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
       
   156         self.assertEqual(self.db.mirrored(u'a'), 0)
       
   157         self.assertEqual(self.db.mirrored(u'\u2201'), 1)
       
   158 
       
   159         self.assertRaises(TypeError, self.db.mirrored)
       
   160         self.assertRaises(TypeError, self.db.mirrored, u'xx')
       
   161 
       
   162     def test_combining(self):
       
   163         self.assertEqual(self.db.combining(u'\uFFFE'), 0)
       
   164         self.assertEqual(self.db.combining(u'a'), 0)
       
   165         self.assertEqual(self.db.combining(u'\u20e1'), 230)
       
   166 
       
   167         self.assertRaises(TypeError, self.db.combining)
       
   168         self.assertRaises(TypeError, self.db.combining, u'xx')
       
   169 
       
   170     def test_normalize(self):
       
   171         self.assertRaises(TypeError, self.db.normalize)
       
   172         self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
       
   173         self.assertEqual(self.db.normalize('NFKC', u''), u'')
       
   174         # The rest can be found in test_normalization.py
       
   175         # which requires an external file.
       
   176 
       
   177     def test_east_asian_width(self):
       
   178         eaw = self.db.east_asian_width
       
   179         self.assertRaises(TypeError, eaw, 'a')
       
   180         self.assertRaises(TypeError, eaw, u'')
       
   181         self.assertRaises(TypeError, eaw, u'ra')
       
   182         self.assertEqual(eaw(u'\x1e'), 'N')
       
   183         self.assertEqual(eaw(u'\x20'), 'Na')
       
   184         self.assertEqual(eaw(u'\uC894'), 'W')
       
   185         self.assertEqual(eaw(u'\uFF66'), 'H')
       
   186         self.assertEqual(eaw(u'\uFF1F'), 'F')
       
   187         self.assertEqual(eaw(u'\u2010'), 'A')
       
   188 
       
   189 class UnicodeMiscTest(UnicodeDatabaseTest):
       
   190 
       
   191     def test_decimal_numeric_consistent(self):
       
   192         # Test that decimal and numeric are consistent,
       
   193         # i.e. if a character has a decimal value,
       
   194         # its numeric value should be the same.
       
   195         count = 0
       
   196         for i in xrange(0x10000):
       
   197             c = unichr(i)
       
   198             dec = self.db.decimal(c, -1)
       
   199             if dec != -1:
       
   200                 self.assertEqual(dec, self.db.numeric(c))
       
   201                 count += 1
       
   202         self.assert_(count >= 10) # should have tested at least the ASCII digits
       
   203 
       
   204     def test_digit_numeric_consistent(self):
       
   205         # Test that digit and numeric are consistent,
       
   206         # i.e. if a character has a digit value,
       
   207         # its numeric value should be the same.
       
   208         count = 0
       
   209         for i in xrange(0x10000):
       
   210             c = unichr(i)
       
   211             dec = self.db.digit(c, -1)
       
   212             if dec != -1:
       
   213                 self.assertEqual(dec, self.db.numeric(c))
       
   214                 count += 1
       
   215         self.assert_(count >= 10) # should have tested at least the ASCII digits
       
   216 
       
   217     def test_bug_1704793(self):
       
   218         if sys.maxunicode == 65535:
       
   219             self.assertRaises(KeyError, self.db.lookup, "GOTHIC LETTER FAIHU")
       
   220 
       
   221 def test_main():
       
   222     test.test_support.run_unittest(
       
   223         UnicodeMiscTest,
       
   224         UnicodeMethodsTest,
       
   225         UnicodeFunctionsTest
       
   226     )
       
   227 
       
   228 if __name__ == "__main__":
       
   229     test_main()