|
1 """ Test script for the unicodedata module. |
|
2 |
|
3 Written by Marc-Andre Lemburg (mal@lemburg.com). |
|
4 |
|
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
|
6 |
|
7 """#" |
|
8 import unittest, test.test_support |
|
9 import hashlib, sys |
|
10 |
|
11 encoding = 'utf-8' |
|
12 |
|
13 |
|
14 ### Run tests |
|
15 |
|
16 class UnicodeMethodsTest(unittest.TestCase): |
|
17 |
|
18 # update this, if the database changes |
|
19 expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a' |
|
20 |
|
21 def test_method_checksum(self): |
|
22 h = hashlib.sha1() |
|
23 for i in range(65536): |
|
24 char = unichr(i) |
|
25 data = [ |
|
26 # Predicates (single char) |
|
27 u"01"[char.isalnum()], |
|
28 u"01"[char.isalpha()], |
|
29 u"01"[char.isdecimal()], |
|
30 u"01"[char.isdigit()], |
|
31 u"01"[char.islower()], |
|
32 u"01"[char.isnumeric()], |
|
33 u"01"[char.isspace()], |
|
34 u"01"[char.istitle()], |
|
35 u"01"[char.isupper()], |
|
36 |
|
37 # Predicates (multiple chars) |
|
38 u"01"[(char + u'abc').isalnum()], |
|
39 u"01"[(char + u'abc').isalpha()], |
|
40 u"01"[(char + u'123').isdecimal()], |
|
41 u"01"[(char + u'123').isdigit()], |
|
42 u"01"[(char + u'abc').islower()], |
|
43 u"01"[(char + u'123').isnumeric()], |
|
44 u"01"[(char + u' \t').isspace()], |
|
45 u"01"[(char + u'abc').istitle()], |
|
46 u"01"[(char + u'ABC').isupper()], |
|
47 |
|
48 # Mappings (single char) |
|
49 char.lower(), |
|
50 char.upper(), |
|
51 char.title(), |
|
52 |
|
53 # Mappings (multiple chars) |
|
54 (char + u'abc').lower(), |
|
55 (char + u'ABC').upper(), |
|
56 (char + u'abc').title(), |
|
57 (char + u'ABC').title(), |
|
58 |
|
59 ] |
|
60 h.update(u''.join(data).encode(encoding)) |
|
61 result = h.hexdigest() |
|
62 self.assertEqual(result, self.expectedchecksum) |
|
63 |
|
64 class UnicodeDatabaseTest(unittest.TestCase): |
|
65 |
|
66 def setUp(self): |
|
67 # In case unicodedata is not available, this will raise an ImportError, |
|
68 # but the other test cases will still be run |
|
69 import unicodedata |
|
70 self.db = unicodedata |
|
71 |
|
72 def tearDown(self): |
|
73 del self.db |
|
74 |
|
75 class UnicodeFunctionsTest(UnicodeDatabaseTest): |
|
76 |
|
77 # update this, if the database changes |
|
78 expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2' |
|
79 |
|
80 def test_function_checksum(self): |
|
81 data = [] |
|
82 h = hashlib.sha1() |
|
83 |
|
84 for i in range(0x10000): |
|
85 char = unichr(i) |
|
86 data = [ |
|
87 # Properties |
|
88 str(self.db.digit(char, -1)), |
|
89 str(self.db.numeric(char, -1)), |
|
90 str(self.db.decimal(char, -1)), |
|
91 self.db.category(char), |
|
92 self.db.bidirectional(char), |
|
93 self.db.decomposition(char), |
|
94 str(self.db.mirrored(char)), |
|
95 str(self.db.combining(char)), |
|
96 ] |
|
97 h.update(''.join(data)) |
|
98 result = h.hexdigest() |
|
99 self.assertEqual(result, self.expectedchecksum) |
|
100 |
|
101 def test_digit(self): |
|
102 self.assertEqual(self.db.digit(u'A', None), None) |
|
103 self.assertEqual(self.db.digit(u'9'), 9) |
|
104 self.assertEqual(self.db.digit(u'\u215b', None), None) |
|
105 self.assertEqual(self.db.digit(u'\u2468'), 9) |
|
106 |
|
107 self.assertRaises(TypeError, self.db.digit) |
|
108 self.assertRaises(TypeError, self.db.digit, u'xx') |
|
109 self.assertRaises(ValueError, self.db.digit, u'x') |
|
110 |
|
111 def test_numeric(self): |
|
112 self.assertEqual(self.db.numeric(u'A',None), None) |
|
113 self.assertEqual(self.db.numeric(u'9'), 9) |
|
114 self.assertEqual(self.db.numeric(u'\u215b'), 0.125) |
|
115 self.assertEqual(self.db.numeric(u'\u2468'), 9.0) |
|
116 |
|
117 self.assertRaises(TypeError, self.db.numeric) |
|
118 self.assertRaises(TypeError, self.db.numeric, u'xx') |
|
119 self.assertRaises(ValueError, self.db.numeric, u'x') |
|
120 |
|
121 def test_decimal(self): |
|
122 self.assertEqual(self.db.decimal(u'A',None), None) |
|
123 self.assertEqual(self.db.decimal(u'9'), 9) |
|
124 self.assertEqual(self.db.decimal(u'\u215b', None), None) |
|
125 self.assertEqual(self.db.decimal(u'\u2468', None), None) |
|
126 |
|
127 self.assertRaises(TypeError, self.db.decimal) |
|
128 self.assertRaises(TypeError, self.db.decimal, u'xx') |
|
129 self.assertRaises(ValueError, self.db.decimal, u'x') |
|
130 |
|
131 def test_category(self): |
|
132 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn') |
|
133 self.assertEqual(self.db.category(u'a'), 'Ll') |
|
134 self.assertEqual(self.db.category(u'A'), 'Lu') |
|
135 |
|
136 self.assertRaises(TypeError, self.db.category) |
|
137 self.assertRaises(TypeError, self.db.category, u'xx') |
|
138 |
|
139 def test_bidirectional(self): |
|
140 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '') |
|
141 self.assertEqual(self.db.bidirectional(u' '), 'WS') |
|
142 self.assertEqual(self.db.bidirectional(u'A'), 'L') |
|
143 |
|
144 self.assertRaises(TypeError, self.db.bidirectional) |
|
145 self.assertRaises(TypeError, self.db.bidirectional, u'xx') |
|
146 |
|
147 def test_decomposition(self): |
|
148 self.assertEqual(self.db.decomposition(u'\uFFFE'),'') |
|
149 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034') |
|
150 |
|
151 self.assertRaises(TypeError, self.db.decomposition) |
|
152 self.assertRaises(TypeError, self.db.decomposition, u'xx') |
|
153 |
|
154 def test_mirrored(self): |
|
155 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0) |
|
156 self.assertEqual(self.db.mirrored(u'a'), 0) |
|
157 self.assertEqual(self.db.mirrored(u'\u2201'), 1) |
|
158 |
|
159 self.assertRaises(TypeError, self.db.mirrored) |
|
160 self.assertRaises(TypeError, self.db.mirrored, u'xx') |
|
161 |
|
162 def test_combining(self): |
|
163 self.assertEqual(self.db.combining(u'\uFFFE'), 0) |
|
164 self.assertEqual(self.db.combining(u'a'), 0) |
|
165 self.assertEqual(self.db.combining(u'\u20e1'), 230) |
|
166 |
|
167 self.assertRaises(TypeError, self.db.combining) |
|
168 self.assertRaises(TypeError, self.db.combining, u'xx') |
|
169 |
|
170 def test_normalize(self): |
|
171 self.assertRaises(TypeError, self.db.normalize) |
|
172 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx') |
|
173 self.assertEqual(self.db.normalize('NFKC', u''), u'') |
|
174 # The rest can be found in test_normalization.py |
|
175 # which requires an external file. |
|
176 |
|
177 def test_east_asian_width(self): |
|
178 eaw = self.db.east_asian_width |
|
179 self.assertRaises(TypeError, eaw, 'a') |
|
180 self.assertRaises(TypeError, eaw, u'') |
|
181 self.assertRaises(TypeError, eaw, u'ra') |
|
182 self.assertEqual(eaw(u'\x1e'), 'N') |
|
183 self.assertEqual(eaw(u'\x20'), 'Na') |
|
184 self.assertEqual(eaw(u'\uC894'), 'W') |
|
185 self.assertEqual(eaw(u'\uFF66'), 'H') |
|
186 self.assertEqual(eaw(u'\uFF1F'), 'F') |
|
187 self.assertEqual(eaw(u'\u2010'), 'A') |
|
188 |
|
189 class UnicodeMiscTest(UnicodeDatabaseTest): |
|
190 |
|
191 def test_decimal_numeric_consistent(self): |
|
192 # Test that decimal and numeric are consistent, |
|
193 # i.e. if a character has a decimal value, |
|
194 # its numeric value should be the same. |
|
195 count = 0 |
|
196 for i in xrange(0x10000): |
|
197 c = unichr(i) |
|
198 dec = self.db.decimal(c, -1) |
|
199 if dec != -1: |
|
200 self.assertEqual(dec, self.db.numeric(c)) |
|
201 count += 1 |
|
202 self.assert_(count >= 10) # should have tested at least the ASCII digits |
|
203 |
|
204 def test_digit_numeric_consistent(self): |
|
205 # Test that digit and numeric are consistent, |
|
206 # i.e. if a character has a digit value, |
|
207 # its numeric value should be the same. |
|
208 count = 0 |
|
209 for i in xrange(0x10000): |
|
210 c = unichr(i) |
|
211 dec = self.db.digit(c, -1) |
|
212 if dec != -1: |
|
213 self.assertEqual(dec, self.db.numeric(c)) |
|
214 count += 1 |
|
215 self.assert_(count >= 10) # should have tested at least the ASCII digits |
|
216 |
|
217 def test_bug_1704793(self): |
|
218 if sys.maxunicode == 65535: |
|
219 self.assertRaises(KeyError, self.db.lookup, "GOTHIC LETTER FAIHU") |
|
220 |
|
221 def test_main(): |
|
222 test.test_support.run_unittest( |
|
223 UnicodeMiscTest, |
|
224 UnicodeMethodsTest, |
|
225 UnicodeFunctionsTest |
|
226 ) |
|
227 |
|
228 if __name__ == "__main__": |
|
229 test_main() |