|
1 """ Test script for the unicodedata module. |
|
2 |
|
3 Written by Marc-Andre Lemburg (mal@lemburg.com). |
|
4 |
|
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
|
6 |
|
7 """ |
|
8 |
|
9 import sys |
|
10 import unittest |
|
11 import hashlib |
|
12 import subprocess |
|
13 import test.test_support |
|
14 |
|
15 encoding = 'utf-8' |
|
16 |
|
17 |
|
18 ### Run tests |
|
19 |
|
20 class UnicodeMethodsTest(unittest.TestCase): |
|
21 |
|
22 # update this, if the database changes |
|
23 expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e' |
|
24 |
|
25 def test_method_checksum(self): |
|
26 h = hashlib.sha1() |
|
27 for i in range(65536): |
|
28 char = unichr(i) |
|
29 data = [ |
|
30 # Predicates (single char) |
|
31 u"01"[char.isalnum()], |
|
32 u"01"[char.isalpha()], |
|
33 u"01"[char.isdecimal()], |
|
34 u"01"[char.isdigit()], |
|
35 u"01"[char.islower()], |
|
36 u"01"[char.isnumeric()], |
|
37 u"01"[char.isspace()], |
|
38 u"01"[char.istitle()], |
|
39 u"01"[char.isupper()], |
|
40 |
|
41 # Predicates (multiple chars) |
|
42 u"01"[(char + u'abc').isalnum()], |
|
43 u"01"[(char + u'abc').isalpha()], |
|
44 u"01"[(char + u'123').isdecimal()], |
|
45 u"01"[(char + u'123').isdigit()], |
|
46 u"01"[(char + u'abc').islower()], |
|
47 u"01"[(char + u'123').isnumeric()], |
|
48 u"01"[(char + u' \t').isspace()], |
|
49 u"01"[(char + u'abc').istitle()], |
|
50 u"01"[(char + u'ABC').isupper()], |
|
51 |
|
52 # Mappings (single char) |
|
53 char.lower(), |
|
54 char.upper(), |
|
55 char.title(), |
|
56 |
|
57 # Mappings (multiple chars) |
|
58 (char + u'abc').lower(), |
|
59 (char + u'ABC').upper(), |
|
60 (char + u'abc').title(), |
|
61 (char + u'ABC').title(), |
|
62 |
|
63 ] |
|
64 h.update(u''.join(data).encode(encoding)) |
|
65 result = h.hexdigest() |
|
66 self.assertEqual(result, self.expectedchecksum) |
|
67 |
|
68 class UnicodeDatabaseTest(unittest.TestCase): |
|
69 |
|
70 def setUp(self): |
|
71 # In case unicodedata is not available, this will raise an ImportError, |
|
72 # but the other test cases will still be run |
|
73 import unicodedata |
|
74 self.db = unicodedata |
|
75 |
|
76 def tearDown(self): |
|
77 del self.db |
|
78 |
|
79 class UnicodeFunctionsTest(UnicodeDatabaseTest): |
|
80 |
|
81 # update this, if the database changes |
|
82 expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca' |
|
83 |
|
84 def test_function_checksum(self): |
|
85 data = [] |
|
86 h = hashlib.sha1() |
|
87 |
|
88 for i in range(0x10000): |
|
89 char = unichr(i) |
|
90 data = [ |
|
91 # Properties |
|
92 str(self.db.digit(char, -1)), |
|
93 str(self.db.numeric(char, -1)), |
|
94 str(self.db.decimal(char, -1)), |
|
95 self.db.category(char), |
|
96 self.db.bidirectional(char), |
|
97 self.db.decomposition(char), |
|
98 str(self.db.mirrored(char)), |
|
99 str(self.db.combining(char)), |
|
100 ] |
|
101 h.update(''.join(data)) |
|
102 result = h.hexdigest() |
|
103 self.assertEqual(result, self.expectedchecksum) |
|
104 |
|
105 def test_digit(self): |
|
106 self.assertEqual(self.db.digit(u'A', None), None) |
|
107 self.assertEqual(self.db.digit(u'9'), 9) |
|
108 self.assertEqual(self.db.digit(u'\u215b', None), None) |
|
109 self.assertEqual(self.db.digit(u'\u2468'), 9) |
|
110 self.assertEqual(self.db.digit(u'\U00020000', None), None) |
|
111 |
|
112 self.assertRaises(TypeError, self.db.digit) |
|
113 self.assertRaises(TypeError, self.db.digit, u'xx') |
|
114 self.assertRaises(ValueError, self.db.digit, u'x') |
|
115 |
|
116 def test_numeric(self): |
|
117 self.assertEqual(self.db.numeric(u'A',None), None) |
|
118 self.assertEqual(self.db.numeric(u'9'), 9) |
|
119 self.assertEqual(self.db.numeric(u'\u215b'), 0.125) |
|
120 self.assertEqual(self.db.numeric(u'\u2468'), 9.0) |
|
121 self.assertEqual(self.db.numeric(u'\U00020000', None), None) |
|
122 |
|
123 self.assertRaises(TypeError, self.db.numeric) |
|
124 self.assertRaises(TypeError, self.db.numeric, u'xx') |
|
125 self.assertRaises(ValueError, self.db.numeric, u'x') |
|
126 |
|
127 def test_decimal(self): |
|
128 self.assertEqual(self.db.decimal(u'A',None), None) |
|
129 self.assertEqual(self.db.decimal(u'9'), 9) |
|
130 self.assertEqual(self.db.decimal(u'\u215b', None), None) |
|
131 self.assertEqual(self.db.decimal(u'\u2468', None), None) |
|
132 self.assertEqual(self.db.decimal(u'\U00020000', None), None) |
|
133 |
|
134 self.assertRaises(TypeError, self.db.decimal) |
|
135 self.assertRaises(TypeError, self.db.decimal, u'xx') |
|
136 self.assertRaises(ValueError, self.db.decimal, u'x') |
|
137 |
|
138 def test_category(self): |
|
139 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn') |
|
140 self.assertEqual(self.db.category(u'a'), 'Ll') |
|
141 self.assertEqual(self.db.category(u'A'), 'Lu') |
|
142 self.assertEqual(self.db.category(u'\U00020000'), 'Lo') |
|
143 |
|
144 self.assertRaises(TypeError, self.db.category) |
|
145 self.assertRaises(TypeError, self.db.category, u'xx') |
|
146 |
|
147 def test_bidirectional(self): |
|
148 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '') |
|
149 self.assertEqual(self.db.bidirectional(u' '), 'WS') |
|
150 self.assertEqual(self.db.bidirectional(u'A'), 'L') |
|
151 self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L') |
|
152 |
|
153 self.assertRaises(TypeError, self.db.bidirectional) |
|
154 self.assertRaises(TypeError, self.db.bidirectional, u'xx') |
|
155 |
|
156 def test_decomposition(self): |
|
157 self.assertEqual(self.db.decomposition(u'\uFFFE'),'') |
|
158 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034') |
|
159 |
|
160 self.assertRaises(TypeError, self.db.decomposition) |
|
161 self.assertRaises(TypeError, self.db.decomposition, u'xx') |
|
162 |
|
163 def test_mirrored(self): |
|
164 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0) |
|
165 self.assertEqual(self.db.mirrored(u'a'), 0) |
|
166 self.assertEqual(self.db.mirrored(u'\u2201'), 1) |
|
167 self.assertEqual(self.db.mirrored(u'\U00020000'), 0) |
|
168 |
|
169 self.assertRaises(TypeError, self.db.mirrored) |
|
170 self.assertRaises(TypeError, self.db.mirrored, u'xx') |
|
171 |
|
172 def test_combining(self): |
|
173 self.assertEqual(self.db.combining(u'\uFFFE'), 0) |
|
174 self.assertEqual(self.db.combining(u'a'), 0) |
|
175 self.assertEqual(self.db.combining(u'\u20e1'), 230) |
|
176 self.assertEqual(self.db.combining(u'\U00020000'), 0) |
|
177 |
|
178 self.assertRaises(TypeError, self.db.combining) |
|
179 self.assertRaises(TypeError, self.db.combining, u'xx') |
|
180 |
|
181 def test_normalize(self): |
|
182 self.assertRaises(TypeError, self.db.normalize) |
|
183 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx') |
|
184 self.assertEqual(self.db.normalize('NFKC', u''), u'') |
|
185 # The rest can be found in test_normalization.py |
|
186 # which requires an external file. |
|
187 |
|
188 def test_east_asian_width(self): |
|
189 eaw = self.db.east_asian_width |
|
190 self.assertRaises(TypeError, eaw, 'a') |
|
191 self.assertRaises(TypeError, eaw, u'') |
|
192 self.assertRaises(TypeError, eaw, u'ra') |
|
193 self.assertEqual(eaw(u'\x1e'), 'N') |
|
194 self.assertEqual(eaw(u'\x20'), 'Na') |
|
195 self.assertEqual(eaw(u'\uC894'), 'W') |
|
196 self.assertEqual(eaw(u'\uFF66'), 'H') |
|
197 self.assertEqual(eaw(u'\uFF1F'), 'F') |
|
198 self.assertEqual(eaw(u'\u2010'), 'A') |
|
199 self.assertEqual(eaw(u'\U00020000'), 'W') |
|
200 |
|
201 class UnicodeMiscTest(UnicodeDatabaseTest): |
|
202 |
|
203 def test_failed_import_during_compiling(self): |
|
204 # Issue 4367 |
|
205 # Decoding \N escapes requires the unicodedata module. If it can't be |
|
206 # imported, we shouldn't segfault. |
|
207 |
|
208 # This program should raise a SyntaxError in the eval. |
|
209 code = "import sys;" \ |
|
210 "sys.modules['unicodedata'] = None;" \ |
|
211 """eval("u'\N{SOFT HYPHEN}'")""" |
|
212 args = [sys.executable, "-c", code] |
|
213 # We use a subprocess because the unicodedata module may already have |
|
214 # been loaded in this process. |
|
215 popen = subprocess.Popen(args, stderr=subprocess.PIPE) |
|
216 popen.wait() |
|
217 self.assertEqual(popen.returncode, 1) |
|
218 error = "SyntaxError: (unicode error) \N escapes not supported " \ |
|
219 "(can't load unicodedata module)" |
|
220 self.assertTrue(error in popen.stderr.read()) |
|
221 |
|
222 def test_decimal_numeric_consistent(self): |
|
223 # Test that decimal and numeric are consistent, |
|
224 # i.e. if a character has a decimal value, |
|
225 # its numeric value should be the same. |
|
226 count = 0 |
|
227 for i in xrange(0x10000): |
|
228 c = unichr(i) |
|
229 dec = self.db.decimal(c, -1) |
|
230 if dec != -1: |
|
231 self.assertEqual(dec, self.db.numeric(c)) |
|
232 count += 1 |
|
233 self.assert_(count >= 10) # should have tested at least the ASCII digits |
|
234 |
|
235 def test_digit_numeric_consistent(self): |
|
236 # Test that digit and numeric are consistent, |
|
237 # i.e. if a character has a digit value, |
|
238 # its numeric value should be the same. |
|
239 count = 0 |
|
240 for i in xrange(0x10000): |
|
241 c = unichr(i) |
|
242 dec = self.db.digit(c, -1) |
|
243 if dec != -1: |
|
244 self.assertEqual(dec, self.db.numeric(c)) |
|
245 count += 1 |
|
246 self.assert_(count >= 10) # should have tested at least the ASCII digits |
|
247 |
|
248 def test_bug_1704793(self): |
|
249 self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346') |
|
250 |
|
251 def test_ucd_510(self): |
|
252 import unicodedata |
|
253 # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0 |
|
254 self.assert_(unicodedata.mirrored(u"\u0f3a")) |
|
255 self.assert_(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a")) |
|
256 # Also, we now have two ways of representing |
|
257 # the upper-case mapping: as delta, or as absolute value |
|
258 self.assert_(u"a".upper()==u'A') |
|
259 self.assert_(u"\u1d79".upper()==u'\ua77d') |
|
260 |
|
261 def test_main(): |
|
262 test.test_support.run_unittest( |
|
263 UnicodeMiscTest, |
|
264 UnicodeMethodsTest, |
|
265 UnicodeFunctionsTest |
|
266 ) |
|
267 |
|
268 if __name__ == "__main__": |
|
269 test_main() |