|
1 from test.test_support import run_unittest, open_urlresource |
|
2 import unittest |
|
3 |
|
4 import sys |
|
5 import os |
|
6 from unicodedata import normalize, unidata_version |
|
7 |
|
8 TESTDATAFILE = "NormalizationTest" + os.extsep + "txt" |
|
9 TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE |
|
10 |
|
11 if os.path.exists(TESTDATAFILE): |
|
12 f = open(TESTDATAFILE) |
|
13 l = f.readline() |
|
14 f.close() |
|
15 if not unidata_version in l: |
|
16 os.unlink(TESTDATAFILE) |
|
17 |
|
18 class RangeError(Exception): |
|
19 pass |
|
20 |
|
21 def NFC(str): |
|
22 return normalize("NFC", str) |
|
23 |
|
24 def NFKC(str): |
|
25 return normalize("NFKC", str) |
|
26 |
|
27 def NFD(str): |
|
28 return normalize("NFD", str) |
|
29 |
|
30 def NFKD(str): |
|
31 return normalize("NFKD", str) |
|
32 |
|
33 def unistr(data): |
|
34 data = [int(x, 16) for x in data.split(" ")] |
|
35 for x in data: |
|
36 if x > sys.maxunicode: |
|
37 raise RangeError |
|
38 return u"".join([unichr(x) for x in data]) |
|
39 |
|
40 class NormalizationTest(unittest.TestCase): |
|
41 def test_main(self): |
|
42 part1_data = {} |
|
43 for line in open_urlresource(TESTDATAURL): |
|
44 if '#' in line: |
|
45 line = line.split('#')[0] |
|
46 line = line.strip() |
|
47 if not line: |
|
48 continue |
|
49 if line.startswith("@Part"): |
|
50 part = line.split()[0] |
|
51 continue |
|
52 if part == "@Part3": |
|
53 # XXX we don't support PRI #29 yet, so skip these tests for now |
|
54 continue |
|
55 try: |
|
56 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] |
|
57 except RangeError: |
|
58 # Skip unsupported characters; |
|
59 # try atleast adding c1 if we are in part1 |
|
60 if part == "@Part1": |
|
61 try: |
|
62 c1 = unistr(line.split(';')[0]) |
|
63 except RangeError: |
|
64 pass |
|
65 else: |
|
66 part1_data[c1] = 1 |
|
67 continue |
|
68 |
|
69 # Perform tests |
|
70 self.failUnless(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) |
|
71 self.failUnless(c4 == NFC(c4) == NFC(c5), line) |
|
72 self.failUnless(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) |
|
73 self.failUnless(c5 == NFD(c4) == NFD(c5), line) |
|
74 self.failUnless(c4 == NFKC(c1) == NFKC(c2) == \ |
|
75 NFKC(c3) == NFKC(c4) == NFKC(c5), |
|
76 line) |
|
77 self.failUnless(c5 == NFKD(c1) == NFKD(c2) == \ |
|
78 NFKD(c3) == NFKD(c4) == NFKD(c5), |
|
79 line) |
|
80 |
|
81 # Record part 1 data |
|
82 if part == "@Part1": |
|
83 part1_data[c1] = 1 |
|
84 |
|
85 # Perform tests for all other data |
|
86 for c in range(sys.maxunicode+1): |
|
87 X = unichr(c) |
|
88 if X in part1_data: |
|
89 continue |
|
90 self.failUnless(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) |
|
91 |
|
92 def test_bug_834676(self): |
|
93 # Check for bug 834676 |
|
94 normalize('NFC', u'\ud55c\uae00') |
|
95 |
|
96 |
|
97 def test_main(): |
|
98 # Hit the exception early |
|
99 open_urlresource(TESTDATAURL) |
|
100 run_unittest(NormalizationTest) |
|
101 |
|
102 if __name__ == "__main__": |
|
103 test_main() |