|
1 import formatter |
|
2 import unittest |
|
3 |
|
4 from test import test_support |
|
5 htmllib = test_support.import_module('htmllib', deprecated=True) |
|
6 |
|
7 |
|
8 class AnchorCollector(htmllib.HTMLParser): |
|
9 def __init__(self, *args, **kw): |
|
10 self.__anchors = [] |
|
11 htmllib.HTMLParser.__init__(self, *args, **kw) |
|
12 |
|
13 def get_anchor_info(self): |
|
14 return self.__anchors |
|
15 |
|
16 def anchor_bgn(self, *args): |
|
17 self.__anchors.append(args) |
|
18 |
|
19 class DeclCollector(htmllib.HTMLParser): |
|
20 def __init__(self, *args, **kw): |
|
21 self.__decls = [] |
|
22 htmllib.HTMLParser.__init__(self, *args, **kw) |
|
23 |
|
24 def get_decl_info(self): |
|
25 return self.__decls |
|
26 |
|
27 def unknown_decl(self, data): |
|
28 self.__decls.append(data) |
|
29 |
|
30 |
|
31 class HTMLParserTestCase(unittest.TestCase): |
|
32 def test_anchor_collection(self): |
|
33 # See SF bug #467059. |
|
34 parser = AnchorCollector(formatter.NullFormatter(), verbose=1) |
|
35 parser.feed( |
|
36 """<a href='http://foo.org/' name='splat'> </a> |
|
37 <a href='http://www.python.org/'> </a> |
|
38 <a name='frob'> </a> |
|
39 """) |
|
40 parser.close() |
|
41 self.assertEquals(parser.get_anchor_info(), |
|
42 [('http://foo.org/', 'splat', ''), |
|
43 ('http://www.python.org/', '', ''), |
|
44 ('', 'frob', ''), |
|
45 ]) |
|
46 |
|
47 def test_decl_collection(self): |
|
48 # See SF patch #545300 |
|
49 parser = DeclCollector(formatter.NullFormatter(), verbose=1) |
|
50 parser.feed( |
|
51 """<html> |
|
52 <body> |
|
53 hallo |
|
54 <![if !supportEmptyParas]> <![endif]> |
|
55 </body> |
|
56 </html> |
|
57 """) |
|
58 parser.close() |
|
59 self.assertEquals(parser.get_decl_info(), |
|
60 ["if !supportEmptyParas", |
|
61 "endif" |
|
62 ]) |
|
63 |
|
64 def test_main(): |
|
65 test_support.run_unittest(HTMLParserTestCase) |
|
66 |
|
67 |
|
68 if __name__ == "__main__": |
|
69 test_main() |