FCL/sftools/dev/hostenv/pythontoolsplat: comparison python-2.5.2/win32/Lib/test/test

equal deleted inserted replaced

--1:000000000000
+:ae805ac0140d
+import htmlentitydefs
+import pprint
+import re
+import sgmllib
+import unittest
+from test import test_support
+class EventCollector(sgmllib.SGMLParser):
+def __init__(self):
+self.events = []
+self.append = self.events.append
+sgmllib.SGMLParser.__init__(self)
+def get_events(self):
+# Normalize the list of events so that buffer artefacts don't
+# separate runs of contiguous characters.
+L = []
+prevtype = None
+for event in self.events:
+type = event[0]
+if type == prevtype == "data":
+L[-1] = ("data", L[-1][1] + event[1])
+else:
+L.append(event)
+prevtype = type
+self.events = L
+return L
+# structure markup
+def unknown_starttag(self, tag, attrs):
+self.append(("starttag", tag, attrs))
+def unknown_endtag(self, tag):
+self.append(("endtag", tag))
+# all other markup
+def handle_comment(self, data):
+self.append(("comment", data))
+def handle_charref(self, data):
+self.append(("charref", data))
+def handle_data(self, data):
+self.append(("data", data))
+def handle_decl(self, decl):
+self.append(("decl", decl))
+def handle_entityref(self, data):
+self.append(("entityref", data))
+def handle_pi(self, data):
+self.append(("pi", data))
+def unknown_decl(self, decl):
+self.append(("unknown decl", decl))
+class CDATAEventCollector(EventCollector):
+def start_cdata(self, attrs):
+self.append(("starttag", "cdata", attrs))
+self.setliteral()
+class HTMLEntityCollector(EventCollector):
+entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)'
+'|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)')
+def convert_charref(self, name):
+self.append(("charref", "convert", name))
+if name[0] != "x":
+return EventCollector.convert_charref(self, name)
+def convert_codepoint(self, codepoint):
+self.append(("codepoint", "convert", codepoint))
+EventCollector.convert_codepoint(self, codepoint)
+def convert_entityref(self, name):
+self.append(("entityref", "convert", name))
+return EventCollector.convert_entityref(self, name)
+# These to record that they were called, then pass the call along
+# to the default implementation so that it's actions can be
+# recorded.
+def handle_charref(self, data):
+self.append(("charref", data))
+sgmllib.SGMLParser.handle_charref(self, data)
+def handle_entityref(self, data):
+self.append(("entityref", data))
+sgmllib.SGMLParser.handle_entityref(self, data)
+class SGMLParserTestCase(unittest.TestCase):
+collector = EventCollector
+def get_events(self, source):
+parser = self.collector()
+try:
+for s in source:
+parser.feed(s)
+parser.close()
+except:
+#self.events = parser.events
+raise
+return parser.get_events()
+def check_events(self, source, expected_events):
+try:
+events = self.get_events(source)
+except:
+import sys
+#print >>sys.stderr, pprint.pformat(self.events)
+raise
+if events != expected_events:
+self.fail("received events did not match expected events\n"
+"Expected:\n" + pprint.pformat(expected_events) +
+"\nReceived:\n" + pprint.pformat(events))
+def check_parse_error(self, source):
+parser = EventCollector()
+try:
+parser.feed(source)
+parser.close()
+except sgmllib.SGMLParseError:
+pass
+else:
+self.fail("expected SGMLParseError for %r\nReceived:\n%s"
+% (source, pprint.pformat(parser.get_events())))
+def test_doctype_decl_internal(self):
+inside = """\
+DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
+SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [
+<!ELEMENT html - O EMPTY>
+<!ATTLIST html
+version CDATA #IMPLIED
+profile CDATA 'DublinCore'>
+<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
+<!ENTITY myEntity 'internal parsed entity'>
+<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
+<!ENTITY % paramEntity 'name|name|name'>
+%paramEntity;
+<!-- comment -->
+]"""
+self.check_events(["<!%s>" % inside], [
+("decl", inside),
+])
+def test_doctype_decl_external(self):
+inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
+self.check_events("<!%s>" % inside, [
+("decl", inside),
+])
+def test_underscore_in_attrname(self):
+# SF bug #436621
+"""Make sure attribute names with underscores are accepted"""
+self.check_events("<a has_under _under>", [
+("starttag", "a", [("has_under", "has_under"),
+("_under", "_under")]),
+])
+def test_underscore_in_tagname(self):
+# SF bug #436621
+"""Make sure tag names with underscores are accepted"""
+self.check_events("<has_under></has_under>", [
+("starttag", "has_under", []),
+("endtag", "has_under"),
+])
+def test_quotes_in_unquoted_attrs(self):
+# SF bug #436621
+"""Be sure quotes in unquoted attributes are made part of the value"""
+self.check_events("<a href=foo'bar\"baz>", [
+("starttag", "a", [("href", "foo'bar\"baz")]),
+])
+def test_xhtml_empty_tag(self):
+"""Handling of XHTML-style empty start tags"""
+self.check_events("<br />text<i></i>", [
+("starttag", "br", []),
+("data", "text"),
+("starttag", "i", []),
+("endtag", "i"),
+])
+def test_processing_instruction_only(self):
+self.check_events("<?processing instruction>", [
+("pi", "processing instruction"),
+])
+def test_bad_nesting(self):
+self.check_events("<a><b></a></b>", [
+("starttag", "a", []),
+("starttag", "b", []),
+("endtag", "a"),
+("endtag", "b"),
+])
+def test_bare_ampersands(self):
+self.check_events("this text & contains & ampersands &", [
+("data", "this text & contains & ampersands &"),
+])
+def test_bare_pointy_brackets(self):
+self.check_events("this < text > contains < bare>pointy< brackets", [
+("data", "this < text > contains < bare>pointy< brackets"),
+])
+def test_attr_syntax(self):
+output = [
+("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
+]
+self.check_events("""<a b='v' c="v" d=v e>""", output)
+self.check_events("""<a  b = 'v' c = "v" d = v e>""", output)
+self.check_events("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
+self.check_events("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
+def test_attr_values(self):
+self.check_events("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
+[("starttag", "a", [("b", "xxx\n\txxx"),
+("c", "yyy\t\nyyy"),
+("d", "\txyz\n")])
+])
+self.check_events("""<a b='' c="">""", [
+("starttag", "a", [("b", ""), ("c", "")]),
+])
+# URL construction stuff from RFC 1808:
+safe = "$-_.+"
+extra = "!*'(),"
+reserved = ";/?:@&="
+url = "http://example.com:8080/path/to/file?%s%s%s" % (
+safe, extra, reserved)
+self.check_events("""<e a=%s>""" % url, [
+("starttag", "e", [("a", url)]),
+])
+# Regression test for SF patch #669683.
+self.check_events("<e a=rgb(1,2,3)>", [
+("starttag", "e", [("a", "rgb(1,2,3)")]),
+])
+def test_attr_values_entities(self):
+"""Substitution of entities and charrefs in attribute values"""
+# SF bug #1452246
+self.check_events("""<a b=&lt; c=&lt;&gt; d=&lt-&gt; e='&lt; '
+f="&xxx;" g='&#32;&#33;' h='&#500;'
+i='x?a=b&c=d;'
+j='&amp;#42;' k='&#38;#42;'>""",
+[("starttag", "a", [("b", "<"),
+("c", "<>"),
+("d", "&lt->"),
+("e", "< "),
+("f", "&xxx;"),
+("g", " !"),
+("h", "&#500;"),
+("i", "x?a=b&c=d;"),
+("j", "&#42;"),
+("k", "&#42;"),
+])])
+def test_convert_overrides(self):
+# This checks that the character and entity reference
+# conversion helpers are called at the documented times.  No
+# attempt is made to really change what the parser accepts.
+#
+self.collector = HTMLEntityCollector
+self.check_events(('<a title="&ldquo;test&#x201d;">foo</a>'
+'&foobar;&#42;'), [
+('entityref', 'convert', 'ldquo'),
+('charref', 'convert', 'x201d'),
+('starttag', 'a', [('title', '&ldquo;test&#x201d;')]),
+('data', 'foo'),
+('endtag', 'a'),
+('entityref', 'foobar'),
+('entityref', 'convert', 'foobar'),
+('charref', '42'),
+('charref', 'convert', '42'),
+('codepoint', 'convert', 42),
+])
+def test_attr_funky_names(self):
+self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
+("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
+])
+def test_attr_value_ip6_url(self):
+# http://www.python.org/sf/853506
+self.check_events(("<a href='http://[1080::8:800:200C:417A]/'>"
+"<a href=http://[1080::8:800:200C:417A]/>"), [
+("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
+("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
+])
+def test_illegal_declarations(self):
+s = 'abc<!spacer type="block" height="25">def'
+self.check_events(s, [
+("data", "abc"),
+("unknown decl", 'spacer type="block" height="25"'),
+("data", "def"),
+])
+def test_weird_starttags(self):
+self.check_events("<a<a>", [
+("starttag", "a", []),
+("starttag", "a", []),
+])
+self.check_events("</a<a>", [
+("endtag", "a"),
+("starttag", "a", []),
+])
+def test_declaration_junk_chars(self):
+self.check_parse_error("<!DOCTYPE foo $ >")
+def test_get_starttag_text(self):
+s = """<foobar   \n   one="1"\ttwo=2   >"""
+self.check_events(s, [
+("starttag", "foobar", [("one", "1"), ("two", "2")]),
+])
+def test_cdata_content(self):
+s = ("<cdata> <!-- not a comment --> &not-an-entity-ref; </cdata>"
+"<notcdata> <!-- comment --> </notcdata>")
+self.collector = CDATAEventCollector
+self.check_events(s, [
+("starttag", "cdata", []),
+("data", " <!-- not a comment --> &not-an-entity-ref; "),
+("endtag", "cdata"),
+("starttag", "notcdata", []),
+("data", " "),
+("comment", " comment "),
+("data", " "),
+("endtag", "notcdata"),
+])
+s = """<cdata> <not a='start tag'> </cdata>"""
+self.check_events(s, [
+("starttag", "cdata", []),
+("data", " <not a='start tag'> "),
+("endtag", "cdata"),
+])
+def test_illegal_declarations(self):
+s = 'abc<!spacer type="block" height="25">def'
+self.check_events(s, [
+("data", "abc"),
+("unknown decl", 'spacer type="block" height="25"'),
+("data", "def"),
+])
+def test_enumerated_attr_type(self):
+s = "<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>"
+self.check_events(s, [
+('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'),
+])
+def test_read_chunks(self):
+# SF bug #1541697, this caused sgml parser to hang
+# Just verify this code doesn't cause a hang.
+CHUNK = 1024  # increasing this to 8212 makes the problem go away
+f = open(test_support.findfile('sgml_input.html'))
+fp = sgmllib.SGMLParser()
+while 1:
+data = f.read(CHUNK)
+fp.feed(data)
+if len(data) != CHUNK:
+break
+# XXX These tests have been disabled by prefixing their names with
+# an underscore.  The first two exercise outstanding bugs in the
+# sgmllib module, and the third exhibits questionable behavior
+# that needs to be carefully considered before changing it.
+def _test_starttag_end_boundary(self):
+self.check_events("<a b='<'>", [("starttag", "a", [("b", "<")])])
+self.check_events("<a b='>'>", [("starttag", "a", [("b", ">")])])
+def _test_buffer_artefacts(self):
+output = [("starttag", "a", [("b", "<")])]
+self.check_events(["<a b='<'>"], output)
+self.check_events(["<a ", "b='<'>"], output)
+self.check_events(["<a b", "='<'>"], output)
+self.check_events(["<a b=", "'<'>"], output)
+self.check_events(["<a b='<", "'>"], output)
+self.check_events(["<a b='<'", ">"], output)
+output = [("starttag", "a", [("b", ">")])]
+self.check_events(["<a b='>'>"], output)
+self.check_events(["<a ", "b='>'>"], output)
+self.check_events(["<a b", "='>'>"], output)
+self.check_events(["<a b=", "'>'>"], output)
+self.check_events(["<a b='>", "'>"], output)
+self.check_events(["<a b='>'", ">"], output)
+output = [("comment", "abc")]
+self.check_events(["", "<!--abc-->"], output)
+self.check_events(["<", "!--abc-->"], output)
+self.check_events(["<!", "--abc-->"], output)
+self.check_events(["<!-", "-abc-->"], output)
+self.check_events(["<!--", "abc-->"], output)
+self.check_events(["<!--a", "bc-->"], output)
+self.check_events(["<!--ab", "c-->"], output)
+self.check_events(["<!--abc", "-->"], output)
+self.check_events(["<!--abc-", "->"], output)
+self.check_events(["<!--abc--", ">"], output)
+self.check_events(["<!--abc-->", ""], output)
+def _test_starttag_junk_chars(self):
+self.check_parse_error("<")
+self.check_parse_error("<>")
+self.check_parse_error("</$>")
+self.check_parse_error("</")
+self.check_parse_error("</a")
+self.check_parse_error("<$")
+self.check_parse_error("<$>")
+self.check_parse_error("<!")
+self.check_parse_error("<a $>")
+self.check_parse_error("<a")
+self.check_parse_error("<a foo='bar'")
+self.check_parse_error("<a foo='bar")
+self.check_parse_error("<a foo='>'")
+self.check_parse_error("<a foo='>")
+self.check_parse_error("<a foo=>")
+def test_main():
+test_support.run_unittest(SGMLParserTestCase)
+if __name__ == "__main__":
+test_main()