python-2.5.2/win32/Lib/test/test_pyexpat.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 # Very simple test - Parse a file and print what happens
       
     2 
       
     3 # XXX TypeErrors on calling handlers, or on bad return values from a
       
     4 # handler, are obscure and unhelpful.
       
     5 
       
     6 import pyexpat
       
     7 from xml.parsers import expat
       
     8 
       
     9 from test.test_support import sortdict, TestFailed
       
    10 
       
    11 class Outputter:
       
    12     def StartElementHandler(self, name, attrs):
       
    13         print 'Start element:\n\t', repr(name), sortdict(attrs)
       
    14 
       
    15     def EndElementHandler(self, name):
       
    16         print 'End element:\n\t', repr(name)
       
    17 
       
    18     def CharacterDataHandler(self, data):
       
    19         data = data.strip()
       
    20         if data:
       
    21             print 'Character data:'
       
    22             print '\t', repr(data)
       
    23 
       
    24     def ProcessingInstructionHandler(self, target, data):
       
    25         print 'PI:\n\t', repr(target), repr(data)
       
    26 
       
    27     def StartNamespaceDeclHandler(self, prefix, uri):
       
    28         print 'NS decl:\n\t', repr(prefix), repr(uri)
       
    29 
       
    30     def EndNamespaceDeclHandler(self, prefix):
       
    31         print 'End of NS decl:\n\t', repr(prefix)
       
    32 
       
    33     def StartCdataSectionHandler(self):
       
    34         print 'Start of CDATA section'
       
    35 
       
    36     def EndCdataSectionHandler(self):
       
    37         print 'End of CDATA section'
       
    38 
       
    39     def CommentHandler(self, text):
       
    40         print 'Comment:\n\t', repr(text)
       
    41 
       
    42     def NotationDeclHandler(self, *args):
       
    43         name, base, sysid, pubid = args
       
    44         print 'Notation declared:', args
       
    45 
       
    46     def UnparsedEntityDeclHandler(self, *args):
       
    47         entityName, base, systemId, publicId, notationName = args
       
    48         print 'Unparsed entity decl:\n\t', args
       
    49 
       
    50     def NotStandaloneHandler(self, userData):
       
    51         print 'Not standalone'
       
    52         return 1
       
    53 
       
    54     def ExternalEntityRefHandler(self, *args):
       
    55         context, base, sysId, pubId = args
       
    56         print 'External entity ref:', args[1:]
       
    57         return 1
       
    58 
       
    59     def DefaultHandler(self, userData):
       
    60         pass
       
    61 
       
    62     def DefaultHandlerExpand(self, userData):
       
    63         pass
       
    64 
       
    65 
       
    66 def confirm(ok):
       
    67     if ok:
       
    68         print "OK."
       
    69     else:
       
    70         print "Not OK."
       
    71 
       
    72 out = Outputter()
       
    73 parser = expat.ParserCreate(namespace_separator='!')
       
    74 
       
    75 # Test getting/setting returns_unicode
       
    76 parser.returns_unicode = 0; confirm(parser.returns_unicode == 0)
       
    77 parser.returns_unicode = 1; confirm(parser.returns_unicode == 1)
       
    78 parser.returns_unicode = 2; confirm(parser.returns_unicode == 1)
       
    79 parser.returns_unicode = 0; confirm(parser.returns_unicode == 0)
       
    80 
       
    81 # Test getting/setting ordered_attributes
       
    82 parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0)
       
    83 parser.ordered_attributes = 1; confirm(parser.ordered_attributes == 1)
       
    84 parser.ordered_attributes = 2; confirm(parser.ordered_attributes == 1)
       
    85 parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0)
       
    86 
       
    87 # Test getting/setting specified_attributes
       
    88 parser.specified_attributes = 0; confirm(parser.specified_attributes == 0)
       
    89 parser.specified_attributes = 1; confirm(parser.specified_attributes == 1)
       
    90 parser.specified_attributes = 2; confirm(parser.specified_attributes == 1)
       
    91 parser.specified_attributes = 0; confirm(parser.specified_attributes == 0)
       
    92 
       
    93 HANDLER_NAMES = [
       
    94     'StartElementHandler', 'EndElementHandler',
       
    95     'CharacterDataHandler', 'ProcessingInstructionHandler',
       
    96     'UnparsedEntityDeclHandler', 'NotationDeclHandler',
       
    97     'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
       
    98     'CommentHandler', 'StartCdataSectionHandler',
       
    99     'EndCdataSectionHandler',
       
   100     'DefaultHandler', 'DefaultHandlerExpand',
       
   101     #'NotStandaloneHandler',
       
   102     'ExternalEntityRefHandler'
       
   103     ]
       
   104 for name in HANDLER_NAMES:
       
   105     setattr(parser, name, getattr(out, name))
       
   106 
       
   107 data = '''\
       
   108 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
       
   109 <?xml-stylesheet href="stylesheet.css"?>
       
   110 <!-- comment data -->
       
   111 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
       
   112 <!ELEMENT root ANY>
       
   113 <!NOTATION notation SYSTEM "notation.jpeg">
       
   114 <!ENTITY acirc "&#226;">
       
   115 <!ENTITY external_entity SYSTEM "entity.file">
       
   116 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
       
   117 %unparsed_entity;
       
   118 ]>
       
   119 
       
   120 <root attr1="value1" attr2="value2&#8000;">
       
   121 <myns:subelement xmlns:myns="http://www.python.org/namespace">
       
   122      Contents of subelements
       
   123 </myns:subelement>
       
   124 <sub2><![CDATA[contents of CDATA section]]></sub2>
       
   125 &external_entity;
       
   126 </root>
       
   127 '''
       
   128 
       
   129 # Produce UTF-8 output
       
   130 parser.returns_unicode = 0
       
   131 try:
       
   132     parser.Parse(data, 1)
       
   133 except expat.error:
       
   134     print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
       
   135     print '** Line', parser.ErrorLineNumber
       
   136     print '** Column', parser.ErrorColumnNumber
       
   137     print '** Byte', parser.ErrorByteIndex
       
   138 
       
   139 # Try the parse again, this time producing Unicode output
       
   140 parser = expat.ParserCreate(namespace_separator='!')
       
   141 parser.returns_unicode = 1
       
   142 
       
   143 for name in HANDLER_NAMES:
       
   144     setattr(parser, name, getattr(out, name))
       
   145 try:
       
   146     parser.Parse(data, 1)
       
   147 except expat.error:
       
   148     print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
       
   149     print '** Line', parser.ErrorLineNumber
       
   150     print '** Column', parser.ErrorColumnNumber
       
   151     print '** Byte', parser.ErrorByteIndex
       
   152 
       
   153 # Try parsing a file
       
   154 parser = expat.ParserCreate(namespace_separator='!')
       
   155 parser.returns_unicode = 1
       
   156 
       
   157 for name in HANDLER_NAMES:
       
   158     setattr(parser, name, getattr(out, name))
       
   159 import StringIO
       
   160 file = StringIO.StringIO(data)
       
   161 try:
       
   162     parser.ParseFile(file)
       
   163 except expat.error:
       
   164     print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
       
   165     print '** Line', parser.ErrorLineNumber
       
   166     print '** Column', parser.ErrorColumnNumber
       
   167     print '** Byte', parser.ErrorByteIndex
       
   168 
       
   169 
       
   170 # Tests that make sure we get errors when the namespace_separator value
       
   171 # is illegal, and that we don't for good values:
       
   172 print
       
   173 print "Testing constructor for proper handling of namespace_separator values:"
       
   174 expat.ParserCreate()
       
   175 expat.ParserCreate(namespace_separator=None)
       
   176 expat.ParserCreate(namespace_separator=' ')
       
   177 print "Legal values tested o.k."
       
   178 try:
       
   179     expat.ParserCreate(namespace_separator=42)
       
   180 except TypeError, e:
       
   181     print "Caught expected TypeError:"
       
   182     print e
       
   183 else:
       
   184     print "Failed to catch expected TypeError."
       
   185 
       
   186 try:
       
   187     expat.ParserCreate(namespace_separator='too long')
       
   188 except ValueError, e:
       
   189     print "Caught expected ValueError:"
       
   190     print e
       
   191 else:
       
   192     print "Failed to catch expected ValueError."
       
   193 
       
   194 # ParserCreate() needs to accept a namespace_separator of zero length
       
   195 # to satisfy the requirements of RDF applications that are required
       
   196 # to simply glue together the namespace URI and the localname.  Though
       
   197 # considered a wart of the RDF specifications, it needs to be supported.
       
   198 #
       
   199 # See XML-SIG mailing list thread starting with
       
   200 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
       
   201 #
       
   202 expat.ParserCreate(namespace_separator='') # too short
       
   203 
       
   204 # Test the interning machinery.
       
   205 p = expat.ParserCreate()
       
   206 L = []
       
   207 def collector(name, *args):
       
   208     L.append(name)
       
   209 p.StartElementHandler = collector
       
   210 p.EndElementHandler = collector
       
   211 p.Parse("<e> <e/> <e></e> </e>", 1)
       
   212 tag = L[0]
       
   213 if len(L) != 6:
       
   214     print "L should only contain 6 entries; found", len(L)
       
   215 for entry in L:
       
   216     if tag is not entry:
       
   217         print "expected L to contain many references to the same string",
       
   218         print "(it didn't)"
       
   219         print "L =", repr(L)
       
   220         break
       
   221 
       
   222 # Tests of the buffer_text attribute.
       
   223 import sys
       
   224 
       
   225 class TextCollector:
       
   226     def __init__(self, parser):
       
   227         self.stuff = []
       
   228 
       
   229     def check(self, expected, label):
       
   230         require(self.stuff == expected,
       
   231                 "%s\nstuff    = %r\nexpected = %r"
       
   232                 % (label, self.stuff, map(unicode, expected)))
       
   233 
       
   234     def CharacterDataHandler(self, text):
       
   235         self.stuff.append(text)
       
   236 
       
   237     def StartElementHandler(self, name, attrs):
       
   238         self.stuff.append("<%s>" % name)
       
   239         bt = attrs.get("buffer-text")
       
   240         if bt == "yes":
       
   241             parser.buffer_text = 1
       
   242         elif bt == "no":
       
   243             parser.buffer_text = 0
       
   244 
       
   245     def EndElementHandler(self, name):
       
   246         self.stuff.append("</%s>" % name)
       
   247 
       
   248     def CommentHandler(self, data):
       
   249         self.stuff.append("<!--%s-->" % data)
       
   250 
       
   251 def require(cond, label):
       
   252     # similar to confirm(), but no extraneous output
       
   253     if not cond:
       
   254         raise TestFailed(label)
       
   255 
       
   256 def setup(handlers=[]):
       
   257     parser = expat.ParserCreate()
       
   258     require(not parser.buffer_text,
       
   259             "buffer_text not disabled by default")
       
   260     parser.buffer_text = 1
       
   261     handler = TextCollector(parser)
       
   262     parser.CharacterDataHandler = handler.CharacterDataHandler
       
   263     for name in handlers:
       
   264         setattr(parser, name, getattr(handler, name))
       
   265     return parser, handler
       
   266 
       
   267 parser, handler = setup()
       
   268 require(parser.buffer_text,
       
   269         "text buffering either not acknowledged or not enabled")
       
   270 parser.Parse("<a>1<b/>2<c/>3</a>", 1)
       
   271 handler.check(["123"],
       
   272               "buffered text not properly collapsed")
       
   273 
       
   274 # XXX This test exposes more detail of Expat's text chunking than we
       
   275 # XXX like, but it tests what we need to concisely.
       
   276 parser, handler = setup(["StartElementHandler"])
       
   277 parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
       
   278 handler.check(["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
       
   279               "buffering control not reacting as expected")
       
   280 
       
   281 parser, handler = setup()
       
   282 parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
       
   283 handler.check(["1<2> \n 3"],
       
   284               "buffered text not properly collapsed")
       
   285 
       
   286 parser, handler = setup(["StartElementHandler"])
       
   287 parser.Parse("<a>1<b/>2<c/>3</a>", 1)
       
   288 handler.check(["<a>", "1", "<b>", "2", "<c>", "3"],
       
   289               "buffered text not properly split")
       
   290 
       
   291 parser, handler = setup(["StartElementHandler", "EndElementHandler"])
       
   292 parser.CharacterDataHandler = None
       
   293 parser.Parse("<a>1<b/>2<c/>3</a>", 1)
       
   294 handler.check(["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"],
       
   295               "huh?")
       
   296 
       
   297 parser, handler = setup(["StartElementHandler", "EndElementHandler"])
       
   298 parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
       
   299 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"],
       
   300               "huh?")
       
   301 
       
   302 parser, handler = setup(["CommentHandler", "EndElementHandler",
       
   303                          "StartElementHandler"])
       
   304 parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
       
   305 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
       
   306               "buffered text not properly split")
       
   307 
       
   308 parser, handler = setup(["CommentHandler", "EndElementHandler",
       
   309                          "StartElementHandler"])
       
   310 parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
       
   311 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
       
   312                "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
       
   313               "buffered text not properly split")
       
   314 
       
   315 # Test handling of exception from callback:
       
   316 def StartElementHandler(name, attrs):
       
   317     raise RuntimeError(name)
       
   318 
       
   319 parser = expat.ParserCreate()
       
   320 parser.StartElementHandler = StartElementHandler
       
   321 
       
   322 try:
       
   323     parser.Parse("<a><b><c/></b></a>", 1)
       
   324 except RuntimeError, e:
       
   325     if e.args[0] != "a":
       
   326         print "Expected RuntimeError for element 'a'; found %r" % e.args[0]
       
   327 else:
       
   328     print "Expected RuntimeError for 'a'"
       
   329 
       
   330 # Test Current* members:
       
   331 class PositionTest:
       
   332 
       
   333     def __init__(self, expected_list, parser):
       
   334         self.parser = parser
       
   335         self.parser.StartElementHandler = self.StartElementHandler
       
   336         self.parser.EndElementHandler = self.EndElementHandler
       
   337         self.expected_list = expected_list
       
   338         self.upto = 0
       
   339 
       
   340     def StartElementHandler(self, name, attrs):
       
   341         self.check_pos('s')
       
   342 
       
   343     def EndElementHandler(self, name):
       
   344         self.check_pos('e')
       
   345 
       
   346     def check_pos(self, event):
       
   347         pos = (event,
       
   348                self.parser.CurrentByteIndex,
       
   349                self.parser.CurrentLineNumber,
       
   350                self.parser.CurrentColumnNumber)
       
   351         require(self.upto < len(self.expected_list),
       
   352                 'too many parser events')
       
   353         expected = self.expected_list[self.upto]
       
   354         require(pos == expected,
       
   355                 'expected position %s, got %s' % (expected, pos))
       
   356         self.upto += 1
       
   357 
       
   358 
       
   359 parser = expat.ParserCreate()
       
   360 handler = PositionTest([('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
       
   361                         ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)],
       
   362                        parser)
       
   363 parser.Parse('''<a>
       
   364  <b>
       
   365   <c/>
       
   366  </b>
       
   367 </a>''', 1)
       
   368 
       
   369 
       
   370 def test_parse_only_xml_data():
       
   371     # http://python.org/sf/1296433
       
   372     #
       
   373     xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
       
   374     # this one doesn't crash
       
   375     #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
       
   376 
       
   377     def handler(text):
       
   378         raise Exception
       
   379 
       
   380     parser = expat.ParserCreate()
       
   381     parser.CharacterDataHandler = handler
       
   382 
       
   383     try:
       
   384         parser.Parse(xml)
       
   385     except:
       
   386         pass
       
   387 
       
   388 test_parse_only_xml_data()