python-2.5.2/win32/Lib/xml/sax/expatreader.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """
       
     2 SAX driver for the pyexpat C module.  This driver works with
       
     3 pyexpat.__version__ == '2.22'.
       
     4 """
       
     5 
       
     6 version = "0.20"
       
     7 
       
     8 from xml.sax._exceptions import *
       
     9 from xml.sax.handler import feature_validation, feature_namespaces
       
    10 from xml.sax.handler import feature_namespace_prefixes
       
    11 from xml.sax.handler import feature_external_ges, feature_external_pes
       
    12 from xml.sax.handler import feature_string_interning
       
    13 from xml.sax.handler import property_xml_string, property_interning_dict
       
    14 
       
    15 # xml.parsers.expat does not raise ImportError in Jython
       
    16 import sys
       
    17 if sys.platform[:4] == "java":
       
    18     raise SAXReaderNotAvailable("expat not available in Java", None)
       
    19 del sys
       
    20 
       
    21 try:
       
    22     from xml.parsers import expat
       
    23 except ImportError:
       
    24     raise SAXReaderNotAvailable("expat not supported", None)
       
    25 else:
       
    26     if not hasattr(expat, "ParserCreate"):
       
    27         raise SAXReaderNotAvailable("expat not supported", None)
       
    28 from xml.sax import xmlreader, saxutils, handler
       
    29 
       
    30 AttributesImpl = xmlreader.AttributesImpl
       
    31 AttributesNSImpl = xmlreader.AttributesNSImpl
       
    32 
       
    33 # If we're using a sufficiently recent version of Python, we can use
       
    34 # weak references to avoid cycles between the parser and content
       
    35 # handler, otherwise we'll just have to pretend.
       
    36 try:
       
    37     import _weakref
       
    38 except ImportError:
       
    39     def _mkproxy(o):
       
    40         return o
       
    41 else:
       
    42     import weakref
       
    43     _mkproxy = weakref.proxy
       
    44     del weakref, _weakref
       
    45 
       
    46 # --- ExpatLocator
       
    47 
       
    48 class ExpatLocator(xmlreader.Locator):
       
    49     """Locator for use with the ExpatParser class.
       
    50 
       
    51     This uses a weak reference to the parser object to avoid creating
       
    52     a circular reference between the parser and the content handler.
       
    53     """
       
    54     def __init__(self, parser):
       
    55         self._ref = _mkproxy(parser)
       
    56 
       
    57     def getColumnNumber(self):
       
    58         parser = self._ref
       
    59         if parser._parser is None:
       
    60             return None
       
    61         return parser._parser.ErrorColumnNumber
       
    62 
       
    63     def getLineNumber(self):
       
    64         parser = self._ref
       
    65         if parser._parser is None:
       
    66             return 1
       
    67         return parser._parser.ErrorLineNumber
       
    68 
       
    69     def getPublicId(self):
       
    70         parser = self._ref
       
    71         if parser is None:
       
    72             return None
       
    73         return parser._source.getPublicId()
       
    74 
       
    75     def getSystemId(self):
       
    76         parser = self._ref
       
    77         if parser is None:
       
    78             return None
       
    79         return parser._source.getSystemId()
       
    80 
       
    81 
       
    82 # --- ExpatParser
       
    83 
       
    84 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
       
    85     """SAX driver for the pyexpat C module."""
       
    86 
       
    87     def __init__(self, namespaceHandling=0, bufsize=2**16-20):
       
    88         xmlreader.IncrementalParser.__init__(self, bufsize)
       
    89         self._source = xmlreader.InputSource()
       
    90         self._parser = None
       
    91         self._namespaces = namespaceHandling
       
    92         self._lex_handler_prop = None
       
    93         self._parsing = 0
       
    94         self._entity_stack = []
       
    95         self._external_ges = 1
       
    96         self._interning = None
       
    97 
       
    98     # XMLReader methods
       
    99 
       
   100     def parse(self, source):
       
   101         "Parse an XML document from a URL or an InputSource."
       
   102         source = saxutils.prepare_input_source(source)
       
   103 
       
   104         self._source = source
       
   105         self.reset()
       
   106         self._cont_handler.setDocumentLocator(ExpatLocator(self))
       
   107         xmlreader.IncrementalParser.parse(self, source)
       
   108 
       
   109     def prepareParser(self, source):
       
   110         if source.getSystemId() != None:
       
   111             self._parser.SetBase(source.getSystemId())
       
   112 
       
   113     # Redefined setContentHandler to allow changing handlers during parsing
       
   114 
       
   115     def setContentHandler(self, handler):
       
   116         xmlreader.IncrementalParser.setContentHandler(self, handler)
       
   117         if self._parsing:
       
   118             self._reset_cont_handler()
       
   119 
       
   120     def getFeature(self, name):
       
   121         if name == feature_namespaces:
       
   122             return self._namespaces
       
   123         elif name == feature_string_interning:
       
   124             return self._interning is not None
       
   125         elif name in (feature_validation, feature_external_pes,
       
   126                       feature_namespace_prefixes):
       
   127             return 0
       
   128         elif name == feature_external_ges:
       
   129             return self._external_ges
       
   130         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
       
   131 
       
   132     def setFeature(self, name, state):
       
   133         if self._parsing:
       
   134             raise SAXNotSupportedException("Cannot set features while parsing")
       
   135 
       
   136         if name == feature_namespaces:
       
   137             self._namespaces = state
       
   138         elif name == feature_external_ges:
       
   139             self._external_ges = state
       
   140         elif name == feature_string_interning:
       
   141             if state:
       
   142                 if self._interning is None:
       
   143                     self._interning = {}
       
   144             else:
       
   145                 self._interning = None
       
   146         elif name == feature_validation:
       
   147             if state:
       
   148                 raise SAXNotSupportedException(
       
   149                     "expat does not support validation")
       
   150         elif name == feature_external_pes:
       
   151             if state:
       
   152                 raise SAXNotSupportedException(
       
   153                     "expat does not read external parameter entities")
       
   154         elif name == feature_namespace_prefixes:
       
   155             if state:
       
   156                 raise SAXNotSupportedException(
       
   157                     "expat does not report namespace prefixes")
       
   158         else:
       
   159             raise SAXNotRecognizedException(
       
   160                 "Feature '%s' not recognized" % name)
       
   161 
       
   162     def getProperty(self, name):
       
   163         if name == handler.property_lexical_handler:
       
   164             return self._lex_handler_prop
       
   165         elif name == property_interning_dict:
       
   166             return self._interning
       
   167         elif name == property_xml_string:
       
   168             if self._parser:
       
   169                 if hasattr(self._parser, "GetInputContext"):
       
   170                     return self._parser.GetInputContext()
       
   171                 else:
       
   172                     raise SAXNotRecognizedException(
       
   173                         "This version of expat does not support getting"
       
   174                         " the XML string")
       
   175             else:
       
   176                 raise SAXNotSupportedException(
       
   177                     "XML string cannot be returned when not parsing")
       
   178         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
       
   179 
       
   180     def setProperty(self, name, value):
       
   181         if name == handler.property_lexical_handler:
       
   182             self._lex_handler_prop = value
       
   183             if self._parsing:
       
   184                 self._reset_lex_handler_prop()
       
   185         elif name == property_interning_dict:
       
   186             self._interning = value
       
   187         elif name == property_xml_string:
       
   188             raise SAXNotSupportedException("Property '%s' cannot be set" %
       
   189                                            name)
       
   190         else:
       
   191             raise SAXNotRecognizedException("Property '%s' not recognized" %
       
   192                                             name)
       
   193 
       
   194     # IncrementalParser methods
       
   195 
       
   196     def feed(self, data, isFinal = 0):
       
   197         if not self._parsing:
       
   198             self.reset()
       
   199             self._parsing = 1
       
   200             self._cont_handler.startDocument()
       
   201 
       
   202         try:
       
   203             # The isFinal parameter is internal to the expat reader.
       
   204             # If it is set to true, expat will check validity of the entire
       
   205             # document. When feeding chunks, they are not normally final -
       
   206             # except when invoked from close.
       
   207             self._parser.Parse(data, isFinal)
       
   208         except expat.error, e:
       
   209             exc = SAXParseException(expat.ErrorString(e.code), e, self)
       
   210             # FIXME: when to invoke error()?
       
   211             self._err_handler.fatalError(exc)
       
   212 
       
   213     def close(self):
       
   214         if self._entity_stack:
       
   215             # If we are completing an external entity, do nothing here
       
   216             return
       
   217         self.feed("", isFinal = 1)
       
   218         self._cont_handler.endDocument()
       
   219         self._parsing = 0
       
   220         # break cycle created by expat handlers pointing to our methods
       
   221         self._parser = None
       
   222 
       
   223     def _reset_cont_handler(self):
       
   224         self._parser.ProcessingInstructionHandler = \
       
   225                                     self._cont_handler.processingInstruction
       
   226         self._parser.CharacterDataHandler = self._cont_handler.characters
       
   227 
       
   228     def _reset_lex_handler_prop(self):
       
   229         lex = self._lex_handler_prop
       
   230         parser = self._parser
       
   231         if lex is None:
       
   232             parser.CommentHandler = None
       
   233             parser.StartCdataSectionHandler = None
       
   234             parser.EndCdataSectionHandler = None
       
   235             parser.StartDoctypeDeclHandler = None
       
   236             parser.EndDoctypeDeclHandler = None
       
   237         else:
       
   238             parser.CommentHandler = lex.comment
       
   239             parser.StartCdataSectionHandler = lex.startCDATA
       
   240             parser.EndCdataSectionHandler = lex.endCDATA
       
   241             parser.StartDoctypeDeclHandler = self.start_doctype_decl
       
   242             parser.EndDoctypeDeclHandler = lex.endDTD
       
   243 
       
   244     def reset(self):
       
   245         if self._namespaces:
       
   246             self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
       
   247                                               intern=self._interning)
       
   248             self._parser.namespace_prefixes = 1
       
   249             self._parser.StartElementHandler = self.start_element_ns
       
   250             self._parser.EndElementHandler = self.end_element_ns
       
   251         else:
       
   252             self._parser = expat.ParserCreate(self._source.getEncoding(),
       
   253                                               intern = self._interning)
       
   254             self._parser.StartElementHandler = self.start_element
       
   255             self._parser.EndElementHandler = self.end_element
       
   256 
       
   257         self._reset_cont_handler()
       
   258         self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
       
   259         self._parser.NotationDeclHandler = self.notation_decl
       
   260         self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
       
   261         self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
       
   262 
       
   263         self._decl_handler_prop = None
       
   264         if self._lex_handler_prop:
       
   265             self._reset_lex_handler_prop()
       
   266 #         self._parser.DefaultHandler =
       
   267 #         self._parser.DefaultHandlerExpand =
       
   268 #         self._parser.NotStandaloneHandler =
       
   269         self._parser.ExternalEntityRefHandler = self.external_entity_ref
       
   270         try:
       
   271             self._parser.SkippedEntityHandler = self.skipped_entity_handler
       
   272         except AttributeError:
       
   273             # This pyexpat does not support SkippedEntity
       
   274             pass
       
   275         self._parser.SetParamEntityParsing(
       
   276             expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
       
   277 
       
   278         self._parsing = 0
       
   279         self._entity_stack = []
       
   280 
       
   281     # Locator methods
       
   282 
       
   283     def getColumnNumber(self):
       
   284         if self._parser is None:
       
   285             return None
       
   286         return self._parser.ErrorColumnNumber
       
   287 
       
   288     def getLineNumber(self):
       
   289         if self._parser is None:
       
   290             return 1
       
   291         return self._parser.ErrorLineNumber
       
   292 
       
   293     def getPublicId(self):
       
   294         return self._source.getPublicId()
       
   295 
       
   296     def getSystemId(self):
       
   297         return self._source.getSystemId()
       
   298 
       
   299     # event handlers
       
   300     def start_element(self, name, attrs):
       
   301         self._cont_handler.startElement(name, AttributesImpl(attrs))
       
   302 
       
   303     def end_element(self, name):
       
   304         self._cont_handler.endElement(name)
       
   305 
       
   306     def start_element_ns(self, name, attrs):
       
   307         pair = name.split()
       
   308         if len(pair) == 1:
       
   309             # no namespace
       
   310             pair = (None, name)
       
   311         elif len(pair) == 3:
       
   312             pair = pair[0], pair[1]
       
   313         else:
       
   314             # default namespace
       
   315             pair = tuple(pair)
       
   316 
       
   317         newattrs = {}
       
   318         qnames = {}
       
   319         for (aname, value) in attrs.items():
       
   320             parts = aname.split()
       
   321             length = len(parts)
       
   322             if length == 1:
       
   323                 # no namespace
       
   324                 qname = aname
       
   325                 apair = (None, aname)
       
   326             elif length == 3:
       
   327                 qname = "%s:%s" % (parts[2], parts[1])
       
   328                 apair = parts[0], parts[1]
       
   329             else:
       
   330                 # default namespace
       
   331                 qname = parts[1]
       
   332                 apair = tuple(parts)
       
   333 
       
   334             newattrs[apair] = value
       
   335             qnames[apair] = qname
       
   336 
       
   337         self._cont_handler.startElementNS(pair, None,
       
   338                                           AttributesNSImpl(newattrs, qnames))
       
   339 
       
   340     def end_element_ns(self, name):
       
   341         pair = name.split()
       
   342         if len(pair) == 1:
       
   343             pair = (None, name)
       
   344         elif len(pair) == 3:
       
   345             pair = pair[0], pair[1]
       
   346         else:
       
   347             pair = tuple(pair)
       
   348 
       
   349         self._cont_handler.endElementNS(pair, None)
       
   350 
       
   351     # this is not used (call directly to ContentHandler)
       
   352     def processing_instruction(self, target, data):
       
   353         self._cont_handler.processingInstruction(target, data)
       
   354 
       
   355     # this is not used (call directly to ContentHandler)
       
   356     def character_data(self, data):
       
   357         self._cont_handler.characters(data)
       
   358 
       
   359     def start_namespace_decl(self, prefix, uri):
       
   360         self._cont_handler.startPrefixMapping(prefix, uri)
       
   361 
       
   362     def end_namespace_decl(self, prefix):
       
   363         self._cont_handler.endPrefixMapping(prefix)
       
   364 
       
   365     def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
       
   366         self._lex_handler_prop.startDTD(name, pubid, sysid)
       
   367 
       
   368     def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
       
   369         self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
       
   370 
       
   371     def notation_decl(self, name, base, sysid, pubid):
       
   372         self._dtd_handler.notationDecl(name, pubid, sysid)
       
   373 
       
   374     def external_entity_ref(self, context, base, sysid, pubid):
       
   375         if not self._external_ges:
       
   376             return 1
       
   377 
       
   378         source = self._ent_handler.resolveEntity(pubid, sysid)
       
   379         source = saxutils.prepare_input_source(source,
       
   380                                                self._source.getSystemId() or
       
   381                                                "")
       
   382 
       
   383         self._entity_stack.append((self._parser, self._source))
       
   384         self._parser = self._parser.ExternalEntityParserCreate(context)
       
   385         self._source = source
       
   386 
       
   387         try:
       
   388             xmlreader.IncrementalParser.parse(self, source)
       
   389         except:
       
   390             return 0  # FIXME: save error info here?
       
   391 
       
   392         (self._parser, self._source) = self._entity_stack[-1]
       
   393         del self._entity_stack[-1]
       
   394         return 1
       
   395 
       
   396     def skipped_entity_handler(self, name, is_pe):
       
   397         if is_pe:
       
   398             # The SAX spec requires to report skipped PEs with a '%'
       
   399             name = '%'+name
       
   400         self._cont_handler.skippedEntity(name)
       
   401 
       
   402 # ---
       
   403 
       
   404 def create_parser(*args, **kwargs):
       
   405     return ExpatParser(*args, **kwargs)
       
   406 
       
   407 # ---
       
   408 
       
   409 if __name__ == "__main__":
       
   410     import xml.sax
       
   411     p = create_parser()
       
   412     p.setContentHandler(xml.sax.XMLGenerator())
       
   413     p.setErrorHandler(xml.sax.ErrorHandler())
       
   414     p.parse("../../../hamlet.xml")