src/3rdparty/webkit/WebCore/dom/XMLTokenizerLibxml2.cpp
changeset 19 fcece45ef507
parent 3 41300fa6a67c
child 30 5dc02b23752f
equal deleted inserted replaced
18:2f34d5167611 19:fcece45ef507
   463 }
   463 }
   464 #endif
   464 #endif
   465 
   465 
   466 static bool didInit = false;
   466 static bool didInit = false;
   467 
   467 
   468 static xmlParserCtxtPtr createStringParser(xmlSAXHandlerPtr handlers, void* userData)
   468 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
   469 {
   469 {
   470     if (!didInit) {
   470     if (!didInit) {
   471         xmlInitParser();
   471         xmlInitParser();
   472         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
   472         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
   473         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
   473         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
   480     parser->replaceEntities = true;
   480     parser->replaceEntities = true;
   481     const UChar BOM = 0xFEFF;
   481     const UChar BOM = 0xFEFF;
   482     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   482     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   483     xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
   483     xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
   484 
   484 
   485     return parser;
   485     return adoptRef(new XMLParserContext(parser));
   486 }
   486 }
   487 
   487 
   488 
   488 
   489 // Chunk should be encoded in UTF-8
   489 // Chunk should be encoded in UTF-8
   490 static xmlParserCtxtPtr createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
   490 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
   491 {
   491 {
   492     if (!didInit) {
   492     if (!didInit) {
   493         xmlInitParser();
   493         xmlInitParser();
   494         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
   494         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
   495         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
   495         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
   516     parser->depth = 0;
   516     parser->depth = 0;
   517     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
   517     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
   518     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
   518     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
   519     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
   519     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
   520     parser->_private = userData;
   520     parser->_private = userData;
   521 
   521     
   522     return parser;
   522     return adoptRef(new XMLParserContext(parser));
   523 }
   523 }
   524 
   524 
   525 // --------------------------------
   525 // --------------------------------
   526 
   526 
   527 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view)
   527 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view)
   607     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
   607     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
   608     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
   608     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
   609         m_defaultNamespaceURI = parentElement->namespaceURI();
   609         m_defaultNamespaceURI = parentElement->namespaceURI();
   610 }
   610 }
   611 
   611 
       
   612 XMLParserContext::~XMLParserContext()
       
   613 {
       
   614     if (m_context->myDoc)
       
   615         xmlFreeDoc(m_context->myDoc);
       
   616     xmlFreeParserCtxt(m_context);
       
   617 }
       
   618 
   612 XMLTokenizer::~XMLTokenizer()
   619 XMLTokenizer::~XMLTokenizer()
   613 {
   620 {
   614     clearCurrentNodeStack();
   621     clearCurrentNodeStack();
   615     if (m_parsingFragment && m_doc)
   622     if (m_parsingFragment && m_doc)
   616         m_doc->deref();
   623         m_doc->deref();
   617     if (m_pendingScript)
   624     if (m_pendingScript)
   618         m_pendingScript->removeClient(this);
   625         m_pendingScript->removeClient(this);
   619     if (m_context)
       
   620         xmlFreeParserCtxt(m_context);
       
   621 }
   626 }
   622 
   627 
   623 void XMLTokenizer::doWrite(const String& parseString)
   628 void XMLTokenizer::doWrite(const String& parseString)
   624 {
   629 {
   625     if (!m_context)
   630     if (!m_context)
   626         initializeParserContext();
   631         initializeParserContext();
   627     
   632 
       
   633     // Protect the libxml context from deletion during a callback
       
   634     RefPtr<XMLParserContext> context = m_context;
       
   635 
   628     // libXML throws an error if you try to switch the encoding for an empty string.
   636     // libXML throws an error if you try to switch the encoding for an empty string.
   629     if (parseString.length()) {
   637     if (parseString.length()) {
   630         // Hack around libxml2's lack of encoding overide support by manually
   638         // Hack around libxml2's lack of encoding overide support by manually
   631         // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
   639         // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
   632         // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 
   640         // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 
   633         // and switch encodings, causing the parse to fail.
   641         // and switch encodings, causing the parse to fail.
   634         const UChar BOM = 0xFEFF;
   642         const UChar BOM = 0xFEFF;
   635         const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   643         const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   636         xmlSwitchEncoding(m_context, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
   644         xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
   637 
   645 
   638         XMLTokenizerScope scope(m_doc->docLoader());
   646         XMLTokenizerScope scope(m_doc->docLoader());
   639         xmlParseChunk(m_context, reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
   647         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
   640     }
   648     }
   641     
   649     
   642     if (m_doc->decoder() && m_doc->decoder()->sawError()) {
   650     if (m_doc->decoder() && m_doc->decoder()->sawError()) {
   643         // If the decoder saw an error, report it as fatal (stops parsing)
   651         // If the decoder saw an error, report it as fatal (stops parsing)
   644         handleError(fatal, "Encoding error", lineNumber(), columnNumber());
   652         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
   645     }
   653     }
   646 
   654 
   647     return;
   655     return;
   648 }
   656 }
   649 
   657 
  1275     m_sawXSLTransform = false;
  1283     m_sawXSLTransform = false;
  1276     m_sawFirstElement = false;
  1284     m_sawFirstElement = false;
  1277 
  1285 
  1278     XMLTokenizerScope scope(m_doc->docLoader());
  1286     XMLTokenizerScope scope(m_doc->docLoader());
  1279     if (m_parsingFragment)
  1287     if (m_parsingFragment)
  1280         m_context = createMemoryParser(&sax, this, chunk);
  1288         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
  1281     else
  1289     else
  1282         m_context = createStringParser(&sax, this);
  1290         m_context = XMLParserContext::createStringParser(&sax, this);
  1283 }
  1291 }
  1284 
  1292 
  1285 void XMLTokenizer::doEnd()
  1293 void XMLTokenizer::doEnd()
  1286 {
  1294 {
  1287 #if ENABLE(XSLT)
  1295 #if ENABLE(XSLT)
  1298 
  1306 
  1299     if (m_context) {
  1307     if (m_context) {
  1300         // Tell libxml we're done.
  1308         // Tell libxml we're done.
  1301         {
  1309         {
  1302             XMLTokenizerScope scope(m_doc->docLoader());
  1310             XMLTokenizerScope scope(m_doc->docLoader());
  1303             xmlParseChunk(m_context, 0, 0, 1);
  1311             xmlParseChunk(context(), 0, 0, 1);
  1304         }
  1312         }
  1305 
  1313 
  1306         if (m_context->myDoc)
       
  1307             xmlFreeDoc(m_context->myDoc);
       
  1308         xmlFreeParserCtxt(m_context);
       
  1309         m_context = 0;
  1314         m_context = 0;
  1310     }
  1315     }
  1311 }
  1316 }
  1312 
  1317 
  1313 #if ENABLE(XSLT)
  1318 #if ENABLE(XSLT)
  1332 }
  1337 }
  1333 #endif
  1338 #endif
  1334 
  1339 
  1335 int XMLTokenizer::lineNumber() const
  1340 int XMLTokenizer::lineNumber() const
  1336 {
  1341 {
  1337     return m_context ? m_context->input->line : 1;
  1342     return context() ? context()->input->line : 1;
  1338 }
  1343 }
  1339 
  1344 
  1340 int XMLTokenizer::columnNumber() const
  1345 int XMLTokenizer::columnNumber() const
  1341 {
  1346 {
  1342     return m_context ? m_context->input->col : 1;
  1347     return context() ? context()->input->col : 1;
  1343 }
  1348 }
  1344 
  1349 
  1345 void XMLTokenizer::stopParsing()
  1350 void XMLTokenizer::stopParsing()
  1346 {
  1351 {
  1347     Tokenizer::stopParsing();
  1352     Tokenizer::stopParsing();
  1348     xmlStopParser(m_context);
  1353     if (context())
       
  1354         xmlStopParser(context());
  1349 }
  1355 }
  1350 
  1356 
  1351 void XMLTokenizer::resumeParsing()
  1357 void XMLTokenizer::resumeParsing()
  1352 {
  1358 {
  1353     ASSERT(m_parserPaused);
  1359     ASSERT(m_parserPaused);
  1382     XMLTokenizer tokenizer(fragment, parent);
  1388     XMLTokenizer tokenizer(fragment, parent);
  1383     
  1389     
  1384     CString chunkAsUtf8 = chunk.utf8();
  1390     CString chunkAsUtf8 = chunk.utf8();
  1385     tokenizer.initializeParserContext(chunkAsUtf8.data());
  1391     tokenizer.initializeParserContext(chunkAsUtf8.data());
  1386 
  1392 
  1387     xmlParseContent(tokenizer.m_context);
  1393     xmlParseContent(tokenizer.context());
  1388 
  1394 
  1389     tokenizer.endDocument();
  1395     tokenizer.endDocument();
  1390 
  1396 
  1391     // Check if all the chunk has been processed.
  1397     // Check if all the chunk has been processed.
  1392     long bytesProcessed = xmlByteConsumed(tokenizer.m_context);
  1398     long bytesProcessed = xmlByteConsumed(tokenizer.context());
  1393     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length())
  1399     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length())
  1394         return false;
  1400         return false;
  1395 
  1401 
  1396     // No error if the chunk is well formed or it is not but we have no error.
  1402     // No error if the chunk is well formed or it is not but we have no error.
  1397     return tokenizer.m_context->wellFormed || xmlCtxtGetLastError(tokenizer.m_context) == 0;
  1403     return tokenizer.context()->wellFormed || xmlCtxtGetLastError(tokenizer.context()) == 0;
  1398 }
  1404 }
  1399 
  1405 
  1400 // --------------------------------
  1406 // --------------------------------
  1401 
  1407 
  1402 struct AttributeParseState {
  1408 struct AttributeParseState {
  1435 
  1441 
  1436     xmlSAXHandler sax;
  1442     xmlSAXHandler sax;
  1437     memset(&sax, 0, sizeof(sax));
  1443     memset(&sax, 0, sizeof(sax));
  1438     sax.startElementNs = attributesStartElementNsHandler;
  1444     sax.startElementNs = attributesStartElementNsHandler;
  1439     sax.initialized = XML_SAX2_MAGIC;
  1445     sax.initialized = XML_SAX2_MAGIC;
  1440     xmlParserCtxtPtr parser = createStringParser(&sax, &state);
  1446     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
  1441     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
  1447     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
  1442     xmlParseChunk(parser, reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
  1448     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
  1443     if (parser->myDoc)
       
  1444         xmlFreeDoc(parser->myDoc);
       
  1445     xmlFreeParserCtxt(parser);
       
  1446     attrsOK = state.gotAttributes;
  1449     attrsOK = state.gotAttributes;
  1447     return state.attributes;
  1450     return state.attributes;
  1448 }
  1451 }
  1449 
  1452 
  1450 }
  1453 }