463 } |
463 } |
464 #endif |
464 #endif |
465 |
465 |
466 static bool didInit = false; |
466 static bool didInit = false; |
467 |
467 |
468 static xmlParserCtxtPtr createStringParser(xmlSAXHandlerPtr handlers, void* userData) |
468 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) |
469 { |
469 { |
470 if (!didInit) { |
470 if (!didInit) { |
471 xmlInitParser(); |
471 xmlInitParser(); |
472 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
472 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
473 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
473 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
480 parser->replaceEntities = true; |
480 parser->replaceEntities = true; |
481 const UChar BOM = 0xFEFF; |
481 const UChar BOM = 0xFEFF; |
482 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
482 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
483 xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
483 xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
484 |
484 |
485 return parser; |
485 return adoptRef(new XMLParserContext(parser)); |
486 } |
486 } |
487 |
487 |
488 |
488 |
489 // Chunk should be encoded in UTF-8 |
489 // Chunk should be encoded in UTF-8 |
490 static xmlParserCtxtPtr createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk) |
490 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk) |
491 { |
491 { |
492 if (!didInit) { |
492 if (!didInit) { |
493 xmlInitParser(); |
493 xmlInitParser(); |
494 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
494 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
495 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
495 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
516 parser->depth = 0; |
516 parser->depth = 0; |
517 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); |
517 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); |
518 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); |
518 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); |
519 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); |
519 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); |
520 parser->_private = userData; |
520 parser->_private = userData; |
521 |
521 |
522 return parser; |
522 return adoptRef(new XMLParserContext(parser)); |
523 } |
523 } |
524 |
524 |
525 // -------------------------------- |
525 // -------------------------------- |
526 |
526 |
527 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view) |
527 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view) |
607 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. |
607 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. |
608 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) |
608 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) |
609 m_defaultNamespaceURI = parentElement->namespaceURI(); |
609 m_defaultNamespaceURI = parentElement->namespaceURI(); |
610 } |
610 } |
611 |
611 |
|
612 XMLParserContext::~XMLParserContext() |
|
613 { |
|
614 if (m_context->myDoc) |
|
615 xmlFreeDoc(m_context->myDoc); |
|
616 xmlFreeParserCtxt(m_context); |
|
617 } |
|
618 |
612 XMLTokenizer::~XMLTokenizer() |
619 XMLTokenizer::~XMLTokenizer() |
613 { |
620 { |
614 clearCurrentNodeStack(); |
621 clearCurrentNodeStack(); |
615 if (m_parsingFragment && m_doc) |
622 if (m_parsingFragment && m_doc) |
616 m_doc->deref(); |
623 m_doc->deref(); |
617 if (m_pendingScript) |
624 if (m_pendingScript) |
618 m_pendingScript->removeClient(this); |
625 m_pendingScript->removeClient(this); |
619 if (m_context) |
|
620 xmlFreeParserCtxt(m_context); |
|
621 } |
626 } |
622 |
627 |
623 void XMLTokenizer::doWrite(const String& parseString) |
628 void XMLTokenizer::doWrite(const String& parseString) |
624 { |
629 { |
625 if (!m_context) |
630 if (!m_context) |
626 initializeParserContext(); |
631 initializeParserContext(); |
627 |
632 |
|
633 // Protect the libxml context from deletion during a callback |
|
634 RefPtr<XMLParserContext> context = m_context; |
|
635 |
628 // libXML throws an error if you try to switch the encoding for an empty string. |
636 // libXML throws an error if you try to switch the encoding for an empty string. |
629 if (parseString.length()) { |
637 if (parseString.length()) { |
630 // Hack around libxml2's lack of encoding overide support by manually |
638 // Hack around libxml2's lack of encoding overide support by manually |
631 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml |
639 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml |
632 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks |
640 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks |
633 // and switch encodings, causing the parse to fail. |
641 // and switch encodings, causing the parse to fail. |
634 const UChar BOM = 0xFEFF; |
642 const UChar BOM = 0xFEFF; |
635 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
643 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
636 xmlSwitchEncoding(m_context, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
644 xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
637 |
645 |
638 XMLTokenizerScope scope(m_doc->docLoader()); |
646 XMLTokenizerScope scope(m_doc->docLoader()); |
639 xmlParseChunk(m_context, reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0); |
647 xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0); |
640 } |
648 } |
641 |
649 |
642 if (m_doc->decoder() && m_doc->decoder()->sawError()) { |
650 if (m_doc->decoder() && m_doc->decoder()->sawError()) { |
643 // If the decoder saw an error, report it as fatal (stops parsing) |
651 // If the decoder saw an error, report it as fatal (stops parsing) |
644 handleError(fatal, "Encoding error", lineNumber(), columnNumber()); |
652 handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col); |
645 } |
653 } |
646 |
654 |
647 return; |
655 return; |
648 } |
656 } |
649 |
657 |
1275 m_sawXSLTransform = false; |
1283 m_sawXSLTransform = false; |
1276 m_sawFirstElement = false; |
1284 m_sawFirstElement = false; |
1277 |
1285 |
1278 XMLTokenizerScope scope(m_doc->docLoader()); |
1286 XMLTokenizerScope scope(m_doc->docLoader()); |
1279 if (m_parsingFragment) |
1287 if (m_parsingFragment) |
1280 m_context = createMemoryParser(&sax, this, chunk); |
1288 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); |
1281 else |
1289 else |
1282 m_context = createStringParser(&sax, this); |
1290 m_context = XMLParserContext::createStringParser(&sax, this); |
1283 } |
1291 } |
1284 |
1292 |
1285 void XMLTokenizer::doEnd() |
1293 void XMLTokenizer::doEnd() |
1286 { |
1294 { |
1287 #if ENABLE(XSLT) |
1295 #if ENABLE(XSLT) |
1332 } |
1337 } |
1333 #endif |
1338 #endif |
1334 |
1339 |
1335 int XMLTokenizer::lineNumber() const |
1340 int XMLTokenizer::lineNumber() const |
1336 { |
1341 { |
1337 return m_context ? m_context->input->line : 1; |
1342 return context() ? context()->input->line : 1; |
1338 } |
1343 } |
1339 |
1344 |
1340 int XMLTokenizer::columnNumber() const |
1345 int XMLTokenizer::columnNumber() const |
1341 { |
1346 { |
1342 return m_context ? m_context->input->col : 1; |
1347 return context() ? context()->input->col : 1; |
1343 } |
1348 } |
1344 |
1349 |
1345 void XMLTokenizer::stopParsing() |
1350 void XMLTokenizer::stopParsing() |
1346 { |
1351 { |
1347 Tokenizer::stopParsing(); |
1352 Tokenizer::stopParsing(); |
1348 xmlStopParser(m_context); |
1353 if (context()) |
|
1354 xmlStopParser(context()); |
1349 } |
1355 } |
1350 |
1356 |
1351 void XMLTokenizer::resumeParsing() |
1357 void XMLTokenizer::resumeParsing() |
1352 { |
1358 { |
1353 ASSERT(m_parserPaused); |
1359 ASSERT(m_parserPaused); |
1382 XMLTokenizer tokenizer(fragment, parent); |
1388 XMLTokenizer tokenizer(fragment, parent); |
1383 |
1389 |
1384 CString chunkAsUtf8 = chunk.utf8(); |
1390 CString chunkAsUtf8 = chunk.utf8(); |
1385 tokenizer.initializeParserContext(chunkAsUtf8.data()); |
1391 tokenizer.initializeParserContext(chunkAsUtf8.data()); |
1386 |
1392 |
1387 xmlParseContent(tokenizer.m_context); |
1393 xmlParseContent(tokenizer.context()); |
1388 |
1394 |
1389 tokenizer.endDocument(); |
1395 tokenizer.endDocument(); |
1390 |
1396 |
1391 // Check if all the chunk has been processed. |
1397 // Check if all the chunk has been processed. |
1392 long bytesProcessed = xmlByteConsumed(tokenizer.m_context); |
1398 long bytesProcessed = xmlByteConsumed(tokenizer.context()); |
1393 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) |
1399 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) |
1394 return false; |
1400 return false; |
1395 |
1401 |
1396 // No error if the chunk is well formed or it is not but we have no error. |
1402 // No error if the chunk is well formed or it is not but we have no error. |
1397 return tokenizer.m_context->wellFormed || xmlCtxtGetLastError(tokenizer.m_context) == 0; |
1403 return tokenizer.context()->wellFormed || xmlCtxtGetLastError(tokenizer.context()) == 0; |
1398 } |
1404 } |
1399 |
1405 |
1400 // -------------------------------- |
1406 // -------------------------------- |
1401 |
1407 |
1402 struct AttributeParseState { |
1408 struct AttributeParseState { |
1435 |
1441 |
1436 xmlSAXHandler sax; |
1442 xmlSAXHandler sax; |
1437 memset(&sax, 0, sizeof(sax)); |
1443 memset(&sax, 0, sizeof(sax)); |
1438 sax.startElementNs = attributesStartElementNsHandler; |
1444 sax.startElementNs = attributesStartElementNsHandler; |
1439 sax.initialized = XML_SAX2_MAGIC; |
1445 sax.initialized = XML_SAX2_MAGIC; |
1440 xmlParserCtxtPtr parser = createStringParser(&sax, &state); |
1446 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); |
1441 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; |
1447 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; |
1442 xmlParseChunk(parser, reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1); |
1448 xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1); |
1443 if (parser->myDoc) |
|
1444 xmlFreeDoc(parser->myDoc); |
|
1445 xmlFreeParserCtxt(parser); |
|
1446 attrsOK = state.gotAttributes; |
1449 attrsOK = state.gotAttributes; |
1447 return state.attributes; |
1450 return state.attributes; |
1448 } |
1451 } |
1449 |
1452 |
1450 } |
1453 } |