diff -r 000000000000 -r 4f2f89ce4247 WebCore/html/LegacyHTMLTreeBuilder.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/WebCore/html/LegacyHTMLTreeBuilder.cpp Fri Sep 17 09:02:29 2010 +0300 @@ -0,0 +1,1786 @@ +/* + Copyright (C) 1997 Martin Jones (mjones@kde.org) + (C) 1997 Torben Weis (weis@kde.org) + (C) 1999,2001 Lars Knoll (knoll@kde.org) + (C) 2000,2001 Dirk Mueller (mueller@kde.org) + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "config.h" +#include "LegacyHTMLTreeBuilder.h" + +#include "CharacterNames.h" +#include "CSSPropertyNames.h" +#include "CSSValueKeywords.h" +#include "Chrome.h" +#include "ChromeClient.h" +#include "Comment.h" +#include "Console.h" +#include "DOMWindow.h" +#include "DocumentFragment.h" +#include "DocumentType.h" +#include "Frame.h" +#include "HTMLBodyElement.h" +#include "HTMLDocument.h" +#include "HTMLDivElement.h" +#include "HTMLDListElement.h" +#include "HTMLElementFactory.h" +#include "HTMLFormElement.h" +#include "HTMLHeadElement.h" +#include "HTMLHRElement.h" +#include "HTMLHtmlElement.h" +#include "HTMLIsIndexElement.h" +#include "HTMLMapElement.h" +#include "HTMLNames.h" +#include "HTMLParserQuirks.h" +#include "HTMLTableCellElement.h" +#include "HTMLTableRowElement.h" +#include "HTMLTableSectionElement.h" +#include "LegacyHTMLDocumentParser.h" +#include "LocalizedStrings.h" +#include "Page.h" +#include "Settings.h" +#include "Text.h" +#include "TreeDepthLimit.h" +#include +#include + +namespace WebCore { + +using namespace HTMLNames; + +static const unsigned cMaxRedundantTagDepth = 20; +static const unsigned cResidualStyleMaxDepth = 200; +static const unsigned cResidualStyleIterationLimit = 10; + + +static const int minBlockLevelTagPriority = 3; + +// A cap on the number of tags with priority minBlockLevelTagPriority or higher +// allowed in m_blockStack. The cap is enforced by adding such new elements as +// siblings instead of children once it is reached. +static const size_t cMaxBlockDepth = 4096; + + +typedef HashSet TagNameSet; + +template< size_t ArraySize > +static void addTags(TagNameSet& set, QualifiedName (&names)[ArraySize]) +{ + for (size_t x = 0; x < ArraySize; x++) { + const QualifiedName& name = names[x]; + set.add(name.localName().impl()); + } +} + +struct HTMLStackElem : Noncopyable { + HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) + : tagName(t) + , level(lvl) + , strayTableContent(false) + , node(n) + , didRefNode(r) + , next(nx) + { + } + + void derefNode() + { + if (didRefNode) + node->deref(); + } + + AtomicString tagName; + int level; + bool strayTableContent; + Node* node; + bool didRefNode; + HTMLStackElem* next; +}; + +/** + * The parser parses tokenized input into the document, building up the + * document tree. If the document is well-formed, parsing it is straightforward. + * + * Unfortunately, we have to handle many HTML documents that are not well-formed, + * so the parser has to be tolerant about errors. + * + * We have to take care of at least the following error conditions: + * + * 1. The element being added is explicitly forbidden inside some outer tag. + * In this case we should close all tags up to the one, which forbids + * the element, and add it afterwards. + * + * 2. We are not allowed to add the element directly. It could be that + * the person writing the document forgot some tag in between (or that the + * tag in between is optional). This could be the case with the following + * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). + * + * 3. We want to add a block element inside to an inline element. Close all + * inline elements up to the next higher block element. + * + * 4. If this doesn't help, close elements until we are allowed to add the + * element or ignore the tag. + * + */ + +LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(HTMLDocument* doc, bool reportErrors) + : m_document(doc) + , m_current(doc) + , m_didRefCurrent(false) + , m_blockStack(0) + , m_blocksInStack(0) + , m_treeDepth(0) + , m_hasPElementInScope(NotInScope) + , m_inBody(false) + , m_haveContent(false) + , m_haveFrameSet(false) + , m_isParsingFragment(false) + , m_reportErrors(reportErrors) + , m_handlingResidualStyleAcrossBlocks(false) + , m_inStrayTableContent(0) + , m_scriptingPermission(FragmentScriptingAllowed) + , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) +{ +} + +LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) + : m_document(frag->document()) + , m_current(frag) + , m_didRefCurrent(true) + , m_blockStack(0) + , m_blocksInStack(0) + , m_treeDepth(0) + , m_hasPElementInScope(NotInScope) + , m_inBody(true) + , m_haveContent(false) + , m_haveFrameSet(false) + , m_isParsingFragment(true) + , m_reportErrors(false) + , m_handlingResidualStyleAcrossBlocks(false) + , m_inStrayTableContent(0) + , m_scriptingPermission(scriptingPermission) + , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) +{ + if (frag) + frag->ref(); +} + +LegacyHTMLTreeBuilder::~LegacyHTMLTreeBuilder() +{ + freeBlock(); + if (m_didRefCurrent) + m_current->deref(); +} + +void LegacyHTMLTreeBuilder::reset() +{ + ASSERT(!m_isParsingFragment); + + setCurrent(m_document); + + freeBlock(); + + m_treeDepth = 0; + m_inBody = false; + m_haveFrameSet = false; + m_haveContent = false; + m_inStrayTableContent = 0; + + m_currentFormElement = 0; + m_currentMapElement = 0; + m_head = 0; + m_isindexElement = 0; + + m_skipModeTag = nullAtom; + + if (m_parserQuirks) + m_parserQuirks->reset(); +} + +void LegacyHTMLTreeBuilder::setCurrent(Node* newCurrent) +{ + bool didRefNewCurrent = newCurrent && newCurrent != m_document; + if (didRefNewCurrent) + newCurrent->ref(); + if (m_didRefCurrent) + m_current->deref(); + m_current = newCurrent; + m_didRefCurrent = didRefNewCurrent; +} + +inline static int tagPriorityOfNode(Node* n) +{ + return n->isHTMLElement() ? static_cast(n)->tagPriority() : 0; +} + +inline void LegacyHTMLTreeBuilder::limitDepth(int tagPriority) +{ + while (m_treeDepth >= maxDOMTreeDepth) + popBlock(m_blockStack->tagName); + if (tagPriority >= minBlockLevelTagPriority) { + while (m_blocksInStack >= cMaxBlockDepth) + popBlock(m_blockStack->tagName); + } +} + +inline bool LegacyHTMLTreeBuilder::insertNodeAfterLimitDepth(Node* n, bool flat) +{ + limitDepth(tagPriorityOfNode(n)); + return insertNode(n, flat); +} + +PassRefPtr LegacyHTMLTreeBuilder::parseToken(Token* t) +{ + if (!m_skipModeTag.isNull()) { + if (!t->beginTag && t->tagName == m_skipModeTag) + // Found the end tag for the current skip mode, so we're done skipping. + m_skipModeTag = nullAtom; + else if (m_current->localName() == t->tagName) + // Do not skip . + // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? + ; + else + return 0; + } + + // Apparently some sites use
instead of
. Be compatible with IE and Firefox and treat this like
. + if (t->isCloseTag(brTag) && m_document->inCompatMode()) { + reportError(MalformedBRError); + t->beginTag = true; + } + + if (!t->beginTag) { + processCloseTag(t); + return 0; + } + + // Ignore spaces, if we're not inside a paragraph or other inline code. + // Do not alter the text if it is part of a scriptTag. + if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { + if (m_inBody && !skipMode() && m_current->localName() != styleTag && + m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) + m_haveContent = true; + + // HTML5 requires text node coalescing. + // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#insert-a-character + Node* previousChild = m_current->lastChild(); + if (previousChild && previousChild->isTextNode()) { + // Only coalesce text nodes if the text node wouldn't be foster parented. + if (!m_current->hasTagName(htmlTag) + && !m_current->hasTagName(tableTag) + && !m_current->hasTagName(trTag) + && !m_current->hasTagName(theadTag) + && !m_current->hasTagName(tbodyTag) + && !m_current->hasTagName(tfootTag) + && !m_current->hasTagName(titleTag)) { + // Technically we're only supposed to merge into the previous + // text node if it was the last node inserted by the parser. + // (This was a spec modification made to make it easier for + // mozilla to run their parser in a thread.) + // In practice it does not seem to matter. + CharacterData* textNode = static_cast(previousChild); + textNode->parserAppendData(t->text); + return textNode; + } + } + + RefPtr n; + String text = t->text.get(); + unsigned charsLeft = text.length(); + while (charsLeft) { + // split large blocks of text to nodes of manageable size + n = Text::createWithLengthLimit(m_document, text, charsLeft); + if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) + return 0; + } + return n; + } + + RefPtr n = getNode(t); + // just to be sure, and to catch currently unimplemented stuff + if (!n) + return 0; + + // set attributes + if (n->isHTMLElement()) { + HTMLElement* e = static_cast(n.get()); + if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) + e->setAttributeMap(t->attrs.get(), m_scriptingPermission); + + // take care of optional close tags + if (e->endTagRequirement() == TagStatusOptional) + popBlock(t->tagName); + + // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing + // syntax was used, report an error. + if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { + if (t->tagName == scriptTag) + reportError(IncorrectXMLCloseScriptWarning); + else + reportError(IncorrectXMLSelfCloseError, &t->tagName); + } + } + + if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) { + // we couldn't insert the node + + if (n->isElementNode()) { + Element* e = static_cast(n.get()); + e->setAttributeMap(0); + } + + if (m_currentMapElement == n) + m_currentMapElement = 0; + + if (m_currentFormElement == n) + m_currentFormElement = 0; + + if (m_head == n) + m_head = 0; + + return 0; + } + return n; +} + +void LegacyHTMLTreeBuilder::parseDoctypeToken(DoctypeToken* t) +{ + // Ignore any doctype after the first. Ignore doctypes in fragments. + if (m_document->doctype() || m_isParsingFragment || m_current != m_document) + return; + + // Make a new doctype node and set it as our doctype. + m_document->legacyParserAddChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); + if (t->m_forceQuirks) + m_document->setParseMode(Document::Compat); +} + +static bool isTableSection(const Node* n) +{ + return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); +} + +static bool isTablePart(const Node* n) +{ + return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) + || isTableSection(n); +} + +static bool isTableRelated(const Node* n) +{ + return n->hasTagName(tableTag) || isTablePart(n); +} + +static bool isScopingTag(const AtomicString& tagName) +{ + return tagName == appletTag || tagName == captionTag || tagName == tdTag + || tagName == thTag || tagName == buttonTag || tagName == marqueeTag + || tagName == objectTag || tagName == tableTag || tagName == htmlTag; +} + +bool LegacyHTMLTreeBuilder::insertNode(Node* n, bool flat) +{ + RefPtr protectNode(n); + + const AtomicString& localName = n->localName(); + + // is never allowed inside stray table content. Always pop out of the stray table content + // and close up the first table, and then start the second table as a sibling. + if (m_inStrayTableContent && localName == tableTag) + popBlock(tableTag); + + if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) + return false; + + int tagPriority = tagPriorityOfNode(n); + + // let's be stupid and just try to insert it. + // this should work if the document is well-formed + Node* newNode = m_current->legacyParserAddChild(n); + if (!newNode) + return handleError(n, flat, localName, tagPriority); // Try to handle the error. + + // don't push elements without end tags (e.g., ) on the stack + bool parentAttached = m_current->attached(); + if (tagPriority > 0 && !flat) { + if (newNode == m_current) { + // This case should only be hit when a demoted is placed inside a table. + ASSERT(localName == formTag); + reportError(FormInsideTablePartError, &m_current->localName()); + HTMLFormElement* form = static_cast(n); + form->setDemoted(true); + } else { + // The pushBlock function transfers ownership of current to the block stack + // so we're guaranteed that m_didRefCurrent is false. The code below is an + // optimized version of setCurrent that takes advantage of that fact and also + // assumes that newNode is neither 0 nor a pointer to the document. + pushBlock(localName, tagPriority); + newNode->beginParsingChildren(); + ASSERT(!m_didRefCurrent); + newNode->ref(); + m_current = newNode; + m_didRefCurrent = true; + } + if (parentAttached && !n->attached() && !m_isParsingFragment) + n->attach(); + } else { + if (parentAttached && !n->attached() && !m_isParsingFragment) + n->attach(); + n->finishParsingChildren(); + } + + if (localName == htmlTag && m_document->frame() && !m_isParsingFragment) + m_document->frame()->loader()->dispatchDocumentElementAvailable(); + + return true; +} + +bool LegacyHTMLTreeBuilder::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) +{ + // Error handling code. This is just ad hoc handling of specific parent/child combinations. + bool handled = false; + + // 1. Check out the element's tag name to decide how to deal with errors. + if (n->isHTMLElement()) { + HTMLElement* h = static_cast(n); + if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { + if (m_inStrayTableContent && !isTableRelated(m_current)) { + reportError(MisplacedTablePartError, &localName, &m_current->localName()); + // pop out to the nearest enclosing table-related tag. + while (m_blockStack && !isTableRelated(m_current)) + popOneBlock(); + return insertNode(n); + } + } else if (h->hasLocalName(headTag)) { + if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { + reportError(MisplacedHeadError); + return false; + } + } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { + bool createdHead = false; + if (!m_head) { + createHead(); + createdHead = true; + } + if (m_head) { + if (!createdHead) + reportError(MisplacedHeadContentError, &localName, &m_current->localName()); + if (m_head->legacyParserAddChild(n)) { + if (!n->attached() && !m_isParsingFragment) + n->attach(); + return true; + } + return false; + } + } else if (h->hasLocalName(htmlTag)) { + if (!m_current->isDocumentNode() ) { + if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag) && !m_isParsingFragment) { + reportError(RedundantHTMLBodyError, &localName); + // we have another element.... apply attributes to existing one + // make sure we don't overwrite already existing attributes + NamedNodeMap* map = static_cast(n)->attributes(true); + Element* existingHTML = static_cast(m_document->documentElement()); + NamedNodeMap* bmap = existingHTML->attributes(false); + for (unsigned l = 0; map && l < map->length(); ++l) { + Attribute* it = map->attributeItem(l); + if (!bmap->getAttributeItem(it->name())) + existingHTML->setAttribute(it->name(), it->value()); + } + } + return false; + } + } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { + bool createdHead = false; + if (!m_head) { + createHead(); + createdHead = true; + } + if (m_head) { + Node* newNode = m_head->legacyParserAddChild(n); + if (!newNode) { + setSkipMode(h->tagQName()); + return false; + } + + if (!createdHead) + reportError(MisplacedHeadContentError, &localName, &m_current->localName()); + + pushBlock(localName, tagPriority); + newNode->beginParsingChildren(); + setCurrent(newNode); + if (!n->attached() && !m_isParsingFragment) + n->attach(); + return true; + } + if (m_inBody) { + setSkipMode(h->tagQName()); + return false; + } + } else if (h->hasLocalName(bodyTag)) { + if (m_inBody && m_document->body() && !m_isParsingFragment) { + // we have another element.... apply attributes to existing one + // make sure we don't overwrite already existing attributes + // some sites use ... + reportError(RedundantHTMLBodyError, &localName); + NamedNodeMap* map = static_cast(n)->attributes(true); + Element* existingBody = m_document->body(); + NamedNodeMap* bmap = existingBody->attributes(false); + for (unsigned l = 0; map && l < map->length(); ++l) { + Attribute* it = map->attributeItem(l); + if (!bmap->getAttributeItem(it->name())) + existingBody->setAttribute(it->name(), it->value()); + } + return false; + } else if (!m_current->isDocumentNode()) + return false; + } else if (h->hasLocalName(areaTag)) { + if (m_currentMapElement) { + reportError(MisplacedAreaError, &m_current->localName()); + m_currentMapElement->legacyParserAddChild(n); + if (!n->attached() && !m_isParsingFragment) + n->attach(); + handled = true; + return true; + } + return false; + } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { + if (isTableRelated(m_current)) { + while (m_blockStack && isTablePart(m_current)) + popOneBlock(); + return insertNode(n); + } + } + } else if (n->isCommentNode() && !m_head) + return false; + + // 2. Next we examine our currently active element to do some further error handling. + if (m_current->isHTMLElement()) { + HTMLElement* h = static_cast(m_current); + const AtomicString& currentTagName = h->localName(); + if (h->hasLocalName(htmlTag)) { + HTMLElement* elt = n->isHTMLElement() ? static_cast(n) : 0; + if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || + elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || + elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || + elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || + elt->hasLocalName(baseTag))) { + if (!m_head) { + m_head = HTMLHeadElement::create(m_document); + insertNode(m_head.get()); + handled = true; + } + } else { + if (n->isTextNode()) { + Text* t = static_cast(n); + if (t->containsOnlyWhitespace()) { + if (m_head && !m_inBody) { + // We're between and . According to + // the HTML5 parsing algorithm, we're supposed to + // insert whitespace text nodes into the HTML element. + ExceptionCode ec; + m_current->appendChild(n, ec); + return true; + } + return false; + } + } + if (!m_haveFrameSet) { + // Ensure that head exists. + // But not for older versions of Mail, where the implicit isn't expected - + if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) + createHead(); + + popBlock(headTag); + startBody(); + insertNode(HTMLBodyElement::create(m_document).get()); + handled = true; + } else + reportError(MisplacedFramesetContentError, &localName); + } + } else if (h->hasLocalName(headTag)) { + if (n->hasTagName(htmlTag)) + return false; + else { + // This means the body starts here... + if (!m_haveFrameSet) { + ASSERT(currentTagName == headTag); + popBlock(currentTagName); + startBody(); + insertNode(HTMLBodyElement::create(m_document).get()); + handled = true; + } else + reportError(MisplacedFramesetContentError, &localName); + } + } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) + || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(currentTagName); + handled = true; + } else if (h->hasLocalName(captionTag)) { + // Illegal content in a caption. Close the caption and try again. + reportError(MisplacedCaptionContentError, &localName); + popBlock(currentTagName); + if (isTablePart(n)) + return insertNode(n, flat); + } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { + if (n->hasTagName(tableTag)) { + reportError(MisplacedTableError, ¤tTagName); + if (m_isParsingFragment && !h->hasLocalName(tableTag)) + // fragment may contain table parts without
ancestor, pop them one by one + popBlock(h->localName()); + popBlock(localName); // end the table + handled = true; // ...and start a new one + } else { + ExceptionCode ec = 0; + Node* node = m_current; + Node* parent = node->parentNode(); + // A script may have removed the current node's parent from the DOM + // http://bugs.webkit.org/show_bug.cgi?id=7137 + // FIXME: we should do real recovery here and re-parent with the correct node. + if (!parent) + return false; + Node* grandparent = parent->parentNode(); + + if (n->isTextNode() || + (h->hasLocalName(trTag) && + isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || + ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && + !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && + parent->hasTagName(tableTag))) { + node = (node->hasTagName(tableTag)) ? node : + ((node->hasTagName(trTag)) ? grandparent : parent); + // This can happen with fragments + if (!node) + return false; + Node* parent = node->parentNode(); + if (!parent) + return false; + parent->insertBefore(n, node, ec); + if (!ec) { + reportError(StrayTableContentError, &localName, ¤tTagName); + if (n->isHTMLElement() && tagPriority > 0 && + !flat && static_cast(n)->endTagRequirement() != TagStatusForbidden) + { + pushBlock(localName, tagPriority); + n->beginParsingChildren(); + setCurrent(n); + m_inStrayTableContent++; + m_blockStack->strayTableContent = true; + } + return true; + } + } + + if (!ec) { + if (m_current->hasTagName(trTag)) { + reportError(TablePartRequiredError, &localName, &tdTag.localName()); + insertNode(HTMLTableCellElement::create(tdTag, m_document).get()); + } else if (m_current->hasTagName(tableTag)) { + // Don't report an error in this case, since making a happens all the time when you have
, + // and it isn't really a parse error per se. + insertNode(HTMLTableSectionElement::create(tbodyTag, m_document).get()); + } else { + reportError(TablePartRequiredError, &localName, &trTag.localName()); + insertNode(HTMLTableRowElement::create(m_document).get()); + } + handled = true; + } + } + } else if (h->hasLocalName(objectTag)) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(objectTag); + handled = true; + } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { + if (!isInline(n)) { + popBlock(currentTagName); + handled = true; + } + } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { + if (localName == optgroupTag) { + popBlock(currentTagName); + handled = true; + } else if (localName == selectTag) { + // IE treats a nested select as . Let's do the same + popBlock(localName); + } + } else if (h->hasLocalName(selectTag)) { + if (localName == inputTag || localName == textareaTag) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(currentTagName); + handled = true; + } + } else if (h->hasLocalName(colgroupTag)) { + popBlock(currentTagName); + handled = true; + } else if (!h->hasLocalName(bodyTag)) { + if (isInline(m_current)) { + popInlineBlocks(); + handled = true; + } + } + } else if (m_current->isDocumentNode()) { + if (n->isTextNode()) { + Text* t = static_cast(n); + if (t->containsOnlyWhitespace()) + return false; + } + + if (!m_document->documentElement()) { + insertNode(HTMLHtmlElement::create(m_document).get()); + handled = true; + } + } + + // 3. If we couldn't handle the error, just return false and attempt to error-correct again. + if (!handled) { + reportError(IgnoredContentError, &localName, &m_current->localName()); + return false; + } + return insertNode(n); +} + +typedef bool (LegacyHTMLTreeBuilder::*CreateErrorCheckFunc)(Token* t, RefPtr&); +typedef HashMap FunctionMap; + +bool LegacyHTMLTreeBuilder::textCreateErrorCheck(Token* t, RefPtr& result) +{ + result = Text::create(m_document, t->text.get()); + return false; +} + +bool LegacyHTMLTreeBuilder::commentCreateErrorCheck(Token* t, RefPtr& result) +{ + result = Comment::create(m_document, t->text.get()); + return false; +} + +bool LegacyHTMLTreeBuilder::headCreateErrorCheck(Token*, RefPtr& result) +{ + if (!m_head || m_current->localName() == htmlTag) { + m_head = HTMLHeadElement::create(m_document); + result = m_head; + } else + reportError(MisplacedHeadError); + return false; +} + +bool LegacyHTMLTreeBuilder::bodyCreateErrorCheck(Token*, RefPtr&) +{ + // body no longer allowed if we have a frameset + if (m_haveFrameSet) + return false; + + // Ensure that head exists (unless parsing a fragment). + // But not for older versions of Mail, where the implicit isn't expected - + if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) + createHead(); + + popBlock(headTag); + startBody(); + return true; +} + +bool LegacyHTMLTreeBuilder::framesetCreateErrorCheck(Token*, RefPtr&) +{ + popBlock(headTag); + if (m_inBody && !m_haveFrameSet && !m_haveContent) { + popBlock(bodyTag); + // ### actually for IE document.body returns the now hidden "body" element + // we can't implement that behaviour now because it could cause too many + // regressions and the headaches are not worth the work as long as there is + // no site actually relying on that detail (Dirk) + if (m_document->body() && !m_isParsingFragment) + m_document->body()->setAttribute(styleAttr, "display:none"); + m_inBody = false; + } + if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) + return false; + m_haveFrameSet = true; + startBody(); + return true; +} + +bool LegacyHTMLTreeBuilder::formCreateErrorCheck(Token* t, RefPtr& result) +{ + // Only create a new form if we're not already inside one. + // This is consistent with other browsers' behavior. + if (!m_currentFormElement) { + m_currentFormElement = HTMLFormElement::create(m_document); + result = m_currentFormElement; + pCloserCreateErrorCheck(t, result); + } + return false; +} + +bool LegacyHTMLTreeBuilder::isindexCreateErrorCheck(Token* t, RefPtr& result) +{ + RefPtr n = handleIsindex(t); + if (!m_inBody) + m_isindexElement = n.release(); + else { + t->selfClosingTag = true; + result = n.release(); + } + return false; +} + +bool LegacyHTMLTreeBuilder::selectCreateErrorCheck(Token*, RefPtr&) +{ + return true; +} + +bool LegacyHTMLTreeBuilder::ddCreateErrorCheck(Token* t, RefPtr& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(dtTag); + popBlock(ddTag); + return true; +} + +bool LegacyHTMLTreeBuilder::dtCreateErrorCheck(Token* t, RefPtr& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(ddTag); + popBlock(dtTag); + return true; +} + +bool LegacyHTMLTreeBuilder::rpCreateErrorCheck(Token*, RefPtr&) +{ + popBlock(rpTag); + popBlock(rtTag); + return true; +} + +bool LegacyHTMLTreeBuilder::rtCreateErrorCheck(Token*, RefPtr&) +{ + popBlock(rpTag); + popBlock(rtTag); + return true; +} + +bool LegacyHTMLTreeBuilder::nestedCreateErrorCheck(Token* t, RefPtr&) +{ + popBlock(t->tagName); + return true; +} + +bool LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck(Token* t, RefPtr& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(t->tagName); + return true; +} + +bool LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck(Token* t, RefPtr&) +{ + return allowNestedRedundantTag(t->tagName); +} + +bool LegacyHTMLTreeBuilder::colCreateErrorCheck(Token*, RefPtr&) +{ + if (!m_current->hasTagName(tableTag)) + return true; + RefPtr implicitColgroup = HTMLElementFactory::createHTMLElement(colgroupTag, m_document, 0, true); + insertNode(implicitColgroup.get()); + return true; +} + +bool LegacyHTMLTreeBuilder::tableCellCreateErrorCheck(Token*, RefPtr&) +{ + popBlock(tdTag); + popBlock(thTag); + return true; +} + +bool LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck(Token*, RefPtr&) +{ + popBlock(theadTag); + popBlock(tbodyTag); + popBlock(tfootTag); + return true; +} + +bool LegacyHTMLTreeBuilder::noembedCreateErrorCheck(Token*, RefPtr&) +{ + setSkipMode(noembedTag); + return true; +} + +bool LegacyHTMLTreeBuilder::noframesCreateErrorCheck(Token*, RefPtr&) +{ + setSkipMode(noframesTag); + return true; +} + +bool LegacyHTMLTreeBuilder::noscriptCreateErrorCheck(Token*, RefPtr&) +{ + if (!m_isParsingFragment) { + Frame* frame = m_document->frame(); + if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) + setSkipMode(noscriptTag); + } + return true; +} + +bool LegacyHTMLTreeBuilder::pCloserCreateErrorCheck(Token*, RefPtr&) +{ + if (hasPElementInScope()) + popBlock(pTag); + return true; +} + +bool LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck(Token*, RefPtr&) +{ + if (m_document->inCompatMode()) + return true; + if (hasPElementInScope()) + popBlock(pTag); + return true; +} + +bool LegacyHTMLTreeBuilder::mapCreateErrorCheck(Token*, RefPtr& result) +{ + m_currentMapElement = HTMLMapElement::create(m_document); + result = m_currentMapElement; + return false; +} + +static void mapTagToFunc(FunctionMap& map, const QualifiedName& tag, CreateErrorCheckFunc func) +{ + map.set(tag.localName().impl(), func); +} + +template< size_t ArraySize > +static void mapTagsToFunc(FunctionMap& map, QualifiedName (&names)[ArraySize], CreateErrorCheckFunc func) +{ + for (size_t x = 0; x < ArraySize; x++) { + const QualifiedName& name = names[x]; + mapTagToFunc(map, name, func); + } +} + +PassRefPtr LegacyHTMLTreeBuilder::getNode(Token* t) +{ + // Init our error handling table. + DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); + if (gFunctionMap.isEmpty()) { + QualifiedName nestedCreateErrorTags[] = { aTag, buttonTag, nobrTag, trTag }; + mapTagsToFunc(gFunctionMap, nestedCreateErrorTags, &LegacyHTMLTreeBuilder::nestedCreateErrorCheck); + + QualifiedName nestedStyleCreateErrorTags[] = { bTag, bigTag, iTag, markTag, sTag, smallTag, strikeTag, ttTag, uTag }; + mapTagsToFunc(gFunctionMap, nestedStyleCreateErrorTags, &LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck); + + QualifiedName pCloserCreateErrorTags[] = { addressTag, articleTag, + asideTag, blockquoteTag, centerTag, dirTag, divTag, dlTag, + fieldsetTag, footerTag, h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag, + headerTag, hgroupTag, hrTag, listingTag, menuTag, navTag, olTag, + pTag, plaintextTag, preTag, sectionTag, ulTag }; + mapTagsToFunc(gFunctionMap, pCloserCreateErrorTags, &LegacyHTMLTreeBuilder::pCloserCreateErrorCheck); + + mapTagToFunc(gFunctionMap, bodyTag, &LegacyHTMLTreeBuilder::bodyCreateErrorCheck); + mapTagToFunc(gFunctionMap, colTag, &LegacyHTMLTreeBuilder::colCreateErrorCheck); + mapTagToFunc(gFunctionMap, ddTag, &LegacyHTMLTreeBuilder::ddCreateErrorCheck); + mapTagToFunc(gFunctionMap, dtTag, &LegacyHTMLTreeBuilder::dtCreateErrorCheck); + mapTagToFunc(gFunctionMap, formTag, &LegacyHTMLTreeBuilder::formCreateErrorCheck); + mapTagToFunc(gFunctionMap, framesetTag, &LegacyHTMLTreeBuilder::framesetCreateErrorCheck); + mapTagToFunc(gFunctionMap, headTag, &LegacyHTMLTreeBuilder::headCreateErrorCheck); + mapTagToFunc(gFunctionMap, isindexTag, &LegacyHTMLTreeBuilder::isindexCreateErrorCheck); + mapTagToFunc(gFunctionMap, mapTag, &LegacyHTMLTreeBuilder::mapCreateErrorCheck); + mapTagToFunc(gFunctionMap, liTag, &LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck); + mapTagToFunc(gFunctionMap, noembedTag, &LegacyHTMLTreeBuilder::noembedCreateErrorCheck); + mapTagToFunc(gFunctionMap, noframesTag, &LegacyHTMLTreeBuilder::noframesCreateErrorCheck); + mapTagToFunc(gFunctionMap, noscriptTag, &LegacyHTMLTreeBuilder::noscriptCreateErrorCheck); + mapTagToFunc(gFunctionMap, tableTag, &LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck); + mapTagToFunc(gFunctionMap, rpTag, &LegacyHTMLTreeBuilder::rpCreateErrorCheck); + mapTagToFunc(gFunctionMap, rtTag, &LegacyHTMLTreeBuilder::rtCreateErrorCheck); + mapTagToFunc(gFunctionMap, selectTag, &LegacyHTMLTreeBuilder::selectCreateErrorCheck); + mapTagToFunc(gFunctionMap, tdTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); + mapTagToFunc(gFunctionMap, thTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); + mapTagToFunc(gFunctionMap, tbodyTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); + mapTagToFunc(gFunctionMap, tfootTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); + mapTagToFunc(gFunctionMap, theadTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); + + gFunctionMap.set(commentAtom.impl(), &LegacyHTMLTreeBuilder::commentCreateErrorCheck); + gFunctionMap.set(textAtom.impl(), &LegacyHTMLTreeBuilder::textCreateErrorCheck); + } + + bool proceed = true; + RefPtr result; + if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) + proceed = (this->*errorCheckFunc)(t, result); + if (proceed) + result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); + return result.release(); +} + +bool LegacyHTMLTreeBuilder::allowNestedRedundantTag(const AtomicString& tagName) +{ + // www.liceo.edu.mx is an example of a site that achieves a level of nesting of + // about 1500 tags, all from a bunch of s. We will only allow at most 20 + // nested tags of the same type before just ignoring them all together. + unsigned i = 0; + for (HTMLStackElem* curr = m_blockStack; + i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; + curr = curr->next, i++) { } + return i != cMaxRedundantTagDepth; +} + +void LegacyHTMLTreeBuilder::processCloseTag(Token* t) +{ + // Support for really broken html. + // we never close the body tag, since some stupid web pages close it before the actual end of the doc. + // let's rely on the end() call to close things. + if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) + return; + + bool checkForCloseTagErrors = true; + if (t->tagName == formTag && m_currentFormElement) { + m_currentFormElement = 0; + checkForCloseTagErrors = false; + } else if (t->tagName == mapTag) + m_currentMapElement = 0; + else if (t->tagName == pTag) + checkForCloseTagErrors = false; + + HTMLStackElem* oldElem = m_blockStack; + popBlock(t->tagName, checkForCloseTagErrors); + if (oldElem == m_blockStack && t->tagName == pTag) { + // We encountered a stray

. Amazingly Gecko, WinIE, and MacIE all treat + // this as a valid break, i.e.,

. So go ahead and make the empty + // paragraph. + t->beginTag = true; + parseToken(t); + popBlock(t->tagName); + reportError(StrayParagraphCloseError); + } +} + +bool LegacyHTMLTreeBuilder::isHeadingTag(const AtomicString& tagName) +{ + DEFINE_STATIC_LOCAL(TagNameSet, headingTags, ()); + if (headingTags.isEmpty()) { + QualifiedName tagNames[] = { h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag }; + addTags(headingTags, tagNames); + } + return headingTags.contains(tagName.impl()); +} + +bool LegacyHTMLTreeBuilder::isInline(Node* node) const +{ + if (node->isTextNode()) + return true; + + if (node->isHTMLElement()) { + HTMLElement* e = static_cast(node); + if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || + e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || + e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || + e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || + e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || + e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || + e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || + e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || + e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || + e->hasLocalName(noembedTag) || e->hasLocalName(markTag)) + return true; +#if !ENABLE(XHTMLMP) + if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { + Frame* frame = m_document->frame(); + if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) + return true; + } +#endif + } + + return false; +} + +bool LegacyHTMLTreeBuilder::isResidualStyleTag(const AtomicString& tagName) +{ + DEFINE_STATIC_LOCAL(HashSet, residualStyleTags, ()); + if (residualStyleTags.isEmpty()) { + QualifiedName tagNames[] = { aTag, fontTag, ttTag, uTag, bTag, iTag, + sTag, strikeTag, bigTag, smallTag, emTag, strongTag, dfnTag, + codeTag, sampTag, kbdTag, varTag, nobrTag, markTag }; + addTags(residualStyleTags, tagNames); + } + return residualStyleTags.contains(tagName.impl()); +} + +bool LegacyHTMLTreeBuilder::isAffectedByResidualStyle(const AtomicString& tagName) +{ + DEFINE_STATIC_LOCAL(HashSet, unaffectedTags, ()); + if (unaffectedTags.isEmpty()) { + QualifiedName tagNames[] = { bodyTag, tableTag, theadTag, tbodyTag, + tfootTag, trTag, thTag, tdTag, captionTag, colgroupTag, colTag, + optionTag, optgroupTag, selectTag, objectTag, datagridTag, datalistTag }; + addTags(unaffectedTags, tagNames); + } + return !unaffectedTags.contains(tagName.impl()); +} + +void LegacyHTMLTreeBuilder::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) +{ + HTMLStackElem* maxElem = 0; + bool finished = false; + bool strayTableContent = elem->strayTableContent; + + unsigned iterationCount = 0; + + m_handlingResidualStyleAcrossBlocks = true; + while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { + // Find the outermost element that crosses over to a higher level. If there exists another higher-level + // element, we will do another pass, until we have corrected the innermost one. + ExceptionCode ec = 0; + HTMLStackElem* curr = m_blockStack; + HTMLStackElem* prev = 0; + HTMLStackElem* prevMaxElem = 0; + maxElem = 0; + finished = true; + while (curr && curr != elem) { + if (curr->level > elem->level) { + if (!isAffectedByResidualStyle(curr->tagName)) + return; + if (maxElem) + // We will need another pass. + finished = false; + maxElem = curr; + prevMaxElem = prev; + } + + prev = curr; + curr = curr->next; + } + + if (!curr || !maxElem) + return; + + Node* residualElem = prev->node; + Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; + Node* parentElem = elem->node; + + // Check to see if the reparenting that is going to occur is allowed according to the DOM. + // FIXME: We should either always allow it or perform an additional fixup instead of + // just bailing here. + // Example:

blah

isn't doing a fixup right now. + if (!parentElem->childAllowed(blockElem)) + return; + + m_hasPElementInScope = Unknown; + + if (maxElem->node->parentNode() != elem->node) { + // Walk the stack and remove any elements that aren't residual style tags. These + // are basically just being closed up. Example: + // Moo

Goo

. + // In the above example, the doesn't need to be reopened. It can just close. + HTMLStackElem* currElem = maxElem->next; + HTMLStackElem* prevElem = maxElem; + while (currElem != elem) { + HTMLStackElem* nextElem = currElem->next; + if (!isResidualStyleTag(currElem->tagName)) { + prevElem->next = nextElem; + prevElem->derefNode(); + prevElem->node = currElem->node; + prevElem->didRefNode = currElem->didRefNode; + delete currElem; + m_treeDepth--; + } else + prevElem = currElem; + currElem = nextElem; + } + + // We have to reopen residual tags in between maxElem and elem. An example of this case is: + // Moo

Foo. + // In this case, we need to transform the part before the

into: + // Moo + // so that the will remain open. This involves the modification of elements + // in the block stack. + // This will also affect how we ultimately reparent the block, since we want it to end up + // under the reopened residual tags (e.g., the in the above example.) + RefPtr prevNode = 0; + currElem = maxElem; + while (currElem->node != residualElem) { + if (isResidualStyleTag(currElem->node->localName())) { + // Create a clone of this element. + // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. + Node* currNode = currElem->node->cloneNode(false).releaseRef(); + reportError(ResidualStyleError, &currNode->localName()); + + // Change the stack element's node to point to the clone. + // The stack element adopts the reference we obtained above by calling release(). + currElem->derefNode(); + currElem->node = currNode; + currElem->didRefNode = true; + + // Attach the previous node as a child of this new node. + if (prevNode) + currNode->appendChild(prevNode, ec); + else // The new parent for the block element is going to be the innermost clone. + parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. + + prevNode = currNode; + } + + currElem = currElem->next; + } + + // Now append the chain of new residual style elements if one exists. + if (prevNode) + elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. + } + + // Check if the block is still in the tree. If it isn't, then we don't + // want to remove it from its parent (that would crash) or insert it into + // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 + bool isBlockStillInTree = blockElem->parentNode(); + + // We need to make a clone of |residualElem| and place it just inside |blockElem|. + // All content of |blockElem| is reparented to be under this clone. We then + // reparent |blockElem| using real DOM calls so that attachment/detachment will + // be performed to fix up the rendering tree. + // So for this example: ...

FooGoo

+ // The end result will be: ...

FooGoo

+ // + // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. + if (isBlockStillInTree) + blockElem->parentNode()->removeChild(blockElem, ec); + + Node* newNodePtr = 0; + if (blockElem->firstChild()) { + // Step 2: Clone |residualElem|. + RefPtr newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. + newNodePtr = newNode.get(); + reportError(ResidualStyleError, &newNode->localName()); + + // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| + // before we've put |newElem| into the document. That way we'll only do one attachment of all + // the new content (instead of a bunch of individual attachments). + Node* currNode = blockElem->firstChild(); + while (currNode) { + Node* nextNode = currNode->nextSibling(); + newNode->appendChild(currNode, ec); + currNode = nextNode; + } + + // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no + // attachment can occur yet. + blockElem->appendChild(newNode.release(), ec); + } else + finished = true; + + // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. + if (isBlockStillInTree) + parentElem->appendChild(blockElem, ec); + + // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update + // the node associated with the previous stack element so that when it gets popped, + // it doesn't make the residual element the next current node. + HTMLStackElem* currElem = maxElem; + HTMLStackElem* prevElem = 0; + while (currElem != elem) { + prevElem = currElem; + currElem = currElem->next; + } + prevElem->next = elem->next; + prevElem->derefNode(); + prevElem->node = elem->node; + prevElem->didRefNode = elem->didRefNode; + m_treeDepth--; + if (!finished) { + // Repurpose |elem| to represent |newNode| and insert it at the appropriate position + // in the stack. We do not do this for the innermost block, because in that case the new + // node is effectively no longer open. + elem->next = maxElem; + elem->node = prevMaxElem->node; + elem->didRefNode = prevMaxElem->didRefNode; + elem->strayTableContent = false; + prevMaxElem->next = elem; + ASSERT(newNodePtr); + prevMaxElem->node = newNodePtr; + newNodePtr->ref(); + prevMaxElem->didRefNode = true; + m_treeDepth++; + } else + delete elem; + } + + // FIXME: If we ever make a case like this work: + //
+ // Then this check will be too simplistic. Right now the
chain will end up inside the , which is pretty crazy. + if (strayTableContent) + m_inStrayTableContent--; + + // Step 7: Reopen intermediate inlines, e.g.,

FooGoo

. + // In the above example, Goo should stay italic. + // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. + + HTMLStackElem* curr = m_blockStack; + HTMLStackElem* residualStyleStack = 0; + unsigned stackDepth = 1; + unsigned redundantStyleCount = 0; + while (curr && curr != maxElem) { + // We will actually schedule this tag for reopening + // after we complete the close of this entire block. + if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { + // We've overloaded the use of stack elements and are just reusing the + // struct with a slightly different meaning to the variables. Instead of chaining + // from innermost to outermost, we build up a list of all the tags we need to reopen + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing + // to the outermost tag we need to reopen. + // We also set curr->node to be the actual element that corresponds to the ID stored in + // curr->id rather than the node that you should pop to when the element gets pulled off + // the stack. + if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) + redundantStyleCount++; + else + redundantStyleCount = 0; + + if (redundantStyleCount < cMaxRedundantTagDepth) + moveOneBlockToStack(residualStyleStack); + else + popOneBlock(); + } else + popOneBlock(); + + curr = m_blockStack; + } + + reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. + + m_handlingResidualStyleAcrossBlocks = false; +} + +void LegacyHTMLTreeBuilder::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) +{ + // Loop for each tag that needs to be reopened. + while (elem) { + // Create a shallow clone of the DOM node for this element. + RefPtr newNode = elem->node->cloneNode(false); + reportError(ResidualStyleError, &newNode->localName()); + + // Append the new node. In the malformed table case, we need to insert before the table, + // which will be the last child. + ExceptionCode ec = 0; + if (malformedTableParent) + malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); + else + m_current->appendChild(newNode, ec); + // FIXME: Is it really OK to ignore the exceptions here? + + // Now push a new stack element for this node we just created. + pushBlock(elem->tagName, elem->level); + newNode->beginParsingChildren(); + + // Set our strayTableContent boolean if needed, so that the reopened tag also knows + // that it is inside a malformed table. + m_blockStack->strayTableContent = malformedTableParent != 0; + if (m_blockStack->strayTableContent) + m_inStrayTableContent++; + + // Clear our malformed table parent variable. + malformedTableParent = 0; + + // Update |current| manually to point to the new node. + setCurrent(newNode.get()); + + // Advance to the next tag that needs to be reopened. + HTMLStackElem* next = elem->next; + elem->derefNode(); + delete elem; + elem = next; + } +} + +void LegacyHTMLTreeBuilder::pushBlock(const AtomicString& tagName, int level) +{ + m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); + if (level >= minBlockLevelTagPriority) + m_blocksInStack++; + m_treeDepth++; + m_didRefCurrent = false; + if (tagName == pTag) + m_hasPElementInScope = InScope; + else if (isScopingTag(tagName)) + m_hasPElementInScope = NotInScope; +} + +void LegacyHTMLTreeBuilder::popBlock(const AtomicString& tagName, bool reportErrors) +{ + HTMLStackElem* elem = m_blockStack; + + if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) + return; + + int maxLevel = 0; + + while (elem && (elem->tagName != tagName)) { + if (maxLevel < elem->level) + maxLevel = elem->level; + elem = elem->next; + } + + if (!elem) { + if (reportErrors) + reportError(StrayCloseTagError, &tagName, 0, true); + return; + } + + if (maxLevel > elem->level) { + // We didn't match because the tag is in a different scope, e.g., + //

Foo. Try to correct the problem. + if (!isResidualStyleTag(tagName)) + return; + return handleResidualStyleCloseTagAcrossBlocks(elem); + } + + bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); + HTMLStackElem* residualStyleStack = 0; + Node* malformedTableParent = 0; + + elem = m_blockStack; + unsigned stackDepth = 1; + unsigned redundantStyleCount = 0; + while (elem) { + if (elem->tagName == tagName) { + int strayTable = m_inStrayTableContent; + popOneBlock(); + elem = 0; + + // This element was the root of some malformed content just inside an implicit or + // explicit or . + // If we end up needing to reopen residual style tags, the root of the reopened chain + // must also know that it is the root of malformed content inside a /. + if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { + Node* curr = m_current; + while (curr && !curr->hasTagName(tableTag)) + curr = curr->parentNode(); + malformedTableParent = curr ? curr->parentNode() : 0; + } + } + else { + if (m_currentFormElement && elem->tagName == formTag) + // A is being closed prematurely (and this is + // malformed HTML). Set an attribute on the form to clear out its + // bottom margin. + m_currentFormElement->setMalformed(true); + + // Schedule this tag for reopening + // after we complete the close of this entire block. + if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { + // We've overloaded the use of stack elements and are just reusing the + // struct with a slightly different meaning to the variables. Instead of chaining + // from innermost to outermost, we build up a list of all the tags we need to reopen + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing + // to the outermost tag we need to reopen. + // We also set elem->node to be the actual element that corresponds to the ID stored in + // elem->id rather than the node that you should pop to when the element gets pulled off + // the stack. + if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) + redundantStyleCount++; + else + redundantStyleCount = 0; + + if (redundantStyleCount < cMaxRedundantTagDepth) + moveOneBlockToStack(residualStyleStack); + else + popOneBlock(); + } else + popOneBlock(); + elem = m_blockStack; + } + } + + reopenResidualStyleTags(residualStyleStack, malformedTableParent); +} + +inline HTMLStackElem* LegacyHTMLTreeBuilder::popOneBlockCommon() +{ + HTMLStackElem* elem = m_blockStack; + + // Form elements restore their state during the parsing process. + // Also, a few elements (, ) need to know when all child elements (s) are available. + if (m_current && elem->node != m_current) + m_current->finishParsingChildren(); + + if (m_blockStack->level >= minBlockLevelTagPriority) { + ASSERT(m_blocksInStack > 0); + m_blocksInStack--; + } + m_treeDepth--; + m_blockStack = elem->next; + m_current = elem->node; + m_didRefCurrent = elem->didRefNode; + + if (elem->strayTableContent) + m_inStrayTableContent--; + + if (elem->tagName == pTag) + m_hasPElementInScope = NotInScope; + else if (isScopingTag(elem->tagName)) + m_hasPElementInScope = Unknown; + + return elem; +} + +void LegacyHTMLTreeBuilder::popOneBlock() +{ + // Store the current node before popOneBlockCommon overwrites it. + Node* lastCurrent = m_current; + bool didRefLastCurrent = m_didRefCurrent; + + delete popOneBlockCommon(); + + if (didRefLastCurrent) + lastCurrent->deref(); +} + +void LegacyHTMLTreeBuilder::moveOneBlockToStack(HTMLStackElem*& head) +{ + // We'll be using the stack element we're popping, but for the current node. + // See the two callers for details. + + // Store the current node before popOneBlockCommon overwrites it. + Node* lastCurrent = m_current; + bool didRefLastCurrent = m_didRefCurrent; + + // Pop the block, but don't deref the current node as popOneBlock does because + // we'll be using the pointer in the new stack element. + HTMLStackElem* elem = popOneBlockCommon(); + + // Transfer the current node into the stack element. + // No need to deref the old elem->node because popOneBlockCommon transferred + // it into the m_current/m_didRefCurrent fields. + elem->node = lastCurrent; + elem->didRefNode = didRefLastCurrent; + elem->next = head; + head = elem; +} + +void LegacyHTMLTreeBuilder::checkIfHasPElementInScope() +{ + m_hasPElementInScope = NotInScope; + HTMLStackElem* elem = m_blockStack; + while (elem) { + const AtomicString& tagName = elem->tagName; + if (tagName == pTag) { + m_hasPElementInScope = InScope; + return; + } else if (isScopingTag(tagName)) + return; + elem = elem->next; + } +} + +void LegacyHTMLTreeBuilder::popInlineBlocks() +{ + while (m_blockStack && isInline(m_current)) + popOneBlock(); +} + +void LegacyHTMLTreeBuilder::freeBlock() +{ + while (m_blockStack) + popOneBlock(); + ASSERT(!m_blocksInStack); + ASSERT(!m_treeDepth); +} + +void LegacyHTMLTreeBuilder::createHead() +{ + if (m_head) + return; + + if (!m_document->documentElement() && !m_isParsingFragment) { + insertNode(HTMLHtmlElement::create(m_document).get()); + ASSERT(m_document->documentElement() || m_isParsingFragment); + } + + m_head = HTMLHeadElement::create(m_document); + + if (m_isParsingFragment) + return; + + HTMLElement* body = m_document->body(); + ExceptionCode ec = 0; + m_document->documentElement()->insertBefore(m_head.get(), body, ec); + if (ec) + m_head = 0; + + // If the body does not exist yet, then the should be pushed as the current block. + if (m_head && !body) { + pushBlock(m_head->localName(), m_head->tagPriority()); + setCurrent(m_head.get()); + } +} + +PassRefPtr LegacyHTMLTreeBuilder::handleIsindex(Token* t) +{ + RefPtr n = HTMLDivElement::create(m_document); + + NamedNodeMap* attrs = t->attrs.get(); + + RefPtr isIndex = HTMLIsIndexElement::create(m_document, m_currentFormElement.get()); + isIndex->setAttributeMap(attrs); + isIndex->setAttribute(typeAttr, "khtml_isindex"); + + String text = searchableIndexIntroduction(); + if (attrs) { + if (Attribute* a = attrs->getAttributeItem(promptAttr)) + text = a->value().string() + " "; + t->attrs = 0; + } + + n->legacyParserAddChild(HTMLHRElement::create(m_document)); + n->legacyParserAddChild(Text::create(m_document, text)); + n->legacyParserAddChild(isIndex.release()); + n->legacyParserAddChild(HTMLHRElement::create(m_document)); + + return n.release(); +} + +void LegacyHTMLTreeBuilder::startBody() +{ + if (m_inBody) + return; + + m_inBody = true; + + if (m_isindexElement) { + insertNode(m_isindexElement.get(), true /* don't descend into this node */); + m_isindexElement = 0; + } +} + +void LegacyHTMLTreeBuilder::finished() +{ + // In the case of a completely empty document, here's the place to create the HTML element. + if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) + insertNode(HTMLHtmlElement::create(m_document).get()); + + // This ensures that "current" is not left pointing to a node when the document is destroyed. + freeBlock(); + setCurrent(0); + + // Warning, this may delete the parser, so don't try to do anything else after this. + if (!m_isParsingFragment) + m_document->finishedParsing(); +} + +void LegacyHTMLTreeBuilder::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) +{ + Frame* frame = m_document->frame(); + if (!frame) + return; + + ScriptableDocumentParser* parser = m_document->scriptableDocumentParser(); + int lineNumber = parser->lineNumber() + 1; + + AtomicString tag1; + AtomicString tag2; + if (tagName1) { + if (*tagName1 == "#text") + tag1 = "Text"; + else if (*tagName1 == "#comment") + tag1 = ""; + else + tag1 = (closeTags ? ""; + } + if (tagName2) { + if (*tagName2 == "#text") + tag2 = "Text"; + else if (*tagName2 == "#comment") + tag2 = ""; + else + tag2 = (closeTags ? ""; + } + + const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); + if (!errorMsg) + return; + + String message; + if (parser->processingContentWrittenByScript()) + message += htmlParserDocumentWriteMessage(); + message += errorMsg; + message.replace("%tag1", tag1); + message.replace("%tag2", tag2); + + frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, + isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, + message, lineNumber, m_document->url().string()); +} + +#ifdef BUILDING_ON_LEOPARD +bool shouldCreateImplicitHead(Document* document) +{ + ASSERT(document); + + Settings* settings = document->page() ? document->page()->settings() : 0; + return settings ? !settings->needsLeopardMailQuirks() : true; +} +#elif defined(BUILDING_ON_TIGER) +bool shouldCreateImplicitHead(Document* document) +{ + ASSERT(document); + + Settings* settings = document->page() ? document->page()->settings() : 0; + return settings ? !settings->needsTigerMailQuirks() : true; +} +#endif + + +String serializeForNumberType(double number) +{ + // According to HTML5, "the best representation of the number n as a floating + // point number" is a string produced by applying ToString() to n. + DtoaBuffer buffer; + unsigned length; + doubleToStringInJavaScriptFormat(number, buffer, &length); + return String(buffer, length); +} + +bool parseToDoubleForNumberType(const String& src, double* out) +{ + // See HTML5 2.4.4.3 `Real numbers.' + + if (src.isEmpty()) + return false; + // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. + // So, check the first character. + if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) + return false; + + bool valid = false; + double value = src.toDouble(&valid); + if (!valid) + return false; + // NaN and Infinity are not valid numbers according to the standard. + if (!isfinite(value)) + return false; + // -0 -> 0 + if (!value) + value = 0; + if (out) + *out = value; + return true; +} + +}