diff -r 000000000000 -r 4f2f89ce4247 WebCore/html/HTMLTreeBuilder.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/WebCore/html/HTMLTreeBuilder.h Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLTreeBuilder_h
+#define HTMLTreeBuilder_h
+
+#include "Element.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLConstructionSite.h"
+#include "HTMLElementStack.h"
+#include "HTMLFormattingElementList.h"
+#include "HTMLTokenizer.h"
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace WebCore {
+
+class AtomicHTMLToken;
+class Document;
+class DocumentFragment;
+class Frame;
+class HTMLToken;
+class HTMLDocument;
+class LegacyHTMLTreeBuilder;
+class Node;
+
+class HTMLTreeBuilder : public Noncopyable {
+public:
+ // FIXME: Replace constructors with create() functions returning PassOwnPtrs
+ HTMLTreeBuilder(HTMLTokenizer*, HTMLDocument*, bool reportErrors);
+ HTMLTreeBuilder(HTMLTokenizer*, DocumentFragment*, FragmentScriptingPermission);
+ ~HTMLTreeBuilder();
+
+ void setPaused(bool paused) { m_isPaused = paused; }
+ bool isPaused() const { return m_isPaused; }
+
+ // The token really should be passed as a const& since it's never modified.
+ void constructTreeFromToken(HTMLToken&);
+ // Must be called when parser is paused before calling the parser again.
+ PassRefPtr takeScriptToProcess(int& scriptStartLine);
+
+ // Done, close any open tags, etc.
+ void finished();
+
+ static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*);
+
+ // FIXME: This is a dirty, rotten hack to keep HTMLFormControlElement happy
+ // until we stop using the legacy parser. DO NOT CALL THIS METHOD.
+ LegacyHTMLTreeBuilder* legacyTreeBuilder() const { return m_legacyTreeBuilder.get(); }
+
+private:
+ class FakeInsertionMode;
+ class ExternalCharacterTokenBuffer;
+ // Represents HTML5 "insertion mode"
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
+ enum InsertionMode {
+ InitialMode,
+ BeforeHTMLMode,
+ BeforeHeadMode,
+ InHeadMode,
+ InHeadNoscriptMode,
+ AfterHeadMode,
+ InBodyMode,
+ TextMode,
+ InTableMode,
+ InTableTextMode,
+ InCaptionMode,
+ InColumnGroupMode,
+ InTableBodyMode,
+ InRowMode,
+ InCellMode,
+ InSelectMode,
+ InSelectInTableMode,
+ InForeignContentMode,
+ AfterBodyMode,
+ InFramesetMode,
+ AfterFramesetMode,
+ AfterAfterBodyMode,
+ AfterAfterFramesetMode,
+ };
+
+ void passTokenToLegacyParser(HTMLToken&);
+
+ void processToken(AtomicHTMLToken&);
+
+ void processDoctypeToken(AtomicHTMLToken&);
+ void processStartTag(AtomicHTMLToken&);
+ void processEndTag(AtomicHTMLToken&);
+ void processComment(AtomicHTMLToken&);
+ void processCharacter(AtomicHTMLToken&);
+ void processEndOfFile(AtomicHTMLToken&);
+
+ bool processStartTagForInHead(AtomicHTMLToken&);
+ void processStartTagForInBody(AtomicHTMLToken&);
+ void processStartTagForInTable(AtomicHTMLToken&);
+ void processEndTagForInBody(AtomicHTMLToken&);
+ void processEndTagForInTable(AtomicHTMLToken&);
+ void processEndTagForInTableBody(AtomicHTMLToken&);
+ void processEndTagForInRow(AtomicHTMLToken&);
+ void processEndTagForInCell(AtomicHTMLToken&);
+
+ void processIsindexStartTagForInBody(AtomicHTMLToken&);
+ bool processBodyEndTagForInBody(AtomicHTMLToken&);
+ bool processTableEndTagForInTable();
+ bool processCaptionEndTagForInCaption();
+ bool processColgroupEndTagForInColumnGroup();
+ bool processTrEndTagForInRow();
+ // FIXME: This function should be inlined into its one call site or it
+ // needs to assert which tokens it can be called with.
+ void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
+
+ void processCharacterBuffer(ExternalCharacterTokenBuffer&);
+
+ void processFakeStartTag(const QualifiedName&, PassRefPtr attributes = 0);
+ void processFakeEndTag(const QualifiedName&);
+ void processFakeCharacters(const String&);
+ void processFakePEndTagIfPInButtonScope();
+
+ void processGenericRCDATAStartTag(AtomicHTMLToken&);
+ void processGenericRawTextStartTag(AtomicHTMLToken&);
+ void processScriptStartTag(AtomicHTMLToken&);
+
+ // Default processing for the different insertion modes.
+ void defaultForInitial();
+ void defaultForBeforeHTML();
+ void defaultForBeforeHead();
+ void defaultForInHead();
+ void defaultForInHeadNoscript();
+ void defaultForAfterHead();
+ void defaultForInTableText();
+
+ void processUsingSecondaryInsertionModeAndAdjustInsertionMode(AtomicHTMLToken&);
+
+ PassRefPtr attributesForIsindexInput(AtomicHTMLToken&);
+
+ HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
+ void reparentChildren(Element* oldParent, Element* newParent);
+ void callTheAdoptionAgency(AtomicHTMLToken&);
+
+ void closeTheCell();
+
+ template
+ void processCloseWhenNestedTag(AtomicHTMLToken&);
+
+ bool m_framesetOk;
+
+ // FIXME: Implement error reporting.
+ void parseError(AtomicHTMLToken&) { }
+
+ void handleScriptStartTag();
+ void handleScriptEndTag(Element*, int scriptStartLine);
+
+ InsertionMode insertionMode() const { return m_insertionMode; }
+ void setInsertionMode(InsertionMode mode)
+ {
+ m_insertionMode = mode;
+ m_isFakeInsertionMode = false;
+ }
+
+ bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
+ void setFakeInsertionMode(InsertionMode mode)
+ {
+ m_insertionMode = mode;
+ m_isFakeInsertionMode = true;
+ }
+
+ void setSecondaryInsertionMode(InsertionMode);
+
+ void setInsertionModeAndEnd(InsertionMode, bool foreign); // Helper for resetInsertionModeAppropriately
+ void resetInsertionModeAppropriately();
+
+ static bool isScriptingFlagEnabled(Frame* frame);
+
+ Document* m_document;
+ HTMLConstructionSite m_tree;
+
+ bool m_reportErrors;
+ bool m_isPaused;
+ bool m_isFakeInsertionMode;
+
+ // FIXME: InsertionModes should be a separate object to prevent direct
+ // manipulation of these variables. For now, be careful to always use
+ // setInsertionMode and never set m_insertionMode directly.
+ InsertionMode m_insertionMode;
+ InsertionMode m_originalInsertionMode;
+ InsertionMode m_secondaryInsertionMode;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
+ Vector m_pendingTableCharacters;
+
+ // HTML5 spec requires that we be able to change the state of the tokenizer
+ // from within parser actions.
+ HTMLTokenizer* m_tokenizer;
+
+ // We're re-using logic from the old LegacyHTMLTreeBuilder while this class is being written.
+ OwnPtr m_legacyTreeBuilder;
+
+ // These members are intentionally duplicated as the first set is a hack
+ // on top of the legacy parser which will eventually be removed.
+ RefPtr m_lastScriptElement; // FIXME: Hack for