WebCore/html/HTMLTreeBuilder.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    24  */
       
    25 
       
    26 #ifndef HTMLTreeBuilder_h
       
    27 #define HTMLTreeBuilder_h
       
    28 
       
    29 #include "Element.h"
       
    30 #include "FragmentScriptingPermission.h"
       
    31 #include "HTMLConstructionSite.h"
       
    32 #include "HTMLElementStack.h"
       
    33 #include "HTMLFormattingElementList.h"
       
    34 #include "HTMLTokenizer.h"
       
    35 #include <wtf/Noncopyable.h>
       
    36 #include <wtf/OwnPtr.h>
       
    37 #include <wtf/PassOwnPtr.h>
       
    38 #include <wtf/PassRefPtr.h>
       
    39 #include <wtf/RefPtr.h>
       
    40 #include <wtf/unicode/Unicode.h>
       
    41 
       
    42 namespace WebCore {
       
    43 
       
    44 class AtomicHTMLToken;
       
    45 class Document;
       
    46 class DocumentFragment;
       
    47 class Frame;
       
    48 class HTMLToken;
       
    49 class HTMLDocument;
       
    50 class LegacyHTMLTreeBuilder;
       
    51 class Node;
       
    52 
       
    53 class HTMLTreeBuilder : public Noncopyable {
       
    54 public:
       
    55     // FIXME: Replace constructors with create() functions returning PassOwnPtrs
       
    56     HTMLTreeBuilder(HTMLTokenizer*, HTMLDocument*, bool reportErrors);
       
    57     HTMLTreeBuilder(HTMLTokenizer*, DocumentFragment*, FragmentScriptingPermission);
       
    58     ~HTMLTreeBuilder();
       
    59 
       
    60     void setPaused(bool paused) { m_isPaused = paused; }
       
    61     bool isPaused() const { return m_isPaused; }
       
    62 
       
    63     // The token really should be passed as a const& since it's never modified.
       
    64     void constructTreeFromToken(HTMLToken&);
       
    65     // Must be called when parser is paused before calling the parser again.
       
    66     PassRefPtr<Element> takeScriptToProcess(int& scriptStartLine);
       
    67 
       
    68     // Done, close any open tags, etc.
       
    69     void finished();
       
    70 
       
    71     static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*);
       
    72 
       
    73     // FIXME: This is a dirty, rotten hack to keep HTMLFormControlElement happy
       
    74     // until we stop using the legacy parser. DO NOT CALL THIS METHOD.
       
    75     LegacyHTMLTreeBuilder* legacyTreeBuilder() const { return m_legacyTreeBuilder.get(); }
       
    76 
       
    77 private:
       
    78     class FakeInsertionMode;
       
    79     class ExternalCharacterTokenBuffer;
       
    80     // Represents HTML5 "insertion mode"
       
    81     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
       
    82     enum InsertionMode {
       
    83         InitialMode,
       
    84         BeforeHTMLMode,
       
    85         BeforeHeadMode,
       
    86         InHeadMode,
       
    87         InHeadNoscriptMode,
       
    88         AfterHeadMode,
       
    89         InBodyMode,
       
    90         TextMode,
       
    91         InTableMode,
       
    92         InTableTextMode,
       
    93         InCaptionMode,
       
    94         InColumnGroupMode,
       
    95         InTableBodyMode,
       
    96         InRowMode,
       
    97         InCellMode,
       
    98         InSelectMode,
       
    99         InSelectInTableMode,
       
   100         InForeignContentMode,
       
   101         AfterBodyMode,
       
   102         InFramesetMode,
       
   103         AfterFramesetMode,
       
   104         AfterAfterBodyMode,
       
   105         AfterAfterFramesetMode,
       
   106     };
       
   107 
       
   108     void passTokenToLegacyParser(HTMLToken&);
       
   109 
       
   110     void processToken(AtomicHTMLToken&);
       
   111 
       
   112     void processDoctypeToken(AtomicHTMLToken&);
       
   113     void processStartTag(AtomicHTMLToken&);
       
   114     void processEndTag(AtomicHTMLToken&);
       
   115     void processComment(AtomicHTMLToken&);
       
   116     void processCharacter(AtomicHTMLToken&);
       
   117     void processEndOfFile(AtomicHTMLToken&);
       
   118 
       
   119     bool processStartTagForInHead(AtomicHTMLToken&);
       
   120     void processStartTagForInBody(AtomicHTMLToken&);
       
   121     void processStartTagForInTable(AtomicHTMLToken&);
       
   122     void processEndTagForInBody(AtomicHTMLToken&);
       
   123     void processEndTagForInTable(AtomicHTMLToken&);
       
   124     void processEndTagForInTableBody(AtomicHTMLToken&);
       
   125     void processEndTagForInRow(AtomicHTMLToken&);
       
   126     void processEndTagForInCell(AtomicHTMLToken&);
       
   127 
       
   128     void processIsindexStartTagForInBody(AtomicHTMLToken&);
       
   129     bool processBodyEndTagForInBody(AtomicHTMLToken&);
       
   130     bool processTableEndTagForInTable();
       
   131     bool processCaptionEndTagForInCaption();
       
   132     bool processColgroupEndTagForInColumnGroup();
       
   133     bool processTrEndTagForInRow();
       
   134     // FIXME: This function should be inlined into its one call site or it
       
   135     // needs to assert which tokens it can be called with.
       
   136     void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
       
   137 
       
   138     void processCharacterBuffer(ExternalCharacterTokenBuffer&);
       
   139 
       
   140     void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
       
   141     void processFakeEndTag(const QualifiedName&);
       
   142     void processFakeCharacters(const String&);
       
   143     void processFakePEndTagIfPInButtonScope();
       
   144 
       
   145     void processGenericRCDATAStartTag(AtomicHTMLToken&);
       
   146     void processGenericRawTextStartTag(AtomicHTMLToken&);
       
   147     void processScriptStartTag(AtomicHTMLToken&);
       
   148 
       
   149     // Default processing for the different insertion modes.
       
   150     void defaultForInitial();
       
   151     void defaultForBeforeHTML();
       
   152     void defaultForBeforeHead();
       
   153     void defaultForInHead();
       
   154     void defaultForInHeadNoscript();
       
   155     void defaultForAfterHead();
       
   156     void defaultForInTableText();
       
   157 
       
   158     void processUsingSecondaryInsertionModeAndAdjustInsertionMode(AtomicHTMLToken&);
       
   159 
       
   160     PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
       
   161 
       
   162     HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
       
   163     void reparentChildren(Element* oldParent, Element* newParent);
       
   164     void callTheAdoptionAgency(AtomicHTMLToken&);
       
   165 
       
   166     void closeTheCell();
       
   167 
       
   168     template <bool shouldClose(const Element*)>
       
   169     void processCloseWhenNestedTag(AtomicHTMLToken&);
       
   170 
       
   171     bool m_framesetOk;
       
   172 
       
   173     // FIXME: Implement error reporting.
       
   174     void parseError(AtomicHTMLToken&) { }
       
   175 
       
   176     void handleScriptStartTag();
       
   177     void handleScriptEndTag(Element*, int scriptStartLine);
       
   178 
       
   179     InsertionMode insertionMode() const { return m_insertionMode; }
       
   180     void setInsertionMode(InsertionMode mode)
       
   181     {
       
   182         m_insertionMode = mode;
       
   183         m_isFakeInsertionMode = false;
       
   184     }
       
   185 
       
   186     bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
       
   187     void setFakeInsertionMode(InsertionMode mode)
       
   188     {
       
   189         m_insertionMode = mode;
       
   190         m_isFakeInsertionMode = true;
       
   191     }
       
   192 
       
   193     void setSecondaryInsertionMode(InsertionMode);
       
   194 
       
   195     void setInsertionModeAndEnd(InsertionMode, bool foreign); // Helper for resetInsertionModeAppropriately
       
   196     void resetInsertionModeAppropriately();
       
   197 
       
   198     static bool isScriptingFlagEnabled(Frame* frame);
       
   199 
       
   200     Document* m_document;
       
   201     HTMLConstructionSite m_tree;
       
   202 
       
   203     bool m_reportErrors;
       
   204     bool m_isPaused;
       
   205     bool m_isFakeInsertionMode;
       
   206 
       
   207     // FIXME: InsertionModes should be a separate object to prevent direct
       
   208     // manipulation of these variables.  For now, be careful to always use
       
   209     // setInsertionMode and never set m_insertionMode directly.
       
   210     InsertionMode m_insertionMode;
       
   211     InsertionMode m_originalInsertionMode;
       
   212     InsertionMode m_secondaryInsertionMode;
       
   213 
       
   214     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
       
   215     Vector<UChar> m_pendingTableCharacters;
       
   216 
       
   217     // HTML5 spec requires that we be able to change the state of the tokenizer
       
   218     // from within parser actions.
       
   219     HTMLTokenizer* m_tokenizer;
       
   220 
       
   221     // We're re-using logic from the old LegacyHTMLTreeBuilder while this class is being written.
       
   222     OwnPtr<LegacyHTMLTreeBuilder> m_legacyTreeBuilder;
       
   223 
       
   224     // These members are intentionally duplicated as the first set is a hack
       
   225     // on top of the legacy parser which will eventually be removed.
       
   226     RefPtr<Element> m_lastScriptElement; // FIXME: Hack for <script> support on top of the old parser.
       
   227     int m_lastScriptElementStartLine; // FIXME: Hack for <script> support on top of the old parser.
       
   228 
       
   229     RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
       
   230     int m_scriptToProcessStartLine; // Starting line number of the script tag needing processing.
       
   231 
       
   232     // FIXME: FragmentScriptingPermission is a HACK for platform/Pasteboard.
       
   233     // FragmentScriptingNotAllowed causes the Parser to remove children
       
   234     // from <script> tags (so javascript doesn't show up in pastes).
       
   235     FragmentScriptingPermission m_fragmentScriptingPermission;
       
   236     bool m_isParsingFragment;
       
   237 };
       
   238 
       
   239 }
       
   240 
       
   241 #endif