|
1 /* |
|
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #ifndef HTMLTreeBuilder_h |
|
27 #define HTMLTreeBuilder_h |
|
28 |
|
29 #include "Element.h" |
|
30 #include "FragmentScriptingPermission.h" |
|
31 #include "HTMLConstructionSite.h" |
|
32 #include "HTMLElementStack.h" |
|
33 #include "HTMLFormattingElementList.h" |
|
34 #include "HTMLTokenizer.h" |
|
35 #include <wtf/Noncopyable.h> |
|
36 #include <wtf/OwnPtr.h> |
|
37 #include <wtf/PassOwnPtr.h> |
|
38 #include <wtf/PassRefPtr.h> |
|
39 #include <wtf/RefPtr.h> |
|
40 #include <wtf/unicode/Unicode.h> |
|
41 |
|
42 namespace WebCore { |
|
43 |
|
44 class AtomicHTMLToken; |
|
45 class Document; |
|
46 class DocumentFragment; |
|
47 class Frame; |
|
48 class HTMLToken; |
|
49 class HTMLDocument; |
|
50 class LegacyHTMLTreeBuilder; |
|
51 class Node; |
|
52 |
|
53 class HTMLTreeBuilder : public Noncopyable { |
|
54 public: |
|
55 // FIXME: Replace constructors with create() functions returning PassOwnPtrs |
|
56 HTMLTreeBuilder(HTMLTokenizer*, HTMLDocument*, bool reportErrors); |
|
57 HTMLTreeBuilder(HTMLTokenizer*, DocumentFragment*, FragmentScriptingPermission); |
|
58 ~HTMLTreeBuilder(); |
|
59 |
|
60 void setPaused(bool paused) { m_isPaused = paused; } |
|
61 bool isPaused() const { return m_isPaused; } |
|
62 |
|
63 // The token really should be passed as a const& since it's never modified. |
|
64 void constructTreeFromToken(HTMLToken&); |
|
65 // Must be called when parser is paused before calling the parser again. |
|
66 PassRefPtr<Element> takeScriptToProcess(int& scriptStartLine); |
|
67 |
|
68 // Done, close any open tags, etc. |
|
69 void finished(); |
|
70 |
|
71 static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*); |
|
72 |
|
73 // FIXME: This is a dirty, rotten hack to keep HTMLFormControlElement happy |
|
74 // until we stop using the legacy parser. DO NOT CALL THIS METHOD. |
|
75 LegacyHTMLTreeBuilder* legacyTreeBuilder() const { return m_legacyTreeBuilder.get(); } |
|
76 |
|
77 private: |
|
78 class FakeInsertionMode; |
|
79 class ExternalCharacterTokenBuffer; |
|
80 // Represents HTML5 "insertion mode" |
|
81 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode |
|
82 enum InsertionMode { |
|
83 InitialMode, |
|
84 BeforeHTMLMode, |
|
85 BeforeHeadMode, |
|
86 InHeadMode, |
|
87 InHeadNoscriptMode, |
|
88 AfterHeadMode, |
|
89 InBodyMode, |
|
90 TextMode, |
|
91 InTableMode, |
|
92 InTableTextMode, |
|
93 InCaptionMode, |
|
94 InColumnGroupMode, |
|
95 InTableBodyMode, |
|
96 InRowMode, |
|
97 InCellMode, |
|
98 InSelectMode, |
|
99 InSelectInTableMode, |
|
100 InForeignContentMode, |
|
101 AfterBodyMode, |
|
102 InFramesetMode, |
|
103 AfterFramesetMode, |
|
104 AfterAfterBodyMode, |
|
105 AfterAfterFramesetMode, |
|
106 }; |
|
107 |
|
108 void passTokenToLegacyParser(HTMLToken&); |
|
109 |
|
110 void processToken(AtomicHTMLToken&); |
|
111 |
|
112 void processDoctypeToken(AtomicHTMLToken&); |
|
113 void processStartTag(AtomicHTMLToken&); |
|
114 void processEndTag(AtomicHTMLToken&); |
|
115 void processComment(AtomicHTMLToken&); |
|
116 void processCharacter(AtomicHTMLToken&); |
|
117 void processEndOfFile(AtomicHTMLToken&); |
|
118 |
|
119 bool processStartTagForInHead(AtomicHTMLToken&); |
|
120 void processStartTagForInBody(AtomicHTMLToken&); |
|
121 void processStartTagForInTable(AtomicHTMLToken&); |
|
122 void processEndTagForInBody(AtomicHTMLToken&); |
|
123 void processEndTagForInTable(AtomicHTMLToken&); |
|
124 void processEndTagForInTableBody(AtomicHTMLToken&); |
|
125 void processEndTagForInRow(AtomicHTMLToken&); |
|
126 void processEndTagForInCell(AtomicHTMLToken&); |
|
127 |
|
128 void processIsindexStartTagForInBody(AtomicHTMLToken&); |
|
129 bool processBodyEndTagForInBody(AtomicHTMLToken&); |
|
130 bool processTableEndTagForInTable(); |
|
131 bool processCaptionEndTagForInCaption(); |
|
132 bool processColgroupEndTagForInColumnGroup(); |
|
133 bool processTrEndTagForInRow(); |
|
134 // FIXME: This function should be inlined into its one call site or it |
|
135 // needs to assert which tokens it can be called with. |
|
136 void processAnyOtherEndTagForInBody(AtomicHTMLToken&); |
|
137 |
|
138 void processCharacterBuffer(ExternalCharacterTokenBuffer&); |
|
139 |
|
140 void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0); |
|
141 void processFakeEndTag(const QualifiedName&); |
|
142 void processFakeCharacters(const String&); |
|
143 void processFakePEndTagIfPInButtonScope(); |
|
144 |
|
145 void processGenericRCDATAStartTag(AtomicHTMLToken&); |
|
146 void processGenericRawTextStartTag(AtomicHTMLToken&); |
|
147 void processScriptStartTag(AtomicHTMLToken&); |
|
148 |
|
149 // Default processing for the different insertion modes. |
|
150 void defaultForInitial(); |
|
151 void defaultForBeforeHTML(); |
|
152 void defaultForBeforeHead(); |
|
153 void defaultForInHead(); |
|
154 void defaultForInHeadNoscript(); |
|
155 void defaultForAfterHead(); |
|
156 void defaultForInTableText(); |
|
157 |
|
158 void processUsingSecondaryInsertionModeAndAdjustInsertionMode(AtomicHTMLToken&); |
|
159 |
|
160 PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&); |
|
161 |
|
162 HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*); |
|
163 void reparentChildren(Element* oldParent, Element* newParent); |
|
164 void callTheAdoptionAgency(AtomicHTMLToken&); |
|
165 |
|
166 void closeTheCell(); |
|
167 |
|
168 template <bool shouldClose(const Element*)> |
|
169 void processCloseWhenNestedTag(AtomicHTMLToken&); |
|
170 |
|
171 bool m_framesetOk; |
|
172 |
|
173 // FIXME: Implement error reporting. |
|
174 void parseError(AtomicHTMLToken&) { } |
|
175 |
|
176 void handleScriptStartTag(); |
|
177 void handleScriptEndTag(Element*, int scriptStartLine); |
|
178 |
|
179 InsertionMode insertionMode() const { return m_insertionMode; } |
|
180 void setInsertionMode(InsertionMode mode) |
|
181 { |
|
182 m_insertionMode = mode; |
|
183 m_isFakeInsertionMode = false; |
|
184 } |
|
185 |
|
186 bool isFakeInsertionMode() { return m_isFakeInsertionMode; } |
|
187 void setFakeInsertionMode(InsertionMode mode) |
|
188 { |
|
189 m_insertionMode = mode; |
|
190 m_isFakeInsertionMode = true; |
|
191 } |
|
192 |
|
193 void setSecondaryInsertionMode(InsertionMode); |
|
194 |
|
195 void setInsertionModeAndEnd(InsertionMode, bool foreign); // Helper for resetInsertionModeAppropriately |
|
196 void resetInsertionModeAppropriately(); |
|
197 |
|
198 static bool isScriptingFlagEnabled(Frame* frame); |
|
199 |
|
200 Document* m_document; |
|
201 HTMLConstructionSite m_tree; |
|
202 |
|
203 bool m_reportErrors; |
|
204 bool m_isPaused; |
|
205 bool m_isFakeInsertionMode; |
|
206 |
|
207 // FIXME: InsertionModes should be a separate object to prevent direct |
|
208 // manipulation of these variables. For now, be careful to always use |
|
209 // setInsertionMode and never set m_insertionMode directly. |
|
210 InsertionMode m_insertionMode; |
|
211 InsertionMode m_originalInsertionMode; |
|
212 InsertionMode m_secondaryInsertionMode; |
|
213 |
|
214 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens |
|
215 Vector<UChar> m_pendingTableCharacters; |
|
216 |
|
217 // HTML5 spec requires that we be able to change the state of the tokenizer |
|
218 // from within parser actions. |
|
219 HTMLTokenizer* m_tokenizer; |
|
220 |
|
221 // We're re-using logic from the old LegacyHTMLTreeBuilder while this class is being written. |
|
222 OwnPtr<LegacyHTMLTreeBuilder> m_legacyTreeBuilder; |
|
223 |
|
224 // These members are intentionally duplicated as the first set is a hack |
|
225 // on top of the legacy parser which will eventually be removed. |
|
226 RefPtr<Element> m_lastScriptElement; // FIXME: Hack for <script> support on top of the old parser. |
|
227 int m_lastScriptElementStartLine; // FIXME: Hack for <script> support on top of the old parser. |
|
228 |
|
229 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. |
|
230 int m_scriptToProcessStartLine; // Starting line number of the script tag needing processing. |
|
231 |
|
232 // FIXME: FragmentScriptingPermission is a HACK for platform/Pasteboard. |
|
233 // FragmentScriptingNotAllowed causes the Parser to remove children |
|
234 // from <script> tags (so javascript doesn't show up in pastes). |
|
235 FragmentScriptingPermission m_fragmentScriptingPermission; |
|
236 bool m_isParsingFragment; |
|
237 }; |
|
238 |
|
239 } |
|
240 |
|
241 #endif |