author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Mon, 03 May 2010 13:17:34 +0300 | |
changeset 19 | fcece45ef507 |
parent 0 | 1918ee327afb |
child 30 | 5dc02b23752f |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
Copyright (C) 1997 Martin Jones (mjones@kde.org) |
|
3 |
(C) 1997 Torben Weis (weis@kde.org) |
|
4 |
(C) 1999,2001 Lars Knoll (knoll@kde.org) |
|
5 |
(C) 2000,2001 Dirk Mueller (mueller@kde.org) |
|
6 |
Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
7 |
Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
|
8 |
||
9 |
This library is free software; you can redistribute it and/or |
|
10 |
modify it under the terms of the GNU Library General Public |
|
11 |
License as published by the Free Software Foundation; either |
|
12 |
version 2 of the License, or (at your option) any later version. |
|
13 |
||
14 |
This library is distributed in the hope that it will be useful, |
|
15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
17 |
Library General Public License for more details. |
|
18 |
||
19 |
You should have received a copy of the GNU Library General Public License |
|
20 |
along with this library; see the file COPYING.LIB. If not, write to |
|
21 |
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
22 |
Boston, MA 02110-1301, USA. |
|
23 |
*/ |
|
24 |
||
25 |
#include "config.h" |
|
26 |
#include "HTMLParser.h" |
|
27 |
||
28 |
#include "CharacterNames.h" |
|
29 |
#include "CSSPropertyNames.h" |
|
30 |
#include "CSSValueKeywords.h" |
|
31 |
#include "ChromeClient.h" |
|
32 |
#include "Comment.h" |
|
33 |
#include "Console.h" |
|
34 |
#include "DOMWindow.h" |
|
35 |
#include "DocumentFragment.h" |
|
36 |
#include "DocumentType.h" |
|
37 |
#include "Frame.h" |
|
38 |
#include "HTMLBodyElement.h" |
|
39 |
#include "HTMLDocument.h" |
|
40 |
#include "HTMLDivElement.h" |
|
41 |
#include "HTMLDListElement.h" |
|
42 |
#include "HTMLElementFactory.h" |
|
43 |
#include "HTMLFormElement.h" |
|
44 |
#include "HTMLHeadElement.h" |
|
45 |
#include "HTMLHRElement.h" |
|
46 |
#include "HTMLHtmlElement.h" |
|
47 |
#include "HTMLIsIndexElement.h" |
|
48 |
#include "HTMLMapElement.h" |
|
49 |
#include "HTMLNames.h" |
|
50 |
#include "HTMLParserQuirks.h" |
|
51 |
#include "HTMLTableCellElement.h" |
|
52 |
#include "HTMLTableRowElement.h" |
|
53 |
#include "HTMLTableSectionElement.h" |
|
54 |
#include "HTMLTokenizer.h" |
|
55 |
#include "LocalizedStrings.h" |
|
56 |
#include "Page.h" |
|
57 |
#include "Settings.h" |
|
58 |
#include "Text.h" |
|
59 |
#include <wtf/StdLibExtras.h> |
|
60 |
||
61 |
namespace WebCore { |
|
62 |
||
63 |
using namespace HTMLNames; |
|
64 |
||
65 |
static const unsigned cMaxRedundantTagDepth = 20; |
|
66 |
static const unsigned cResidualStyleMaxDepth = 200; |
|
67 |
||
68 |
static const int minBlockLevelTagPriority = 3; |
|
69 |
||
70 |
// A cap on the number of tags with priority minBlockLevelTagPriority or higher |
|
71 |
// allowed in m_blockStack. The cap is enforced by adding such new elements as |
|
72 |
// siblings instead of children once it is reached. |
|
73 |
static const size_t cMaxBlockDepth = 4096; |
|
74 |
||
75 |
struct HTMLStackElem : Noncopyable { |
|
76 |
HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) |
|
77 |
: tagName(t) |
|
78 |
, level(lvl) |
|
79 |
, strayTableContent(false) |
|
80 |
, node(n) |
|
81 |
, didRefNode(r) |
|
82 |
, next(nx) |
|
83 |
{ |
|
84 |
} |
|
85 |
||
86 |
void derefNode() |
|
87 |
{ |
|
88 |
if (didRefNode) |
|
89 |
node->deref(); |
|
90 |
} |
|
91 |
||
92 |
AtomicString tagName; |
|
93 |
int level; |
|
94 |
bool strayTableContent; |
|
95 |
Node* node; |
|
96 |
bool didRefNode; |
|
97 |
HTMLStackElem* next; |
|
98 |
}; |
|
99 |
||
100 |
/** |
|
101 |
* The parser parses tokenized input into the document, building up the |
|
102 |
* document tree. If the document is well-formed, parsing it is straightforward. |
|
103 |
* |
|
104 |
* Unfortunately, we have to handle many HTML documents that are not well-formed, |
|
105 |
* so the parser has to be tolerant about errors. |
|
106 |
* |
|
107 |
* We have to take care of at least the following error conditions: |
|
108 |
* |
|
109 |
* 1. The element being added is explicitly forbidden inside some outer tag. |
|
110 |
* In this case we should close all tags up to the one, which forbids |
|
111 |
* the element, and add it afterwards. |
|
112 |
* |
|
113 |
* 2. We are not allowed to add the element directly. It could be that |
|
114 |
* the person writing the document forgot some tag in between (or that the |
|
115 |
* tag in between is optional). This could be the case with the following |
|
116 |
* tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). |
|
117 |
* |
|
118 |
* 3. We want to add a block element inside to an inline element. Close all |
|
119 |
* inline elements up to the next higher block element. |
|
120 |
* |
|
121 |
* 4. If this doesn't help, close elements until we are allowed to add the |
|
122 |
* element or ignore the tag. |
|
123 |
* |
|
124 |
*/ |
|
125 |
||
126 |
HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors) |
|
127 |
: m_document(doc) |
|
128 |
, m_current(doc) |
|
129 |
, m_didRefCurrent(false) |
|
130 |
, m_blockStack(0) |
|
131 |
, m_blocksInStack(0) |
|
132 |
, m_hasPElementInScope(NotInScope) |
|
133 |
, m_inBody(false) |
|
134 |
, m_haveContent(false) |
|
135 |
, m_haveFrameSet(false) |
|
136 |
, m_isParsingFragment(false) |
|
137 |
, m_reportErrors(reportErrors) |
|
138 |
, m_handlingResidualStyleAcrossBlocks(false) |
|
139 |
, m_inStrayTableContent(0) |
|
140 |
, m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) |
|
141 |
{ |
|
142 |
} |
|
143 |
||
144 |
HTMLParser::HTMLParser(DocumentFragment* frag) |
|
145 |
: m_document(frag->document()) |
|
146 |
, m_current(frag) |
|
147 |
, m_didRefCurrent(true) |
|
148 |
, m_blockStack(0) |
|
149 |
, m_blocksInStack(0) |
|
150 |
, m_hasPElementInScope(NotInScope) |
|
151 |
, m_inBody(true) |
|
152 |
, m_haveContent(false) |
|
153 |
, m_haveFrameSet(false) |
|
154 |
, m_isParsingFragment(true) |
|
155 |
, m_reportErrors(false) |
|
156 |
, m_handlingResidualStyleAcrossBlocks(false) |
|
157 |
, m_inStrayTableContent(0) |
|
158 |
, m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) |
|
159 |
{ |
|
160 |
if (frag) |
|
161 |
frag->ref(); |
|
162 |
} |
|
163 |
||
164 |
HTMLParser::~HTMLParser() |
|
165 |
{ |
|
166 |
freeBlock(); |
|
167 |
if (m_didRefCurrent) |
|
168 |
m_current->deref(); |
|
169 |
} |
|
170 |
||
171 |
void HTMLParser::reset() |
|
172 |
{ |
|
173 |
ASSERT(!m_isParsingFragment); |
|
174 |
||
175 |
setCurrent(m_document); |
|
176 |
||
177 |
freeBlock(); |
|
178 |
||
179 |
m_inBody = false; |
|
180 |
m_haveFrameSet = false; |
|
181 |
m_haveContent = false; |
|
182 |
m_inStrayTableContent = 0; |
|
183 |
||
184 |
m_currentFormElement = 0; |
|
185 |
m_currentMapElement = 0; |
|
186 |
m_head = 0; |
|
187 |
m_isindexElement = 0; |
|
188 |
||
189 |
m_skipModeTag = nullAtom; |
|
190 |
||
191 |
if (m_parserQuirks) |
|
192 |
m_parserQuirks->reset(); |
|
193 |
} |
|
194 |
||
195 |
void HTMLParser::setCurrent(Node* newCurrent) |
|
196 |
{ |
|
197 |
bool didRefNewCurrent = newCurrent && newCurrent != m_document; |
|
198 |
if (didRefNewCurrent) |
|
199 |
newCurrent->ref(); |
|
200 |
if (m_didRefCurrent) |
|
201 |
m_current->deref(); |
|
202 |
m_current = newCurrent; |
|
203 |
m_didRefCurrent = didRefNewCurrent; |
|
204 |
} |
|
205 |
||
206 |
PassRefPtr<Node> HTMLParser::parseToken(Token* t) |
|
207 |
{ |
|
208 |
if (!m_skipModeTag.isNull()) { |
|
209 |
if (!t->beginTag && t->tagName == m_skipModeTag) |
|
210 |
// Found the end tag for the current skip mode, so we're done skipping. |
|
211 |
m_skipModeTag = nullAtom; |
|
212 |
else if (m_current->localName() == t->tagName) |
|
213 |
// Do not skip </iframe>. |
|
214 |
// FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? |
|
215 |
; |
|
216 |
else |
|
217 |
return 0; |
|
218 |
} |
|
219 |
||
220 |
// Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. |
|
221 |
if (t->isCloseTag(brTag) && m_document->inCompatMode()) { |
|
222 |
reportError(MalformedBRError); |
|
223 |
t->beginTag = true; |
|
224 |
} |
|
225 |
||
226 |
if (!t->beginTag) { |
|
227 |
processCloseTag(t); |
|
228 |
return 0; |
|
229 |
} |
|
230 |
||
231 |
// Ignore spaces, if we're not inside a paragraph or other inline code. |
|
232 |
// Do not alter the text if it is part of a scriptTag. |
|
233 |
if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { |
|
234 |
if (m_inBody && !skipMode() && m_current->localName() != styleTag && |
|
235 |
m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) |
|
236 |
m_haveContent = true; |
|
237 |
||
238 |
RefPtr<Node> n; |
|
239 |
String text = t->text.get(); |
|
240 |
unsigned charsLeft = text.length(); |
|
241 |
while (charsLeft) { |
|
242 |
// split large blocks of text to nodes of manageable size |
|
243 |
n = Text::createWithLengthLimit(m_document, text, charsLeft); |
|
244 |
if (!insertNode(n.get(), t->selfClosingTag)) |
|
245 |
return 0; |
|
246 |
} |
|
247 |
return n; |
|
248 |
} |
|
249 |
||
250 |
RefPtr<Node> n = getNode(t); |
|
251 |
// just to be sure, and to catch currently unimplemented stuff |
|
252 |
if (!n) |
|
253 |
return 0; |
|
254 |
||
255 |
// set attributes |
|
256 |
if (n->isHTMLElement()) { |
|
257 |
HTMLElement* e = static_cast<HTMLElement*>(n.get()); |
|
258 |
e->setAttributeMap(t->attrs.get()); |
|
259 |
||
260 |
// take care of optional close tags |
|
261 |
if (e->endTagRequirement() == TagStatusOptional) |
|
262 |
popBlock(t->tagName); |
|
263 |
||
264 |
// If the node does not have a forbidden end tag requirement, and if the broken XML self-closing |
|
265 |
// syntax was used, report an error. |
|
266 |
if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { |
|
267 |
if (t->tagName == scriptTag) |
|
268 |
reportError(IncorrectXMLCloseScriptWarning); |
|
269 |
else |
|
270 |
reportError(IncorrectXMLSelfCloseError, &t->tagName); |
|
271 |
} |
|
272 |
} |
|
273 |
||
274 |
if (!insertNode(n.get(), t->selfClosingTag)) { |
|
275 |
// we couldn't insert the node |
|
276 |
||
277 |
if (n->isElementNode()) { |
|
278 |
Element* e = static_cast<Element*>(n.get()); |
|
279 |
e->setAttributeMap(0); |
|
280 |
} |
|
281 |
||
282 |
if (m_currentMapElement == n) |
|
283 |
m_currentMapElement = 0; |
|
284 |
||
285 |
if (m_currentFormElement == n) |
|
286 |
m_currentFormElement = 0; |
|
287 |
||
288 |
if (m_head == n) |
|
289 |
m_head = 0; |
|
290 |
||
291 |
return 0; |
|
292 |
} |
|
293 |
return n; |
|
294 |
} |
|
295 |
||
296 |
void HTMLParser::parseDoctypeToken(DoctypeToken* t) |
|
297 |
{ |
|
298 |
// Ignore any doctype after the first. Ignore doctypes in fragments. |
|
299 |
if (m_document->doctype() || m_isParsingFragment || m_current != m_document) |
|
300 |
return; |
|
301 |
||
302 |
// Make a new doctype node and set it as our doctype. |
|
303 |
m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); |
|
304 |
} |
|
305 |
||
306 |
static bool isTableSection(const Node* n) |
|
307 |
{ |
|
308 |
return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); |
|
309 |
} |
|
310 |
||
311 |
static bool isTablePart(const Node* n) |
|
312 |
{ |
|
313 |
return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || |
|
314 |
isTableSection(n); |
|
315 |
} |
|
316 |
||
317 |
static bool isTableRelated(const Node* n) |
|
318 |
{ |
|
319 |
return n->hasTagName(tableTag) || isTablePart(n); |
|
320 |
} |
|
321 |
||
322 |
static bool isScopingTag(const AtomicString& tagName) |
|
323 |
{ |
|
324 |
return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag; |
|
325 |
} |
|
326 |
||
327 |
bool HTMLParser::insertNode(Node* n, bool flat) |
|
328 |
{ |
|
329 |
RefPtr<Node> protectNode(n); |
|
330 |
||
331 |
const AtomicString& localName = n->localName(); |
|
332 |
int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; |
|
333 |
||
334 |
// <table> is never allowed inside stray table content. Always pop out of the stray table content |
|
335 |
// and close up the first table, and then start the second table as a sibling. |
|
336 |
if (m_inStrayTableContent && localName == tableTag) |
|
337 |
popBlock(tableTag); |
|
338 |
||
339 |
if (tagPriority >= minBlockLevelTagPriority) { |
|
340 |
while (m_blocksInStack >= cMaxBlockDepth) |
|
341 |
popBlock(m_blockStack->tagName); |
|
342 |
} |
|
343 |
||
344 |
if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) |
|
345 |
return false; |
|
346 |
||
347 |
// let's be stupid and just try to insert it. |
|
348 |
// this should work if the document is well-formed |
|
349 |
Node* newNode = m_current->addChild(n); |
|
350 |
if (!newNode) |
|
351 |
return handleError(n, flat, localName, tagPriority); // Try to handle the error. |
|
352 |
||
353 |
// don't push elements without end tags (e.g., <img>) on the stack |
|
354 |
bool parentAttached = m_current->attached(); |
|
355 |
if (tagPriority > 0 && !flat) { |
|
356 |
if (newNode == m_current) { |
|
357 |
// This case should only be hit when a demoted <form> is placed inside a table. |
|
358 |
ASSERT(localName == formTag); |
|
359 |
reportError(FormInsideTablePartError, &m_current->localName()); |
|
360 |
HTMLFormElement* form = static_cast<HTMLFormElement*>(n); |
|
361 |
form->setDemoted(true); |
|
362 |
} else { |
|
363 |
// The pushBlock function transfers ownership of current to the block stack |
|
364 |
// so we're guaranteed that m_didRefCurrent is false. The code below is an |
|
365 |
// optimized version of setCurrent that takes advantage of that fact and also |
|
366 |
// assumes that newNode is neither 0 nor a pointer to the document. |
|
367 |
pushBlock(localName, tagPriority); |
|
368 |
newNode->beginParsingChildren(); |
|
369 |
ASSERT(!m_didRefCurrent); |
|
370 |
newNode->ref(); |
|
371 |
m_current = newNode; |
|
372 |
m_didRefCurrent = true; |
|
373 |
} |
|
374 |
if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
375 |
n->attach(); |
|
376 |
} else { |
|
377 |
if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
378 |
n->attach(); |
|
379 |
n->finishParsingChildren(); |
|
380 |
} |
|
381 |
||
382 |
if (localName == htmlTag && m_document->frame()) |
|
383 |
m_document->frame()->loader()->dispatchDocumentElementAvailable(); |
|
384 |
||
385 |
return true; |
|
386 |
} |
|
387 |
||
388 |
bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) |
|
389 |
{ |
|
390 |
// Error handling code. This is just ad hoc handling of specific parent/child combinations. |
|
391 |
HTMLElement* e; |
|
392 |
bool handled = false; |
|
393 |
||
394 |
// 1. Check out the element's tag name to decide how to deal with errors. |
|
395 |
if (n->isHTMLElement()) { |
|
396 |
HTMLElement* h = static_cast<HTMLElement*>(n); |
|
397 |
if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { |
|
398 |
if (m_inStrayTableContent && !isTableRelated(m_current)) { |
|
399 |
reportError(MisplacedTablePartError, &localName, &m_current->localName()); |
|
400 |
// pop out to the nearest enclosing table-related tag. |
|
401 |
while (m_blockStack && !isTableRelated(m_current)) |
|
402 |
popOneBlock(); |
|
403 |
return insertNode(n); |
|
404 |
} |
|
405 |
} else if (h->hasLocalName(headTag)) { |
|
406 |
if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { |
|
407 |
reportError(MisplacedHeadError); |
|
408 |
return false; |
|
409 |
} |
|
410 |
} else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { |
|
411 |
bool createdHead = false; |
|
412 |
if (!m_head) { |
|
413 |
createHead(); |
|
414 |
createdHead = true; |
|
415 |
} |
|
416 |
if (m_head) { |
|
417 |
if (!createdHead) |
|
418 |
reportError(MisplacedHeadContentError, &localName, &m_current->localName()); |
|
419 |
if (m_head->addChild(n)) { |
|
420 |
if (!n->attached() && !m_isParsingFragment) |
|
421 |
n->attach(); |
|
422 |
return true; |
|
423 |
} else |
|
424 |
return false; |
|
425 |
} |
|
426 |
} else if (h->hasLocalName(htmlTag)) { |
|
427 |
if (!m_current->isDocumentNode() ) { |
|
428 |
if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag)) { |
|
429 |
reportError(RedundantHTMLBodyError, &localName); |
|
430 |
// we have another <HTML> element.... apply attributes to existing one |
|
431 |
// make sure we don't overwrite already existing attributes |
|
432 |
NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); |
|
433 |
Element* existingHTML = static_cast<Element*>(m_document->documentElement()); |
|
434 |
NamedNodeMap* bmap = existingHTML->attributes(false); |
|
435 |
for (unsigned l = 0; map && l < map->length(); ++l) { |
|
436 |
Attribute* it = map->attributeItem(l); |
|
437 |
if (!bmap->getAttributeItem(it->name())) |
|
438 |
existingHTML->setAttribute(it->name(), it->value()); |
|
439 |
} |
|
440 |
} |
|
441 |
return false; |
|
442 |
} |
|
443 |
} else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { |
|
444 |
bool createdHead = false; |
|
445 |
if (!m_head) { |
|
446 |
createHead(); |
|
447 |
createdHead = true; |
|
448 |
} |
|
449 |
if (m_head) { |
|
450 |
Node* newNode = m_head->addChild(n); |
|
451 |
if (!newNode) { |
|
452 |
setSkipMode(h->tagQName()); |
|
453 |
return false; |
|
454 |
} |
|
455 |
||
456 |
if (!createdHead) |
|
457 |
reportError(MisplacedHeadContentError, &localName, &m_current->localName()); |
|
458 |
||
459 |
pushBlock(localName, tagPriority); |
|
460 |
newNode->beginParsingChildren(); |
|
461 |
setCurrent(newNode); |
|
462 |
if (!n->attached() && !m_isParsingFragment) |
|
463 |
n->attach(); |
|
464 |
return true; |
|
465 |
} |
|
466 |
if (m_inBody) { |
|
467 |
setSkipMode(h->tagQName()); |
|
468 |
return false; |
|
469 |
} |
|
470 |
} else if (h->hasLocalName(bodyTag)) { |
|
471 |
if (m_inBody && m_document->body()) { |
|
472 |
// we have another <BODY> element.... apply attributes to existing one |
|
473 |
// make sure we don't overwrite already existing attributes |
|
474 |
// some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> |
|
475 |
reportError(RedundantHTMLBodyError, &localName); |
|
476 |
NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); |
|
477 |
Element* existingBody = m_document->body(); |
|
478 |
NamedNodeMap* bmap = existingBody->attributes(false); |
|
479 |
for (unsigned l = 0; map && l < map->length(); ++l) { |
|
480 |
Attribute* it = map->attributeItem(l); |
|
481 |
if (!bmap->getAttributeItem(it->name())) |
|
482 |
existingBody->setAttribute(it->name(), it->value()); |
|
483 |
} |
|
484 |
return false; |
|
485 |
} |
|
486 |
else if (!m_current->isDocumentNode()) |
|
487 |
return false; |
|
488 |
} else if (h->hasLocalName(areaTag)) { |
|
489 |
if (m_currentMapElement) { |
|
490 |
reportError(MisplacedAreaError, &m_current->localName()); |
|
491 |
m_currentMapElement->addChild(n); |
|
492 |
if (!n->attached() && !m_isParsingFragment) |
|
493 |
n->attach(); |
|
494 |
handled = true; |
|
495 |
return true; |
|
496 |
} |
|
497 |
return false; |
|
498 |
} else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { |
|
499 |
if (isTableRelated(m_current)) { |
|
500 |
while (m_blockStack && isTablePart(m_current)) |
|
501 |
popOneBlock(); |
|
502 |
return insertNode(n); |
|
503 |
} |
|
504 |
} |
|
505 |
} else if (n->isCommentNode() && !m_head) |
|
506 |
return false; |
|
507 |
||
508 |
// 2. Next we examine our currently active element to do some further error handling. |
|
509 |
if (m_current->isHTMLElement()) { |
|
510 |
HTMLElement* h = static_cast<HTMLElement*>(m_current); |
|
511 |
const AtomicString& currentTagName = h->localName(); |
|
512 |
if (h->hasLocalName(htmlTag)) { |
|
513 |
HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; |
|
514 |
if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || |
|
515 |
elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || |
|
516 |
elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || |
|
517 |
elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || |
|
518 |
elt->hasLocalName(baseTag))) { |
|
519 |
if (!m_head) { |
|
520 |
m_head = new HTMLHeadElement(headTag, m_document); |
|
521 |
insertNode(m_head.get()); |
|
522 |
handled = true; |
|
523 |
} |
|
524 |
} else { |
|
525 |
if (n->isTextNode()) { |
|
526 |
Text* t = static_cast<Text*>(n); |
|
527 |
if (t->containsOnlyWhitespace()) |
|
528 |
return false; |
|
529 |
} |
|
530 |
if (!m_haveFrameSet) { |
|
531 |
// Ensure that head exists. |
|
532 |
// But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> |
|
533 |
if (shouldCreateImplicitHead(m_document)) |
|
534 |
createHead(); |
|
535 |
||
536 |
popBlock(headTag); |
|
537 |
e = new HTMLBodyElement(bodyTag, m_document); |
|
538 |
startBody(); |
|
539 |
insertNode(e); |
|
540 |
handled = true; |
|
541 |
} else |
|
542 |
reportError(MisplacedFramesetContentError, &localName); |
|
543 |
} |
|
544 |
} else if (h->hasLocalName(headTag)) { |
|
545 |
if (n->hasTagName(htmlTag)) |
|
546 |
return false; |
|
547 |
else { |
|
548 |
// This means the body starts here... |
|
549 |
if (!m_haveFrameSet) { |
|
550 |
ASSERT(currentTagName == headTag); |
|
551 |
popBlock(currentTagName); |
|
552 |
e = new HTMLBodyElement(bodyTag, m_document); |
|
553 |
startBody(); |
|
554 |
insertNode(e); |
|
555 |
handled = true; |
|
556 |
} else |
|
557 |
reportError(MisplacedFramesetContentError, &localName); |
|
558 |
} |
|
559 |
} else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) |
|
560 |
|| h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { |
|
561 |
reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
562 |
popBlock(currentTagName); |
|
563 |
handled = true; |
|
564 |
} else if (h->hasLocalName(captionTag)) { |
|
565 |
// Illegal content in a caption. Close the caption and try again. |
|
566 |
reportError(MisplacedCaptionContentError, &localName); |
|
567 |
popBlock(currentTagName); |
|
568 |
if (isTablePart(n)) |
|
569 |
return insertNode(n, flat); |
|
570 |
} else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { |
|
571 |
if (n->hasTagName(tableTag)) { |
|
572 |
reportError(MisplacedTableError, ¤tTagName); |
|
573 |
if (m_isParsingFragment && !h->hasLocalName(tableTag)) |
|
574 |
// fragment may contain table parts without <table> ancestor, pop them one by one |
|
575 |
popBlock(h->localName()); |
|
576 |
popBlock(localName); // end the table |
|
577 |
handled = true; // ...and start a new one |
|
578 |
} else { |
|
579 |
ExceptionCode ec = 0; |
|
580 |
Node* node = m_current; |
|
581 |
Node* parent = node->parentNode(); |
|
582 |
// A script may have removed the current node's parent from the DOM |
|
583 |
// http://bugs.webkit.org/show_bug.cgi?id=7137 |
|
584 |
// FIXME: we should do real recovery here and re-parent with the correct node. |
|
585 |
if (!parent) |
|
586 |
return false; |
|
587 |
Node* grandparent = parent->parentNode(); |
|
588 |
||
589 |
if (n->isTextNode() || |
|
590 |
(h->hasLocalName(trTag) && |
|
591 |
isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || |
|
592 |
((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && |
|
593 |
!n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && |
|
594 |
parent->hasTagName(tableTag))) { |
|
595 |
node = (node->hasTagName(tableTag)) ? node : |
|
596 |
((node->hasTagName(trTag)) ? grandparent : parent); |
|
597 |
// This can happen with fragments |
|
598 |
if (!node) |
|
599 |
return false; |
|
600 |
Node* parent = node->parentNode(); |
|
601 |
if (!parent) |
|
602 |
return false; |
|
603 |
parent->insertBefore(n, node, ec); |
|
604 |
if (!ec) { |
|
605 |
reportError(StrayTableContentError, &localName, ¤tTagName); |
|
606 |
if (n->isHTMLElement() && tagPriority > 0 && |
|
607 |
!flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) |
|
608 |
{ |
|
609 |
pushBlock(localName, tagPriority); |
|
610 |
n->beginParsingChildren(); |
|
611 |
setCurrent(n); |
|
612 |
m_inStrayTableContent++; |
|
613 |
m_blockStack->strayTableContent = true; |
|
614 |
} |
|
615 |
return true; |
|
616 |
} |
|
617 |
} |
|
618 |
||
619 |
if (!ec) { |
|
620 |
if (m_current->hasTagName(trTag)) { |
|
621 |
reportError(TablePartRequiredError, &localName, &tdTag.localName()); |
|
622 |
e = new HTMLTableCellElement(tdTag, m_document); |
|
623 |
} else if (m_current->hasTagName(tableTag)) { |
|
624 |
// Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, |
|
625 |
// and it isn't really a parse error per se. |
|
626 |
e = new HTMLTableSectionElement(tbodyTag, m_document); |
|
627 |
} else { |
|
628 |
reportError(TablePartRequiredError, &localName, &trTag.localName()); |
|
629 |
e = new HTMLTableRowElement(trTag, m_document); |
|
630 |
} |
|
631 |
||
632 |
insertNode(e); |
|
633 |
handled = true; |
|
634 |
} |
|
635 |
} |
|
636 |
} else if (h->hasLocalName(objectTag)) { |
|
637 |
reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
638 |
popBlock(objectTag); |
|
639 |
handled = true; |
|
640 |
} else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) { |
|
641 |
if (!isInline(n)) { |
|
642 |
popBlock(currentTagName); |
|
643 |
handled = true; |
|
644 |
} |
|
645 |
} else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { |
|
646 |
if (localName == optgroupTag) { |
|
647 |
popBlock(currentTagName); |
|
648 |
handled = true; |
|
649 |
} else if (localName == selectTag) { |
|
650 |
// IE treats a nested select as </select>. Let's do the same |
|
651 |
popBlock(localName); |
|
652 |
} |
|
653 |
} else if (h->hasLocalName(selectTag)) { |
|
654 |
if (localName == inputTag || localName == textareaTag) { |
|
655 |
reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
656 |
popBlock(currentTagName); |
|
657 |
handled = true; |
|
658 |
} |
|
659 |
} else if (h->hasLocalName(colgroupTag)) { |
|
660 |
popBlock(currentTagName); |
|
661 |
handled = true; |
|
662 |
} else if (!h->hasLocalName(bodyTag)) { |
|
663 |
if (isInline(m_current)) { |
|
664 |
popInlineBlocks(); |
|
665 |
handled = true; |
|
666 |
} |
|
667 |
} |
|
668 |
} else if (m_current->isDocumentNode()) { |
|
669 |
if (n->isTextNode()) { |
|
670 |
Text* t = static_cast<Text*>(n); |
|
671 |
if (t->containsOnlyWhitespace()) |
|
672 |
return false; |
|
673 |
} |
|
674 |
||
675 |
if (!m_document->documentElement()) { |
|
676 |
e = new HTMLHtmlElement(htmlTag, m_document); |
|
677 |
insertNode(e); |
|
678 |
handled = true; |
|
679 |
} |
|
680 |
} |
|
681 |
||
682 |
// 3. If we couldn't handle the error, just return false and attempt to error-correct again. |
|
683 |
if (!handled) { |
|
684 |
reportError(IgnoredContentError, &localName, &m_current->localName()); |
|
685 |
return false; |
|
686 |
} |
|
687 |
return insertNode(n); |
|
688 |
} |
|
689 |
||
690 |
typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); |
|
691 |
typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; |
|
692 |
||
693 |
bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
694 |
{ |
|
695 |
result = Text::create(m_document, t->text.get()); |
|
696 |
return false; |
|
697 |
} |
|
698 |
||
699 |
bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
700 |
{ |
|
701 |
result = Comment::create(m_document, t->text.get()); |
|
702 |
return false; |
|
703 |
} |
|
704 |
||
705 |
bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result) |
|
706 |
{ |
|
707 |
if (!m_head || m_current->localName() == htmlTag) { |
|
708 |
m_head = new HTMLHeadElement(headTag, m_document); |
|
709 |
result = m_head; |
|
710 |
} else |
|
711 |
reportError(MisplacedHeadError); |
|
712 |
return false; |
|
713 |
} |
|
714 |
||
715 |
bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&) |
|
716 |
{ |
|
717 |
// body no longer allowed if we have a frameset |
|
718 |
if (m_haveFrameSet) |
|
719 |
return false; |
|
720 |
||
721 |
// Ensure that head exists (unless parsing a fragment). |
|
722 |
// But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> |
|
723 |
if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) |
|
724 |
createHead(); |
|
725 |
||
726 |
popBlock(headTag); |
|
727 |
startBody(); |
|
728 |
return true; |
|
729 |
} |
|
730 |
||
731 |
bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&) |
|
732 |
{ |
|
733 |
popBlock(headTag); |
|
734 |
if (m_inBody && !m_haveFrameSet && !m_haveContent) { |
|
735 |
popBlock(bodyTag); |
|
736 |
// ### actually for IE document.body returns the now hidden "body" element |
|
737 |
// we can't implement that behaviour now because it could cause too many |
|
738 |
// regressions and the headaches are not worth the work as long as there is |
|
739 |
// no site actually relying on that detail (Dirk) |
|
740 |
if (m_document->body()) |
|
741 |
m_document->body()->setAttribute(styleAttr, "display:none"); |
|
742 |
m_inBody = false; |
|
743 |
} |
|
744 |
if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) |
|
745 |
return false; |
|
746 |
m_haveFrameSet = true; |
|
747 |
startBody(); |
|
748 |
return true; |
|
749 |
} |
|
750 |
||
751 |
bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
752 |
{ |
|
753 |
// Only create a new form if we're not already inside one. |
|
754 |
// This is consistent with other browsers' behavior. |
|
755 |
if (!m_currentFormElement) { |
|
756 |
m_currentFormElement = new HTMLFormElement(formTag, m_document); |
|
757 |
result = m_currentFormElement; |
|
758 |
pCloserCreateErrorCheck(t, result); |
|
759 |
} |
|
760 |
return false; |
|
761 |
} |
|
762 |
||
763 |
bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
764 |
{ |
|
765 |
RefPtr<Node> n = handleIsindex(t); |
|
766 |
if (!m_inBody) |
|
767 |
m_isindexElement = n.release(); |
|
768 |
else { |
|
769 |
t->selfClosingTag = true; |
|
770 |
result = n.release(); |
|
771 |
} |
|
772 |
return false; |
|
773 |
} |
|
774 |
||
775 |
bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&) |
|
776 |
{ |
|
777 |
return true; |
|
778 |
} |
|
779 |
||
780 |
bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
781 |
{ |
|
782 |
pCloserCreateErrorCheck(t, result); |
|
783 |
popBlock(dtTag); |
|
784 |
popBlock(ddTag); |
|
785 |
return true; |
|
786 |
} |
|
787 |
||
788 |
bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
789 |
{ |
|
790 |
pCloserCreateErrorCheck(t, result); |
|
791 |
popBlock(ddTag); |
|
792 |
popBlock(dtTag); |
|
793 |
return true; |
|
794 |
} |
|
795 |
||
796 |
bool HTMLParser::rpCreateErrorCheck(Token*, RefPtr<Node>&) |
|
797 |
{ |
|
798 |
popBlock(rpTag); |
|
799 |
popBlock(rtTag); |
|
800 |
return true; |
|
801 |
} |
|
802 |
||
803 |
bool HTMLParser::rtCreateErrorCheck(Token*, RefPtr<Node>&) |
|
804 |
{ |
|
805 |
popBlock(rpTag); |
|
806 |
popBlock(rtTag); |
|
807 |
return true; |
|
808 |
} |
|
809 |
||
810 |
bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&) |
|
811 |
{ |
|
812 |
popBlock(t->tagName); |
|
813 |
return true; |
|
814 |
} |
|
815 |
||
816 |
bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
817 |
{ |
|
818 |
pCloserCreateErrorCheck(t, result); |
|
819 |
popBlock(t->tagName); |
|
820 |
return true; |
|
821 |
} |
|
822 |
||
823 |
bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&) |
|
824 |
{ |
|
825 |
return allowNestedRedundantTag(t->tagName); |
|
826 |
} |
|
827 |
||
828 |
bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&) |
|
829 |
{ |
|
830 |
popBlock(tdTag); |
|
831 |
popBlock(thTag); |
|
832 |
return true; |
|
833 |
} |
|
834 |
||
835 |
bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&) |
|
836 |
{ |
|
837 |
popBlock(theadTag); |
|
838 |
popBlock(tbodyTag); |
|
839 |
popBlock(tfootTag); |
|
840 |
return true; |
|
841 |
} |
|
842 |
||
843 |
bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&) |
|
844 |
{ |
|
845 |
setSkipMode(noembedTag); |
|
846 |
return true; |
|
847 |
} |
|
848 |
||
849 |
bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&) |
|
850 |
{ |
|
851 |
setSkipMode(noframesTag); |
|
852 |
return true; |
|
853 |
} |
|
854 |
||
855 |
bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&) |
|
856 |
{ |
|
857 |
if (!m_isParsingFragment) { |
|
858 |
Settings* settings = m_document->settings(); |
|
859 |
if (settings && settings->isJavaScriptEnabled()) |
|
860 |
setSkipMode(noscriptTag); |
|
861 |
} |
|
862 |
return true; |
|
863 |
} |
|
864 |
||
865 |
bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&) |
|
866 |
{ |
|
867 |
if (hasPElementInScope()) |
|
868 |
popBlock(pTag); |
|
869 |
return true; |
|
870 |
} |
|
871 |
||
872 |
bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&) |
|
873 |
{ |
|
874 |
if (m_document->inCompatMode()) |
|
875 |
return true; |
|
876 |
if (hasPElementInScope()) |
|
877 |
popBlock(pTag); |
|
878 |
return true; |
|
879 |
} |
|
880 |
||
881 |
bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result) |
|
882 |
{ |
|
883 |
m_currentMapElement = new HTMLMapElement(mapTag, m_document); |
|
884 |
result = m_currentMapElement; |
|
885 |
return false; |
|
886 |
} |
|
887 |
||
888 |
PassRefPtr<Node> HTMLParser::getNode(Token* t) |
|
889 |
{ |
|
890 |
// Init our error handling table. |
|
891 |
DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); |
|
892 |
if (gFunctionMap.isEmpty()) { |
|
893 |
gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
894 |
gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
895 |
gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
896 |
gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
897 |
gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
898 |
gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck); |
|
899 |
gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
900 |
gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
901 |
gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck); |
|
902 |
gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck); |
|
903 |
gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
904 |
gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
905 |
gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
906 |
gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck); |
|
907 |
gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck); |
|
908 |
gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
909 |
gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck); |
|
910 |
gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
911 |
gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
912 |
gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
913 |
gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
914 |
gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
915 |
gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
916 |
gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck); |
|
917 |
gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
918 |
gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
919 |
gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck); |
|
920 |
gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck); |
|
921 |
gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
922 |
gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck); |
|
923 |
gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
924 |
gFunctionMap.set(navTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
925 |
gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
926 |
gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck); |
|
927 |
gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck); |
|
928 |
#if !ENABLE(XHTMLMP) |
|
929 |
gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck); |
|
930 |
#endif |
|
931 |
gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
932 |
gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
933 |
gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
934 |
gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
935 |
gFunctionMap.set(rpTag.localName().impl(), &HTMLParser::rpCreateErrorCheck); |
|
936 |
gFunctionMap.set(rtTag.localName().impl(), &HTMLParser::rtCreateErrorCheck); |
|
937 |
gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
938 |
gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck); |
|
939 |
gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
940 |
gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
941 |
gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck); |
|
942 |
gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
943 |
gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); |
|
944 |
gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck); |
|
945 |
gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
946 |
gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); |
|
947 |
gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
948 |
gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
949 |
gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
950 |
gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
951 |
gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); |
|
952 |
} |
|
953 |
||
954 |
bool proceed = true; |
|
955 |
RefPtr<Node> result; |
|
956 |
if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) |
|
957 |
proceed = (this->*errorCheckFunc)(t, result); |
|
958 |
if (proceed) |
|
959 |
result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); |
|
960 |
return result.release(); |
|
961 |
} |
|
962 |
||
963 |
bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName) |
|
964 |
{ |
|
965 |
// www.liceo.edu.mx is an example of a site that achieves a level of nesting of |
|
966 |
// about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 |
|
967 |
// nested tags of the same type before just ignoring them all together. |
|
968 |
unsigned i = 0; |
|
969 |
for (HTMLStackElem* curr = m_blockStack; |
|
970 |
i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; |
|
971 |
curr = curr->next, i++) { } |
|
972 |
return i != cMaxRedundantTagDepth; |
|
973 |
} |
|
974 |
||
975 |
void HTMLParser::processCloseTag(Token* t) |
|
976 |
{ |
|
977 |
// Support for really broken html. |
|
978 |
// we never close the body tag, since some stupid web pages close it before the actual end of the doc. |
|
979 |
// let's rely on the end() call to close things. |
|
980 |
if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) |
|
981 |
return; |
|
982 |
||
983 |
bool checkForCloseTagErrors = true; |
|
984 |
if (t->tagName == formTag && m_currentFormElement) { |
|
985 |
m_currentFormElement = 0; |
|
986 |
checkForCloseTagErrors = false; |
|
987 |
} else if (t->tagName == mapTag) |
|
988 |
m_currentMapElement = 0; |
|
989 |
else if (t->tagName == pTag) |
|
990 |
checkForCloseTagErrors = false; |
|
991 |
||
992 |
HTMLStackElem* oldElem = m_blockStack; |
|
993 |
popBlock(t->tagName, checkForCloseTagErrors); |
|
994 |
if (oldElem == m_blockStack && t->tagName == pTag) { |
|
995 |
// We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat |
|
996 |
// this as a valid break, i.e., <p></p>. So go ahead and make the empty |
|
997 |
// paragraph. |
|
998 |
t->beginTag = true; |
|
999 |
parseToken(t); |
|
1000 |
popBlock(t->tagName); |
|
1001 |
reportError(StrayParagraphCloseError); |
|
1002 |
} |
|
1003 |
} |
|
1004 |
||
1005 |
bool HTMLParser::isHeaderTag(const AtomicString& tagName) |
|
1006 |
{ |
|
1007 |
DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ()); |
|
1008 |
if (headerTags.isEmpty()) { |
|
1009 |
headerTags.add(h1Tag.localName().impl()); |
|
1010 |
headerTags.add(h2Tag.localName().impl()); |
|
1011 |
headerTags.add(h3Tag.localName().impl()); |
|
1012 |
headerTags.add(h4Tag.localName().impl()); |
|
1013 |
headerTags.add(h5Tag.localName().impl()); |
|
1014 |
headerTags.add(h6Tag.localName().impl()); |
|
1015 |
} |
|
1016 |
||
1017 |
return headerTags.contains(tagName.impl()); |
|
1018 |
} |
|
1019 |
||
1020 |
bool HTMLParser::isInline(Node* node) const |
|
1021 |
{ |
|
1022 |
if (node->isTextNode()) |
|
1023 |
return true; |
|
1024 |
||
1025 |
if (node->isHTMLElement()) { |
|
1026 |
HTMLElement* e = static_cast<HTMLElement*>(node); |
|
1027 |
if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || |
|
1028 |
e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || |
|
1029 |
e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || |
|
1030 |
e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || |
|
1031 |
e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || |
|
1032 |
e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || |
|
1033 |
e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || |
|
1034 |
e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || |
|
1035 |
e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || |
|
1036 |
e->hasLocalName(noembedTag)) |
|
1037 |
return true; |
|
1038 |
#if !ENABLE(XHTMLMP) |
|
1039 |
if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { |
|
1040 |
Settings* settings = m_document->settings(); |
|
1041 |
if (settings && settings->isJavaScriptEnabled()) |
|
1042 |
return true; |
|
1043 |
} |
|
1044 |
#endif |
|
1045 |
} |
|
1046 |
||
1047 |
return false; |
|
1048 |
} |
|
1049 |
||
1050 |
bool HTMLParser::isResidualStyleTag(const AtomicString& tagName) |
|
1051 |
{ |
|
1052 |
DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ()); |
|
1053 |
if (residualStyleTags.isEmpty()) { |
|
1054 |
residualStyleTags.add(aTag.localName().impl()); |
|
1055 |
residualStyleTags.add(fontTag.localName().impl()); |
|
1056 |
residualStyleTags.add(ttTag.localName().impl()); |
|
1057 |
residualStyleTags.add(uTag.localName().impl()); |
|
1058 |
residualStyleTags.add(bTag.localName().impl()); |
|
1059 |
residualStyleTags.add(iTag.localName().impl()); |
|
1060 |
residualStyleTags.add(sTag.localName().impl()); |
|
1061 |
residualStyleTags.add(strikeTag.localName().impl()); |
|
1062 |
residualStyleTags.add(bigTag.localName().impl()); |
|
1063 |
residualStyleTags.add(smallTag.localName().impl()); |
|
1064 |
residualStyleTags.add(emTag.localName().impl()); |
|
1065 |
residualStyleTags.add(strongTag.localName().impl()); |
|
1066 |
residualStyleTags.add(dfnTag.localName().impl()); |
|
1067 |
residualStyleTags.add(codeTag.localName().impl()); |
|
1068 |
residualStyleTags.add(sampTag.localName().impl()); |
|
1069 |
residualStyleTags.add(kbdTag.localName().impl()); |
|
1070 |
residualStyleTags.add(varTag.localName().impl()); |
|
1071 |
residualStyleTags.add(nobrTag.localName().impl()); |
|
1072 |
} |
|
1073 |
||
1074 |
return residualStyleTags.contains(tagName.impl()); |
|
1075 |
} |
|
1076 |
||
1077 |
bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName) |
|
1078 |
{ |
|
1079 |
DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ()); |
|
1080 |
if (unaffectedTags.isEmpty()) { |
|
1081 |
unaffectedTags.add(bodyTag.localName().impl()); |
|
1082 |
unaffectedTags.add(tableTag.localName().impl()); |
|
1083 |
unaffectedTags.add(theadTag.localName().impl()); |
|
1084 |
unaffectedTags.add(tbodyTag.localName().impl()); |
|
1085 |
unaffectedTags.add(tfootTag.localName().impl()); |
|
1086 |
unaffectedTags.add(trTag.localName().impl()); |
|
1087 |
unaffectedTags.add(thTag.localName().impl()); |
|
1088 |
unaffectedTags.add(tdTag.localName().impl()); |
|
1089 |
unaffectedTags.add(captionTag.localName().impl()); |
|
1090 |
unaffectedTags.add(colgroupTag.localName().impl()); |
|
1091 |
unaffectedTags.add(colTag.localName().impl()); |
|
1092 |
unaffectedTags.add(optionTag.localName().impl()); |
|
1093 |
unaffectedTags.add(optgroupTag.localName().impl()); |
|
1094 |
unaffectedTags.add(selectTag.localName().impl()); |
|
1095 |
unaffectedTags.add(objectTag.localName().impl()); |
|
1096 |
unaffectedTags.add(datagridTag.localName().impl()); |
|
1097 |
unaffectedTags.add(datalistTag.localName().impl()); |
|
1098 |
} |
|
1099 |
||
1100 |
return !unaffectedTags.contains(tagName.impl()); |
|
1101 |
} |
|
1102 |
||
1103 |
void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) |
|
1104 |
{ |
|
1105 |
HTMLStackElem* maxElem = 0; |
|
1106 |
bool finished = false; |
|
1107 |
bool strayTableContent = elem->strayTableContent; |
|
1108 |
||
1109 |
m_handlingResidualStyleAcrossBlocks = true; |
|
1110 |
while (!finished) { |
|
1111 |
// Find the outermost element that crosses over to a higher level. If there exists another higher-level |
|
1112 |
// element, we will do another pass, until we have corrected the innermost one. |
|
1113 |
ExceptionCode ec = 0; |
|
1114 |
HTMLStackElem* curr = m_blockStack; |
|
1115 |
HTMLStackElem* prev = 0; |
|
1116 |
HTMLStackElem* prevMaxElem = 0; |
|
1117 |
maxElem = 0; |
|
1118 |
finished = true; |
|
1119 |
while (curr && curr != elem) { |
|
1120 |
if (curr->level > elem->level) { |
|
1121 |
if (!isAffectedByResidualStyle(curr->tagName)) |
|
1122 |
return; |
|
1123 |
if (maxElem) |
|
1124 |
// We will need another pass. |
|
1125 |
finished = false; |
|
1126 |
maxElem = curr; |
|
1127 |
prevMaxElem = prev; |
|
1128 |
} |
|
1129 |
||
1130 |
prev = curr; |
|
1131 |
curr = curr->next; |
|
1132 |
} |
|
1133 |
||
1134 |
if (!curr || !maxElem) |
|
1135 |
return; |
|
1136 |
||
1137 |
Node* residualElem = prev->node; |
|
1138 |
Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; |
|
1139 |
Node* parentElem = elem->node; |
|
1140 |
||
1141 |
// Check to see if the reparenting that is going to occur is allowed according to the DOM. |
|
1142 |
// FIXME: We should either always allow it or perform an additional fixup instead of |
|
1143 |
// just bailing here. |
|
1144 |
// Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. |
|
1145 |
if (!parentElem->childAllowed(blockElem)) |
|
1146 |
return; |
|
1147 |
||
1148 |
m_hasPElementInScope = Unknown; |
|
1149 |
||
1150 |
if (maxElem->node->parentNode() != elem->node) { |
|
1151 |
// Walk the stack and remove any elements that aren't residual style tags. These |
|
1152 |
// are basically just being closed up. Example: |
|
1153 |
// <font><span>Moo<p>Goo</font></p>. |
|
1154 |
// In the above example, the <span> doesn't need to be reopened. It can just close. |
|
1155 |
HTMLStackElem* currElem = maxElem->next; |
|
1156 |
HTMLStackElem* prevElem = maxElem; |
|
1157 |
while (currElem != elem) { |
|
1158 |
HTMLStackElem* nextElem = currElem->next; |
|
1159 |
if (!isResidualStyleTag(currElem->tagName)) { |
|
1160 |
prevElem->next = nextElem; |
|
1161 |
prevElem->derefNode(); |
|
1162 |
prevElem->node = currElem->node; |
|
1163 |
prevElem->didRefNode = currElem->didRefNode; |
|
1164 |
delete currElem; |
|
1165 |
} |
|
1166 |
else |
|
1167 |
prevElem = currElem; |
|
1168 |
currElem = nextElem; |
|
1169 |
} |
|
1170 |
||
1171 |
// We have to reopen residual tags in between maxElem and elem. An example of this case is: |
|
1172 |
// <font><i>Moo<p>Foo</font>. |
|
1173 |
// In this case, we need to transform the part before the <p> into: |
|
1174 |
// <font><i>Moo</i></font><i> |
|
1175 |
// so that the <i> will remain open. This involves the modification of elements |
|
1176 |
// in the block stack. |
|
1177 |
// This will also affect how we ultimately reparent the block, since we want it to end up |
|
1178 |
// under the reopened residual tags (e.g., the <i> in the above example.) |
|
1179 |
RefPtr<Node> prevNode = 0; |
|
1180 |
currElem = maxElem; |
|
1181 |
while (currElem->node != residualElem) { |
|
1182 |
if (isResidualStyleTag(currElem->node->localName())) { |
|
1183 |
// Create a clone of this element. |
|
1184 |
// We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. |
|
1185 |
Node* currNode = currElem->node->cloneNode(false).releaseRef(); |
|
1186 |
reportError(ResidualStyleError, &currNode->localName()); |
|
1187 |
||
1188 |
// Change the stack element's node to point to the clone. |
|
1189 |
// The stack element adopts the reference we obtained above by calling release(). |
|
1190 |
currElem->derefNode(); |
|
1191 |
currElem->node = currNode; |
|
1192 |
currElem->didRefNode = true; |
|
1193 |
||
1194 |
// Attach the previous node as a child of this new node. |
|
1195 |
if (prevNode) |
|
1196 |
currNode->appendChild(prevNode, ec); |
|
1197 |
else // The new parent for the block element is going to be the innermost clone. |
|
1198 |
parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. |
|
1199 |
||
1200 |
prevNode = currNode; |
|
1201 |
} |
|
1202 |
||
1203 |
currElem = currElem->next; |
|
1204 |
} |
|
1205 |
||
1206 |
// Now append the chain of new residual style elements if one exists. |
|
1207 |
if (prevNode) |
|
1208 |
elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. |
|
1209 |
} |
|
1210 |
||
1211 |
// Check if the block is still in the tree. If it isn't, then we don't |
|
1212 |
// want to remove it from its parent (that would crash) or insert it into |
|
1213 |
// a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 |
|
1214 |
bool isBlockStillInTree = blockElem->parentNode(); |
|
1215 |
||
1216 |
// We need to make a clone of |residualElem| and place it just inside |blockElem|. |
|
1217 |
// All content of |blockElem| is reparented to be under this clone. We then |
|
1218 |
// reparent |blockElem| using real DOM calls so that attachment/detachment will |
|
1219 |
// be performed to fix up the rendering tree. |
|
1220 |
// So for this example: <b>...<p>Foo</b>Goo</p> |
|
1221 |
// The end result will be: <b>...</b><p><b>Foo</b>Goo</p> |
|
1222 |
// |
|
1223 |
// Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. |
|
1224 |
if (isBlockStillInTree) |
|
1225 |
blockElem->parentNode()->removeChild(blockElem, ec); |
|
1226 |
||
1227 |
Node* newNodePtr = 0; |
|
1228 |
if (blockElem->firstChild()) { |
|
1229 |
// Step 2: Clone |residualElem|. |
|
1230 |
RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. |
|
1231 |
newNodePtr = newNode.get(); |
|
1232 |
reportError(ResidualStyleError, &newNode->localName()); |
|
1233 |
||
1234 |
// Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| |
|
1235 |
// before we've put |newElem| into the document. That way we'll only do one attachment of all |
|
1236 |
// the new content (instead of a bunch of individual attachments). |
|
1237 |
Node* currNode = blockElem->firstChild(); |
|
1238 |
while (currNode) { |
|
1239 |
Node* nextNode = currNode->nextSibling(); |
|
1240 |
newNode->appendChild(currNode, ec); |
|
1241 |
currNode = nextNode; |
|
1242 |
} |
|
1243 |
||
1244 |
// Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no |
|
1245 |
// attachment can occur yet. |
|
1246 |
blockElem->appendChild(newNode.release(), ec); |
|
1247 |
} else |
|
1248 |
finished = true; |
|
1249 |
||
1250 |
// Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. |
|
1251 |
if (isBlockStillInTree) |
|
1252 |
parentElem->appendChild(blockElem, ec); |
|
1253 |
||
1254 |
// Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update |
|
1255 |
// the node associated with the previous stack element so that when it gets popped, |
|
1256 |
// it doesn't make the residual element the next current node. |
|
1257 |
HTMLStackElem* currElem = maxElem; |
|
1258 |
HTMLStackElem* prevElem = 0; |
|
1259 |
while (currElem != elem) { |
|
1260 |
prevElem = currElem; |
|
1261 |
currElem = currElem->next; |
|
1262 |
} |
|
1263 |
prevElem->next = elem->next; |
|
1264 |
prevElem->derefNode(); |
|
1265 |
prevElem->node = elem->node; |
|
1266 |
prevElem->didRefNode = elem->didRefNode; |
|
1267 |
if (!finished) { |
|
1268 |
// Repurpose |elem| to represent |newNode| and insert it at the appropriate position |
|
1269 |
// in the stack. We do not do this for the innermost block, because in that case the new |
|
1270 |
// node is effectively no longer open. |
|
1271 |
elem->next = maxElem; |
|
1272 |
elem->node = prevMaxElem->node; |
|
1273 |
elem->didRefNode = prevMaxElem->didRefNode; |
|
1274 |
elem->strayTableContent = false; |
|
1275 |
prevMaxElem->next = elem; |
|
1276 |
ASSERT(newNodePtr); |
|
1277 |
prevMaxElem->node = newNodePtr; |
|
19
fcece45ef507
Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
1278 |
newNodePtr->ref(); |
fcece45ef507
Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
1279 |
prevMaxElem->didRefNode = true; |
0 | 1280 |
} else |
1281 |
delete elem; |
|
1282 |
} |
|
1283 |
||
1284 |
// FIXME: If we ever make a case like this work: |
|
1285 |
// <table><b><i><form></b></form></i></table> |
|
1286 |
// Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy. |
|
1287 |
if (strayTableContent) |
|
1288 |
m_inStrayTableContent--; |
|
1289 |
||
1290 |
// Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. |
|
1291 |
// In the above example, Goo should stay italic. |
|
1292 |
// We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. |
|
1293 |
||
1294 |
HTMLStackElem* curr = m_blockStack; |
|
1295 |
HTMLStackElem* residualStyleStack = 0; |
|
1296 |
unsigned stackDepth = 1; |
|
1297 |
unsigned redundantStyleCount = 0; |
|
1298 |
while (curr && curr != maxElem) { |
|
1299 |
// We will actually schedule this tag for reopening |
|
1300 |
// after we complete the close of this entire block. |
|
1301 |
if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { |
|
1302 |
// We've overloaded the use of stack elements and are just reusing the |
|
1303 |
// struct with a slightly different meaning to the variables. Instead of chaining |
|
1304 |
// from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1305 |
// from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1306 |
// to the outermost tag we need to reopen. |
|
1307 |
// We also set curr->node to be the actual element that corresponds to the ID stored in |
|
1308 |
// curr->id rather than the node that you should pop to when the element gets pulled off |
|
1309 |
// the stack. |
|
1310 |
if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) |
|
1311 |
redundantStyleCount++; |
|
1312 |
else |
|
1313 |
redundantStyleCount = 0; |
|
1314 |
||
1315 |
if (redundantStyleCount < cMaxRedundantTagDepth) |
|
1316 |
moveOneBlockToStack(residualStyleStack); |
|
1317 |
else |
|
1318 |
popOneBlock(); |
|
1319 |
} else |
|
1320 |
popOneBlock(); |
|
1321 |
||
1322 |
curr = m_blockStack; |
|
1323 |
} |
|
1324 |
||
1325 |
reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. |
|
1326 |
||
1327 |
m_handlingResidualStyleAcrossBlocks = false; |
|
1328 |
} |
|
1329 |
||
1330 |
void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) |
|
1331 |
{ |
|
1332 |
// Loop for each tag that needs to be reopened. |
|
1333 |
while (elem) { |
|
1334 |
// Create a shallow clone of the DOM node for this element. |
|
1335 |
RefPtr<Node> newNode = elem->node->cloneNode(false); |
|
1336 |
reportError(ResidualStyleError, &newNode->localName()); |
|
1337 |
||
1338 |
// Append the new node. In the malformed table case, we need to insert before the table, |
|
1339 |
// which will be the last child. |
|
1340 |
ExceptionCode ec = 0; |
|
1341 |
if (malformedTableParent) |
|
1342 |
malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); |
|
1343 |
else |
|
1344 |
m_current->appendChild(newNode, ec); |
|
1345 |
// FIXME: Is it really OK to ignore the exceptions here? |
|
1346 |
||
1347 |
// Now push a new stack element for this node we just created. |
|
1348 |
pushBlock(elem->tagName, elem->level); |
|
1349 |
newNode->beginParsingChildren(); |
|
1350 |
||
1351 |
// Set our strayTableContent boolean if needed, so that the reopened tag also knows |
|
1352 |
// that it is inside a malformed table. |
|
1353 |
m_blockStack->strayTableContent = malformedTableParent != 0; |
|
1354 |
if (m_blockStack->strayTableContent) |
|
1355 |
m_inStrayTableContent++; |
|
1356 |
||
1357 |
// Clear our malformed table parent variable. |
|
1358 |
malformedTableParent = 0; |
|
1359 |
||
1360 |
// Update |current| manually to point to the new node. |
|
1361 |
setCurrent(newNode.get()); |
|
1362 |
||
1363 |
// Advance to the next tag that needs to be reopened. |
|
1364 |
HTMLStackElem* next = elem->next; |
|
1365 |
elem->derefNode(); |
|
1366 |
delete elem; |
|
1367 |
elem = next; |
|
1368 |
} |
|
1369 |
} |
|
1370 |
||
1371 |
void HTMLParser::pushBlock(const AtomicString& tagName, int level) |
|
1372 |
{ |
|
1373 |
m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); |
|
1374 |
if (level >= minBlockLevelTagPriority) |
|
1375 |
m_blocksInStack++; |
|
1376 |
m_didRefCurrent = false; |
|
1377 |
if (tagName == pTag) |
|
1378 |
m_hasPElementInScope = InScope; |
|
1379 |
else if (isScopingTag(tagName)) |
|
1380 |
m_hasPElementInScope = NotInScope; |
|
1381 |
} |
|
1382 |
||
1383 |
void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors) |
|
1384 |
{ |
|
1385 |
HTMLStackElem* elem = m_blockStack; |
|
1386 |
||
1387 |
if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) |
|
1388 |
return; |
|
1389 |
||
1390 |
int maxLevel = 0; |
|
1391 |
||
1392 |
while (elem && (elem->tagName != tagName)) { |
|
1393 |
if (maxLevel < elem->level) |
|
1394 |
maxLevel = elem->level; |
|
1395 |
elem = elem->next; |
|
1396 |
} |
|
1397 |
||
1398 |
if (!elem) { |
|
1399 |
if (reportErrors) |
|
1400 |
reportError(StrayCloseTagError, &tagName, 0, true); |
|
1401 |
return; |
|
1402 |
} |
|
1403 |
||
1404 |
if (maxLevel > elem->level) { |
|
1405 |
// We didn't match because the tag is in a different scope, e.g., |
|
1406 |
// <b><p>Foo</b>. Try to correct the problem. |
|
1407 |
if (!isResidualStyleTag(tagName)) |
|
1408 |
return; |
|
1409 |
return handleResidualStyleCloseTagAcrossBlocks(elem); |
|
1410 |
} |
|
1411 |
||
1412 |
bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); |
|
1413 |
HTMLStackElem* residualStyleStack = 0; |
|
1414 |
Node* malformedTableParent = 0; |
|
1415 |
||
1416 |
elem = m_blockStack; |
|
1417 |
unsigned stackDepth = 1; |
|
1418 |
unsigned redundantStyleCount = 0; |
|
1419 |
while (elem) { |
|
1420 |
if (elem->tagName == tagName) { |
|
1421 |
int strayTable = m_inStrayTableContent; |
|
1422 |
popOneBlock(); |
|
1423 |
elem = 0; |
|
1424 |
||
1425 |
// This element was the root of some malformed content just inside an implicit or |
|
1426 |
// explicit <tbody> or <tr>. |
|
1427 |
// If we end up needing to reopen residual style tags, the root of the reopened chain |
|
1428 |
// must also know that it is the root of malformed content inside a <tbody>/<tr>. |
|
1429 |
if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { |
|
1430 |
Node* curr = m_current; |
|
1431 |
while (curr && !curr->hasTagName(tableTag)) |
|
1432 |
curr = curr->parentNode(); |
|
1433 |
malformedTableParent = curr ? curr->parentNode() : 0; |
|
1434 |
} |
|
1435 |
} |
|
1436 |
else { |
|
1437 |
if (m_currentFormElement && elem->tagName == formTag) |
|
1438 |
// A <form> is being closed prematurely (and this is |
|
1439 |
// malformed HTML). Set an attribute on the form to clear out its |
|
1440 |
// bottom margin. |
|
1441 |
m_currentFormElement->setMalformed(true); |
|
1442 |
||
1443 |
// Schedule this tag for reopening |
|
1444 |
// after we complete the close of this entire block. |
|
1445 |
if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { |
|
1446 |
// We've overloaded the use of stack elements and are just reusing the |
|
1447 |
// struct with a slightly different meaning to the variables. Instead of chaining |
|
1448 |
// from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1449 |
// from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1450 |
// to the outermost tag we need to reopen. |
|
1451 |
// We also set elem->node to be the actual element that corresponds to the ID stored in |
|
1452 |
// elem->id rather than the node that you should pop to when the element gets pulled off |
|
1453 |
// the stack. |
|
1454 |
if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) |
|
1455 |
redundantStyleCount++; |
|
1456 |
else |
|
1457 |
redundantStyleCount = 0; |
|
1458 |
||
1459 |
if (redundantStyleCount < cMaxRedundantTagDepth) |
|
1460 |
moveOneBlockToStack(residualStyleStack); |
|
1461 |
else |
|
1462 |
popOneBlock(); |
|
1463 |
} else |
|
1464 |
popOneBlock(); |
|
1465 |
elem = m_blockStack; |
|
1466 |
} |
|
1467 |
} |
|
1468 |
||
1469 |
reopenResidualStyleTags(residualStyleStack, malformedTableParent); |
|
1470 |
} |
|
1471 |
||
1472 |
inline HTMLStackElem* HTMLParser::popOneBlockCommon() |
|
1473 |
{ |
|
1474 |
HTMLStackElem* elem = m_blockStack; |
|
1475 |
||
1476 |
// Form elements restore their state during the parsing process. |
|
1477 |
// Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. |
|
1478 |
if (m_current && elem->node != m_current) |
|
1479 |
m_current->finishParsingChildren(); |
|
1480 |
||
1481 |
if (m_blockStack->level >= minBlockLevelTagPriority) { |
|
1482 |
ASSERT(m_blocksInStack > 0); |
|
1483 |
m_blocksInStack--; |
|
1484 |
} |
|
1485 |
m_blockStack = elem->next; |
|
1486 |
m_current = elem->node; |
|
1487 |
m_didRefCurrent = elem->didRefNode; |
|
1488 |
||
1489 |
if (elem->strayTableContent) |
|
1490 |
m_inStrayTableContent--; |
|
1491 |
||
1492 |
if (elem->tagName == pTag) |
|
1493 |
m_hasPElementInScope = NotInScope; |
|
1494 |
else if (isScopingTag(elem->tagName)) |
|
1495 |
m_hasPElementInScope = Unknown; |
|
1496 |
||
1497 |
return elem; |
|
1498 |
} |
|
1499 |
||
1500 |
void HTMLParser::popOneBlock() |
|
1501 |
{ |
|
1502 |
// Store the current node before popOneBlockCommon overwrites it. |
|
1503 |
Node* lastCurrent = m_current; |
|
1504 |
bool didRefLastCurrent = m_didRefCurrent; |
|
1505 |
||
1506 |
delete popOneBlockCommon(); |
|
1507 |
||
1508 |
if (didRefLastCurrent) |
|
1509 |
lastCurrent->deref(); |
|
1510 |
} |
|
1511 |
||
1512 |
void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head) |
|
1513 |
{ |
|
1514 |
// We'll be using the stack element we're popping, but for the current node. |
|
1515 |
// See the two callers for details. |
|
1516 |
||
1517 |
// Store the current node before popOneBlockCommon overwrites it. |
|
1518 |
Node* lastCurrent = m_current; |
|
1519 |
bool didRefLastCurrent = m_didRefCurrent; |
|
1520 |
||
1521 |
// Pop the block, but don't deref the current node as popOneBlock does because |
|
1522 |
// we'll be using the pointer in the new stack element. |
|
1523 |
HTMLStackElem* elem = popOneBlockCommon(); |
|
1524 |
||
1525 |
// Transfer the current node into the stack element. |
|
1526 |
// No need to deref the old elem->node because popOneBlockCommon transferred |
|
1527 |
// it into the m_current/m_didRefCurrent fields. |
|
1528 |
elem->node = lastCurrent; |
|
1529 |
elem->didRefNode = didRefLastCurrent; |
|
1530 |
elem->next = head; |
|
1531 |
head = elem; |
|
1532 |
} |
|
1533 |
||
1534 |
void HTMLParser::checkIfHasPElementInScope() |
|
1535 |
{ |
|
1536 |
m_hasPElementInScope = NotInScope; |
|
1537 |
HTMLStackElem* elem = m_blockStack; |
|
1538 |
while (elem) { |
|
1539 |
const AtomicString& tagName = elem->tagName; |
|
1540 |
if (tagName == pTag) { |
|
1541 |
m_hasPElementInScope = InScope; |
|
1542 |
return; |
|
1543 |
} else if (isScopingTag(tagName)) |
|
1544 |
return; |
|
1545 |
elem = elem->next; |
|
1546 |
} |
|
1547 |
} |
|
1548 |
||
1549 |
void HTMLParser::popInlineBlocks() |
|
1550 |
{ |
|
1551 |
while (m_blockStack && isInline(m_current)) |
|
1552 |
popOneBlock(); |
|
1553 |
} |
|
1554 |
||
1555 |
void HTMLParser::freeBlock() |
|
1556 |
{ |
|
1557 |
while (m_blockStack) |
|
1558 |
popOneBlock(); |
|
1559 |
ASSERT(!m_blocksInStack); |
|
1560 |
} |
|
1561 |
||
1562 |
void HTMLParser::createHead() |
|
1563 |
{ |
|
1564 |
if (m_head) |
|
1565 |
return; |
|
1566 |
||
1567 |
if (!m_document->documentElement()) { |
|
1568 |
insertNode(new HTMLHtmlElement(htmlTag, m_document)); |
|
1569 |
ASSERT(m_document->documentElement()); |
|
1570 |
} |
|
1571 |
||
1572 |
m_head = new HTMLHeadElement(headTag, m_document); |
|
1573 |
HTMLElement* body = m_document->body(); |
|
1574 |
ExceptionCode ec = 0; |
|
1575 |
m_document->documentElement()->insertBefore(m_head.get(), body, ec); |
|
1576 |
if (ec) |
|
1577 |
m_head = 0; |
|
1578 |
||
1579 |
// If the body does not exist yet, then the <head> should be pushed as the current block. |
|
1580 |
if (m_head && !body) { |
|
1581 |
pushBlock(m_head->localName(), m_head->tagPriority()); |
|
1582 |
setCurrent(m_head.get()); |
|
1583 |
} |
|
1584 |
} |
|
1585 |
||
1586 |
PassRefPtr<Node> HTMLParser::handleIsindex(Token* t) |
|
1587 |
{ |
|
1588 |
RefPtr<Node> n = new HTMLDivElement(divTag, m_document); |
|
1589 |
||
1590 |
NamedMappedAttrMap* attrs = t->attrs.get(); |
|
1591 |
||
1592 |
RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, m_document, m_currentFormElement.get()); |
|
1593 |
isIndex->setAttributeMap(attrs); |
|
1594 |
isIndex->setAttribute(typeAttr, "khtml_isindex"); |
|
1595 |
||
1596 |
String text = searchableIndexIntroduction(); |
|
1597 |
if (attrs) { |
|
1598 |
if (Attribute* a = attrs->getAttributeItem(promptAttr)) |
|
1599 |
text = a->value().string() + " "; |
|
1600 |
t->attrs = 0; |
|
1601 |
} |
|
1602 |
||
1603 |
n->addChild(new HTMLHRElement(hrTag, m_document)); |
|
1604 |
n->addChild(Text::create(m_document, text)); |
|
1605 |
n->addChild(isIndex.release()); |
|
1606 |
n->addChild(new HTMLHRElement(hrTag, m_document)); |
|
1607 |
||
1608 |
return n.release(); |
|
1609 |
} |
|
1610 |
||
1611 |
void HTMLParser::startBody() |
|
1612 |
{ |
|
1613 |
if (m_inBody) |
|
1614 |
return; |
|
1615 |
||
1616 |
m_inBody = true; |
|
1617 |
||
1618 |
if (m_isindexElement) { |
|
1619 |
insertNode(m_isindexElement.get(), true /* don't descend into this node */); |
|
1620 |
m_isindexElement = 0; |
|
1621 |
} |
|
1622 |
} |
|
1623 |
||
1624 |
void HTMLParser::finished() |
|
1625 |
{ |
|
1626 |
// In the case of a completely empty document, here's the place to create the HTML element. |
|
1627 |
if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) |
|
1628 |
insertNode(new HTMLHtmlElement(htmlTag, m_document)); |
|
1629 |
||
1630 |
// This ensures that "current" is not left pointing to a node when the document is destroyed. |
|
1631 |
freeBlock(); |
|
1632 |
setCurrent(0); |
|
1633 |
||
1634 |
// Warning, this may delete the tokenizer and parser, so don't try to do anything else after this. |
|
1635 |
if (!m_isParsingFragment) |
|
1636 |
m_document->finishedParsing(); |
|
1637 |
} |
|
1638 |
||
1639 |
void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) |
|
1640 |
{ |
|
1641 |
Frame* frame = m_document->frame(); |
|
1642 |
if (!frame) |
|
1643 |
return; |
|
1644 |
||
1645 |
HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(m_document->tokenizer()); |
|
1646 |
int lineNumber = htmlTokenizer->lineNumber() + 1; |
|
1647 |
||
1648 |
AtomicString tag1; |
|
1649 |
AtomicString tag2; |
|
1650 |
if (tagName1) { |
|
1651 |
if (*tagName1 == "#text") |
|
1652 |
tag1 = "Text"; |
|
1653 |
else if (*tagName1 == "#comment") |
|
1654 |
tag1 = "<!-- comment -->"; |
|
1655 |
else |
|
1656 |
tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; |
|
1657 |
} |
|
1658 |
if (tagName2) { |
|
1659 |
if (*tagName2 == "#text") |
|
1660 |
tag2 = "Text"; |
|
1661 |
else if (*tagName2 == "#comment") |
|
1662 |
tag2 = "<!-- comment -->"; |
|
1663 |
else |
|
1664 |
tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; |
|
1665 |
} |
|
1666 |
||
1667 |
const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); |
|
1668 |
if (!errorMsg) |
|
1669 |
return; |
|
1670 |
||
1671 |
String message; |
|
1672 |
if (htmlTokenizer->processingContentWrittenByScript()) |
|
1673 |
message += htmlParserDocumentWriteMessage(); |
|
1674 |
message += errorMsg; |
|
1675 |
message.replace("%tag1", tag1); |
|
1676 |
message.replace("%tag2", tag2); |
|
1677 |
||
1678 |
frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, |
|
1679 |
isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, |
|
1680 |
message, lineNumber, m_document->url().string()); |
|
1681 |
} |
|
1682 |
||
1683 |
#ifdef BUILDING_ON_LEOPARD |
|
1684 |
bool shouldCreateImplicitHead(Document* document) |
|
1685 |
{ |
|
1686 |
ASSERT(document); |
|
1687 |
||
1688 |
Settings* settings = document->page() ? document->page()->settings() : 0; |
|
1689 |
return settings ? !settings->needsLeopardMailQuirks() : true; |
|
1690 |
} |
|
1691 |
#elif defined(BUILDING_ON_TIGER) |
|
1692 |
bool shouldCreateImplicitHead(Document* document) |
|
1693 |
{ |
|
1694 |
ASSERT(document); |
|
1695 |
||
1696 |
Settings* settings = document->page() ? document->page()->settings() : 0; |
|
1697 |
return settings ? !settings->needsTigerMailQuirks() : true; |
|
1698 |
} |
|
1699 |
#endif |
|
1700 |
||
1701 |
} |