|
1 /* |
|
2 Copyright (C) 1997 Martin Jones (mjones@kde.org) |
|
3 (C) 1997 Torben Weis (weis@kde.org) |
|
4 (C) 1999,2001 Lars Knoll (knoll@kde.org) |
|
5 (C) 2000,2001 Dirk Mueller (mueller@kde.org) |
|
6 Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. |
|
7 |
|
8 This library is free software; you can redistribute it and/or |
|
9 modify it under the terms of the GNU Library General Public |
|
10 License as published by the Free Software Foundation; either |
|
11 version 2 of the License, or (at your option) any later version. |
|
12 |
|
13 This library is distributed in the hope that it will be useful, |
|
14 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
16 Library General Public License for more details. |
|
17 |
|
18 You should have received a copy of the GNU Library General Public License |
|
19 along with this library; see the file COPYING.LIB. If not, write to |
|
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
21 Boston, MA 02110-1301, USA. |
|
22 */ |
|
23 |
|
24 #include "config.h" |
|
25 #include "HTMLParser.h" |
|
26 |
|
27 #include "CharacterNames.h" |
|
28 #include "CSSPropertyNames.h" |
|
29 #include "CSSValueKeywords.h" |
|
30 #include "Comment.h" |
|
31 #include "DocumentFragment.h" |
|
32 #include "Frame.h" |
|
33 #include "HTMLBodyElement.h" |
|
34 #include "HTMLDocument.h" |
|
35 #include "HTMLDivElement.h" |
|
36 #include "HTMLDListElement.h" |
|
37 #include "HTMLElementFactory.h" |
|
38 #include "HTMLFormElement.h" |
|
39 #include "HTMLHeadElement.h" |
|
40 #include "HTMLHRElement.h" |
|
41 #include "HTMLHtmlElement.h" |
|
42 #include "HTMLIsIndexElement.h" |
|
43 #include "HTMLMapElement.h" |
|
44 #include "HTMLNames.h" |
|
45 #include "HTMLTableCellElement.h" |
|
46 #include "HTMLTableRowElement.h" |
|
47 #include "HTMLTableSectionElement.h" |
|
48 #include "HTMLTokenizer.h" |
|
49 #include "LocalizedStrings.h" |
|
50 #include "Page.h" |
|
51 #include "Settings.h" |
|
52 #include "Text.h" |
|
53 |
|
54 namespace WebCore { |
|
55 |
|
56 using namespace HTMLNames; |
|
57 |
|
58 static const unsigned cMaxRedundantTagDepth = 20; |
|
59 static const unsigned cResidualStyleMaxDepth = 200; |
|
60 |
|
61 struct HTMLStackElem : Noncopyable { |
|
62 HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) |
|
63 : tagName(t) |
|
64 , level(lvl) |
|
65 , strayTableContent(false) |
|
66 , node(n) |
|
67 , didRefNode(r) |
|
68 , next(nx) |
|
69 { |
|
70 } |
|
71 |
|
72 void derefNode() |
|
73 { |
|
74 if (didRefNode) |
|
75 node->deref(); |
|
76 } |
|
77 |
|
78 AtomicString tagName; |
|
79 int level; |
|
80 bool strayTableContent; |
|
81 Node* node; |
|
82 bool didRefNode; |
|
83 HTMLStackElem* next; |
|
84 }; |
|
85 |
|
86 /** |
|
87 * The parser parses tokenized input into the document, building up the |
|
88 * document tree. If the document is well-formed, parsing it is straightforward. |
|
89 * |
|
90 * Unfortunately, we have to handle many HTML documents that are not well-formed, |
|
91 * so the parser has to be tolerant about errors. |
|
92 * |
|
93 * We have to take care of at least the following error conditions: |
|
94 * |
|
95 * 1. The element being added is explicitly forbidden inside some outer tag. |
|
96 * In this case we should close all tags up to the one, which forbids |
|
97 * the element, and add it afterwards. |
|
98 * |
|
99 * 2. We are not allowed to add the element directly. It could be that |
|
100 * the person writing the document forgot some tag in between (or that the |
|
101 * tag in between is optional). This could be the case with the following |
|
102 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). |
|
103 * |
|
104 * 3. We want to add a block element inside to an inline element. Close all |
|
105 * inline elements up to the next higher block element. |
|
106 * |
|
107 * 4. If this doesn't help, close elements until we are allowed to add the |
|
108 * element or ignore the tag. |
|
109 * |
|
110 */ |
|
111 |
|
112 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors) |
|
113 : document(doc) |
|
114 , current(doc) |
|
115 , didRefCurrent(false) |
|
116 , blockStack(0) |
|
117 , head(0) |
|
118 , inBody(false) |
|
119 , haveContent(false) |
|
120 , haveFrameSet(false) |
|
121 , m_isParsingFragment(false) |
|
122 , m_reportErrors(reportErrors) |
|
123 , m_handlingResidualStyleAcrossBlocks(false) |
|
124 , inStrayTableContent(0) |
|
125 { |
|
126 } |
|
127 |
|
128 HTMLParser::HTMLParser(DocumentFragment* frag) |
|
129 : document(frag->document()) |
|
130 , current(frag) |
|
131 , didRefCurrent(true) |
|
132 , blockStack(0) |
|
133 , head(0) |
|
134 , inBody(true) |
|
135 , haveContent(false) |
|
136 , haveFrameSet(false) |
|
137 , m_isParsingFragment(true) |
|
138 , m_reportErrors(false) |
|
139 , m_handlingResidualStyleAcrossBlocks(false) |
|
140 , inStrayTableContent(0) |
|
141 { |
|
142 if (frag) |
|
143 frag->ref(); |
|
144 } |
|
145 |
|
146 HTMLParser::~HTMLParser() |
|
147 { |
|
148 freeBlock(); |
|
149 if (didRefCurrent) |
|
150 current->deref(); |
|
151 } |
|
152 |
|
153 void HTMLParser::reset() |
|
154 { |
|
155 ASSERT(!m_isParsingFragment); |
|
156 |
|
157 setCurrent(document); |
|
158 |
|
159 freeBlock(); |
|
160 |
|
161 inBody = false; |
|
162 haveFrameSet = false; |
|
163 haveContent = false; |
|
164 inStrayTableContent = 0; |
|
165 |
|
166 m_currentFormElement = 0; |
|
167 m_currentMapElement = 0; |
|
168 head = 0; |
|
169 m_isindexElement = 0; |
|
170 |
|
171 m_skipModeTag = nullAtom; |
|
172 } |
|
173 |
|
174 void HTMLParser::setCurrent(Node* newCurrent) |
|
175 { |
|
176 bool didRefNewCurrent = newCurrent && newCurrent != document; |
|
177 if (didRefNewCurrent) |
|
178 newCurrent->ref(); |
|
179 if (didRefCurrent) |
|
180 current->deref(); |
|
181 current = newCurrent; |
|
182 didRefCurrent = didRefNewCurrent; |
|
183 } |
|
184 |
|
185 PassRefPtr<Node> HTMLParser::parseToken(Token* t) |
|
186 { |
|
187 if (!m_skipModeTag.isNull()) { |
|
188 if (!t->beginTag && t->tagName == m_skipModeTag) |
|
189 // Found the end tag for the current skip mode, so we're done skipping. |
|
190 m_skipModeTag = nullAtom; |
|
191 else if (current->localName() == t->tagName) |
|
192 // Do not skip </iframe>. |
|
193 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? |
|
194 ; |
|
195 else |
|
196 return 0; |
|
197 } |
|
198 |
|
199 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. |
|
200 if (t->isCloseTag(brTag) && document->inCompatMode()) { |
|
201 reportError(MalformedBRError); |
|
202 t->beginTag = true; |
|
203 } |
|
204 |
|
205 if (!t->beginTag) { |
|
206 processCloseTag(t); |
|
207 return 0; |
|
208 } |
|
209 |
|
210 // ignore spaces, if we're not inside a paragraph or other inline code |
|
211 if (t->tagName == textAtom && t->text) { |
|
212 if (inBody && !skipMode() && current->localName() != styleTag && current->localName() != titleTag && |
|
213 current->localName() != scriptTag && !t->text->containsOnlyWhitespace()) |
|
214 haveContent = true; |
|
215 |
|
216 RefPtr<Node> n; |
|
217 String text = t->text.get(); |
|
218 unsigned charsLeft = text.length(); |
|
219 while (charsLeft) { |
|
220 // split large blocks of text to nodes of manageable size |
|
221 n = Text::createWithLengthLimit(document, text, charsLeft); |
|
222 if (!insertNode(n.get(), t->flat)) |
|
223 return 0; |
|
224 } |
|
225 return n; |
|
226 } |
|
227 |
|
228 RefPtr<Node> n = getNode(t); |
|
229 // just to be sure, and to catch currently unimplemented stuff |
|
230 if (!n) |
|
231 return 0; |
|
232 |
|
233 // set attributes |
|
234 if (n->isHTMLElement()) { |
|
235 HTMLElement* e = static_cast<HTMLElement*>(n.get()); |
|
236 e->setAttributeMap(t->attrs.get()); |
|
237 |
|
238 // take care of optional close tags |
|
239 if (e->endTagRequirement() == TagStatusOptional) |
|
240 popBlock(t->tagName); |
|
241 |
|
242 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing |
|
243 // syntax was used, report an error. |
|
244 if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { |
|
245 if (t->tagName == scriptTag) |
|
246 reportError(IncorrectXMLCloseScriptWarning); |
|
247 else |
|
248 reportError(IncorrectXMLSelfCloseError, &t->tagName); |
|
249 } |
|
250 } |
|
251 |
|
252 if (!insertNode(n.get(), t->flat)) { |
|
253 // we couldn't insert the node |
|
254 |
|
255 if (n->isElementNode()) { |
|
256 Element* e = static_cast<Element*>(n.get()); |
|
257 e->setAttributeMap(0); |
|
258 } |
|
259 |
|
260 if (m_currentMapElement == n) |
|
261 m_currentMapElement = 0; |
|
262 |
|
263 if (m_currentFormElement == n) |
|
264 m_currentFormElement = 0; |
|
265 |
|
266 if (head == n) |
|
267 head = 0; |
|
268 |
|
269 return 0; |
|
270 } |
|
271 return n; |
|
272 } |
|
273 |
|
274 static bool isTableSection(Node* n) |
|
275 { |
|
276 return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); |
|
277 } |
|
278 |
|
279 static bool isTablePart(Node* n) |
|
280 { |
|
281 return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || |
|
282 isTableSection(n); |
|
283 } |
|
284 |
|
285 static bool isTableRelated(Node* n) |
|
286 { |
|
287 return n->hasTagName(tableTag) || isTablePart(n); |
|
288 } |
|
289 |
|
290 bool HTMLParser::insertNode(Node* n, bool flat) |
|
291 { |
|
292 RefPtr<Node> protectNode(n); |
|
293 |
|
294 const AtomicString& localName = n->localName(); |
|
295 int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; |
|
296 |
|
297 // <table> is never allowed inside stray table content. Always pop out of the stray table content |
|
298 // and close up the first table, and then start the second table as a sibling. |
|
299 if (inStrayTableContent && localName == tableTag) |
|
300 popBlock(tableTag); |
|
301 |
|
302 // let's be stupid and just try to insert it. |
|
303 // this should work if the document is well-formed |
|
304 Node* newNode = current->addChild(n); |
|
305 if (!newNode) |
|
306 return handleError(n, flat, localName, tagPriority); // Try to handle the error. |
|
307 |
|
308 // don't push elements without end tags (e.g., <img>) on the stack |
|
309 bool parentAttached = current->attached(); |
|
310 if (tagPriority > 0 && !flat) { |
|
311 if (newNode == current) { |
|
312 // This case should only be hit when a demoted <form> is placed inside a table. |
|
313 ASSERT(localName == formTag); |
|
314 reportError(FormInsideTablePartError, ¤t->localName()); |
|
315 } else { |
|
316 // The pushBlock function transfers ownership of current to the block stack |
|
317 // so we're guaranteed that didRefCurrent is false. The code below is an |
|
318 // optimized version of setCurrent that takes advantage of that fact and also |
|
319 // assumes that newNode is neither 0 nor a pointer to the document. |
|
320 pushBlock(localName, tagPriority); |
|
321 ASSERT(!didRefCurrent); |
|
322 newNode->ref(); |
|
323 current = newNode; |
|
324 didRefCurrent = true; |
|
325 } |
|
326 if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
327 n->attach(); |
|
328 } else { |
|
329 if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
330 n->attach(); |
|
331 n->finishedParsing(); |
|
332 } |
|
333 |
|
334 return true; |
|
335 } |
|
336 |
|
337 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) |
|
338 { |
|
339 // Error handling code. This is just ad hoc handling of specific parent/child combinations. |
|
340 HTMLElement* e; |
|
341 bool handled = false; |
|
342 |
|
343 // 1. Check out the element's tag name to decide how to deal with errors. |
|
344 if (n->isHTMLElement()) { |
|
345 HTMLElement* h = static_cast<HTMLElement*>(n); |
|
346 if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { |
|
347 if (inStrayTableContent && !isTableRelated(current)) { |
|
348 reportError(MisplacedTablePartError, &localName, ¤t->localName()); |
|
349 // pop out to the nearest enclosing table-related tag. |
|
350 while (blockStack && !isTableRelated(current)) |
|
351 popOneBlock(); |
|
352 return insertNode(n); |
|
353 } |
|
354 } else if (h->hasLocalName(headTag)) { |
|
355 if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) { |
|
356 reportError(MisplacedHeadError); |
|
357 return false; |
|
358 } |
|
359 } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { |
|
360 bool createdHead = false; |
|
361 if (!head) { |
|
362 createHead(); |
|
363 createdHead = true; |
|
364 } |
|
365 if (head) { |
|
366 if (!createdHead) |
|
367 reportError(MisplacedHeadContentError, &localName, ¤t->localName()); |
|
368 if (head->addChild(n)) { |
|
369 if (!n->attached() && !m_isParsingFragment) |
|
370 n->attach(); |
|
371 return true; |
|
372 } else |
|
373 return false; |
|
374 } |
|
375 } else if (h->hasLocalName(htmlTag)) { |
|
376 if (!current->isDocumentNode() ) { |
|
377 if (document->documentElement()->hasTagName(htmlTag)) { |
|
378 reportError(RedundantHTMLBodyError, &localName); |
|
379 // we have another <HTML> element.... apply attributes to existing one |
|
380 // make sure we don't overwrite already existing attributes |
|
381 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true); |
|
382 Element* existingHTML = static_cast<Element*>(document->documentElement()); |
|
383 NamedAttrMap* bmap = existingHTML->attributes(false); |
|
384 for (unsigned l = 0; map && l < map->length(); ++l) { |
|
385 Attribute* it = map->attributeItem(l); |
|
386 if (!bmap->getAttributeItem(it->name())) |
|
387 existingHTML->setAttribute(it->name(), it->value()); |
|
388 } |
|
389 } |
|
390 return false; |
|
391 } |
|
392 } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) { |
|
393 bool createdHead = false; |
|
394 if (!head) { |
|
395 createHead(); |
|
396 createdHead = true; |
|
397 } |
|
398 if (head) { |
|
399 Node* newNode = head->addChild(n); |
|
400 if (!newNode) { |
|
401 setSkipMode(h->tagQName()); |
|
402 return false; |
|
403 } |
|
404 |
|
405 if (!createdHead) |
|
406 reportError(MisplacedHeadContentError, &localName, ¤t->localName()); |
|
407 |
|
408 pushBlock(localName, tagPriority); |
|
409 setCurrent(newNode); |
|
410 if (!n->attached() && !m_isParsingFragment) |
|
411 n->attach(); |
|
412 return true; |
|
413 } |
|
414 if (inBody) { |
|
415 setSkipMode(h->tagQName()); |
|
416 return false; |
|
417 } |
|
418 } else if (h->hasLocalName(bodyTag)) { |
|
419 if (inBody && document->body()) { |
|
420 // we have another <BODY> element.... apply attributes to existing one |
|
421 // make sure we don't overwrite already existing attributes |
|
422 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> |
|
423 reportError(RedundantHTMLBodyError, &localName); |
|
424 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true); |
|
425 Element* existingBody = document->body(); |
|
426 NamedAttrMap* bmap = existingBody->attributes(false); |
|
427 for (unsigned l = 0; map && l < map->length(); ++l) { |
|
428 Attribute* it = map->attributeItem(l); |
|
429 if (!bmap->getAttributeItem(it->name())) |
|
430 existingBody->setAttribute(it->name(), it->value()); |
|
431 } |
|
432 return false; |
|
433 } |
|
434 else if (!current->isDocumentNode()) |
|
435 return false; |
|
436 } else if (h->hasLocalName(areaTag)) { |
|
437 if (m_currentMapElement) { |
|
438 reportError(MisplacedAreaError, ¤t->localName()); |
|
439 m_currentMapElement->addChild(n); |
|
440 if (!n->attached() && !m_isParsingFragment) |
|
441 n->attach(); |
|
442 handled = true; |
|
443 return true; |
|
444 } |
|
445 return false; |
|
446 } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { |
|
447 if (isTableRelated(current)) { |
|
448 while (blockStack && isTablePart(current)) |
|
449 popOneBlock(); |
|
450 return insertNode(n); |
|
451 } |
|
452 } |
|
453 } else if (n->isCommentNode() && !head) |
|
454 return false; |
|
455 |
|
456 // 2. Next we examine our currently active element to do some further error handling. |
|
457 if (current->isHTMLElement()) { |
|
458 HTMLElement* h = static_cast<HTMLElement*>(current); |
|
459 const AtomicString& currentTagName = current->localName(); |
|
460 if (h->hasLocalName(htmlTag)) { |
|
461 HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; |
|
462 if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || |
|
463 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || |
|
464 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || |
|
465 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || |
|
466 elt->hasLocalName(baseTag) |
|
467 |
|
468 #if PLATFORM(SYMBIAN) |
|
469 || elt->hasLocalName(bgsoundTag) |
|
470 #endif |
|
471 )) { |
|
472 if (!head) { |
|
473 head = new HTMLHeadElement(document); |
|
474 e = head; |
|
475 insertNode(e); |
|
476 handled = true; |
|
477 } |
|
478 } else { |
|
479 if (n->isTextNode()) { |
|
480 Text* t = static_cast<Text*>(n); |
|
481 if (t->containsOnlyWhitespace()) |
|
482 return false; |
|
483 } |
|
484 if (!haveFrameSet) { |
|
485 e = new HTMLBodyElement(document); |
|
486 startBody(); |
|
487 insertNode(e); |
|
488 handled = true; |
|
489 } else |
|
490 reportError(MisplacedFramesetContentError, &localName); |
|
491 } |
|
492 } else if (h->hasLocalName(headTag)) { |
|
493 if (n->hasTagName(htmlTag)) |
|
494 return false; |
|
495 else { |
|
496 // This means the body starts here... |
|
497 if (!haveFrameSet) { |
|
498 popBlock(currentTagName); |
|
499 e = new HTMLBodyElement(document); |
|
500 startBody(); |
|
501 insertNode(e); |
|
502 handled = true; |
|
503 } else |
|
504 reportError(MisplacedFramesetContentError, &localName); |
|
505 } |
|
506 } else if (h->hasLocalName(addressTag) || h->hasLocalName(dlTag) || h->hasLocalName(dtTag) |
|
507 || h->hasLocalName(fontTag) || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { |
|
508 reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
509 popBlock(currentTagName); |
|
510 handled = true; |
|
511 } else if (h->hasLocalName(captionTag)) { |
|
512 // Illegal content in a caption. Close the caption and try again. |
|
513 reportError(MisplacedCaptionContentError, &localName); |
|
514 popBlock(currentTagName); |
|
515 if (isTablePart(n)) |
|
516 return insertNode(n, flat); |
|
517 } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { |
|
518 if (n->hasTagName(tableTag)) { |
|
519 reportError(MisplacedTableError, ¤tTagName); |
|
520 if (m_isParsingFragment && !h->hasLocalName(tableTag)) |
|
521 // fragment may contain table parts without <table> ancestor, pop them one by one |
|
522 popBlock(h->localName()); |
|
523 popBlock(localName); // end the table |
|
524 handled = true; // ...and start a new one |
|
525 } else { |
|
526 ExceptionCode ec = 0; |
|
527 Node* node = current; |
|
528 Node* parent = node->parentNode(); |
|
529 // A script may have removed the current node's parent from the DOM |
|
530 // http://bugs.webkit.org/show_bug.cgi?id=7137 |
|
531 // FIXME: we should do real recovery here and re-parent with the correct node. |
|
532 if (!parent) |
|
533 return false; |
|
534 Node* grandparent = parent->parentNode(); |
|
535 |
|
536 if (n->isTextNode() || |
|
537 (h->hasLocalName(trTag) && |
|
538 isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || |
|
539 ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && |
|
540 !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && |
|
541 parent->hasTagName(tableTag))) { |
|
542 node = (node->hasTagName(tableTag)) ? node : |
|
543 ((node->hasTagName(trTag)) ? grandparent : parent); |
|
544 // This can happen with fragments |
|
545 if (!node) |
|
546 return false; |
|
547 Node* parent = node->parentNode(); |
|
548 if (!parent) |
|
549 return false; |
|
550 parent->insertBefore(n, node, ec); |
|
551 if (!ec) { |
|
552 reportError(StrayTableContentError, &localName, ¤tTagName); |
|
553 if (n->isHTMLElement() && tagPriority > 0 && |
|
554 !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) |
|
555 { |
|
556 pushBlock(localName, tagPriority); |
|
557 setCurrent(n); |
|
558 inStrayTableContent++; |
|
559 blockStack->strayTableContent = true; |
|
560 } |
|
561 return true; |
|
562 } |
|
563 } |
|
564 |
|
565 if (!ec) { |
|
566 if (current->hasTagName(trTag)) { |
|
567 reportError(TablePartRequiredError, &localName, &tdTag.localName()); |
|
568 e = new HTMLTableCellElement(tdTag, document); |
|
569 } else if (current->hasTagName(tableTag)) { |
|
570 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, |
|
571 // and it isn't really a parse error per se. |
|
572 e = new HTMLTableSectionElement(tbodyTag, document); |
|
573 } else { |
|
574 reportError(TablePartRequiredError, &localName, &trTag.localName()); |
|
575 e = new HTMLTableRowElement(document); |
|
576 } |
|
577 |
|
578 insertNode(e); |
|
579 handled = true; |
|
580 } |
|
581 } |
|
582 } else if (h->hasLocalName(objectTag)) { |
|
583 reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
584 popBlock(objectTag); |
|
585 handled = true; |
|
586 } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) { |
|
587 if (!isInline(n)) { |
|
588 popBlock(currentTagName); |
|
589 handled = true; |
|
590 } |
|
591 } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { |
|
592 if (localName == optgroupTag) { |
|
593 popBlock(currentTagName); |
|
594 handled = true; |
|
595 } else if (localName == selectTag) { |
|
596 // IE treats a nested select as </select>. Let's do the same |
|
597 popBlock(localName); |
|
598 } |
|
599 } else if (h->hasLocalName(colgroupTag)) { |
|
600 popBlock(currentTagName); |
|
601 handled = true; |
|
602 } else if (!h->hasLocalName(bodyTag)) { |
|
603 if (isInline(current)) { |
|
604 popInlineBlocks(); |
|
605 handled = true; |
|
606 } |
|
607 } |
|
608 } else if (current->isDocumentNode()) { |
|
609 if (n->isTextNode()) { |
|
610 Text* t = static_cast<Text*>(n); |
|
611 if (t->containsOnlyWhitespace()) |
|
612 return false; |
|
613 } |
|
614 |
|
615 if (!document->documentElement()) { |
|
616 e = new HTMLHtmlElement(document); |
|
617 insertNode(e); |
|
618 handled = true; |
|
619 } |
|
620 } |
|
621 |
|
622 // 3. If we couldn't handle the error, just return false and attempt to error-correct again. |
|
623 if (!handled) { |
|
624 reportError(IgnoredContentError, &localName, ¤t->localName()); |
|
625 return false; |
|
626 } |
|
627 return insertNode(n); |
|
628 } |
|
629 |
|
630 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); |
|
631 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; |
|
632 |
|
633 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
634 { |
|
635 result = new Text(document, t->text.get()); |
|
636 return false; |
|
637 } |
|
638 |
|
639 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
640 { |
|
641 result = new Comment(document, t->text.get()); |
|
642 return false; |
|
643 } |
|
644 |
|
645 bool HTMLParser::headCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
646 { |
|
647 if (!head || current->localName() == htmlTag) { |
|
648 head = new HTMLHeadElement(document); |
|
649 result = head; |
|
650 } else |
|
651 reportError(MisplacedHeadError); |
|
652 return false; |
|
653 } |
|
654 |
|
655 bool HTMLParser::bodyCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
656 { |
|
657 // body no longer allowed if we have a frameset |
|
658 if (haveFrameSet) |
|
659 return false; |
|
660 popBlock(headTag); |
|
661 startBody(); |
|
662 return true; |
|
663 } |
|
664 |
|
665 bool HTMLParser::framesetCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
666 { |
|
667 popBlock(headTag); |
|
668 if (inBody && !haveFrameSet && !haveContent) { |
|
669 popBlock(bodyTag); |
|
670 // ### actually for IE document.body returns the now hidden "body" element |
|
671 // we can't implement that behaviour now because it could cause too many |
|
672 // regressions and the headaches are not worth the work as long as there is |
|
673 // no site actually relying on that detail (Dirk) |
|
674 if (document->body()) |
|
675 document->body()->setAttribute(styleAttr, "display:none"); |
|
676 inBody = false; |
|
677 } |
|
678 if ((haveContent || haveFrameSet) && current->localName() == htmlTag) |
|
679 return false; |
|
680 haveFrameSet = true; |
|
681 startBody(); |
|
682 return true; |
|
683 } |
|
684 |
|
685 bool HTMLParser::iframeCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
686 { |
|
687 // a bit of a special case, since the frame is inlined |
|
688 setSkipMode(iframeTag); |
|
689 return true; |
|
690 } |
|
691 |
|
692 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
693 { |
|
694 // Only create a new form if we're not already inside one. |
|
695 // This is consistent with other browsers' behavior. |
|
696 if (!m_currentFormElement) { |
|
697 m_currentFormElement = new HTMLFormElement(document); |
|
698 result = m_currentFormElement; |
|
699 } |
|
700 return false; |
|
701 } |
|
702 |
|
703 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
704 { |
|
705 RefPtr<Node> n = handleIsindex(t); |
|
706 if (!inBody) { |
|
707 m_isindexElement = n.release(); |
|
708 } else { |
|
709 t->flat = true; |
|
710 result = n.release(); |
|
711 } |
|
712 return false; |
|
713 } |
|
714 |
|
715 bool HTMLParser::selectCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
716 { |
|
717 return true; |
|
718 } |
|
719 |
|
720 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
721 { |
|
722 popBlock(dtTag); |
|
723 popBlock(ddTag); |
|
724 return true; |
|
725 } |
|
726 |
|
727 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
728 { |
|
729 popBlock(ddTag); |
|
730 popBlock(dtTag); |
|
731 return true; |
|
732 } |
|
733 |
|
734 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
735 { |
|
736 popBlock(t->tagName); |
|
737 return true; |
|
738 } |
|
739 |
|
740 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
741 { |
|
742 return allowNestedRedundantTag(t->tagName); |
|
743 } |
|
744 |
|
745 bool HTMLParser::tableCellCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
746 { |
|
747 popBlock(tdTag); |
|
748 popBlock(thTag); |
|
749 return true; |
|
750 } |
|
751 |
|
752 bool HTMLParser::tableSectionCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
753 { |
|
754 popBlock(theadTag); |
|
755 popBlock(tbodyTag); |
|
756 popBlock(tfootTag); |
|
757 return true; |
|
758 } |
|
759 |
|
760 bool HTMLParser::noembedCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
761 { |
|
762 setSkipMode(noembedTag); |
|
763 return true; |
|
764 } |
|
765 |
|
766 bool HTMLParser::noframesCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
767 { |
|
768 setSkipMode(noframesTag); |
|
769 return true; |
|
770 } |
|
771 |
|
772 bool HTMLParser::noscriptCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
773 { |
|
774 if (!m_isParsingFragment) { |
|
775 Settings* settings = document->settings(); |
|
776 if (settings && settings->isJavaScriptEnabled()) |
|
777 setSkipMode(noscriptTag); |
|
778 } |
|
779 return true; |
|
780 } |
|
781 |
|
782 bool HTMLParser::mapCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
783 { |
|
784 m_currentMapElement = new HTMLMapElement(document); |
|
785 result = m_currentMapElement; |
|
786 return false; |
|
787 } |
|
788 |
|
789 PassRefPtr<Node> HTMLParser::getNode(Token* t) |
|
790 { |
|
791 // Init our error handling table. |
|
792 static FunctionMap gFunctionMap; |
|
793 if (gFunctionMap.isEmpty()) { |
|
794 gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
795 gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
796 gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
797 gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck); |
|
798 gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
799 gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck); |
|
800 gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck); |
|
801 gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck); |
|
802 gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck); |
|
803 gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck); |
|
804 gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck); |
|
805 gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
806 gFunctionMap.set(iframeTag.localName().impl(), &HTMLParser::iframeCreateErrorCheck); |
|
807 gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck); |
|
808 gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
809 gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck); |
|
810 gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
811 gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck); |
|
812 gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck); |
|
813 gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck); |
|
814 gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
815 gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck); |
|
816 gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
817 gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
818 gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
819 gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); |
|
820 gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck); |
|
821 gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
822 gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); |
|
823 gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); |
|
824 gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); |
|
825 gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
826 gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); |
|
827 } |
|
828 |
|
829 bool proceed = true; |
|
830 RefPtr<Node> result; |
|
831 if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) |
|
832 proceed = (this->*errorCheckFunc)(t, result); |
|
833 if (proceed) |
|
834 result = HTMLElementFactory::createHTMLElement(t->tagName, document, m_currentFormElement.get()); |
|
835 return result.release(); |
|
836 } |
|
837 |
|
838 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName) |
|
839 { |
|
840 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of |
|
841 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 |
|
842 // nested tags of the same type before just ignoring them all together. |
|
843 unsigned i = 0; |
|
844 for (HTMLStackElem* curr = blockStack; |
|
845 i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; |
|
846 curr = curr->next, i++); |
|
847 return i != cMaxRedundantTagDepth; |
|
848 } |
|
849 |
|
850 void HTMLParser::processCloseTag(Token* t) |
|
851 { |
|
852 // Support for really broken html. |
|
853 // we never close the body tag, since some stupid web pages close it before the actual end of the doc. |
|
854 // let's rely on the end() call to close things. |
|
855 if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) |
|
856 return; |
|
857 |
|
858 bool checkForCloseTagErrors = true; |
|
859 if (t->tagName == formTag && m_currentFormElement) { |
|
860 m_currentFormElement = 0; |
|
861 checkForCloseTagErrors = false; |
|
862 } else if (t->tagName == mapTag) |
|
863 m_currentMapElement = 0; |
|
864 else if (t->tagName == pTag) |
|
865 checkForCloseTagErrors = false; |
|
866 |
|
867 HTMLStackElem* oldElem = blockStack; |
|
868 popBlock(t->tagName, checkForCloseTagErrors); |
|
869 if (oldElem == blockStack && t->tagName == pTag) { |
|
870 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat |
|
871 // this as a valid break, i.e., <p></p>. So go ahead and make the empty |
|
872 // paragraph. |
|
873 t->beginTag = true; |
|
874 parseToken(t); |
|
875 popBlock(t->tagName); |
|
876 reportError(StrayParagraphCloseError); |
|
877 } |
|
878 } |
|
879 |
|
880 bool HTMLParser::isHeaderTag(const AtomicString& tagName) |
|
881 { |
|
882 static HashSet<AtomicStringImpl*> headerTags; |
|
883 if (headerTags.isEmpty()) { |
|
884 headerTags.add(h1Tag.localName().impl()); |
|
885 headerTags.add(h2Tag.localName().impl()); |
|
886 headerTags.add(h3Tag.localName().impl()); |
|
887 headerTags.add(h4Tag.localName().impl()); |
|
888 headerTags.add(h5Tag.localName().impl()); |
|
889 headerTags.add(h6Tag.localName().impl()); |
|
890 } |
|
891 |
|
892 return headerTags.contains(tagName.impl()); |
|
893 } |
|
894 |
|
895 bool HTMLParser::isInline(Node* node) const |
|
896 { |
|
897 if (node->isTextNode()) |
|
898 return true; |
|
899 |
|
900 if (node->isHTMLElement()) { |
|
901 HTMLElement* e = static_cast<HTMLElement*>(node); |
|
902 if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || |
|
903 e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || |
|
904 e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || |
|
905 e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || |
|
906 e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || |
|
907 e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || |
|
908 e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || |
|
909 e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || |
|
910 e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || |
|
911 e->hasLocalName(noembedTag)) |
|
912 return true; |
|
913 if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { |
|
914 Settings* settings = document->settings(); |
|
915 if (settings && settings->isJavaScriptEnabled()) |
|
916 return true; |
|
917 } |
|
918 } |
|
919 |
|
920 return false; |
|
921 } |
|
922 |
|
923 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName) |
|
924 { |
|
925 static HashSet<AtomicStringImpl*> residualStyleTags; |
|
926 if (residualStyleTags.isEmpty()) { |
|
927 residualStyleTags.add(aTag.localName().impl()); |
|
928 residualStyleTags.add(fontTag.localName().impl()); |
|
929 residualStyleTags.add(ttTag.localName().impl()); |
|
930 residualStyleTags.add(uTag.localName().impl()); |
|
931 residualStyleTags.add(bTag.localName().impl()); |
|
932 residualStyleTags.add(iTag.localName().impl()); |
|
933 residualStyleTags.add(sTag.localName().impl()); |
|
934 residualStyleTags.add(strikeTag.localName().impl()); |
|
935 residualStyleTags.add(bigTag.localName().impl()); |
|
936 residualStyleTags.add(smallTag.localName().impl()); |
|
937 residualStyleTags.add(emTag.localName().impl()); |
|
938 residualStyleTags.add(strongTag.localName().impl()); |
|
939 residualStyleTags.add(dfnTag.localName().impl()); |
|
940 residualStyleTags.add(codeTag.localName().impl()); |
|
941 residualStyleTags.add(sampTag.localName().impl()); |
|
942 residualStyleTags.add(kbdTag.localName().impl()); |
|
943 residualStyleTags.add(varTag.localName().impl()); |
|
944 residualStyleTags.add(nobrTag.localName().impl()); |
|
945 } |
|
946 |
|
947 return residualStyleTags.contains(tagName.impl()); |
|
948 } |
|
949 |
|
950 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName) |
|
951 { |
|
952 static HashSet<AtomicStringImpl*> unaffectedTags; |
|
953 if (unaffectedTags.isEmpty()) { |
|
954 unaffectedTags.add(bodyTag.localName().impl()); |
|
955 unaffectedTags.add(tableTag.localName().impl()); |
|
956 unaffectedTags.add(theadTag.localName().impl()); |
|
957 unaffectedTags.add(tbodyTag.localName().impl()); |
|
958 unaffectedTags.add(tfootTag.localName().impl()); |
|
959 unaffectedTags.add(trTag.localName().impl()); |
|
960 unaffectedTags.add(thTag.localName().impl()); |
|
961 unaffectedTags.add(tdTag.localName().impl()); |
|
962 unaffectedTags.add(captionTag.localName().impl()); |
|
963 unaffectedTags.add(colgroupTag.localName().impl()); |
|
964 unaffectedTags.add(colTag.localName().impl()); |
|
965 unaffectedTags.add(optionTag.localName().impl()); |
|
966 unaffectedTags.add(optgroupTag.localName().impl()); |
|
967 unaffectedTags.add(selectTag.localName().impl()); |
|
968 unaffectedTags.add(objectTag.localName().impl()); |
|
969 } |
|
970 |
|
971 return !unaffectedTags.contains(tagName.impl()); |
|
972 } |
|
973 |
|
974 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) |
|
975 { |
|
976 HTMLStackElem* maxElem = 0; |
|
977 bool finished = false; |
|
978 m_handlingResidualStyleAcrossBlocks = true; |
|
979 while (!finished) { |
|
980 // Find the outermost element that crosses over to a higher level. If there exists another higher-level |
|
981 // element, we will do another pass, until we have corrected the innermost one. |
|
982 ExceptionCode ec = 0; |
|
983 HTMLStackElem* curr = blockStack; |
|
984 HTMLStackElem* prev = 0; |
|
985 HTMLStackElem* prevMaxElem = 0; |
|
986 maxElem = 0; |
|
987 finished = true; |
|
988 while (curr && curr != elem) { |
|
989 if (curr->level > elem->level) { |
|
990 if (!isAffectedByResidualStyle(curr->tagName)) |
|
991 return; |
|
992 if (maxElem) |
|
993 // We will need another pass. |
|
994 finished = false; |
|
995 maxElem = curr; |
|
996 prevMaxElem = prev; |
|
997 } |
|
998 |
|
999 prev = curr; |
|
1000 curr = curr->next; |
|
1001 } |
|
1002 |
|
1003 if (!curr || !maxElem) |
|
1004 return; |
|
1005 |
|
1006 Node* residualElem = prev->node; |
|
1007 Node* blockElem = prevMaxElem ? prevMaxElem->node : current; |
|
1008 Node* parentElem = elem->node; |
|
1009 |
|
1010 // Check to see if the reparenting that is going to occur is allowed according to the DOM. |
|
1011 // FIXME: We should either always allow it or perform an additional fixup instead of |
|
1012 // just bailing here. |
|
1013 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. |
|
1014 if (!parentElem->childAllowed(blockElem)) |
|
1015 return; |
|
1016 |
|
1017 if (maxElem->node->parentNode() != elem->node) { |
|
1018 // Walk the stack and remove any elements that aren't residual style tags. These |
|
1019 // are basically just being closed up. Example: |
|
1020 // <font><span>Moo<p>Goo</font></p>. |
|
1021 // In the above example, the <span> doesn't need to be reopened. It can just close. |
|
1022 HTMLStackElem* currElem = maxElem->next; |
|
1023 HTMLStackElem* prevElem = maxElem; |
|
1024 while (currElem != elem) { |
|
1025 HTMLStackElem* nextElem = currElem->next; |
|
1026 if (!isResidualStyleTag(currElem->tagName)) { |
|
1027 prevElem->next = nextElem; |
|
1028 prevElem->derefNode(); |
|
1029 prevElem->node = currElem->node; |
|
1030 prevElem->didRefNode = currElem->didRefNode; |
|
1031 delete currElem; |
|
1032 } |
|
1033 else |
|
1034 prevElem = currElem; |
|
1035 currElem = nextElem; |
|
1036 } |
|
1037 |
|
1038 // We have to reopen residual tags in between maxElem and elem. An example of this case is: |
|
1039 // <font><i>Moo<p>Foo</font>. |
|
1040 // In this case, we need to transform the part before the <p> into: |
|
1041 // <font><i>Moo</i></font><i> |
|
1042 // so that the <i> will remain open. This involves the modification of elements |
|
1043 // in the block stack. |
|
1044 // This will also affect how we ultimately reparent the block, since we want it to end up |
|
1045 // under the reopened residual tags (e.g., the <i> in the above example.) |
|
1046 RefPtr<Node> prevNode = 0; |
|
1047 currElem = maxElem; |
|
1048 while (currElem->node != residualElem) { |
|
1049 if (isResidualStyleTag(currElem->node->localName())) { |
|
1050 // Create a clone of this element. |
|
1051 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. |
|
1052 Node* currNode = currElem->node->cloneNode(false).releaseRef(); |
|
1053 reportError(ResidualStyleError, &currNode->localName()); |
|
1054 |
|
1055 // Change the stack element's node to point to the clone. |
|
1056 // The stack element adopts the reference we obtained above by calling release(). |
|
1057 currElem->derefNode(); |
|
1058 currElem->node = currNode; |
|
1059 currElem->didRefNode = true; |
|
1060 |
|
1061 // Attach the previous node as a child of this new node. |
|
1062 if (prevNode) |
|
1063 currNode->appendChild(prevNode, ec); |
|
1064 else // The new parent for the block element is going to be the innermost clone. |
|
1065 parentElem = currNode; |
|
1066 |
|
1067 prevNode = currNode; |
|
1068 } |
|
1069 |
|
1070 currElem = currElem->next; |
|
1071 } |
|
1072 |
|
1073 // Now append the chain of new residual style elements if one exists. |
|
1074 if (prevNode) |
|
1075 elem->node->appendChild(prevNode, ec); |
|
1076 } |
|
1077 |
|
1078 // Check if the block is still in the tree. If it isn't, then we don't |
|
1079 // want to remove it from its parent (that would crash) or insert it into |
|
1080 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 |
|
1081 bool isBlockStillInTree = blockElem->parentNode(); |
|
1082 |
|
1083 // We need to make a clone of |residualElem| and place it just inside |blockElem|. |
|
1084 // All content of |blockElem| is reparented to be under this clone. We then |
|
1085 // reparent |blockElem| using real DOM calls so that attachment/detachment will |
|
1086 // be performed to fix up the rendering tree. |
|
1087 // So for this example: <b>...<p>Foo</b>Goo</p> |
|
1088 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> |
|
1089 // |
|
1090 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. |
|
1091 if (isBlockStillInTree) |
|
1092 blockElem->parentNode()->removeChild(blockElem, ec); |
|
1093 |
|
1094 Node* newNodePtr = 0; |
|
1095 ASSERT(finished || blockElem->firstChild()); |
|
1096 if (blockElem->firstChild()) { |
|
1097 // Step 2: Clone |residualElem|. |
|
1098 RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. |
|
1099 newNodePtr = newNode.get(); |
|
1100 reportError(ResidualStyleError, &newNode->localName()); |
|
1101 |
|
1102 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| |
|
1103 // before we've put |newElem| into the document. That way we'll only do one attachment of all |
|
1104 // the new content (instead of a bunch of individual attachments). |
|
1105 Node* currNode = blockElem->firstChild(); |
|
1106 while (currNode) { |
|
1107 Node* nextNode = currNode->nextSibling(); |
|
1108 newNode->appendChild(currNode, ec); |
|
1109 currNode = nextNode; |
|
1110 } |
|
1111 |
|
1112 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no |
|
1113 // attachment can occur yet. |
|
1114 blockElem->appendChild(newNode.release(), ec); |
|
1115 } |
|
1116 |
|
1117 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. |
|
1118 if (isBlockStillInTree) |
|
1119 parentElem->appendChild(blockElem, ec); |
|
1120 |
|
1121 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update |
|
1122 // the node associated with the previous stack element so that when it gets popped, |
|
1123 // it doesn't make the residual element the next current node. |
|
1124 HTMLStackElem* currElem = maxElem; |
|
1125 HTMLStackElem* prevElem = 0; |
|
1126 while (currElem != elem) { |
|
1127 prevElem = currElem; |
|
1128 currElem = currElem->next; |
|
1129 } |
|
1130 prevElem->next = elem->next; |
|
1131 prevElem->derefNode(); |
|
1132 prevElem->node = elem->node; |
|
1133 prevElem->didRefNode = elem->didRefNode; |
|
1134 if (!finished) { |
|
1135 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position |
|
1136 // in the stack. We do not do this for the innermost block, because in that case the new |
|
1137 // node is effectively no longer open. |
|
1138 elem->next = maxElem; |
|
1139 elem->node = prevMaxElem->node; |
|
1140 elem->didRefNode = prevMaxElem->didRefNode; |
|
1141 prevMaxElem->next = elem; |
|
1142 ASSERT(newNodePtr); |
|
1143 prevMaxElem->node = newNodePtr; |
|
1144 prevMaxElem->didRefNode = false; |
|
1145 } else |
|
1146 delete elem; |
|
1147 } |
|
1148 |
|
1149 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. |
|
1150 // In the above example, Goo should stay italic. |
|
1151 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. |
|
1152 HTMLStackElem* curr = blockStack; |
|
1153 HTMLStackElem* residualStyleStack = 0; |
|
1154 unsigned stackDepth = 1; |
|
1155 while (curr && curr != maxElem) { |
|
1156 // We will actually schedule this tag for reopening |
|
1157 // after we complete the close of this entire block. |
|
1158 if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) |
|
1159 // We've overloaded the use of stack elements and are just reusing the |
|
1160 // struct with a slightly different meaning to the variables. Instead of chaining |
|
1161 // from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1162 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1163 // to the outermost tag we need to reopen. |
|
1164 // We also set curr->node to be the actual element that corresponds to the ID stored in |
|
1165 // curr->id rather than the node that you should pop to when the element gets pulled off |
|
1166 // the stack. |
|
1167 moveOneBlockToStack(residualStyleStack); |
|
1168 else |
|
1169 popOneBlock(); |
|
1170 |
|
1171 curr = blockStack; |
|
1172 } |
|
1173 |
|
1174 reopenResidualStyleTags(residualStyleStack, 0); // FIXME: Deal with stray table content some day |
|
1175 // if it becomes necessary to do so. |
|
1176 |
|
1177 m_handlingResidualStyleAcrossBlocks = false; |
|
1178 } |
|
1179 |
|
1180 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) |
|
1181 { |
|
1182 // Loop for each tag that needs to be reopened. |
|
1183 while (elem) { |
|
1184 // Create a shallow clone of the DOM node for this element. |
|
1185 RefPtr<Node> newNode = elem->node->cloneNode(false); |
|
1186 reportError(ResidualStyleError, &newNode->localName()); |
|
1187 |
|
1188 // Append the new node. In the malformed table case, we need to insert before the table, |
|
1189 // which will be the last child. |
|
1190 ExceptionCode ec = 0; |
|
1191 if (malformedTableParent) |
|
1192 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); |
|
1193 else |
|
1194 current->appendChild(newNode, ec); |
|
1195 // FIXME: Is it really OK to ignore the exceptions here? |
|
1196 |
|
1197 // Now push a new stack element for this node we just created. |
|
1198 pushBlock(elem->tagName, elem->level); |
|
1199 |
|
1200 // Set our strayTableContent boolean if needed, so that the reopened tag also knows |
|
1201 // that it is inside a malformed table. |
|
1202 blockStack->strayTableContent = malformedTableParent != 0; |
|
1203 if (blockStack->strayTableContent) |
|
1204 inStrayTableContent++; |
|
1205 |
|
1206 // Clear our malformed table parent variable. |
|
1207 malformedTableParent = 0; |
|
1208 |
|
1209 // Update |current| manually to point to the new node. |
|
1210 setCurrent(newNode.get()); |
|
1211 |
|
1212 // Advance to the next tag that needs to be reopened. |
|
1213 HTMLStackElem* next = elem->next; |
|
1214 elem->derefNode(); |
|
1215 delete elem; |
|
1216 elem = next; |
|
1217 } |
|
1218 } |
|
1219 |
|
1220 void HTMLParser::pushBlock(const AtomicString& tagName, int level) |
|
1221 { |
|
1222 blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack); |
|
1223 didRefCurrent = false; |
|
1224 } |
|
1225 |
|
1226 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors) |
|
1227 { |
|
1228 HTMLStackElem* elem = blockStack; |
|
1229 |
|
1230 int maxLevel = 0; |
|
1231 |
|
1232 while (elem && (elem->tagName != tagName)) { |
|
1233 if (maxLevel < elem->level) |
|
1234 maxLevel = elem->level; |
|
1235 elem = elem->next; |
|
1236 } |
|
1237 |
|
1238 if (!elem) { |
|
1239 if (reportErrors) |
|
1240 reportError(StrayCloseTagError, &tagName, 0, true); |
|
1241 return; |
|
1242 } |
|
1243 |
|
1244 if (maxLevel > elem->level) { |
|
1245 // We didn't match because the tag is in a different scope, e.g., |
|
1246 // <b><p>Foo</b>. Try to correct the problem. |
|
1247 if (!isResidualStyleTag(tagName)) |
|
1248 return; |
|
1249 return handleResidualStyleCloseTagAcrossBlocks(elem); |
|
1250 } |
|
1251 |
|
1252 bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); |
|
1253 HTMLStackElem* residualStyleStack = 0; |
|
1254 Node* malformedTableParent = 0; |
|
1255 |
|
1256 elem = blockStack; |
|
1257 unsigned stackDepth = 1; |
|
1258 while (elem) { |
|
1259 if (elem->tagName == tagName) { |
|
1260 int strayTable = inStrayTableContent; |
|
1261 popOneBlock(); |
|
1262 elem = 0; |
|
1263 |
|
1264 // This element was the root of some malformed content just inside an implicit or |
|
1265 // explicit <tbody> or <tr>. |
|
1266 // If we end up needing to reopen residual style tags, the root of the reopened chain |
|
1267 // must also know that it is the root of malformed content inside a <tbody>/<tr>. |
|
1268 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) { |
|
1269 Node* curr = current; |
|
1270 while (curr && !curr->hasTagName(tableTag)) |
|
1271 curr = curr->parentNode(); |
|
1272 malformedTableParent = curr ? curr->parentNode() : 0; |
|
1273 } |
|
1274 } |
|
1275 else { |
|
1276 if (m_currentFormElement && elem->tagName == formTag) |
|
1277 // A <form> is being closed prematurely (and this is |
|
1278 // malformed HTML). Set an attribute on the form to clear out its |
|
1279 // bottom margin. |
|
1280 m_currentFormElement->setMalformed(true); |
|
1281 |
|
1282 // Schedule this tag for reopening |
|
1283 // after we complete the close of this entire block. |
|
1284 if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) |
|
1285 // We've overloaded the use of stack elements and are just reusing the |
|
1286 // struct with a slightly different meaning to the variables. Instead of chaining |
|
1287 // from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1288 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1289 // to the outermost tag we need to reopen. |
|
1290 // We also set elem->node to be the actual element that corresponds to the ID stored in |
|
1291 // elem->id rather than the node that you should pop to when the element gets pulled off |
|
1292 // the stack. |
|
1293 moveOneBlockToStack(residualStyleStack); |
|
1294 else |
|
1295 popOneBlock(); |
|
1296 elem = blockStack; |
|
1297 } |
|
1298 } |
|
1299 |
|
1300 reopenResidualStyleTags(residualStyleStack, malformedTableParent); |
|
1301 } |
|
1302 |
|
1303 inline HTMLStackElem* HTMLParser::popOneBlockCommon() |
|
1304 { |
|
1305 HTMLStackElem* elem = blockStack; |
|
1306 |
|
1307 // Form elements restore their state during the parsing process. |
|
1308 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. |
|
1309 if (current && elem->node != current) |
|
1310 current->finishedParsing(); |
|
1311 |
|
1312 blockStack = elem->next; |
|
1313 current = elem->node; |
|
1314 didRefCurrent = elem->didRefNode; |
|
1315 |
|
1316 if (elem->strayTableContent) |
|
1317 inStrayTableContent--; |
|
1318 |
|
1319 return elem; |
|
1320 } |
|
1321 |
|
1322 void HTMLParser::popOneBlock() |
|
1323 { |
|
1324 // Store the current node before popOneBlockCommon overwrites it. |
|
1325 Node* lastCurrent = current; |
|
1326 bool didRefLastCurrent = didRefCurrent; |
|
1327 |
|
1328 delete popOneBlockCommon(); |
|
1329 |
|
1330 if (didRefLastCurrent) |
|
1331 lastCurrent->deref(); |
|
1332 } |
|
1333 |
|
1334 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head) |
|
1335 { |
|
1336 // We'll be using the stack element we're popping, but for the current node. |
|
1337 // See the two callers for details. |
|
1338 |
|
1339 // Store the current node before popOneBlockCommon overwrites it. |
|
1340 Node* lastCurrent = current; |
|
1341 bool didRefLastCurrent = didRefCurrent; |
|
1342 |
|
1343 // Pop the block, but don't deref the current node as popOneBlock does because |
|
1344 // we'll be using the pointer in the new stack element. |
|
1345 HTMLStackElem* elem = popOneBlockCommon(); |
|
1346 |
|
1347 // Transfer the current node into the stack element. |
|
1348 // No need to deref the old elem->node because popOneBlockCommon transferred |
|
1349 // it into the current/didRefCurrent fields. |
|
1350 elem->node = lastCurrent; |
|
1351 elem->didRefNode = didRefLastCurrent; |
|
1352 elem->next = head; |
|
1353 head = elem; |
|
1354 } |
|
1355 |
|
1356 void HTMLParser::popInlineBlocks() |
|
1357 { |
|
1358 while (blockStack && isInline(current)) |
|
1359 popOneBlock(); |
|
1360 } |
|
1361 |
|
1362 void HTMLParser::freeBlock() |
|
1363 { |
|
1364 while (blockStack) |
|
1365 popOneBlock(); |
|
1366 } |
|
1367 |
|
1368 void HTMLParser::createHead() |
|
1369 { |
|
1370 if (head || !document->documentElement()) |
|
1371 return; |
|
1372 |
|
1373 head = new HTMLHeadElement(document); |
|
1374 HTMLElement* body = document->body(); |
|
1375 ExceptionCode ec = 0; |
|
1376 document->documentElement()->insertBefore(head, body, ec); |
|
1377 if (ec) |
|
1378 head = 0; |
|
1379 } |
|
1380 |
|
1381 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t) |
|
1382 { |
|
1383 RefPtr<Node> n = new HTMLDivElement(document); |
|
1384 |
|
1385 NamedMappedAttrMap* attrs = t->attrs.get(); |
|
1386 |
|
1387 RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(document, m_currentFormElement.get()); |
|
1388 isIndex->setAttributeMap(attrs); |
|
1389 isIndex->setAttribute(typeAttr, "khtml_isindex"); |
|
1390 |
|
1391 String text = searchableIndexIntroduction(); |
|
1392 if (attrs) { |
|
1393 if (Attribute* a = attrs->getAttributeItem(promptAttr)) |
|
1394 text = a->value().domString() + " "; |
|
1395 t->attrs = 0; |
|
1396 } |
|
1397 |
|
1398 n->addChild(new HTMLHRElement(document)); |
|
1399 n->addChild(new Text(document, text)); |
|
1400 n->addChild(isIndex.release()); |
|
1401 n->addChild(new HTMLHRElement(document)); |
|
1402 |
|
1403 return n.release(); |
|
1404 } |
|
1405 |
|
1406 void HTMLParser::startBody() |
|
1407 { |
|
1408 if (inBody) |
|
1409 return; |
|
1410 |
|
1411 inBody = true; |
|
1412 |
|
1413 if (m_isindexElement) { |
|
1414 insertNode(m_isindexElement.get(), true /* don't descend into this node */); |
|
1415 m_isindexElement = 0; |
|
1416 } |
|
1417 } |
|
1418 |
|
1419 void HTMLParser::finished() |
|
1420 { |
|
1421 // In the case of a completely empty document, here's the place to create the HTML element. |
|
1422 if (current && current->isDocumentNode() && !document->documentElement()) |
|
1423 insertNode(new HTMLHtmlElement(document)); |
|
1424 |
|
1425 // This ensures that "current" is not left pointing to a node when the document is destroyed. |
|
1426 freeBlock(); |
|
1427 setCurrent(0); |
|
1428 |
|
1429 // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this. |
|
1430 if (!m_isParsingFragment) |
|
1431 document->finishedParsing(); |
|
1432 } |
|
1433 |
|
1434 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) |
|
1435 { |
|
1436 Frame* frame = document->frame(); |
|
1437 if (!frame) |
|
1438 return; |
|
1439 |
|
1440 Page* page = frame->page(); |
|
1441 if (!page) |
|
1442 return; |
|
1443 |
|
1444 HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer()); |
|
1445 int lineNumber = htmlTokenizer->lineNumber() + 1; |
|
1446 |
|
1447 AtomicString tag1; |
|
1448 AtomicString tag2; |
|
1449 if (tagName1) { |
|
1450 if (*tagName1 == "#text") |
|
1451 tag1 = "Text"; |
|
1452 else if (*tagName1 == "#comment") |
|
1453 tag1 = "<!-- comment -->"; |
|
1454 else |
|
1455 tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; |
|
1456 } |
|
1457 if (tagName2) { |
|
1458 if (*tagName2 == "#text") |
|
1459 tag2 = "Text"; |
|
1460 else if (*tagName2 == "#comment") |
|
1461 tag2 = "<!-- comment -->"; |
|
1462 else |
|
1463 tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; |
|
1464 } |
|
1465 |
|
1466 const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); |
|
1467 if (!errorMsg) |
|
1468 return; |
|
1469 |
|
1470 String message; |
|
1471 if (htmlTokenizer->processingContentWrittenByScript()) |
|
1472 message += htmlParserDocumentWriteMessage(); |
|
1473 message += errorMsg; |
|
1474 message.replace("%tag1", tag1); |
|
1475 message.replace("%tag2", tag2); |
|
1476 |
|
1477 page->chrome()->addMessageToConsole(HTMLMessageSource, isWarning(errorCode) ? WarningMessageLevel: ErrorMessageLevel, message, lineNumber, document->URL()); |
|
1478 } |
|
1479 |
|
1480 } |