webengine/osswebengine/WebCore/html/HTMLParser.cpp
changeset 0 dd21522fd290
child 8 7c90e6132015
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2     Copyright (C) 1997 Martin Jones (mjones@kde.org)
       
     3               (C) 1997 Torben Weis (weis@kde.org)
       
     4               (C) 1999,2001 Lars Knoll (knoll@kde.org)
       
     5               (C) 2000,2001 Dirk Mueller (mueller@kde.org)
       
     6     Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
       
     7 
       
     8     This library is free software; you can redistribute it and/or
       
     9     modify it under the terms of the GNU Library General Public
       
    10     License as published by the Free Software Foundation; either
       
    11     version 2 of the License, or (at your option) any later version.
       
    12 
       
    13     This library is distributed in the hope that it will be useful,
       
    14     but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    16     Library General Public License for more details.
       
    17 
       
    18     You should have received a copy of the GNU Library General Public License
       
    19     along with this library; see the file COPYING.LIB.  If not, write to
       
    20     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    21     Boston, MA 02110-1301, USA.
       
    22 */
       
    23 
       
    24 #include "config.h"
       
    25 #include "HTMLParser.h"
       
    26 
       
    27 #include "CharacterNames.h"
       
    28 #include "CSSPropertyNames.h"
       
    29 #include "CSSValueKeywords.h"
       
    30 #include "Comment.h"
       
    31 #include "DocumentFragment.h"
       
    32 #include "Frame.h"
       
    33 #include "HTMLBodyElement.h"
       
    34 #include "HTMLDocument.h"
       
    35 #include "HTMLDivElement.h"
       
    36 #include "HTMLDListElement.h"
       
    37 #include "HTMLElementFactory.h"
       
    38 #include "HTMLFormElement.h"
       
    39 #include "HTMLHeadElement.h"
       
    40 #include "HTMLHRElement.h"
       
    41 #include "HTMLHtmlElement.h"
       
    42 #include "HTMLIsIndexElement.h"
       
    43 #include "HTMLMapElement.h"
       
    44 #include "HTMLNames.h"
       
    45 #include "HTMLTableCellElement.h"
       
    46 #include "HTMLTableRowElement.h"
       
    47 #include "HTMLTableSectionElement.h"
       
    48 #include "HTMLTokenizer.h"
       
    49 #include "LocalizedStrings.h"
       
    50 #include "Page.h"
       
    51 #include "Settings.h"
       
    52 #include "Text.h"
       
    53 
       
    54 namespace WebCore {
       
    55 
       
    56 using namespace HTMLNames;
       
    57 
       
    58 static const unsigned cMaxRedundantTagDepth = 20;
       
    59 static const unsigned cResidualStyleMaxDepth = 200;
       
    60 
       
    61 struct HTMLStackElem : Noncopyable {
       
    62     HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
       
    63         : tagName(t)
       
    64         , level(lvl)
       
    65         , strayTableContent(false)
       
    66         , node(n)
       
    67         , didRefNode(r)
       
    68         , next(nx)
       
    69     {
       
    70     }
       
    71 
       
    72     void derefNode()
       
    73     {
       
    74         if (didRefNode)
       
    75             node->deref();
       
    76     }
       
    77 
       
    78     AtomicString tagName;
       
    79     int level;
       
    80     bool strayTableContent;
       
    81     Node* node;
       
    82     bool didRefNode;
       
    83     HTMLStackElem* next;
       
    84 };
       
    85 
       
    86 /**
       
    87  * The parser parses tokenized input into the document, building up the
       
    88  * document tree. If the document is well-formed, parsing it is straightforward.
       
    89  *
       
    90  * Unfortunately, we have to handle many HTML documents that are not well-formed,
       
    91  * so the parser has to be tolerant about errors.
       
    92  *
       
    93  * We have to take care of at least the following error conditions:
       
    94  *
       
    95  * 1. The element being added is explicitly forbidden inside some outer tag.
       
    96  *    In this case we should close all tags up to the one, which forbids
       
    97  *    the element, and add it afterwards.
       
    98  *
       
    99  * 2. We are not allowed to add the element directly. It could be that
       
   100  *    the person writing the document forgot some tag in between (or that the
       
   101  *    tag in between is optional). This could be the case with the following
       
   102  *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
       
   103  *
       
   104  * 3. We want to add a block element inside to an inline element. Close all
       
   105  *    inline elements up to the next higher block element.
       
   106  *
       
   107  * 4. If this doesn't help, close elements until we are allowed to add the
       
   108  *    element or ignore the tag.
       
   109  *
       
   110  */
       
   111 
       
   112 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
       
   113     : document(doc)
       
   114     , current(doc)
       
   115     , didRefCurrent(false)
       
   116     , blockStack(0)
       
   117     , head(0)
       
   118     , inBody(false)
       
   119     , haveContent(false)
       
   120     , haveFrameSet(false)
       
   121     , m_isParsingFragment(false)
       
   122     , m_reportErrors(reportErrors)
       
   123     , m_handlingResidualStyleAcrossBlocks(false)
       
   124     , inStrayTableContent(0)
       
   125 {
       
   126 }
       
   127 
       
   128 HTMLParser::HTMLParser(DocumentFragment* frag)
       
   129     : document(frag->document())
       
   130     , current(frag)
       
   131     , didRefCurrent(true)
       
   132     , blockStack(0)
       
   133     , head(0)
       
   134     , inBody(true)
       
   135     , haveContent(false)
       
   136     , haveFrameSet(false)
       
   137     , m_isParsingFragment(true)
       
   138     , m_reportErrors(false)
       
   139     , m_handlingResidualStyleAcrossBlocks(false)
       
   140     , inStrayTableContent(0)
       
   141 {
       
   142     if (frag)
       
   143         frag->ref();
       
   144 }
       
   145 
       
   146 HTMLParser::~HTMLParser()
       
   147 {
       
   148     freeBlock();
       
   149     if (didRefCurrent) 
       
   150         current->deref(); 
       
   151 }
       
   152 
       
   153 void HTMLParser::reset()
       
   154 {
       
   155     ASSERT(!m_isParsingFragment);
       
   156 
       
   157     setCurrent(document);
       
   158 
       
   159     freeBlock();
       
   160 
       
   161     inBody = false;
       
   162     haveFrameSet = false;
       
   163     haveContent = false;
       
   164     inStrayTableContent = 0;
       
   165 
       
   166     m_currentFormElement = 0;
       
   167     m_currentMapElement = 0;
       
   168     head = 0;
       
   169     m_isindexElement = 0;
       
   170 
       
   171     m_skipModeTag = nullAtom;
       
   172 }
       
   173 
       
   174 void HTMLParser::setCurrent(Node* newCurrent) 
       
   175 {
       
   176     bool didRefNewCurrent = newCurrent && newCurrent != document;
       
   177     if (didRefNewCurrent) 
       
   178         newCurrent->ref(); 
       
   179     if (didRefCurrent) 
       
   180         current->deref(); 
       
   181     current = newCurrent;
       
   182     didRefCurrent = didRefNewCurrent;
       
   183 }
       
   184 
       
   185 PassRefPtr<Node> HTMLParser::parseToken(Token* t)
       
   186 {
       
   187     if (!m_skipModeTag.isNull()) {
       
   188         if (!t->beginTag && t->tagName == m_skipModeTag)
       
   189             // Found the end tag for the current skip mode, so we're done skipping.
       
   190             m_skipModeTag = nullAtom;
       
   191         else if (current->localName() == t->tagName)
       
   192             // Do not skip </iframe>.
       
   193             // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
       
   194             ;
       
   195         else
       
   196             return 0;
       
   197     }
       
   198 
       
   199     // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
       
   200     if (t->isCloseTag(brTag) && document->inCompatMode()) {
       
   201         reportError(MalformedBRError);
       
   202         t->beginTag = true;
       
   203     }
       
   204 
       
   205     if (!t->beginTag) {
       
   206         processCloseTag(t);
       
   207         return 0;
       
   208     }
       
   209 
       
   210     // ignore spaces, if we're not inside a paragraph or other inline code
       
   211     if (t->tagName == textAtom && t->text) {
       
   212         if (inBody && !skipMode() && current->localName() != styleTag && current->localName() != titleTag && 
       
   213             current->localName() != scriptTag && !t->text->containsOnlyWhitespace()) 
       
   214             haveContent = true;
       
   215         
       
   216         RefPtr<Node> n;
       
   217         String text = t->text.get();
       
   218         unsigned charsLeft = text.length();
       
   219         while (charsLeft) {
       
   220             // split large blocks of text to nodes of manageable size
       
   221             n = Text::createWithLengthLimit(document, text, charsLeft);
       
   222             if (!insertNode(n.get(), t->flat))
       
   223                 return 0;
       
   224         }
       
   225         return n;
       
   226     }
       
   227 
       
   228     RefPtr<Node> n = getNode(t);
       
   229     // just to be sure, and to catch currently unimplemented stuff
       
   230     if (!n)
       
   231         return 0;
       
   232 
       
   233     // set attributes
       
   234     if (n->isHTMLElement()) {
       
   235         HTMLElement* e = static_cast<HTMLElement*>(n.get());
       
   236         e->setAttributeMap(t->attrs.get());
       
   237 
       
   238         // take care of optional close tags
       
   239         if (e->endTagRequirement() == TagStatusOptional)
       
   240             popBlock(t->tagName);
       
   241             
       
   242         // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
       
   243         // syntax was used, report an error.
       
   244         if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
       
   245             if (t->tagName == scriptTag)
       
   246                 reportError(IncorrectXMLCloseScriptWarning);
       
   247             else
       
   248                 reportError(IncorrectXMLSelfCloseError, &t->tagName);
       
   249         }
       
   250     }
       
   251 
       
   252     if (!insertNode(n.get(), t->flat)) {
       
   253         // we couldn't insert the node
       
   254 
       
   255         if (n->isElementNode()) {
       
   256             Element* e = static_cast<Element*>(n.get());
       
   257             e->setAttributeMap(0);
       
   258         }
       
   259 
       
   260         if (m_currentMapElement == n)
       
   261             m_currentMapElement = 0;
       
   262 
       
   263         if (m_currentFormElement == n)
       
   264             m_currentFormElement = 0;
       
   265 
       
   266         if (head == n)
       
   267             head = 0;
       
   268 
       
   269         return 0;
       
   270     }
       
   271     return n;
       
   272 }
       
   273 
       
   274 static bool isTableSection(Node* n)
       
   275 {
       
   276     return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
       
   277 }
       
   278 
       
   279 static bool isTablePart(Node* n)
       
   280 {
       
   281     return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
       
   282            isTableSection(n);
       
   283 }
       
   284 
       
   285 static bool isTableRelated(Node* n)
       
   286 {
       
   287     return n->hasTagName(tableTag) || isTablePart(n);
       
   288 }
       
   289 
       
   290 bool HTMLParser::insertNode(Node* n, bool flat)
       
   291 {
       
   292     RefPtr<Node> protectNode(n);
       
   293 
       
   294     const AtomicString& localName = n->localName();
       
   295     int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
       
   296     
       
   297     // <table> is never allowed inside stray table content.  Always pop out of the stray table content
       
   298     // and close up the first table, and then start the second table as a sibling.
       
   299     if (inStrayTableContent && localName == tableTag)
       
   300         popBlock(tableTag);
       
   301     
       
   302     // let's be stupid and just try to insert it.
       
   303     // this should work if the document is well-formed
       
   304     Node* newNode = current->addChild(n);
       
   305     if (!newNode)
       
   306         return handleError(n, flat, localName, tagPriority); // Try to handle the error.
       
   307 
       
   308     // don't push elements without end tags (e.g., <img>) on the stack
       
   309     bool parentAttached = current->attached();
       
   310     if (tagPriority > 0 && !flat) {
       
   311         if (newNode == current) {
       
   312             // This case should only be hit when a demoted <form> is placed inside a table.
       
   313             ASSERT(localName == formTag);
       
   314             reportError(FormInsideTablePartError, &current->localName());
       
   315         } else {
       
   316             // The pushBlock function transfers ownership of current to the block stack
       
   317             // so we're guaranteed that didRefCurrent is false. The code below is an
       
   318             // optimized version of setCurrent that takes advantage of that fact and also
       
   319             // assumes that newNode is neither 0 nor a pointer to the document.
       
   320             pushBlock(localName, tagPriority);
       
   321             ASSERT(!didRefCurrent);
       
   322             newNode->ref(); 
       
   323             current = newNode;
       
   324             didRefCurrent = true;
       
   325         }
       
   326         if (parentAttached && !n->attached() && !m_isParsingFragment)
       
   327             n->attach();
       
   328     } else {
       
   329         if (parentAttached && !n->attached() && !m_isParsingFragment)
       
   330             n->attach();
       
   331         n->finishedParsing();
       
   332     }
       
   333 
       
   334     return true;
       
   335 }
       
   336 
       
   337 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
       
   338 {
       
   339     // Error handling code.  This is just ad hoc handling of specific parent/child combinations.
       
   340     HTMLElement* e;
       
   341     bool handled = false;
       
   342 
       
   343     // 1. Check out the element's tag name to decide how to deal with errors.
       
   344     if (n->isHTMLElement()) {
       
   345         HTMLElement* h = static_cast<HTMLElement*>(n);
       
   346         if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
       
   347             if (inStrayTableContent && !isTableRelated(current)) {
       
   348                 reportError(MisplacedTablePartError, &localName, &current->localName());
       
   349                 // pop out to the nearest enclosing table-related tag.
       
   350                 while (blockStack && !isTableRelated(current))
       
   351                     popOneBlock();
       
   352                 return insertNode(n);
       
   353             }
       
   354         } else if (h->hasLocalName(headTag)) {
       
   355             if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
       
   356                 reportError(MisplacedHeadError);
       
   357                 return false;
       
   358             }
       
   359         } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
       
   360             bool createdHead = false;
       
   361             if (!head) {
       
   362                 createHead();
       
   363                 createdHead = true;
       
   364             }
       
   365             if (head) {
       
   366                 if (!createdHead)
       
   367                     reportError(MisplacedHeadContentError, &localName, &current->localName());
       
   368                 if (head->addChild(n)) {
       
   369                     if (!n->attached() && !m_isParsingFragment)
       
   370                         n->attach();
       
   371                     return true;
       
   372                 } else
       
   373                     return false;
       
   374             }
       
   375         } else if (h->hasLocalName(htmlTag)) {
       
   376             if (!current->isDocumentNode() ) {
       
   377                 if (document->documentElement()->hasTagName(htmlTag)) {
       
   378                     reportError(RedundantHTMLBodyError, &localName);
       
   379                     // we have another <HTML> element.... apply attributes to existing one
       
   380                     // make sure we don't overwrite already existing attributes
       
   381                     NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
       
   382                     Element* existingHTML = static_cast<Element*>(document->documentElement());
       
   383                     NamedAttrMap* bmap = existingHTML->attributes(false);
       
   384                     for (unsigned l = 0; map && l < map->length(); ++l) {
       
   385                         Attribute* it = map->attributeItem(l);
       
   386                         if (!bmap->getAttributeItem(it->name()))
       
   387                             existingHTML->setAttribute(it->name(), it->value());
       
   388                     }
       
   389                 }
       
   390                 return false;
       
   391             }
       
   392         } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
       
   393             bool createdHead = false;
       
   394             if (!head) {
       
   395                 createHead();
       
   396                 createdHead = true;
       
   397             }
       
   398             if (head) {
       
   399                 Node* newNode = head->addChild(n);
       
   400                 if (!newNode) {
       
   401                     setSkipMode(h->tagQName());
       
   402                     return false;
       
   403                 }
       
   404                 
       
   405                 if (!createdHead)
       
   406                     reportError(MisplacedHeadContentError, &localName, &current->localName());
       
   407                 
       
   408                 pushBlock(localName, tagPriority);
       
   409                 setCurrent(newNode);
       
   410                 if (!n->attached() && !m_isParsingFragment)
       
   411                     n->attach();
       
   412                 return true;
       
   413             }
       
   414             if (inBody) {
       
   415                 setSkipMode(h->tagQName());
       
   416                 return false;
       
   417             }
       
   418         } else if (h->hasLocalName(bodyTag)) {
       
   419             if (inBody && document->body()) {
       
   420                 // we have another <BODY> element.... apply attributes to existing one
       
   421                 // make sure we don't overwrite already existing attributes
       
   422                 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
       
   423                 reportError(RedundantHTMLBodyError, &localName);
       
   424                 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
       
   425                 Element* existingBody = document->body();
       
   426                 NamedAttrMap* bmap = existingBody->attributes(false);
       
   427                 for (unsigned l = 0; map && l < map->length(); ++l) {
       
   428                     Attribute* it = map->attributeItem(l);
       
   429                     if (!bmap->getAttributeItem(it->name()))
       
   430                         existingBody->setAttribute(it->name(), it->value());
       
   431                 }
       
   432                 return false;
       
   433             }
       
   434             else if (!current->isDocumentNode())
       
   435                 return false;
       
   436         } else if (h->hasLocalName(areaTag)) {
       
   437             if (m_currentMapElement) {
       
   438                 reportError(MisplacedAreaError, &current->localName());
       
   439                 m_currentMapElement->addChild(n);
       
   440                 if (!n->attached() && !m_isParsingFragment)
       
   441                     n->attach();
       
   442                 handled = true;
       
   443                 return true;
       
   444             }
       
   445             return false;
       
   446         } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
       
   447             if (isTableRelated(current)) {
       
   448                 while (blockStack && isTablePart(current))
       
   449                     popOneBlock();
       
   450                 return insertNode(n);
       
   451             }
       
   452         }
       
   453     } else if (n->isCommentNode() && !head)
       
   454         return false;
       
   455 
       
   456     // 2. Next we examine our currently active element to do some further error handling.
       
   457     if (current->isHTMLElement()) {
       
   458         HTMLElement* h = static_cast<HTMLElement*>(current);
       
   459         const AtomicString& currentTagName = current->localName();
       
   460         if (h->hasLocalName(htmlTag)) {
       
   461             HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
       
   462             if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
       
   463                 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
       
   464                 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
       
   465                 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
       
   466                 elt->hasLocalName(baseTag) 
       
   467 
       
   468 #if PLATFORM(SYMBIAN)
       
   469                 || elt->hasLocalName(bgsoundTag) 
       
   470 #endif                                
       
   471                 )) {
       
   472                 if (!head) {
       
   473                     head = new HTMLHeadElement(document);
       
   474                     e = head;
       
   475                     insertNode(e);
       
   476                     handled = true;
       
   477                 }
       
   478             } else {
       
   479                 if (n->isTextNode()) {
       
   480                     Text* t = static_cast<Text*>(n);
       
   481                     if (t->containsOnlyWhitespace())
       
   482                         return false;
       
   483                 }
       
   484                 if (!haveFrameSet) {
       
   485                     e = new HTMLBodyElement(document);
       
   486                     startBody();
       
   487                     insertNode(e);
       
   488                     handled = true;
       
   489                 } else
       
   490                     reportError(MisplacedFramesetContentError, &localName);
       
   491             }
       
   492         } else if (h->hasLocalName(headTag)) {
       
   493             if (n->hasTagName(htmlTag))
       
   494                 return false;
       
   495             else {
       
   496                 // This means the body starts here...
       
   497                 if (!haveFrameSet) {
       
   498                     popBlock(currentTagName);
       
   499                     e = new HTMLBodyElement(document);
       
   500                     startBody();
       
   501                     insertNode(e);
       
   502                     handled = true;
       
   503                 } else
       
   504                     reportError(MisplacedFramesetContentError, &localName);
       
   505             }
       
   506         } else if (h->hasLocalName(addressTag) || h->hasLocalName(dlTag) || h->hasLocalName(dtTag)
       
   507                    || h->hasLocalName(fontTag) || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
       
   508             reportError(MisplacedContentRetryError, &localName, &currentTagName);
       
   509             popBlock(currentTagName);
       
   510             handled = true;
       
   511         } else if (h->hasLocalName(captionTag)) {
       
   512             // Illegal content in a caption. Close the caption and try again.
       
   513             reportError(MisplacedCaptionContentError, &localName);
       
   514             popBlock(currentTagName);
       
   515             if (isTablePart(n))
       
   516                 return insertNode(n, flat);
       
   517         } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
       
   518             if (n->hasTagName(tableTag)) {
       
   519                 reportError(MisplacedTableError, &currentTagName);
       
   520                 if (m_isParsingFragment && !h->hasLocalName(tableTag))
       
   521                     // fragment may contain table parts without <table> ancestor, pop them one by one
       
   522                     popBlock(h->localName());
       
   523                 popBlock(localName); // end the table
       
   524                 handled = true;      // ...and start a new one
       
   525             } else {
       
   526                 ExceptionCode ec = 0;
       
   527                 Node* node = current;
       
   528                 Node* parent = node->parentNode();
       
   529                 // A script may have removed the current node's parent from the DOM
       
   530                 // http://bugs.webkit.org/show_bug.cgi?id=7137
       
   531                 // FIXME: we should do real recovery here and re-parent with the correct node.
       
   532                 if (!parent)
       
   533                     return false;
       
   534                 Node* grandparent = parent->parentNode();
       
   535 
       
   536                 if (n->isTextNode() ||
       
   537                     (h->hasLocalName(trTag) &&
       
   538                      isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
       
   539                      ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
       
   540                        !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
       
   541                      parent->hasTagName(tableTag))) {
       
   542                     node = (node->hasTagName(tableTag)) ? node :
       
   543                             ((node->hasTagName(trTag)) ? grandparent : parent);
       
   544                     // This can happen with fragments
       
   545                     if (!node)
       
   546                         return false;
       
   547                     Node* parent = node->parentNode();
       
   548                     if (!parent)
       
   549                         return false;
       
   550                     parent->insertBefore(n, node, ec);
       
   551                     if (!ec) {
       
   552                         reportError(StrayTableContentError, &localName, &currentTagName);
       
   553                         if (n->isHTMLElement() && tagPriority > 0 && 
       
   554                             !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
       
   555                         {
       
   556                             pushBlock(localName, tagPriority);
       
   557                             setCurrent(n);
       
   558                             inStrayTableContent++;
       
   559                             blockStack->strayTableContent = true;
       
   560                         }
       
   561                         return true;
       
   562                     }
       
   563                 }
       
   564 
       
   565                 if (!ec) {
       
   566                     if (current->hasTagName(trTag)) {
       
   567                         reportError(TablePartRequiredError, &localName, &tdTag.localName());
       
   568                         e = new HTMLTableCellElement(tdTag, document);
       
   569                     } else if (current->hasTagName(tableTag)) {
       
   570                         // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
       
   571                         // and it isn't really a parse error per se.
       
   572                         e = new HTMLTableSectionElement(tbodyTag, document); 
       
   573                     } else {
       
   574                         reportError(TablePartRequiredError, &localName, &trTag.localName());
       
   575                         e = new HTMLTableRowElement(document);
       
   576                     }
       
   577 
       
   578                     insertNode(e);
       
   579                     handled = true;
       
   580                 }
       
   581             }
       
   582         } else if (h->hasLocalName(objectTag)) {
       
   583             reportError(MisplacedContentRetryError, &localName, &currentTagName);
       
   584             popBlock(objectTag);
       
   585             handled = true;
       
   586         } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
       
   587             if (!isInline(n)) {
       
   588                 popBlock(currentTagName);
       
   589                 handled = true;
       
   590             }
       
   591         } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
       
   592             if (localName == optgroupTag) {
       
   593                 popBlock(currentTagName);
       
   594                 handled = true;
       
   595             } else if (localName == selectTag) {
       
   596                 // IE treats a nested select as </select>. Let's do the same
       
   597                 popBlock(localName);
       
   598             }
       
   599         } else if (h->hasLocalName(colgroupTag)) {
       
   600             popBlock(currentTagName);
       
   601             handled = true;
       
   602         } else if (!h->hasLocalName(bodyTag)) {
       
   603             if (isInline(current)) {
       
   604                 popInlineBlocks();
       
   605                 handled = true;
       
   606             }
       
   607         }
       
   608     } else if (current->isDocumentNode()) {
       
   609         if (n->isTextNode()) {
       
   610             Text* t = static_cast<Text*>(n);
       
   611             if (t->containsOnlyWhitespace())
       
   612                 return false;
       
   613         }
       
   614 
       
   615         if (!document->documentElement()) {
       
   616             e = new HTMLHtmlElement(document);
       
   617             insertNode(e);
       
   618             handled = true;
       
   619         }
       
   620     }
       
   621 
       
   622     // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
       
   623     if (!handled) {
       
   624         reportError(IgnoredContentError, &localName, &current->localName());
       
   625         return false;
       
   626     }
       
   627     return insertNode(n);
       
   628 }
       
   629 
       
   630 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
       
   631 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
       
   632 
       
   633 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   634 {
       
   635     result = new Text(document, t->text.get());
       
   636     return false;
       
   637 }
       
   638 
       
   639 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   640 {
       
   641     result = new Comment(document, t->text.get());
       
   642     return false;
       
   643 }
       
   644 
       
   645 bool HTMLParser::headCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   646 {
       
   647     if (!head || current->localName() == htmlTag) {
       
   648         head = new HTMLHeadElement(document);
       
   649         result = head;
       
   650     } else
       
   651         reportError(MisplacedHeadError);
       
   652     return false;
       
   653 }
       
   654 
       
   655 bool HTMLParser::bodyCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   656 {
       
   657     // body no longer allowed if we have a frameset
       
   658     if (haveFrameSet)
       
   659         return false;
       
   660     popBlock(headTag);
       
   661     startBody();
       
   662     return true;
       
   663 }
       
   664 
       
   665 bool HTMLParser::framesetCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   666 {
       
   667     popBlock(headTag);
       
   668     if (inBody && !haveFrameSet && !haveContent) {
       
   669         popBlock(bodyTag);
       
   670         // ### actually for IE document.body returns the now hidden "body" element
       
   671         // we can't implement that behaviour now because it could cause too many
       
   672         // regressions and the headaches are not worth the work as long as there is
       
   673         // no site actually relying on that detail (Dirk)
       
   674         if (document->body())
       
   675             document->body()->setAttribute(styleAttr, "display:none");
       
   676         inBody = false;
       
   677     }
       
   678     if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
       
   679         return false;
       
   680     haveFrameSet = true;
       
   681     startBody();
       
   682     return true;
       
   683 }
       
   684 
       
   685 bool HTMLParser::iframeCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   686 {
       
   687     // a bit of a special case, since the frame is inlined
       
   688     setSkipMode(iframeTag);
       
   689     return true;
       
   690 }
       
   691 
       
   692 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   693 {
       
   694     // Only create a new form if we're not already inside one.
       
   695     // This is consistent with other browsers' behavior.
       
   696     if (!m_currentFormElement) {
       
   697         m_currentFormElement = new HTMLFormElement(document);
       
   698         result = m_currentFormElement;
       
   699     }
       
   700     return false;
       
   701 }
       
   702 
       
   703 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   704 {
       
   705     RefPtr<Node> n = handleIsindex(t);
       
   706     if (!inBody) {
       
   707         m_isindexElement = n.release();
       
   708     } else {
       
   709         t->flat = true;
       
   710         result = n.release();
       
   711     }
       
   712     return false;
       
   713 }
       
   714 
       
   715 bool HTMLParser::selectCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   716 {
       
   717     return true;
       
   718 }
       
   719 
       
   720 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   721 {
       
   722     popBlock(dtTag);
       
   723     popBlock(ddTag);
       
   724     return true;
       
   725 }
       
   726 
       
   727 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   728 {
       
   729     popBlock(ddTag);
       
   730     popBlock(dtTag);
       
   731     return true;
       
   732 }
       
   733 
       
   734 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   735 {
       
   736     popBlock(t->tagName);
       
   737     return true;
       
   738 }
       
   739 
       
   740 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   741 {
       
   742     return allowNestedRedundantTag(t->tagName);
       
   743 }
       
   744 
       
   745 bool HTMLParser::tableCellCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   746 {
       
   747     popBlock(tdTag);
       
   748     popBlock(thTag);
       
   749     return true;
       
   750 }
       
   751 
       
   752 bool HTMLParser::tableSectionCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   753 {
       
   754     popBlock(theadTag);
       
   755     popBlock(tbodyTag);
       
   756     popBlock(tfootTag);
       
   757     return true;
       
   758 }
       
   759 
       
   760 bool HTMLParser::noembedCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   761 {
       
   762     setSkipMode(noembedTag);
       
   763     return true;
       
   764 }
       
   765 
       
   766 bool HTMLParser::noframesCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   767 {
       
   768     setSkipMode(noframesTag);
       
   769     return true;
       
   770 }
       
   771 
       
   772 bool HTMLParser::noscriptCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   773 {
       
   774     if (!m_isParsingFragment) {
       
   775         Settings* settings = document->settings();
       
   776         if (settings && settings->isJavaScriptEnabled())
       
   777             setSkipMode(noscriptTag);
       
   778     }
       
   779     return true;
       
   780 }
       
   781 
       
   782 bool HTMLParser::mapCreateErrorCheck(Token* t, RefPtr<Node>& result)
       
   783 {
       
   784     m_currentMapElement = new HTMLMapElement(document);
       
   785     result = m_currentMapElement;
       
   786     return false;
       
   787 }
       
   788 
       
   789 PassRefPtr<Node> HTMLParser::getNode(Token* t)
       
   790 {
       
   791     // Init our error handling table.
       
   792     static FunctionMap gFunctionMap;
       
   793     if (gFunctionMap.isEmpty()) {
       
   794         gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
       
   795         gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   796         gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   797         gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
       
   798         gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
       
   799         gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
       
   800         gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
       
   801         gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
       
   802         gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
       
   803         gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
       
   804         gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
       
   805         gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   806         gFunctionMap.set(iframeTag.localName().impl(), &HTMLParser::iframeCreateErrorCheck);
       
   807         gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
       
   808         gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
       
   809         gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
       
   810         gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
       
   811         gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
       
   812         gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
       
   813         gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
       
   814         gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   815         gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
       
   816         gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   817         gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   818         gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
       
   819         gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
       
   820         gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
       
   821         gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
       
   822         gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
       
   823         gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
       
   824         gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
       
   825         gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   826         gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
       
   827     }
       
   828 
       
   829     bool proceed = true;
       
   830     RefPtr<Node> result;
       
   831     if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
       
   832         proceed = (this->*errorCheckFunc)(t, result);
       
   833     if (proceed)
       
   834         result = HTMLElementFactory::createHTMLElement(t->tagName, document, m_currentFormElement.get());
       
   835     return result.release();
       
   836 }
       
   837 
       
   838 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
       
   839 {
       
   840     // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
       
   841     // about 1500 tags, all from a bunch of <b>s.  We will only allow at most 20
       
   842     // nested tags of the same type before just ignoring them all together.
       
   843     unsigned i = 0;
       
   844     for (HTMLStackElem* curr = blockStack;
       
   845          i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
       
   846          curr = curr->next, i++);
       
   847     return i != cMaxRedundantTagDepth;
       
   848 }
       
   849 
       
   850 void HTMLParser::processCloseTag(Token* t)
       
   851 {
       
   852     // Support for really broken html.
       
   853     // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
       
   854     // let's rely on the end() call to close things.
       
   855     if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
       
   856         return;
       
   857     
       
   858     bool checkForCloseTagErrors = true;
       
   859     if (t->tagName == formTag && m_currentFormElement) {
       
   860         m_currentFormElement = 0;
       
   861         checkForCloseTagErrors = false;
       
   862     } else if (t->tagName == mapTag)
       
   863         m_currentMapElement = 0;
       
   864     else if (t->tagName == pTag)
       
   865         checkForCloseTagErrors = false;
       
   866         
       
   867     HTMLStackElem* oldElem = blockStack;
       
   868     popBlock(t->tagName, checkForCloseTagErrors);
       
   869     if (oldElem == blockStack && t->tagName == pTag) {
       
   870         // We encountered a stray </p>.  Amazingly Gecko, WinIE, and MacIE all treat
       
   871         // this as a valid break, i.e., <p></p>.  So go ahead and make the empty
       
   872         // paragraph.
       
   873         t->beginTag = true;
       
   874         parseToken(t);
       
   875         popBlock(t->tagName);
       
   876         reportError(StrayParagraphCloseError);
       
   877     }
       
   878 }
       
   879 
       
   880 bool HTMLParser::isHeaderTag(const AtomicString& tagName)
       
   881 {
       
   882     static HashSet<AtomicStringImpl*> headerTags;
       
   883     if (headerTags.isEmpty()) {
       
   884         headerTags.add(h1Tag.localName().impl());
       
   885         headerTags.add(h2Tag.localName().impl());
       
   886         headerTags.add(h3Tag.localName().impl());
       
   887         headerTags.add(h4Tag.localName().impl());
       
   888         headerTags.add(h5Tag.localName().impl());
       
   889         headerTags.add(h6Tag.localName().impl());
       
   890     }
       
   891     
       
   892     return headerTags.contains(tagName.impl());
       
   893 }
       
   894 
       
   895 bool HTMLParser::isInline(Node* node) const
       
   896 {
       
   897     if (node->isTextNode())
       
   898         return true;
       
   899 
       
   900     if (node->isHTMLElement()) {
       
   901         HTMLElement* e = static_cast<HTMLElement*>(node);
       
   902         if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
       
   903             e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
       
   904             e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
       
   905             e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
       
   906             e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
       
   907             e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
       
   908             e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
       
   909             e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
       
   910             e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
       
   911             e->hasLocalName(noembedTag))
       
   912             return true;
       
   913         if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
       
   914             Settings* settings = document->settings();
       
   915             if (settings && settings->isJavaScriptEnabled())
       
   916                 return true;
       
   917         }
       
   918     }
       
   919     
       
   920     return false;
       
   921 }
       
   922 
       
   923 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
       
   924 {
       
   925     static HashSet<AtomicStringImpl*> residualStyleTags;
       
   926     if (residualStyleTags.isEmpty()) {
       
   927         residualStyleTags.add(aTag.localName().impl());
       
   928         residualStyleTags.add(fontTag.localName().impl());
       
   929         residualStyleTags.add(ttTag.localName().impl());
       
   930         residualStyleTags.add(uTag.localName().impl());
       
   931         residualStyleTags.add(bTag.localName().impl());
       
   932         residualStyleTags.add(iTag.localName().impl());
       
   933         residualStyleTags.add(sTag.localName().impl());
       
   934         residualStyleTags.add(strikeTag.localName().impl());
       
   935         residualStyleTags.add(bigTag.localName().impl());
       
   936         residualStyleTags.add(smallTag.localName().impl());
       
   937         residualStyleTags.add(emTag.localName().impl());
       
   938         residualStyleTags.add(strongTag.localName().impl());
       
   939         residualStyleTags.add(dfnTag.localName().impl());
       
   940         residualStyleTags.add(codeTag.localName().impl());
       
   941         residualStyleTags.add(sampTag.localName().impl());
       
   942         residualStyleTags.add(kbdTag.localName().impl());
       
   943         residualStyleTags.add(varTag.localName().impl());
       
   944         residualStyleTags.add(nobrTag.localName().impl());
       
   945     }
       
   946     
       
   947     return residualStyleTags.contains(tagName.impl());
       
   948 }
       
   949 
       
   950 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
       
   951 {
       
   952     static HashSet<AtomicStringImpl*> unaffectedTags;
       
   953     if (unaffectedTags.isEmpty()) {
       
   954         unaffectedTags.add(bodyTag.localName().impl());
       
   955         unaffectedTags.add(tableTag.localName().impl());
       
   956         unaffectedTags.add(theadTag.localName().impl());
       
   957         unaffectedTags.add(tbodyTag.localName().impl());
       
   958         unaffectedTags.add(tfootTag.localName().impl());
       
   959         unaffectedTags.add(trTag.localName().impl());
       
   960         unaffectedTags.add(thTag.localName().impl());
       
   961         unaffectedTags.add(tdTag.localName().impl());
       
   962         unaffectedTags.add(captionTag.localName().impl());
       
   963         unaffectedTags.add(colgroupTag.localName().impl());
       
   964         unaffectedTags.add(colTag.localName().impl());
       
   965         unaffectedTags.add(optionTag.localName().impl());
       
   966         unaffectedTags.add(optgroupTag.localName().impl());
       
   967         unaffectedTags.add(selectTag.localName().impl());
       
   968         unaffectedTags.add(objectTag.localName().impl());
       
   969     }
       
   970     
       
   971     return !unaffectedTags.contains(tagName.impl());
       
   972 }
       
   973 
       
   974 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
       
   975 {
       
   976     HTMLStackElem* maxElem = 0;
       
   977     bool finished = false;
       
   978     m_handlingResidualStyleAcrossBlocks = true;
       
   979     while (!finished) {
       
   980         // Find the outermost element that crosses over to a higher level. If there exists another higher-level
       
   981         // element, we will do another pass, until we have corrected the innermost one.
       
   982         ExceptionCode ec = 0;
       
   983         HTMLStackElem* curr = blockStack;
       
   984         HTMLStackElem* prev = 0;
       
   985         HTMLStackElem* prevMaxElem = 0;
       
   986         maxElem = 0;
       
   987         finished = true;
       
   988         while (curr && curr != elem) {
       
   989             if (curr->level > elem->level) {
       
   990                 if (!isAffectedByResidualStyle(curr->tagName))
       
   991                     return;
       
   992                 if (maxElem)
       
   993                     // We will need another pass.
       
   994                     finished = false;
       
   995                 maxElem = curr;
       
   996                 prevMaxElem = prev;
       
   997             }
       
   998 
       
   999             prev = curr;
       
  1000             curr = curr->next;
       
  1001         }
       
  1002 
       
  1003         if (!curr || !maxElem)
       
  1004             return;
       
  1005 
       
  1006         Node* residualElem = prev->node;
       
  1007         Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
       
  1008         Node* parentElem = elem->node;
       
  1009 
       
  1010         // Check to see if the reparenting that is going to occur is allowed according to the DOM.
       
  1011         // FIXME: We should either always allow it or perform an additional fixup instead of
       
  1012         // just bailing here.
       
  1013         // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
       
  1014         if (!parentElem->childAllowed(blockElem))
       
  1015             return;
       
  1016 
       
  1017         if (maxElem->node->parentNode() != elem->node) {
       
  1018             // Walk the stack and remove any elements that aren't residual style tags.  These
       
  1019             // are basically just being closed up.  Example:
       
  1020             // <font><span>Moo<p>Goo</font></p>.
       
  1021             // In the above example, the <span> doesn't need to be reopened.  It can just close.
       
  1022             HTMLStackElem* currElem = maxElem->next;
       
  1023             HTMLStackElem* prevElem = maxElem;
       
  1024             while (currElem != elem) {
       
  1025                 HTMLStackElem* nextElem = currElem->next;
       
  1026                 if (!isResidualStyleTag(currElem->tagName)) {
       
  1027                     prevElem->next = nextElem;
       
  1028                     prevElem->derefNode();
       
  1029                     prevElem->node = currElem->node;
       
  1030                     prevElem->didRefNode = currElem->didRefNode;
       
  1031                     delete currElem;
       
  1032                 }
       
  1033                 else
       
  1034                     prevElem = currElem;
       
  1035                 currElem = nextElem;
       
  1036             }
       
  1037 
       
  1038             // We have to reopen residual tags in between maxElem and elem.  An example of this case is:
       
  1039             // <font><i>Moo<p>Foo</font>.
       
  1040             // In this case, we need to transform the part before the <p> into:
       
  1041             // <font><i>Moo</i></font><i>
       
  1042             // so that the <i> will remain open.  This involves the modification of elements
       
  1043             // in the block stack.
       
  1044             // This will also affect how we ultimately reparent the block, since we want it to end up
       
  1045             // under the reopened residual tags (e.g., the <i> in the above example.)
       
  1046             RefPtr<Node> prevNode = 0;
       
  1047             currElem = maxElem;
       
  1048             while (currElem->node != residualElem) {
       
  1049                 if (isResidualStyleTag(currElem->node->localName())) {
       
  1050                     // Create a clone of this element.
       
  1051                     // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
       
  1052                     Node* currNode = currElem->node->cloneNode(false).releaseRef();
       
  1053                     reportError(ResidualStyleError, &currNode->localName());
       
  1054     
       
  1055                     // Change the stack element's node to point to the clone.
       
  1056                     // The stack element adopts the reference we obtained above by calling release().
       
  1057                     currElem->derefNode();
       
  1058                     currElem->node = currNode;
       
  1059                     currElem->didRefNode = true;
       
  1060 
       
  1061                     // Attach the previous node as a child of this new node.
       
  1062                     if (prevNode)
       
  1063                         currNode->appendChild(prevNode, ec);
       
  1064                     else // The new parent for the block element is going to be the innermost clone.
       
  1065                         parentElem = currNode;
       
  1066 
       
  1067                     prevNode = currNode;
       
  1068                 }
       
  1069 
       
  1070                 currElem = currElem->next;
       
  1071             }
       
  1072 
       
  1073             // Now append the chain of new residual style elements if one exists.
       
  1074             if (prevNode)
       
  1075                 elem->node->appendChild(prevNode, ec);
       
  1076         }
       
  1077 
       
  1078         // Check if the block is still in the tree. If it isn't, then we don't
       
  1079         // want to remove it from its parent (that would crash) or insert it into
       
  1080         // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
       
  1081         bool isBlockStillInTree = blockElem->parentNode();
       
  1082 
       
  1083         // We need to make a clone of |residualElem| and place it just inside |blockElem|.
       
  1084         // All content of |blockElem| is reparented to be under this clone.  We then
       
  1085         // reparent |blockElem| using real DOM calls so that attachment/detachment will
       
  1086         // be performed to fix up the rendering tree.
       
  1087         // So for this example: <b>...<p>Foo</b>Goo</p>
       
  1088         // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
       
  1089         //
       
  1090         // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
       
  1091         if (isBlockStillInTree)
       
  1092             blockElem->parentNode()->removeChild(blockElem, ec);
       
  1093 
       
  1094         Node* newNodePtr = 0;
       
  1095         ASSERT(finished || blockElem->firstChild());
       
  1096         if (blockElem->firstChild()) {
       
  1097             // Step 2: Clone |residualElem|.
       
  1098             RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
       
  1099             newNodePtr = newNode.get();
       
  1100             reportError(ResidualStyleError, &newNode->localName());
       
  1101 
       
  1102             // Step 3: Place |blockElem|'s children under |newNode|.  Remove all of the children of |blockElem|
       
  1103             // before we've put |newElem| into the document.  That way we'll only do one attachment of all
       
  1104             // the new content (instead of a bunch of individual attachments).
       
  1105             Node* currNode = blockElem->firstChild();
       
  1106             while (currNode) {
       
  1107                 Node* nextNode = currNode->nextSibling();
       
  1108                 newNode->appendChild(currNode, ec);
       
  1109                 currNode = nextNode;
       
  1110             }
       
  1111 
       
  1112             // Step 4: Place |newNode| under |blockElem|.  |blockElem| is still out of the document, so no
       
  1113             // attachment can occur yet.
       
  1114             blockElem->appendChild(newNode.release(), ec);
       
  1115         }
       
  1116 
       
  1117         // Step 5: Reparent |blockElem|.  Now the full attachment of the fixed up tree takes place.
       
  1118         if (isBlockStillInTree)
       
  1119             parentElem->appendChild(blockElem, ec);
       
  1120 
       
  1121         // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us.  Also update
       
  1122         // the node associated with the previous stack element so that when it gets popped,
       
  1123         // it doesn't make the residual element the next current node.
       
  1124         HTMLStackElem* currElem = maxElem;
       
  1125         HTMLStackElem* prevElem = 0;
       
  1126         while (currElem != elem) {
       
  1127             prevElem = currElem;
       
  1128             currElem = currElem->next;
       
  1129         }
       
  1130         prevElem->next = elem->next;
       
  1131         prevElem->derefNode();
       
  1132         prevElem->node = elem->node;
       
  1133         prevElem->didRefNode = elem->didRefNode;
       
  1134         if (!finished) {
       
  1135             // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
       
  1136             // in the stack. We do not do this for the innermost block, because in that case the new
       
  1137             // node is effectively no longer open.
       
  1138             elem->next = maxElem;
       
  1139             elem->node = prevMaxElem->node;
       
  1140             elem->didRefNode = prevMaxElem->didRefNode;
       
  1141             prevMaxElem->next = elem;
       
  1142             ASSERT(newNodePtr);
       
  1143             prevMaxElem->node = newNodePtr;
       
  1144             prevMaxElem->didRefNode = false;
       
  1145         } else
       
  1146             delete elem;
       
  1147     }
       
  1148 
       
  1149     // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
       
  1150     // In the above example, Goo should stay italic.
       
  1151     // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
       
  1152     HTMLStackElem* curr = blockStack;
       
  1153     HTMLStackElem* residualStyleStack = 0;
       
  1154     unsigned stackDepth = 1;
       
  1155     while (curr && curr != maxElem) {
       
  1156         // We will actually schedule this tag for reopening
       
  1157         // after we complete the close of this entire block.
       
  1158         if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth)
       
  1159             // We've overloaded the use of stack elements and are just reusing the
       
  1160             // struct with a slightly different meaning to the variables.  Instead of chaining
       
  1161             // from innermost to outermost, we build up a list of all the tags we need to reopen
       
  1162             // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
       
  1163             // to the outermost tag we need to reopen.
       
  1164             // We also set curr->node to be the actual element that corresponds to the ID stored in
       
  1165             // curr->id rather than the node that you should pop to when the element gets pulled off
       
  1166             // the stack.
       
  1167             moveOneBlockToStack(residualStyleStack);
       
  1168         else
       
  1169             popOneBlock();
       
  1170 
       
  1171         curr = blockStack;
       
  1172     }
       
  1173 
       
  1174     reopenResidualStyleTags(residualStyleStack, 0); // FIXME: Deal with stray table content some day
       
  1175                                                     // if it becomes necessary to do so.
       
  1176 
       
  1177     m_handlingResidualStyleAcrossBlocks = false;
       
  1178 }
       
  1179 
       
  1180 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
       
  1181 {
       
  1182     // Loop for each tag that needs to be reopened.
       
  1183     while (elem) {
       
  1184         // Create a shallow clone of the DOM node for this element.
       
  1185         RefPtr<Node> newNode = elem->node->cloneNode(false); 
       
  1186         reportError(ResidualStyleError, &newNode->localName());
       
  1187 
       
  1188         // Append the new node. In the malformed table case, we need to insert before the table,
       
  1189         // which will be the last child.
       
  1190         ExceptionCode ec = 0;
       
  1191         if (malformedTableParent)
       
  1192             malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
       
  1193         else
       
  1194             current->appendChild(newNode, ec);
       
  1195         // FIXME: Is it really OK to ignore the exceptions here?
       
  1196 
       
  1197         // Now push a new stack element for this node we just created.
       
  1198         pushBlock(elem->tagName, elem->level);
       
  1199 
       
  1200         // Set our strayTableContent boolean if needed, so that the reopened tag also knows
       
  1201         // that it is inside a malformed table.
       
  1202         blockStack->strayTableContent = malformedTableParent != 0;
       
  1203         if (blockStack->strayTableContent)
       
  1204             inStrayTableContent++;
       
  1205 
       
  1206         // Clear our malformed table parent variable.
       
  1207         malformedTableParent = 0;
       
  1208 
       
  1209         // Update |current| manually to point to the new node.
       
  1210         setCurrent(newNode.get());
       
  1211         
       
  1212         // Advance to the next tag that needs to be reopened.
       
  1213         HTMLStackElem* next = elem->next;
       
  1214         elem->derefNode();
       
  1215         delete elem;
       
  1216         elem = next;
       
  1217     }
       
  1218 }
       
  1219 
       
  1220 void HTMLParser::pushBlock(const AtomicString& tagName, int level)
       
  1221 {
       
  1222     blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
       
  1223     didRefCurrent = false;
       
  1224 }
       
  1225 
       
  1226 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
       
  1227 {
       
  1228     HTMLStackElem* elem = blockStack;
       
  1229     
       
  1230     int maxLevel = 0;
       
  1231 
       
  1232     while (elem && (elem->tagName != tagName)) {
       
  1233         if (maxLevel < elem->level)
       
  1234             maxLevel = elem->level;
       
  1235         elem = elem->next;
       
  1236     }
       
  1237 
       
  1238     if (!elem) {
       
  1239         if (reportErrors)
       
  1240             reportError(StrayCloseTagError, &tagName, 0, true);
       
  1241         return;
       
  1242     }
       
  1243 
       
  1244     if (maxLevel > elem->level) {
       
  1245         // We didn't match because the tag is in a different scope, e.g.,
       
  1246         // <b><p>Foo</b>.  Try to correct the problem.
       
  1247         if (!isResidualStyleTag(tagName))
       
  1248             return;
       
  1249         return handleResidualStyleCloseTagAcrossBlocks(elem);
       
  1250     }
       
  1251 
       
  1252     bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
       
  1253     HTMLStackElem* residualStyleStack = 0;
       
  1254     Node* malformedTableParent = 0;
       
  1255     
       
  1256     elem = blockStack;
       
  1257     unsigned stackDepth = 1;
       
  1258     while (elem) {
       
  1259         if (elem->tagName == tagName) {
       
  1260             int strayTable = inStrayTableContent;
       
  1261             popOneBlock();
       
  1262             elem = 0;
       
  1263 
       
  1264             // This element was the root of some malformed content just inside an implicit or
       
  1265             // explicit <tbody> or <tr>.
       
  1266             // If we end up needing to reopen residual style tags, the root of the reopened chain
       
  1267             // must also know that it is the root of malformed content inside a <tbody>/<tr>.
       
  1268             if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
       
  1269                 Node* curr = current;
       
  1270                 while (curr && !curr->hasTagName(tableTag))
       
  1271                     curr = curr->parentNode();
       
  1272                 malformedTableParent = curr ? curr->parentNode() : 0;
       
  1273             }
       
  1274         }
       
  1275         else {
       
  1276             if (m_currentFormElement && elem->tagName == formTag)
       
  1277                 // A <form> is being closed prematurely (and this is
       
  1278                 // malformed HTML).  Set an attribute on the form to clear out its
       
  1279                 // bottom margin.
       
  1280                 m_currentFormElement->setMalformed(true);
       
  1281 
       
  1282             // Schedule this tag for reopening
       
  1283             // after we complete the close of this entire block.
       
  1284             if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth)
       
  1285                 // We've overloaded the use of stack elements and are just reusing the
       
  1286                 // struct with a slightly different meaning to the variables.  Instead of chaining
       
  1287                 // from innermost to outermost, we build up a list of all the tags we need to reopen
       
  1288                 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
       
  1289                 // to the outermost tag we need to reopen.
       
  1290                 // We also set elem->node to be the actual element that corresponds to the ID stored in
       
  1291                 // elem->id rather than the node that you should pop to when the element gets pulled off
       
  1292                 // the stack.
       
  1293                 moveOneBlockToStack(residualStyleStack);
       
  1294             else
       
  1295                 popOneBlock();
       
  1296             elem = blockStack;
       
  1297         }
       
  1298     }
       
  1299 
       
  1300     reopenResidualStyleTags(residualStyleStack, malformedTableParent);
       
  1301 }
       
  1302 
       
  1303 inline HTMLStackElem* HTMLParser::popOneBlockCommon()
       
  1304 {
       
  1305     HTMLStackElem* elem = blockStack;
       
  1306 
       
  1307     // Form elements restore their state during the parsing process.
       
  1308     // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
       
  1309     if (current && elem->node != current)
       
  1310         current->finishedParsing();
       
  1311 
       
  1312     blockStack = elem->next;
       
  1313     current = elem->node;
       
  1314     didRefCurrent = elem->didRefNode;
       
  1315 
       
  1316     if (elem->strayTableContent)
       
  1317         inStrayTableContent--;
       
  1318 
       
  1319     return elem;
       
  1320 }
       
  1321 
       
  1322 void HTMLParser::popOneBlock()
       
  1323 {
       
  1324     // Store the current node before popOneBlockCommon overwrites it.
       
  1325     Node* lastCurrent = current;
       
  1326     bool didRefLastCurrent = didRefCurrent;
       
  1327 
       
  1328     delete popOneBlockCommon();
       
  1329 
       
  1330     if (didRefLastCurrent)
       
  1331         lastCurrent->deref();
       
  1332 }
       
  1333 
       
  1334 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
       
  1335 {
       
  1336     // We'll be using the stack element we're popping, but for the current node.
       
  1337     // See the two callers for details.
       
  1338 
       
  1339     // Store the current node before popOneBlockCommon overwrites it.
       
  1340     Node* lastCurrent = current;
       
  1341     bool didRefLastCurrent = didRefCurrent;
       
  1342 
       
  1343     // Pop the block, but don't deref the current node as popOneBlock does because
       
  1344     // we'll be using the pointer in the new stack element.
       
  1345     HTMLStackElem* elem = popOneBlockCommon();
       
  1346 
       
  1347     // Transfer the current node into the stack element.
       
  1348     // No need to deref the old elem->node because popOneBlockCommon transferred
       
  1349     // it into the current/didRefCurrent fields.
       
  1350     elem->node = lastCurrent;
       
  1351     elem->didRefNode = didRefLastCurrent;
       
  1352     elem->next = head;
       
  1353     head = elem;
       
  1354 }
       
  1355 
       
  1356 void HTMLParser::popInlineBlocks()
       
  1357 {
       
  1358     while (blockStack && isInline(current))
       
  1359         popOneBlock();
       
  1360 }
       
  1361 
       
  1362 void HTMLParser::freeBlock()
       
  1363 {
       
  1364     while (blockStack)
       
  1365         popOneBlock();
       
  1366 }
       
  1367 
       
  1368 void HTMLParser::createHead()
       
  1369 {
       
  1370     if (head || !document->documentElement())
       
  1371         return;
       
  1372 
       
  1373     head = new HTMLHeadElement(document);
       
  1374     HTMLElement* body = document->body();
       
  1375     ExceptionCode ec = 0;
       
  1376     document->documentElement()->insertBefore(head, body, ec);
       
  1377     if (ec)
       
  1378         head = 0;
       
  1379 }
       
  1380 
       
  1381 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
       
  1382 {
       
  1383     RefPtr<Node> n = new HTMLDivElement(document);
       
  1384 
       
  1385     NamedMappedAttrMap* attrs = t->attrs.get();
       
  1386 
       
  1387     RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(document, m_currentFormElement.get());
       
  1388     isIndex->setAttributeMap(attrs);
       
  1389     isIndex->setAttribute(typeAttr, "khtml_isindex");
       
  1390 
       
  1391     String text = searchableIndexIntroduction();
       
  1392     if (attrs) {
       
  1393         if (Attribute* a = attrs->getAttributeItem(promptAttr))
       
  1394             text = a->value().domString() + " ";
       
  1395         t->attrs = 0;
       
  1396     }
       
  1397 
       
  1398     n->addChild(new HTMLHRElement(document));
       
  1399     n->addChild(new Text(document, text));
       
  1400     n->addChild(isIndex.release());
       
  1401     n->addChild(new HTMLHRElement(document));
       
  1402 
       
  1403     return n.release();
       
  1404 }
       
  1405 
       
  1406 void HTMLParser::startBody()
       
  1407 {
       
  1408     if (inBody)
       
  1409         return;
       
  1410 
       
  1411     inBody = true;
       
  1412 
       
  1413     if (m_isindexElement) {
       
  1414         insertNode(m_isindexElement.get(), true /* don't descend into this node */);
       
  1415         m_isindexElement = 0;
       
  1416     }
       
  1417 }
       
  1418 
       
  1419 void HTMLParser::finished()
       
  1420 {
       
  1421     // In the case of a completely empty document, here's the place to create the HTML element.
       
  1422     if (current && current->isDocumentNode() && !document->documentElement())
       
  1423         insertNode(new HTMLHtmlElement(document));
       
  1424 
       
  1425     // This ensures that "current" is not left pointing to a node when the document is destroyed.
       
  1426     freeBlock();
       
  1427     setCurrent(0);
       
  1428 
       
  1429     // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
       
  1430     if (!m_isParsingFragment)
       
  1431         document->finishedParsing();
       
  1432 }
       
  1433 
       
  1434 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
       
  1435 {    
       
  1436     Frame* frame = document->frame();
       
  1437     if (!frame)
       
  1438         return;
       
  1439     
       
  1440     Page* page = frame->page();
       
  1441     if (!page)
       
  1442         return;
       
  1443 
       
  1444     HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
       
  1445     int lineNumber = htmlTokenizer->lineNumber() + 1;
       
  1446 
       
  1447     AtomicString tag1;
       
  1448     AtomicString tag2;
       
  1449     if (tagName1) {
       
  1450         if (*tagName1 == "#text")
       
  1451             tag1 = "Text";
       
  1452         else if (*tagName1 == "#comment")
       
  1453             tag1 = "<!-- comment -->";
       
  1454         else
       
  1455             tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
       
  1456     }
       
  1457     if (tagName2) {
       
  1458         if (*tagName2 == "#text")
       
  1459             tag2 = "Text";
       
  1460         else if (*tagName2 == "#comment")
       
  1461             tag2 = "<!-- comment -->";
       
  1462         else
       
  1463             tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
       
  1464     }
       
  1465         
       
  1466     const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
       
  1467     if (!errorMsg)
       
  1468         return;
       
  1469         
       
  1470     String message;
       
  1471     if (htmlTokenizer->processingContentWrittenByScript())
       
  1472         message += htmlParserDocumentWriteMessage();
       
  1473     message += errorMsg;
       
  1474     message.replace("%tag1", tag1);
       
  1475     message.replace("%tag2", tag2);
       
  1476 
       
  1477     page->chrome()->addMessageToConsole(HTMLMessageSource, isWarning(errorCode) ? WarningMessageLevel: ErrorMessageLevel, message, lineNumber, document->URL());
       
  1478 }
       
  1479 
       
  1480 }