webengine/osswebengine/WebCore/html/PreloadScanner.cpp
changeset 0 dd21522fd290
child 68 92a765b5b3e7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebCore/html/PreloadScanner.cpp	Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,835 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "PreloadScanner.h"
+
+#include "AtomicString.h"
+#include "Cache.h"
+#include "CachedCSSStyleSheet.h"
+#include "CachedImage.h"
+#include "CachedResource.h"
+#include "CachedResourceClient.h"
+#include "CachedScript.h"
+#include "CSSHelper.h"
+#include "CString.h"
+#include "DocLoader.h"
+#include "Document.h"
+#include "Frame.h"
+#include "FrameLoader.h"
+#include "HTMLLinkElement.h"
+#include "HTMLNames.h"
+#include "SystemTime.h"
+#include <wtf/unicode/Unicode.h>
+
+#ifdef __GNUC__
+// The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined.
+#include "HTMLEntityNames.c"
+#else
+// Not inlined for non-GCC compilers
+struct Entity {
+    const char* name;
+    int code;
+};
+const struct Entity* findEntity(register const char* str, register unsigned int len);
+#endif
+
+#define PRELOAD_DEBUG 0
+
+using namespace WTF;
+
+namespace WebCore {
+    
+using namespace HTMLNames;
+    
+PreloadScanner::PreloadScanner(Document* doc)
+    : m_inProgress(false)
+    , m_timeUsed(0)
+    , m_document(doc)
+{
+#if PRELOAD_DEBUG
+    printf("CREATING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());
+#endif
+}
+    
+PreloadScanner::~PreloadScanner()
+{
+#if PRELOAD_DEBUG
+    printf("DELETING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());
+    printf("TOTAL TIME USED %.4fs\n", m_timeUsed);
+#endif
+}
+    
+void PreloadScanner::begin() 
+{ 
+    ASSERT(!m_inProgress); 
+    reset(); 
+    m_inProgress = true; 
+}
+    
+void PreloadScanner::end() 
+{ 
+    ASSERT(m_inProgress); 
+    m_inProgress = false; 
+}
+
+void PreloadScanner::reset()
+{
+    m_source.clear();
+    
+    m_state = Data;
+    m_escape = false;
+    m_contentModel = PCDATA;
+    m_commentPos = 0;
+
+    m_closeTag = false;
+    m_tagName.clear();
+    m_attributeName.clear();
+    m_attributeValue.clear();
+    m_lastStartTag = AtomicString();
+    
+    m_urlToLoad = "";
+    m_linkIsStyleSheet = false;
+    m_lastCharacterIndex = 0;
+    clearLastCharacters();
+    
+    m_cssState = CSSInitial;
+    m_cssRule.clear();
+    m_cssRuleValue.clear();
+}
+    
+void PreloadScanner::write(const SegmentedString& source)
+{
+    double startTime = currentTime();
+    tokenize(source);
+    m_timeUsed += currentTime() - startTime;
+}
+    
+static inline bool isWhitespace(UChar c)
+{
+    return c == ' ' || c == '\n' || c == '\r' || c == '\t';
+}
+    
+inline void PreloadScanner::clearLastCharacters()
+{
+    memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(UChar));
+}
+    
+inline void PreloadScanner::rememberCharacter(UChar c)
+{
+    m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize;
+    m_lastCharacters[m_lastCharacterIndex] = c;
+}
+    
+inline bool PreloadScanner::lastCharactersMatch(const char* chars, unsigned count) const
+{
+    unsigned pos = m_lastCharacterIndex;
+    while (count) {
+        if (chars[count - 1] != m_lastCharacters[pos])
+            return false;
+        --count;
+        if (!pos)
+            pos = lastCharactersBufferSize;
+        --pos;
+    }
+    return true;
+}
+    
+static inline unsigned legalEntityFor(unsigned value)
+{
+    // FIXME There is a table for more exceptions in the HTML5 specification.
+    if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
+        return 0xFFFD;
+    return value;
+}
+    
+unsigned PreloadScanner::consumeEntity(SegmentedString& source, bool& notEnoughCharacters)
+{
+    enum EntityState {
+        Initial,
+        NumberType,
+        MaybeHex,
+        Hex,
+        Decimal,
+        Named
+    };
+    EntityState entityState = Initial;
+    unsigned result = 0;
+    Vector<UChar, 10> seenChars;
+    Vector<char, 10> entityName;
+    
+    while (!source.isEmpty()) {
+        UChar cc = *source;
+        seenChars.append(cc);
+        switch (entityState) {
+        case Initial:
+            if (isWhitespace(cc) || cc == '<' || cc == '&')
+                return 0;
+            else if (cc == '#') 
+                entityState = NumberType;
+            else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
+                entityName.append(cc);
+                entityState = Named;
+            } else
+                return 0;
+            break;
+        case NumberType:
+            if (cc == 'x' || cc == 'X')
+                entityState = MaybeHex;
+            else if (cc >= '0' && cc <= '9') {
+                entityState = Decimal;
+                result = cc - '0';
+            } else {
+                source.push('#');
+                return 0;
+            }
+            break;
+        case MaybeHex:
+            if (cc >= '0' && cc <= '9')
+                result = cc - '0';
+            else if (cc >= 'a' && cc <= 'f')
+                result = 10 + cc - 'a';
+            else if (cc >= 'A' && cc <= 'F')
+                result = 10 + cc - 'A';
+            else {
+                source.push(seenChars[1]);
+                source.push('#');
+                return 0;
+            }
+            entityState = Hex;
+            break;
+        case Hex:
+            if (cc >= '0' && cc <= '9')
+                result = result * 16 + cc - '0';
+            else if (cc >= 'a' && cc <= 'f')
+                result = result * 16 + 10 + cc - 'a';
+            else if (cc >= 'A' && cc <= 'F')
+                result = result * 16 + 10 + cc - 'A';
+            else if (cc == ';') {
+                source.advance();
+                return legalEntityFor(result);
+            } else 
+                return legalEntityFor(result);
+            break;
+        case Decimal:
+            if (cc >= '0' && cc <= '9')
+                result = result * 10 + cc - '0';
+            else if (cc == ';') {
+                source.advance();
+                return legalEntityFor(result);
+            } else
+                return legalEntityFor(result);
+            break;               
+        case Named:
+            // This is the attribute only version, generic version matches somewhat differently
+            while (entityName.size() <= 8) {
+                if (cc == ';') {
+                    const Entity* entity = findEntity(entityName.data(), entityName.size());
+                    if (entity) {
+                        source.advance();
+                        return entity->code;
+                    }
+                    break;
+                }
+                if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) {
+                    const Entity* entity = findEntity(entityName.data(), entityName.size());
+                    if (entity)
+                        return entity->code;
+                    break;
+                }
+                entityName.append(cc);
+                source.advance();
+                if (source.isEmpty())
+                    goto outOfCharacters;
+                cc = *source;
+                seenChars.append(cc);
+            }
+            if (seenChars.size() == 2)
+                source.push(seenChars[0]);
+            else if (seenChars.size() == 3) {
+                source.push(seenChars[1]);
+                source.push(seenChars[0]);
+            } else
+                source.prepend(SegmentedString(String(seenChars.data(), seenChars.size() - 1)));
+            return 0;
+        }
+        source.advance();
+    }
+outOfCharacters:
+    notEnoughCharacters = true;
+    source.prepend(SegmentedString(String(seenChars.data(), seenChars.size())));
+    return 0;
+}
+
+void PreloadScanner::tokenize(const SegmentedString& source)
+{
+    ASSERT(m_inProgress);
+    
+    m_source.append(source);
+
+    // This is a simplified HTML5 Tokenizer
+    // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
+    while (!m_source.isEmpty()) {
+        UChar cc = *m_source;
+        switch (m_state) {
+        case Data:
+            while (1) {
+                rememberCharacter(cc);
+                if (cc == '&') {
+                    if (m_contentModel == PCDATA || m_contentModel == RCDATA) {
+                        m_state = EntityData;
+                        break;
+                    }
+                } else if (cc == '-') {
+                    if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) {
+                        if (lastCharactersMatch("<!--", 4))
+                            m_escape = true;
+                    }
+                } else if (cc == '<') {
+                    if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) {
+                        m_state = TagOpen;
+                        break;
+                    }
+                } else if (cc == '>') {
+                     if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) {
+                         if (lastCharactersMatch("-->", 3))
+                             m_escape = false;
+                     }
+                }
+                emitCharacter(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                     return;
+                cc = *m_source;
+            }
+            break;
+        case EntityData:
+            // should try to consume the entity but we only care about entities in attributes
+            m_state = Data;
+            break;
+        case TagOpen:
+            if (m_contentModel == RCDATA || m_contentModel == CDATA) {
+                if (cc == '/')
+                    m_state = CloseTagOpen;
+                else {
+                    m_state = Data;
+                    continue;
+                }
+            } else if (m_contentModel == PCDATA) {
+                if (cc == '!')
+                    m_state = MarkupDeclarationOpen;
+                else if (cc == '/')
+                    m_state = CloseTagOpen;
+                else if (cc >= 'A' && cc <= 'Z') {
+                    m_tagName.clear();
+                    m_tagName.append(cc + 0x20);
+                    m_closeTag = false;
+                    m_state = TagName;
+                } else if (cc >= 'a' && cc <= 'z') {
+                    m_tagName.clear();
+                    m_tagName.append(cc);
+                    m_closeTag = false;
+                    m_state = TagName;
+                } else if (cc == '>') {
+                    m_state = Data;
+                } else if (cc == '?') {
+                    m_state = BogusComment;
+                } else {
+                    m_state = Data;
+                    continue;
+                }
+            }
+            break;
+        case CloseTagOpen:
+            if (m_contentModel == RCDATA || m_contentModel == CDATA) {
+                if (!m_lastStartTag.length()) {
+                    m_state = Data;
+                    continue;
+                }
+                if (m_source.length() < m_lastStartTag.length() + 1)
+                    return;
+                Vector<UChar> tmpString;
+                UChar tmpChar = 0;
+                bool match = true;
+                for (unsigned n = 0; n < m_lastStartTag.length() + 1; n++) {
+                    tmpChar = Unicode::toLower(*m_source);
+                    if (n < m_lastStartTag.length() && tmpChar != m_lastStartTag[n])
+                        match = false;
+                    tmpString.append(tmpChar);
+                    m_source.advance();
+                }
+                m_source.prepend(SegmentedString(String(tmpString.data(), tmpString.size())));
+                if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) {
+                    m_state = Data;
+                    continue;
+                }
+            }
+            if (cc >= 'A' && cc <= 'Z') {
+                m_tagName.clear();
+                m_tagName.append(cc + 0x20);
+                m_closeTag = true;
+                m_state = TagName;
+            } else if (cc >= 'a' && cc <= 'z') {
+                m_tagName.clear();
+                m_tagName.append(cc);
+                m_closeTag = true;
+                m_state = TagName;
+            } else if (cc == '>') {
+                m_state = Data;
+            } else
+                m_state = BogusComment;
+            break;
+        case TagName:
+            while (1) {
+                if (isWhitespace(cc)) {
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc == '>') {
+                    emitTag();
+                    m_state = Data;
+                    break;
+                }
+                if (cc == '/') {
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc >= 'A' && cc <= 'Z')
+                    m_tagName.append(cc + 0x20);
+                else
+                    m_tagName.append(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case BeforeAttributeName:
+            if (isWhitespace(cc))
+                ;
+            else if (cc == '>') {
+                emitTag();
+                m_state = Data;
+            } else if (cc >= 'A' && cc <= 'Z') {
+                m_attributeName.clear();
+                m_attributeValue.clear();
+                m_attributeName.append(cc + 0x20);
+                m_state = AttributeName;
+            } else if (cc == '/')
+                ;
+            else {
+                m_attributeName.clear();
+                m_attributeValue.clear();
+                m_attributeName.append(cc);
+                m_state = AttributeName;
+            }
+            break;
+        case AttributeName:
+            while (1) {
+                if (isWhitespace(cc)) {
+                    m_state = AfterAttributeName;
+                    break;
+                }
+                if (cc == '=') {
+                    m_state = BeforeAttributeValue;
+                    break;
+                }
+                if (cc == '>') {
+                    emitTag();
+                    m_state = Data;
+                    break;
+                } 
+                if (cc == '/') {
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc >= 'A' && cc <= 'Z')
+                    m_attributeName.append(cc + 0x20);
+                else
+                    m_attributeName.append(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case AfterAttributeName:
+            if (isWhitespace(cc))
+                ;
+            else if (cc == '=')
+                m_state = BeforeAttributeValue; 
+            else if (cc == '>') {
+                emitTag();
+                m_state = Data;
+            } else if (cc >= 'A' && cc <= 'Z') {
+                m_attributeName.clear();
+                m_attributeValue.clear();
+                m_attributeName.append(cc + 0x20);
+                m_state = AttributeName;
+            } else if (cc == '/')
+                m_state = BeforeAttributeName;
+            else {
+                m_attributeName.clear();
+                m_attributeValue.clear();
+                m_attributeName.append(cc);
+                m_state = AttributeName;
+            }
+            break;
+        case BeforeAttributeValue:
+            if (isWhitespace(cc))
+                ;
+            else if (cc == '"')
+                m_state = AttributeValueDoubleQuoted;
+            else if (cc == '&') {
+                m_state = AttributeValueUnquoted;
+                continue;
+            } else if (cc == '\'')
+                m_state = AttributeValueSingleQuoted;
+            else if (cc == '>') {
+                emitTag();
+                m_state = Data;
+            } else {
+                m_attributeValue.append(cc);
+                m_state = AttributeValueUnquoted;
+            }
+            break;
+        case AttributeValueDoubleQuoted:
+            while (1) {
+                if (cc == '"') {
+                    processAttribute();
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc == '&') {
+                    m_stateBeforeEntityInAttributeValue = m_state;
+                    m_state = EntityInAttributeValue;
+                    break;
+                } 
+                m_attributeValue.append(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case AttributeValueSingleQuoted:
+            while (1) {
+                if (cc == '\'') {
+                    processAttribute();
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc == '&') {
+                    m_stateBeforeEntityInAttributeValue = m_state;
+                    m_state = EntityInAttributeValue;
+                    break;
+                } 
+                m_attributeValue.append(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case AttributeValueUnquoted:
+            while (1) {
+                if (isWhitespace(cc)) {
+                    processAttribute();
+                    m_state = BeforeAttributeName;
+                    break;
+                }
+                if (cc == '&') {
+                    m_stateBeforeEntityInAttributeValue = m_state;
+                    m_state = EntityInAttributeValue;
+                    break;
+                }
+                if (cc == '>') {
+                    processAttribute();
+                    emitTag();
+                    m_state = Data;
+                    break;
+                }
+                m_attributeValue.append(cc);
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case EntityInAttributeValue: 
+            {
+                bool notEnoughCharacters = false; 
+                unsigned entity = consumeEntity(m_source, notEnoughCharacters);
+                if (notEnoughCharacters)
+                    return;
+                if (entity > 0xFFFF) {
+                    m_attributeValue.append(U16_LEAD(entity));
+                    m_attributeValue.append(U16_TRAIL(entity));
+                } else if (entity)
+                    m_attributeValue.append(entity);
+                else
+                    m_attributeValue.append('&');
+            }
+            m_state = m_stateBeforeEntityInAttributeValue;
+            continue;
+        case BogusComment:
+            while (1) {
+                if (cc == '>') {
+                    m_state = Data;
+                    break;
+                }
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case MarkupDeclarationOpen: {
+            if (cc == '-') {
+                if (m_source.length() < 2)
+                    return;
+                m_source.advance();
+                cc = *m_source;
+                if (cc == '-')
+                    m_state = CommentStart;
+                else {
+                    m_state = BogusComment;
+                    continue;
+                }
+            // If we cared about the DOCTYPE we would test to enter those states here
+            } else {
+                m_state = BogusComment;
+                continue;
+            }
+            break;
+        }
+        case CommentStart:
+            if (cc == '-')
+                m_state = CommentStartDash;
+            else if (cc == '>')
+                m_state = Data;
+            else
+                m_state = Comment;
+            break;
+        case CommentStartDash:
+            if (cc == '-')
+                m_state = CommentEnd;
+            else if (cc == '>')
+                m_state = Data;
+            else
+                m_state = Comment;
+            break;
+        case Comment:
+            while (1) {
+                if (cc == '-') {
+                    m_state = CommentEndDash;
+                    break;
+                }
+                m_source.advance();
+                if (m_source.isEmpty())
+                    return;
+                cc = *m_source;
+            }
+            break;
+        case CommentEndDash:
+            if (cc == '-')
+                m_state = CommentEnd;
+            else 
+                m_state = Comment;
+            break;
+        case CommentEnd:
+            if (cc == '>')
+                m_state = Data;
+            else if (cc == '-')
+                ;
+            else 
+                m_state = Comment;
+            break;
+        }
+        m_source.advance();
+    }
+}
+    
+void PreloadScanner::processAttribute()
+{
+    AtomicString tag = AtomicString(m_tagName.data(), m_tagName.size());
+    AtomicString attribute = AtomicString(m_attributeName.data(), m_attributeName.size());
+    
+    String value(m_attributeValue.data(), m_attributeValue.size());
+    if (tag == scriptTag || tag == imgTag) {
+        if (attribute == srcAttr && m_urlToLoad.isEmpty())
+            m_urlToLoad = parseURL(value);
+    } else if (tag == linkTag) {
+        if (attribute == hrefAttr && m_urlToLoad.isEmpty())
+            m_urlToLoad = parseURL(value);
+        else if (attribute == relAttr) {
+            bool styleSheet = false;
+            bool alternate = false;
+            bool icon = false;
+            HTMLLinkElement::tokenizeRelAttribute(value, styleSheet, alternate, icon);
+            m_linkIsStyleSheet = styleSheet && !alternate && !icon;
+        }
+    }
+}
+    
+inline void PreloadScanner::emitCharacter(UChar c)
+{
+    if (m_contentModel == CDATA && m_lastStartTag == styleTag) 
+        tokenizeCSS(c);
+}
+    
+inline void PreloadScanner::tokenizeCSS(UChar c)
+{    
+    // We are just interested in @import rules, no need for real tokenization here
+    // Searching for other types of resources is probably low payoff
+    switch (m_cssState) {
+    case CSSInitial:
+        if (c == '@')
+            m_cssState = CSSRuleStart;
+        else if (c == '/')
+            m_cssState = CSSMaybeComment;
+        break;
+    case CSSMaybeComment:
+        if (c == '*')
+            m_cssState = CSSComment;
+        else
+            m_cssState = CSSInitial;
+        break;
+    case CSSComment:
+        if (c == '*')
+            m_cssState = CSSMaybeCommentEnd;
+        break;
+    case CSSMaybeCommentEnd:
+        if (c == '/')
+            m_cssState = CSSInitial;
+        else if (c == '*')
+            ;
+        else
+            m_cssState = CSSComment;
+        break;
+    case CSSRuleStart:
+        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+            m_cssRule.clear();
+            m_cssRuleValue.clear();
+            m_cssRule.append(c);
+            m_cssState = CSSRule;
+        } else
+            m_cssState = CSSInitial;
+        break;
+    case CSSRule:
+        if (isWhitespace(c))
+            m_cssState = CSSAfterRule;
+        else if (c == ';')
+            m_cssState = CSSInitial;
+        else
+            m_cssRule.append(c);
+        break;
+    case CSSAfterRule:
+        if (isWhitespace(c))
+            ;
+        else if (c == ';')
+            m_cssState = CSSInitial;
+        else {
+            m_cssState = CSSRuleValue;
+            m_cssRuleValue.append(c);
+        }
+        break;
+    case CSSRuleValue:
+        if (isWhitespace(c))
+            m_cssState = CSSAferRuleValue;
+        else if (c == ';') {
+            emitCSSRule();
+            m_cssState = CSSInitial;
+        } else 
+            m_cssRuleValue.append(c);
+        break;
+    case CSSAferRuleValue:
+        if (isWhitespace(c))
+            ;
+        else if (c == ';') {
+            emitCSSRule();
+            m_cssState = CSSInitial;
+        } else {
+            // FIXME media rules
+             m_cssState = CSSInitial;
+        }
+        break;
+    }
+}
+    
+void PreloadScanner::emitTag()
+{
+    if (m_closeTag) {
+        m_contentModel = PCDATA;
+        m_cssState = CSSInitial;
+        clearLastCharacters();
+        return;
+    }
+    
+    AtomicString tag(m_tagName.data(), m_tagName.size());
+    m_lastStartTag = tag;
+    
+    if (tag == textareaTag || tag == titleTag)
+        m_contentModel = RCDATA;
+    else if (tag == styleTag || tag == xmpTag || tag == scriptTag || tag == iframeTag || tag == noembedTag || tag == noframesTag)
+        m_contentModel = CDATA;
+    else if (tag == noscriptTag)
+        // we wouldn't be here if scripts were disabled
+        m_contentModel = CDATA;
+    else if (tag == plaintextTag)
+        m_contentModel = PLAINTEXT;
+    else
+        m_contentModel = PCDATA;
+    
+    if (m_urlToLoad.isEmpty()) {
+        m_linkIsStyleSheet = false;
+        return;
+    }
+    
+    if (tag == scriptTag)
+        m_document->docLoader()->preload(CachedResource::Script, m_urlToLoad);
+    else if (tag == imgTag) 
+        m_document->docLoader()->preload(CachedResource::ImageResource, m_urlToLoad);
+    else if (tag == linkTag && m_linkIsStyleSheet) 
+        m_document->docLoader()->preload(CachedResource::CSSStyleSheet, m_urlToLoad);
+
+    m_urlToLoad = String();
+    m_linkIsStyleSheet = false;
+}
+    
+void PreloadScanner::emitCSSRule()
+{
+    String rule(m_cssRule.data(), m_cssRule.size());
+    if (rule.lower() == "import" && !m_cssRuleValue.isEmpty()) {
+        String value(m_cssRuleValue.data(), m_cssRuleValue.size());
+        String url = parseURL(value);
+        if (!url.isEmpty())
+            m_document->docLoader()->preload(CachedResource::CSSStyleSheet, url);
+    }
+    m_cssRule.clear();
+    m_cssRuleValue.clear();
+}
+                
+}