WebCore/platform/LinkHash.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
       
     3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
       
     4  *           (C) 2001 Dirk Mueller (mueller@kde.org)
       
     5  *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
       
     6  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
       
     7  *
       
     8  * This library is free software; you can redistribute it and/or
       
     9  * modify it under the terms of the GNU Library General Public
       
    10  * License as published by the Free Software Foundation; either
       
    11  * version 2 of the License, or (at your option) any later version.
       
    12  *
       
    13  * This library is distributed in the hope that it will be useful,
       
    14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    16  * Library General Public License for more details.
       
    17  *
       
    18  * You should have received a copy of the GNU Library General Public License
       
    19  * along with this library; see the file COPYING.LIB.  If not, write to
       
    20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    21  * Boston, MA 02110-1301, USA.
       
    22  */
       
    23 
       
    24 #include "config.h"
       
    25 #include "AtomicString.h"
       
    26 #include "KURL.h"
       
    27 #include "LinkHash.h"
       
    28 #include "PlatformString.h"
       
    29 #include "StringHash.h"
       
    30 #include "StringImpl.h"
       
    31 
       
    32 namespace WebCore {
       
    33 
       
    34 static inline int findSlashDotDotSlash(const UChar* characters, size_t length)
       
    35 {
       
    36     if (length < 4)
       
    37         return -1;
       
    38     unsigned loopLimit = length - 3;
       
    39     for (unsigned i = 0; i < loopLimit; ++i) {
       
    40         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
       
    41             return i;
       
    42     }
       
    43     return -1;
       
    44 }
       
    45 
       
    46 static inline int findSlashSlash(const UChar* characters, size_t length, int position)
       
    47 {
       
    48     if (length < 2)
       
    49         return -1;
       
    50     unsigned loopLimit = length - 1;
       
    51     for (unsigned i = position; i < loopLimit; ++i) {
       
    52         if (characters[i] == '/' && characters[i + 1] == '/')
       
    53             return i;
       
    54     }
       
    55     return -1;
       
    56 }
       
    57 
       
    58 static inline int findSlashDotSlash(const UChar* characters, size_t length)
       
    59 {
       
    60     if (length < 3)
       
    61         return -1;
       
    62     unsigned loopLimit = length - 2;
       
    63     for (unsigned i = 0; i < loopLimit; ++i) {
       
    64         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
       
    65             return i;
       
    66     }
       
    67     return -1;
       
    68 }
       
    69 
       
    70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
       
    71 {
       
    72     if (length < 3)
       
    73         return false;
       
    74     unsigned loopLimit = length - 2;
       
    75     for (unsigned i = 0; i < loopLimit; ++i) {
       
    76         if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
       
    77             return true;
       
    78     }
       
    79     return false;
       
    80 }
       
    81 
       
    82 static inline void cleanPath(Vector<UChar, 512>& path)
       
    83 {
       
    84     // FIXME: Should not do this in the query or anchor part.
       
    85     int pos;
       
    86     while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) {
       
    87         int prev = reverseFind(path.data(), path.size(), '/', pos - 1);
       
    88         // don't remove the host, i.e. http://foo.org/../foo.html
       
    89         if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/'))
       
    90             path.remove(pos, 3);
       
    91         else
       
    92             path.remove(prev, pos - prev + 3);
       
    93     }
       
    94 
       
    95     // FIXME: Should not do this in the query part.
       
    96     // Set refPos to -2 to mean "I haven't looked for the anchor yet".
       
    97     // We don't want to waste a function call on the search for the the anchor
       
    98     // in the vast majority of cases where there is no "//" in the path.
       
    99     pos = 0;
       
   100     int refPos = -2;
       
   101     while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) {
       
   102         if (refPos == -2)
       
   103             refPos = find(path.data(), path.size(), '#');
       
   104         if (refPos > 0 && pos >= refPos)
       
   105             break;
       
   106 
       
   107         if (pos == 0 || path[pos - 1] != ':')
       
   108             path.remove(pos);
       
   109         else
       
   110             pos += 2;
       
   111     }
       
   112 
       
   113     // FIXME: Should not do this in the query or anchor part.
       
   114     while ((pos = findSlashDotSlash(path.data(), path.size())) != -1)
       
   115         path.remove(pos, 2);
       
   116 }
       
   117 
       
   118 
       
   119 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
       
   120 {
       
   121     return (c | 0x20) == lowercaseLetter;
       
   122 }
       
   123 
       
   124 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
       
   125 {
       
   126     if (length < 6)
       
   127         return false;
       
   128     if (!matchLetter(characters[0], 'h')
       
   129             || !matchLetter(characters[1], 't')
       
   130             || !matchLetter(characters[2], 't')
       
   131             || !matchLetter(characters[3], 'p'))
       
   132         return false;
       
   133     if (!(characters[4] == ':'
       
   134             || (matchLetter(characters[4], 's') && characters[5] == ':')))
       
   135         return false;
       
   136 
       
   137     unsigned pos = characters[4] == ':' ? 5 : 6;
       
   138 
       
   139     // Skip initial two slashes if present.
       
   140     if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
       
   141         pos += 2;
       
   142 
       
   143     // Find next slash.
       
   144     while (pos < length && characters[pos] != '/')
       
   145         ++pos;
       
   146 
       
   147     return pos == length;
       
   148 }
       
   149 
       
   150 static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
       
   151 {
       
   152     return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length));
       
   153 }
       
   154 
       
   155 LinkHash visitedLinkHash(const UChar* url, unsigned length)
       
   156 {
       
   157     return visitedLinkHashInline(url, length);
       
   158 }
       
   159 
       
   160 static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
       
   161 {
       
   162     if (attributeURL.isNull())
       
   163         return;
       
   164 
       
   165     const UChar* characters = attributeURL.characters();
       
   166     unsigned length = attributeURL.length();
       
   167 
       
   168     // This is a poor man's completeURL. Faster with less memory allocation.
       
   169     // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
       
   170     // For example, it does not handle international domain names properly.
       
   171 
       
   172     // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
       
   173     //    1) The "://" could be in the query or anchor.
       
   174     //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
       
   175 
       
   176     // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
       
   177     // have a query or anchor.
       
   178 
       
   179     bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
       
   180 
       
   181     if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
       
   182         buffer.append(attributeURL.characters(), attributeURL.length());
       
   183         return;
       
   184     }
       
   185 
       
   186 
       
   187     if (hasColonSlashSlash) {
       
   188         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
       
   189         // end of the path, *before* the query or anchor.
       
   190         buffer.append(characters, length);
       
   191         buffer.append('/');
       
   192         return;
       
   193     }
       
   194 
       
   195     if (!length)
       
   196         buffer.append(base.string().characters(), base.string().length());
       
   197     else {
       
   198         switch (characters[0]) {
       
   199             case '/':
       
   200                 buffer.append(base.string().characters(), base.pathStart());
       
   201                 break;
       
   202             case '#':
       
   203                 buffer.append(base.string().characters(), base.pathEnd());
       
   204                 break;
       
   205             default:
       
   206                 buffer.append(base.string().characters(), base.pathAfterLastSlash());
       
   207                 break;
       
   208         }
       
   209     }
       
   210     buffer.append(characters, length);
       
   211     cleanPath(buffer);
       
   212     if (needsTrailingSlash(buffer.data(), buffer.size())) {
       
   213         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
       
   214         // end of the path, *before* the query or anchor.
       
   215         buffer.append('/');
       
   216     }
       
   217 
       
   218     return;
       
   219 }
       
   220 
       
   221 void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
       
   222 {
       
   223     return visitedURLInline(base, attributeURL, buffer);
       
   224 }
       
   225 
       
   226 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
       
   227 {
       
   228     Vector<UChar, 512> url;
       
   229     visitedURLInline(base, attributeURL, url);
       
   230     if (url.isEmpty())
       
   231         return 0;
       
   232 
       
   233     return visitedLinkHashInline(url.data(), url.size());
       
   234 }
       
   235 
       
   236 }  // namespace WebCore