diff -r 000000000000 -r 4f2f89ce4247 WebCore/platform/LinkHash.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/WebCore/platform/LinkHash.cpp Fri Sep 17 09:02:29 2010 +0300 @@ -0,0 +1,236 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * (C) 1999 Antti Koivisto (koivisto@kde.org) + * (C) 2001 Dirk Mueller (mueller@kde.org) + * (C) 2006 Alexey Proskuryakov (ap@webkit.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "AtomicString.h" +#include "KURL.h" +#include "LinkHash.h" +#include "PlatformString.h" +#include "StringHash.h" +#include "StringImpl.h" + +namespace WebCore { + +static inline int findSlashDotDotSlash(const UChar* characters, size_t length) +{ + if (length < 4) + return -1; + unsigned loopLimit = length - 3; + for (unsigned i = 0; i < loopLimit; ++i) { + if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') + return i; + } + return -1; +} + +static inline int findSlashSlash(const UChar* characters, size_t length, int position) +{ + if (length < 2) + return -1; + unsigned loopLimit = length - 1; + for (unsigned i = position; i < loopLimit; ++i) { + if (characters[i] == '/' && characters[i + 1] == '/') + return i; + } + return -1; +} + +static inline int findSlashDotSlash(const UChar* characters, size_t length) +{ + if (length < 3) + return -1; + unsigned loopLimit = length - 2; + for (unsigned i = 0; i < loopLimit; ++i) { + if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') + return i; + } + return -1; +} + +static inline bool containsColonSlashSlash(const UChar* characters, unsigned length) +{ + if (length < 3) + return false; + unsigned loopLimit = length - 2; + for (unsigned i = 0; i < loopLimit; ++i) { + if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') + return true; + } + return false; +} + +static inline void cleanPath(Vector& path) +{ + // FIXME: Should not do this in the query or anchor part. + int pos; + while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) { + int prev = reverseFind(path.data(), path.size(), '/', pos - 1); + // don't remove the host, i.e. http://foo.org/../foo.html + if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/')) + path.remove(pos, 3); + else + path.remove(prev, pos - prev + 3); + } + + // FIXME: Should not do this in the query part. + // Set refPos to -2 to mean "I haven't looked for the anchor yet". + // We don't want to waste a function call on the search for the the anchor + // in the vast majority of cases where there is no "//" in the path. + pos = 0; + int refPos = -2; + while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) { + if (refPos == -2) + refPos = find(path.data(), path.size(), '#'); + if (refPos > 0 && pos >= refPos) + break; + + if (pos == 0 || path[pos - 1] != ':') + path.remove(pos); + else + pos += 2; + } + + // FIXME: Should not do this in the query or anchor part. + while ((pos = findSlashDotSlash(path.data(), path.size())) != -1) + path.remove(pos, 2); +} + + +static inline bool matchLetter(UChar c, UChar lowercaseLetter) +{ + return (c | 0x20) == lowercaseLetter; +} + +static inline bool needsTrailingSlash(const UChar* characters, unsigned length) +{ + if (length < 6) + return false; + if (!matchLetter(characters[0], 'h') + || !matchLetter(characters[1], 't') + || !matchLetter(characters[2], 't') + || !matchLetter(characters[3], 'p')) + return false; + if (!(characters[4] == ':' + || (matchLetter(characters[4], 's') && characters[5] == ':'))) + return false; + + unsigned pos = characters[4] == ':' ? 5 : 6; + + // Skip initial two slashes if present. + if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') + pos += 2; + + // Find next slash. + while (pos < length && characters[pos] != '/') + ++pos; + + return pos == length; +} + +static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length) +{ + return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length)); +} + +LinkHash visitedLinkHash(const UChar* url, unsigned length) +{ + return visitedLinkHashInline(url, length); +} + +static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector& buffer) +{ + if (attributeURL.isNull()) + return; + + const UChar* characters = attributeURL.characters(); + unsigned length = attributeURL.length(); + + // This is a poor man's completeURL. Faster with less memory allocation. + // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does. + // For example, it does not handle international domain names properly. + + // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: + // 1) The "://" could be in the query or anchor. + // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. + + // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does + // have a query or anchor. + + bool hasColonSlashSlash = containsColonSlashSlash(characters, length); + + if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { + buffer.append(attributeURL.characters(), attributeURL.length()); + return; + } + + + if (hasColonSlashSlash) { + // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the + // end of the path, *before* the query or anchor. + buffer.append(characters, length); + buffer.append('/'); + return; + } + + if (!length) + buffer.append(base.string().characters(), base.string().length()); + else { + switch (characters[0]) { + case '/': + buffer.append(base.string().characters(), base.pathStart()); + break; + case '#': + buffer.append(base.string().characters(), base.pathEnd()); + break; + default: + buffer.append(base.string().characters(), base.pathAfterLastSlash()); + break; + } + } + buffer.append(characters, length); + cleanPath(buffer); + if (needsTrailingSlash(buffer.data(), buffer.size())) { + // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the + // end of the path, *before* the query or anchor. + buffer.append('/'); + } + + return; +} + +void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector& buffer) +{ + return visitedURLInline(base, attributeURL, buffer); +} + +LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL) +{ + Vector url; + visitedURLInline(base, attributeURL, url); + if (url.isEmpty()) + return 0; + + return visitedLinkHashInline(url.data(), url.size()); +} + +} // namespace WebCore