--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebKit/Misc/WebNSURLExtras.mm Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,1032 @@
+/*
+ * Copyright (C) 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import <WebKit/WebNSURLExtras.h>
+
+#import <JavaScriptCore/Assertions.h>
+#import <WebKit/WebKitNSStringExtras.h>
+#import <WebKit/WebNSDataExtras.h>
+#import <WebKit/WebNSObjectExtras.h>
+#import <WebKit/WebLocalizableStrings.h>
+#import <WebCore/KURL.h>
+#import <WebCore/LoaderNSURLExtras.h>
+
+#import <WebKitSystemInterface.h>
+#import "WebSystemInterface.h"
+
+#import <Foundation/NSURLRequest.h>
+
+#import <unicode/uchar.h>
+#import <unicode/uidna.h>
+#import <unicode/uscript.h>
+
+using namespace WebCore;
+
+typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
+
+// Needs to be big enough to hold an IDN-encoded name.
+// For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
+#define HOST_NAME_BUFFER_LENGTH 2048
+
+#define URL_BYTES_BUFFER_LENGTH 2048
+
+static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
+static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
+
+static inline BOOL isLookalikeCharacter(int charCode)
+{
+// FIXME: Move this code down into WebCore so it can be shared with other platforms.
+
+// This function treats the following as unsafe, lookalike characters:
+// any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
+// and any ignorable character.
+
+// We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars),
+// and included all of these characters that ICU can encode.
+
+ if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
+ return YES;
+
+ switch (charCode) {
+ case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
+ case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
+ case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
+ case 0x05B4: /* HEBREW POINT HIRIQ */
+ case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
+ case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
+ case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
+ case 0x0660: /* ARABIC INDIC DIGIT ZERO */
+ case 0x06D4: /* ARABIC FULL STOP */
+ case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
+ case 0x2027: /* HYPHENATION POINT */
+ case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
+ case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
+ case 0x2044: /* FRACTION SLASH */
+ case 0x2215: /* DIVISION SLASH */
+ case 0x23ae: /* INTEGRAL EXTENSION */
+ case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
+ case 0x29F8: /* BIG SOLIDUS */
+ case 0x29f6: /* SOLIDUS WITH OVERBAR */
+ case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
+ case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
+ case 0x3008: /* LEFT ANGLE BRACKET */
+ case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
+ case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
+ case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
+ case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
+ case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
+ case 0x33DF: /* SQUARE A OVER M */
+ case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
+ case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
+ case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
+ case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
+ case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
+ return YES;
+ default:
+ return NO;
+ }
+}
+
+static char hexDigit(int i)
+{
+ if (i < 0 || i > 16) {
+ LOG_ERROR("illegal hex digit");
+ return '0';
+ }
+ int h = i;
+ if (h >= 10) {
+ h = h - 10 + 'A';
+ }
+ else {
+ h += '0';
+ }
+ return h;
+}
+
+static BOOL isHexDigit(char c)
+{
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+}
+
+static int hexDigitValue(char c)
+{
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ }
+ if (c >= 'A' && c <= 'F') {
+ return c - 'A' + 10;
+ }
+ if (c >= 'a' && c <= 'f') {
+ return c - 'a' + 10;
+ }
+ LOG_ERROR("illegal hex digit");
+ return 0;
+}
+
+static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+ // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
+ // Skip quoted strings so that characters in them don't confuse us.
+ // When we find a '?' character, we are past the part of the URL that contains host names.
+
+ static NSCharacterSet *hostNameOrStringStartCharacters;
+ if (hostNameOrStringStartCharacters == nil) {
+ hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
+ CFRetain(hostNameOrStringStartCharacters);
+ }
+ static NSCharacterSet *hostNameEndCharacters;
+ if (hostNameEndCharacters == nil) {
+ hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
+ CFRetain(hostNameEndCharacters);
+ }
+ static NSCharacterSet *quotedStringCharacters;
+ if (quotedStringCharacters == nil) {
+ quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
+ CFRetain(quotedStringCharacters);
+ }
+
+ unsigned stringLength = [string length];
+ NSRange remaining = NSMakeRange(0, stringLength);
+
+ while (1) {
+ // Find start of host name or of quoted string.
+ NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
+ if (hostNameOrStringStart.location == NSNotFound) {
+ return;
+ }
+ unichar c = [string characterAtIndex:hostNameOrStringStart.location];
+ remaining.location = NSMaxRange(hostNameOrStringStart);
+ remaining.length = stringLength - remaining.location;
+
+ if (c == '?') {
+ return;
+ }
+
+ if (c == '@') {
+ // Find end of host name.
+ unsigned hostNameStart = remaining.location;
+ NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
+ BOOL done;
+ if (hostNameEnd.location == NSNotFound) {
+ hostNameEnd.location = stringLength;
+ done = YES;
+ } else {
+ remaining.location = hostNameEnd.location;
+ remaining.length = stringLength - remaining.location;
+ done = NO;
+ }
+
+ // Process host name range.
+ f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
+
+ if (done) {
+ return;
+ }
+ } else {
+ // Skip quoted string.
+ ASSERT(c == '"');
+ while (1) {
+ NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
+ if (escapedCharacterOrStringEnd.location == NSNotFound) {
+ return;
+ }
+ c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
+ remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
+ remaining.length = stringLength - remaining.location;
+
+ // If we are the end of the string, then break from the string loop back to the host name loop.
+ if (c == '"') {
+ break;
+ }
+
+ // Skip escaped character.
+ ASSERT(c == '\\');
+ if (remaining.length == 0) {
+ return;
+ }
+ remaining.location += 1;
+ remaining.length -= 1;
+ }
+ }
+ }
+}
+
+static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
+{
+ // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
+ // but we have to do it before doing all the %-escaping, and this is the only
+ // code we have that parses mailto URLs anyway.
+
+ // Maybe we should implement this using a character buffer instead?
+
+ if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
+ applyHostNameFunctionToMailToURLString(string, f, context);
+ return;
+ }
+
+ // Find the host name in a hierarchical URL.
+ // It comes after a "://" sequence, with scheme characters preceding.
+ // If ends with the end of the string or a ":", "/", or a "?".
+ // If there is a "@" character, the host part is just the part after the "@".
+ NSRange separatorRange = [string rangeOfString:@"://"];
+ if (separatorRange.location == NSNotFound) {
+ return;
+ }
+
+ // Check that all characters before the :// are valid scheme characters.
+ static NSCharacterSet *nonSchemeCharacters;
+ if (nonSchemeCharacters == nil) {
+ nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
+ CFRetain(nonSchemeCharacters);
+ }
+ if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
+ return;
+ }
+
+ unsigned stringLength = [string length];
+
+ static NSCharacterSet *hostTerminators;
+ if (hostTerminators == nil) {
+ hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
+ CFRetain(hostTerminators);
+ }
+
+ // Start after the separator.
+ unsigned authorityStart = NSMaxRange(separatorRange);
+
+ // Find terminating character.
+ NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
+ unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
+
+ // Find "@" for the start of the host name.
+ NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
+ unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
+
+ f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
+}
+
+@implementation NSURL (WebNSURLExtras)
+
+static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
+{
+ BOOL needsMapping = encode
+ ? [string _web_hostNameNeedsEncodingWithRange:range]
+ : [string _web_hostNameNeedsDecodingWithRange:range];
+ if (!needsMapping) {
+ return;
+ }
+
+ NSMutableArray **array = (NSMutableArray **)context;
+ if (*array == nil) {
+ *array = [[NSMutableArray alloc] init];
+ }
+
+ [*array addObject:[NSValue valueWithRange:range]];
+}
+
+static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
+{
+ return collectRangesThatNeedMapping(string, range, context, YES);
+}
+
+static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
+{
+ return collectRangesThatNeedMapping(string, range, context, NO);
+}
+
+static NSString *mapHostNames(NSString *string, BOOL encode)
+{
+ // Generally, we want to optimize for the case where there is one host name that does not need mapping.
+
+ if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
+ return string;
+
+ // Make a list of ranges that actually need mapping.
+ NSMutableArray *hostNameRanges = nil;
+ StringRangeApplierFunction f = encode
+ ? collectRangesThatNeedEncoding
+ : collectRangesThatNeedDecoding;
+ applyHostNameFunctionToURLString(string, f, &hostNameRanges);
+ if (hostNameRanges == nil)
+ return string;
+
+ // Do the mapping.
+ NSMutableString *mutableCopy = [string mutableCopy];
+ unsigned i = [hostNameRanges count];
+ while (i-- != 0) {
+ NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
+ NSString *mappedHostName = encode
+ ? [string _web_encodeHostNameWithRange:hostNameRange]
+ : [string _web_decodeHostNameWithRange:hostNameRange];
+ [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
+ }
+ [hostNameRanges release];
+ return [mutableCopy autorelease];
+}
+
++ (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
+{
+ if (string == nil) {
+ return nil;
+ }
+ string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
+
+ NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
+ ASSERT(userTypedData);
+
+ const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]);
+ int inLength = [userTypedData length];
+ if (inLength == 0) {
+ return [NSURL URLWithString:@""];
+ }
+
+ char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character
+ char *p = outBytes;
+ int outLength = 0;
+ int i;
+ for (i = 0; i < inLength; i++) {
+ UInt8 c = inBytes[i];
+ if (c <= 0x20 || c >= 0x7f) {
+ *p++ = '%';
+ *p++ = hexDigit(c >> 4);
+ *p++ = hexDigit(c & 0xf);
+ outLength += 3;
+ }
+ else {
+ *p++ = c;
+ outLength++;
+ }
+ }
+
+ NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
+ return [self _web_URLWithData:data relativeToURL:URL];
+}
+
++ (NSURL *)_web_URLWithUserTypedString:(NSString *)string
+{
+ return [self _web_URLWithUserTypedString:string relativeToURL:nil];
+}
+
++ (NSURL *)_web_URLWithDataAsString:(NSString *)string
+{
+ if (string == nil) {
+ return nil;
+ }
+ return [self _web_URLWithDataAsString:string relativeToURL:nil];
+}
+
++ (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
+{
+ if (string == nil) {
+ return nil;
+ }
+ string = [string _webkit_stringByTrimmingWhitespace];
+ NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
+ return [self _web_URLWithData:data relativeToURL:baseURL];
+}
+
++ (NSURL *)_web_URLWithData:(NSData *)data
+{
+ return urlWithData(data);
+}
+
++ (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
+{
+ return urlWithDataRelativeToURL(data, baseURL);
+}
+
+- (NSData *)_web_originalData
+{
+ return urlOriginalData(self);
+}
+
+- (NSString *)_web_originalDataAsString
+{
+ return urlOriginalDataAsString(self);
+}
+
+- (NSString *)_web_userVisibleString
+{
+ NSData *data = [self _web_originalData];
+ const unsigned char *before = static_cast<const unsigned char*>([data bytes]);
+ int length = [data length];
+
+ bool needsHostNameDecoding = false;
+
+ const unsigned char *p = before;
+ int bufferLength = (length * 3) + 1;
+ char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character
+ char *q = after;
+ int i;
+ for (i = 0; i < length; i++) {
+ unsigned char c = p[i];
+ // escape control characters, space, and delete
+ if (c <= 0x20 || c == 0x7f) {
+ *q++ = '%';
+ *q++ = hexDigit(c >> 4);
+ *q++ = hexDigit(c & 0xf);
+ }
+ // unescape escape sequences that indicate bytes greater than 0x7f
+ else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
+ unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
+ if (u > 0x7f) {
+ // unescape
+ *q++ = u;
+ }
+ else {
+ // do not unescape
+ *q++ = p[i];
+ *q++ = p[i + 1];
+ *q++ = p[i + 2];
+ }
+ i += 2;
+ }
+ else {
+ *q++ = c;
+
+ // Check for "xn--" in an efficient, non-case-sensitive, way.
+ if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
+ needsHostNameDecoding = true;
+ }
+ }
+ *q = '\0';
+
+ // Check string to see if it can be converted to display using UTF-8
+ NSString *result = [NSString stringWithUTF8String:after];
+ if (!result) {
+ // Could not convert to UTF-8.
+ // Convert characters greater than 0x7f to escape sequences.
+ // Shift current string to the end of the buffer
+ // then we will copy back bytes to the start of the buffer
+ // as we convert.
+ int afterlength = q - after;
+ char *p = after + bufferLength - afterlength - 1;
+ memmove(p, after, afterlength + 1); // copies trailing '\0'
+ char *q = after;
+ while (*p) {
+ unsigned char c = *p;
+ if (c > 0x7f) {
+ *q++ = '%';
+ *q++ = hexDigit(c >> 4);
+ *q++ = hexDigit(c & 0xf);
+ }
+ else {
+ *q++ = *p;
+ }
+ p++;
+ }
+ *q = '\0';
+ result = [NSString stringWithUTF8String:after];
+ }
+
+ free(after);
+
+ // As an optimization, only do host name decoding if we have "xn--" somewhere.
+ return needsHostNameDecoding ? mapHostNames(result, NO) : result;
+}
+
+- (BOOL)_web_isEmpty
+{
+ return urlIsEmpty(self);
+}
+
+- (const char *)_web_URLCString
+{
+ NSMutableData *data = [NSMutableData data];
+ [data appendData:[self _web_originalData]];
+ [data appendBytes:"\0" length:1];
+ return (const char *)[data bytes];
+ }
+
+- (NSURL *)_webkit_canonicalize
+{
+ InitWebCoreSystemInterface();
+ return canonicalURL(self);
+}
+
+typedef struct {
+ NSString *scheme;
+ NSString *user;
+ NSString *password;
+ NSString *host;
+ CFIndex port; // kCFNotFound means ignore/omit
+ NSString *path;
+ NSString *query;
+ NSString *fragment;
+} WebKitURLComponents;
+
+
+
+- (NSURL *)_webkit_URLByRemovingComponent:(CFURLComponentType)component
+{
+ return urlByRemovingComponent(self, component);
+}
+
+- (NSURL *)_webkit_URLByRemovingFragment
+{
+ return urlByRemovingFragment(self);
+}
+
+- (NSURL *)_webkit_URLByRemovingResourceSpecifier
+{
+ return urlByRemovingResourceSpecifier(self);
+}
+
+- (BOOL)_webkit_isJavaScriptURL
+{
+ return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
+}
+
+- (NSString *)_webkit_scriptIfJavaScriptURL
+{
+ return [[self absoluteString] _webkit_scriptIfJavaScriptURL];
+}
+
+- (BOOL)_webkit_isFileURL
+{
+ return urlIsFileURL(self);
+}
+
+- (BOOL)_webkit_isFTPDirectoryURL
+{
+ return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
+}
+
+- (BOOL)_webkit_shouldLoadAsEmptyDocument
+{
+ return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
+}
+
+- (NSURL *)_web_URLWithLowercasedScheme
+{
+ CFRange range;
+ CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
+ if (range.location == kCFNotFound) {
+ return self;
+ }
+
+ UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
+ UInt8 *buffer = static_buffer;
+ CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
+ if (bytesFilled == -1) {
+ CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
+ buffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
+ bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
+ ASSERT(bytesFilled == bytesToAllocate);
+ }
+
+ int i;
+ BOOL changed = NO;
+ for (i = 0; i < range.length; ++i) {
+ UInt8 c = buffer[range.location + i];
+ UInt8 lower = tolower(c);
+ if (c != lower) {
+ buffer[range.location + i] = lower;
+ changed = YES;
+ }
+ }
+
+ NSURL *result = changed
+ ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
+ : (NSURL *)self;
+
+ if (buffer != static_buffer) {
+ free(buffer);
+ }
+
+ return result;
+}
+
+
+-(BOOL)_web_hasQuestionMarkOnlyQueryString
+{
+ CFRange rangeWithSeparators;
+ CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
+ if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
+ return YES;
+ }
+ return NO;
+}
+
+-(NSData *)_web_schemeSeparatorWithoutColon
+{
+ NSData *result = nil;
+ CFRange rangeWithSeparators;
+ CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
+ if (rangeWithSeparators.location != kCFNotFound) {
+ NSString *absoluteString = [self absoluteString];
+ NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
+ if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
+ NSString *slashes = [absoluteString substringWithRange:separatorsRange];
+ result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
+ }
+ }
+ return result;
+}
+
+#define completeURL (CFURLComponentType)-1
+
+-(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
+{
+ static int URLComponentTypeBufferLength = 2048;
+
+ UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
+ UInt8 *allBytesBuffer = staticAllBytesBuffer;
+
+ CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
+ if (bytesFilled == -1) {
+ CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
+ allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
+ bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
+ }
+
+ CFRange range;
+ if (componentType != completeURL) {
+ range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
+ if (range.location == kCFNotFound) {
+ return nil;
+ }
+ }
+ else {
+ range.location = 0;
+ range.length = bytesFilled;
+ }
+
+ NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length];
+
+ const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]);
+ NSMutableData *resultData = [NSMutableData data];
+ // NOTE: add leading '?' to query strings non-zero length query strings.
+ // NOTE: retain question-mark only query strings.
+ if (componentType == kCFURLComponentQuery) {
+ if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
+ [resultData appendBytes:"?" length:1];
+ }
+ }
+ int i;
+ for (i = 0; i < range.length; i++) {
+ unsigned char c = bytes[i];
+ if (c <= 0x20 || c >= 0x7f) {
+ char escaped[3];
+ escaped[0] = '%';
+ escaped[1] = hexDigit(c >> 4);
+ escaped[2] = hexDigit(c & 0xf);
+ [resultData appendBytes:escaped length:3];
+ }
+ else {
+ char b[1];
+ b[0] = c;
+ [resultData appendBytes:b length:1];
+ }
+ }
+
+ if (staticAllBytesBuffer != allBytesBuffer) {
+ free(allBytesBuffer);
+ }
+
+ return resultData;
+}
+
+-(NSData *)_web_schemeData
+{
+ return [self _web_dataForURLComponentType:kCFURLComponentScheme];
+}
+
+-(NSData *)_web_hostData
+{
+ NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
+ NSData *scheme = [self _web_schemeData];
+ // Take off localhost for file
+ if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
+ return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
+ }
+ return result;
+}
+
+- (NSString *)_web_hostString
+{
+ NSData *data = [self _web_hostData];
+ if (!data) {
+ data = [NSData data];
+ }
+ return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
+}
+
+- (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType
+{
+ return suggestedFilenameWithMIMEType(self, MIMEType);
+}
+
+@end
+
+@implementation NSString (WebNSURLExtras)
+
+- (BOOL)_web_isUserVisibleURL
+{
+ BOOL valid = YES;
+ // get buffer
+
+ char static_buffer[1024];
+ const char *p;
+ BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
+ if (success) {
+ p = static_buffer;
+ } else {
+ p = [self UTF8String];
+ }
+
+ int length = strlen(p);
+
+ // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
+ // are the things that will lead _web_userVisibleString to actually change things.
+ int i;
+ for (i = 0; i < length; i++) {
+ unsigned char c = p[i];
+ // escape control characters, space, and delete
+ if (c <= 0x20 || c == 0x7f) {
+ valid = NO;
+ break;
+ } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
+ unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
+ if (u > 0x7f) {
+ valid = NO;
+ break;
+ }
+ i += 2;
+ } else {
+ // Check for "xn--" in an efficient, non-case-sensitive, way.
+ if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') {
+ valid = NO;
+ break;
+ }
+ }
+ }
+
+ return valid;
+}
+
+
+- (BOOL)_webkit_isJavaScriptURL
+{
+ return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"];
+}
+
+- (BOOL)_webkit_isFileURL
+{
+ return stringIsFileURL(self);
+}
+
+- (NSString *)_webkit_stringByReplacingValidPercentEscapes
+{
+ DeprecatedString s = KURL::decode_string(DeprecatedString::fromNSString(self));
+ return s.getNSString();
+}
+
+- (NSString *)_webkit_scriptIfJavaScriptURL
+{
+ if (![self _webkit_isJavaScriptURL]) {
+ return nil;
+ }
+ return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
+}
+
+- (BOOL)_webkit_isFTPDirectoryURL
+{
+ int length = [self length];
+ if (length < 5) { // 5 is length of "ftp:/"
+ return NO;
+ }
+ unichar lastChar = [self characterAtIndex:length - 1];
+ return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"];
+}
+
+
+static BOOL readIDNScriptWhiteListFile(NSString *filename)
+{
+ if (!filename) {
+ return NO;
+ }
+ FILE *file = fopen([filename fileSystemRepresentation], "r");
+ if (file == NULL) {
+ return NO;
+ }
+
+ // Read a word at a time.
+ // Allow comments, starting with # character to the end of the line.
+ while (1) {
+ // Skip a comment if present.
+ int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
+ if (result == EOF) {
+ break;
+ }
+
+ // Read a script name if present.
+ char word[33];
+ result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
+ if (result == EOF) {
+ break;
+ }
+ if (result == 1) {
+ // Got a word, map to script code and put it into the array.
+ int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
+ if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
+ size_t index = script / 32;
+ uint32_t mask = 1 << (script % 32);
+ IDNScriptWhiteList[index] |= mask;
+ }
+ }
+ }
+ fclose(file);
+ return YES;
+}
+
+static void readIDNScriptWhiteList(void)
+{
+ // Read white list from library.
+ NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
+ int i, numDirs = [dirs count];
+ for (i = 0; i < numDirs; i++) {
+ NSString *dir = [dirs objectAtIndex:i];
+ if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
+ return;
+ }
+ }
+
+ // Fall back on white list inside bundle.
+ NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
+ readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
+}
+
+static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
+{
+ pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
+
+ int32_t i = 0;
+ while (i < length) {
+ UChar32 c;
+ U16_NEXT(buffer, i, length, c)
+ UErrorCode error = U_ZERO_ERROR;
+ UScriptCode script = uscript_getScript(c, &error);
+ if (error != U_ZERO_ERROR) {
+ LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
+ return NO;
+ }
+ if (script < 0) {
+ LOG_ERROR("got negative number for script code from ICU: %d", script);
+ return NO;
+ }
+ if (script >= USCRIPT_CODE_LIMIT) {
+ return NO;
+ }
+ size_t index = script / 32;
+ uint32_t mask = 1 << (script % 32);
+ if (!(IDNScriptWhiteList[index] & mask)) {
+ return NO;
+ }
+
+ if (isLookalikeCharacter(c))
+ return NO;
+ }
+ return YES;
+}
+
+// Return value of nil means no mapping is necessary.
+// If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
+// If makeString is YES, then return value is either nil or the mapped string.
+- (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
+{
+ if (range.length > HOST_NAME_BUFFER_LENGTH) {
+ return nil;
+ }
+
+ if ([self length] == 0)
+ return nil;
+
+ UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
+ UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
+
+ NSString *string = self;
+ if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
+ NSString *substring = [self substringWithRange:range];
+ substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
+ if (substring != nil) {
+ string = substring;
+ range = NSMakeRange(0, [string length]);
+ }
+ }
+
+ int length = range.length;
+ [string getCharacters:sourceBuffer range:range];
+
+ UErrorCode error = U_ZERO_ERROR;
+ int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
+ (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
+ if (error != U_ZERO_ERROR) {
+ return nil;
+ }
+ if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
+ return nil;
+ }
+ if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) {
+ return nil;
+ }
+ return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self;
+}
+
+- (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
+{
+ return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
+}
+
+- (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
+{
+ return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
+}
+
+- (NSString *)_web_decodeHostNameWithRange:(NSRange)range
+{
+ return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
+}
+
+- (NSString *)_web_encodeHostNameWithRange:(NSRange)range
+{
+ return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
+}
+
+- (NSString *)_web_decodeHostName
+{
+ NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
+ return name == nil ? self : name;
+}
+
+- (NSString *)_web_encodeHostName
+{
+ NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
+ return name == nil ? self : name;
+}
+
+-(NSRange)_webkit_rangeOfURLScheme
+{
+ NSRange colon = [self rangeOfString:@":"];
+ if (colon.location != NSNotFound && colon.location > 0) {
+ NSRange scheme = {0, colon.location};
+ static NSCharacterSet *InverseSchemeCharacterSet = nil;
+ if (!InverseSchemeCharacterSet) {
+ /*
+ This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps
+ everything else when adding items to the autocomplete DB. Makes me wonder if we
+ even need to enforce the character set here.
+ */
+ NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
+ InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain];
+ }
+ NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme];
+ if (illegals.location == NSNotFound)
+ return scheme;
+ }
+ return NSMakeRange(NSNotFound, 0);
+}
+
+-(BOOL)_webkit_looksLikeAbsoluteURL
+{
+ // Trim whitespace because _web_URLWithString allows whitespace.
+ return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound;
+}
+
+- (NSString *)_webkit_URLFragment
+{
+ NSRange fragmentRange;
+
+ fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch];
+ if (fragmentRange.location == NSNotFound)
+ return nil;
+ return [self substringFromIndex:fragmentRange.location + 1];
+}
+
+@end