webengine/osswebengine/WebKit/Misc/WebNSURLExtras.mm
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2  * Copyright (C) 2005 Apple Computer, Inc.  All rights reserved.
       
     3  * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
       
     4  *
       
     5  * Redistribution and use in source and binary forms, with or without
       
     6  * modification, are permitted provided that the following conditions
       
     7  * are met:
       
     8  *
       
     9  * 1.  Redistributions of source code must retain the above copyright
       
    10  *     notice, this list of conditions and the following disclaimer. 
       
    11  * 2.  Redistributions in binary form must reproduce the above copyright
       
    12  *     notice, this list of conditions and the following disclaimer in the
       
    13  *     documentation and/or other materials provided with the distribution. 
       
    14  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
       
    15  *     its contributors may be used to endorse or promote products derived
       
    16  *     from this software without specific prior written permission. 
       
    17  *
       
    18  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
       
    19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
       
    21  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
       
    22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
       
    24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
       
    25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       
    27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    28  */
       
    29 
       
    30 #import <WebKit/WebNSURLExtras.h>
       
    31 
       
    32 #import <JavaScriptCore/Assertions.h>
       
    33 #import <WebKit/WebKitNSStringExtras.h>
       
    34 #import <WebKit/WebNSDataExtras.h>
       
    35 #import <WebKit/WebNSObjectExtras.h>
       
    36 #import <WebKit/WebLocalizableStrings.h>
       
    37 #import <WebCore/KURL.h>
       
    38 #import <WebCore/LoaderNSURLExtras.h>
       
    39 
       
    40 #import <WebKitSystemInterface.h>
       
    41 #import "WebSystemInterface.h"
       
    42 
       
    43 #import <Foundation/NSURLRequest.h>
       
    44 
       
    45 #import <unicode/uchar.h>
       
    46 #import <unicode/uidna.h>
       
    47 #import <unicode/uscript.h>
       
    48 
       
    49 using namespace WebCore;
       
    50 
       
    51 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
       
    52 
       
    53 // Needs to be big enough to hold an IDN-encoded name.
       
    54 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
       
    55 #define HOST_NAME_BUFFER_LENGTH 2048
       
    56 
       
    57 #define URL_BYTES_BUFFER_LENGTH 2048
       
    58 
       
    59 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
       
    60 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
       
    61 
       
    62 static inline BOOL isLookalikeCharacter(int charCode)
       
    63 {
       
    64 // FIXME: Move this code down into WebCore so it can be shared with other platforms.
       
    65 
       
    66 // This function treats the following as unsafe, lookalike characters:
       
    67 // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU, 
       
    68 // and any ignorable character.
       
    69 
       
    70 // We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars), 
       
    71 // and included all of these characters that ICU can encode.
       
    72 
       
    73     if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
       
    74         return YES;
       
    75 
       
    76     switch (charCode) {
       
    77         case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
       
    78         case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
       
    79         case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
       
    80         case 0x05B4: /* HEBREW POINT HIRIQ */
       
    81         case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
       
    82         case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
       
    83         case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
       
    84         case 0x0660: /* ARABIC INDIC DIGIT ZERO */
       
    85         case 0x06D4: /* ARABIC FULL STOP */
       
    86         case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
       
    87         case 0x2027: /* HYPHENATION POINT */
       
    88         case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
       
    89         case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
       
    90         case 0x2044: /* FRACTION SLASH */
       
    91         case 0x2215: /* DIVISION SLASH */
       
    92         case 0x23ae: /* INTEGRAL EXTENSION */
       
    93         case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
       
    94         case 0x29F8: /* BIG SOLIDUS */
       
    95         case 0x29f6: /* SOLIDUS WITH OVERBAR */
       
    96         case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
       
    97         case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
       
    98         case 0x3008: /* LEFT ANGLE BRACKET */
       
    99         case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
       
   100         case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
       
   101         case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
       
   102         case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
       
   103         case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
       
   104         case 0x33DF: /* SQUARE A OVER M */
       
   105         case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
       
   106         case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
       
   107         case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
       
   108         case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
       
   109         case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
       
   110             return YES;
       
   111         default:
       
   112             return NO;
       
   113     }
       
   114 }
       
   115 
       
   116 static char hexDigit(int i)
       
   117 {
       
   118     if (i < 0 || i > 16) {
       
   119         LOG_ERROR("illegal hex digit");
       
   120         return '0';
       
   121     }
       
   122     int h = i;
       
   123     if (h >= 10) {
       
   124         h = h - 10 + 'A'; 
       
   125     }
       
   126     else {
       
   127         h += '0';
       
   128     }
       
   129     return h;
       
   130 }
       
   131 
       
   132 static BOOL isHexDigit(char c)
       
   133 {
       
   134     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
       
   135 }
       
   136 
       
   137 static int hexDigitValue(char c)
       
   138 {
       
   139     if (c >= '0' && c <= '9') {
       
   140         return c - '0';
       
   141     }
       
   142     if (c >= 'A' && c <= 'F') {
       
   143         return c - 'A' + 10;
       
   144     }
       
   145     if (c >= 'a' && c <= 'f') {
       
   146         return c - 'a' + 10;
       
   147     }
       
   148     LOG_ERROR("illegal hex digit");
       
   149     return 0;
       
   150 }
       
   151 
       
   152 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
       
   153 {
       
   154     // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
       
   155     // Skip quoted strings so that characters in them don't confuse us.
       
   156     // When we find a '?' character, we are past the part of the URL that contains host names.
       
   157 
       
   158     static NSCharacterSet *hostNameOrStringStartCharacters;
       
   159     if (hostNameOrStringStartCharacters == nil) {
       
   160         hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
       
   161         CFRetain(hostNameOrStringStartCharacters);
       
   162     }
       
   163     static NSCharacterSet *hostNameEndCharacters;
       
   164     if (hostNameEndCharacters == nil) {
       
   165         hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
       
   166         CFRetain(hostNameEndCharacters);
       
   167     }
       
   168     static NSCharacterSet *quotedStringCharacters;
       
   169     if (quotedStringCharacters == nil) {
       
   170         quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
       
   171         CFRetain(quotedStringCharacters);
       
   172     }
       
   173 
       
   174     unsigned stringLength = [string length];
       
   175     NSRange remaining = NSMakeRange(0, stringLength);
       
   176     
       
   177     while (1) {
       
   178         // Find start of host name or of quoted string.
       
   179         NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
       
   180         if (hostNameOrStringStart.location == NSNotFound) {
       
   181             return;
       
   182         }
       
   183         unichar c = [string characterAtIndex:hostNameOrStringStart.location];
       
   184         remaining.location = NSMaxRange(hostNameOrStringStart);
       
   185         remaining.length = stringLength - remaining.location;
       
   186 
       
   187         if (c == '?') {
       
   188             return;
       
   189         }
       
   190         
       
   191         if (c == '@') {
       
   192             // Find end of host name.
       
   193             unsigned hostNameStart = remaining.location;
       
   194             NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
       
   195             BOOL done;
       
   196             if (hostNameEnd.location == NSNotFound) {
       
   197                 hostNameEnd.location = stringLength;
       
   198                 done = YES;
       
   199             } else {
       
   200                 remaining.location = hostNameEnd.location;
       
   201                 remaining.length = stringLength - remaining.location;
       
   202                 done = NO;
       
   203             }
       
   204 
       
   205             // Process host name range.
       
   206             f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
       
   207 
       
   208             if (done) {
       
   209                 return;
       
   210             }
       
   211         } else {
       
   212             // Skip quoted string.
       
   213             ASSERT(c == '"');
       
   214             while (1) {
       
   215                 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
       
   216                 if (escapedCharacterOrStringEnd.location == NSNotFound) {
       
   217                     return;
       
   218                 }
       
   219                 c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
       
   220                 remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
       
   221                 remaining.length = stringLength - remaining.location;
       
   222                 
       
   223                 // If we are the end of the string, then break from the string loop back to the host name loop.
       
   224                 if (c == '"') {
       
   225                     break;
       
   226                 }
       
   227                 
       
   228                 // Skip escaped character.
       
   229                 ASSERT(c == '\\');
       
   230                 if (remaining.length == 0) {
       
   231                     return;
       
   232                 }                
       
   233                 remaining.location += 1;
       
   234                 remaining.length -= 1;
       
   235             }
       
   236         }
       
   237     }
       
   238 }
       
   239 
       
   240 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
       
   241 {
       
   242     // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
       
   243     // but we have to do it before doing all the %-escaping, and this is the only
       
   244     // code we have that parses mailto URLs anyway.
       
   245 
       
   246     // Maybe we should implement this using a character buffer instead?
       
   247 
       
   248     if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
       
   249         applyHostNameFunctionToMailToURLString(string, f, context);
       
   250         return;
       
   251     }
       
   252 
       
   253     // Find the host name in a hierarchical URL.
       
   254     // It comes after a "://" sequence, with scheme characters preceding.
       
   255     // If ends with the end of the string or a ":", "/", or a "?".
       
   256     // If there is a "@" character, the host part is just the part after the "@".
       
   257     NSRange separatorRange = [string rangeOfString:@"://"];
       
   258     if (separatorRange.location == NSNotFound) {
       
   259         return;
       
   260     }
       
   261 
       
   262     // Check that all characters before the :// are valid scheme characters.
       
   263     static NSCharacterSet *nonSchemeCharacters;
       
   264     if (nonSchemeCharacters == nil) {
       
   265         nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
       
   266         CFRetain(nonSchemeCharacters);
       
   267     }
       
   268     if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
       
   269         return;
       
   270     }
       
   271 
       
   272     unsigned stringLength = [string length];
       
   273 
       
   274     static NSCharacterSet *hostTerminators;
       
   275     if (hostTerminators == nil) {
       
   276         hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
       
   277         CFRetain(hostTerminators);
       
   278     }
       
   279 
       
   280     // Start after the separator.
       
   281     unsigned authorityStart = NSMaxRange(separatorRange);
       
   282 
       
   283     // Find terminating character.
       
   284     NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
       
   285     unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
       
   286 
       
   287     // Find "@" for the start of the host name.
       
   288     NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
       
   289     unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
       
   290 
       
   291     f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
       
   292 }
       
   293 
       
   294 @implementation NSURL (WebNSURLExtras)
       
   295 
       
   296 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
       
   297 {
       
   298     BOOL needsMapping = encode
       
   299         ? [string _web_hostNameNeedsEncodingWithRange:range]
       
   300         : [string _web_hostNameNeedsDecodingWithRange:range];
       
   301     if (!needsMapping) {
       
   302         return;
       
   303     }
       
   304 
       
   305     NSMutableArray **array = (NSMutableArray **)context;
       
   306     if (*array == nil) {
       
   307         *array = [[NSMutableArray alloc] init];
       
   308     }
       
   309 
       
   310     [*array addObject:[NSValue valueWithRange:range]];
       
   311 }
       
   312 
       
   313 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
       
   314 {
       
   315     return collectRangesThatNeedMapping(string, range, context, YES);
       
   316 }
       
   317 
       
   318 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
       
   319 {
       
   320     return collectRangesThatNeedMapping(string, range, context, NO);
       
   321 }
       
   322 
       
   323 static NSString *mapHostNames(NSString *string, BOOL encode)
       
   324 {
       
   325     // Generally, we want to optimize for the case where there is one host name that does not need mapping.
       
   326     
       
   327     if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
       
   328         return string;
       
   329 
       
   330     // Make a list of ranges that actually need mapping.
       
   331     NSMutableArray *hostNameRanges = nil;
       
   332     StringRangeApplierFunction f = encode
       
   333         ? collectRangesThatNeedEncoding
       
   334         : collectRangesThatNeedDecoding;
       
   335     applyHostNameFunctionToURLString(string, f, &hostNameRanges);
       
   336     if (hostNameRanges == nil)
       
   337         return string;
       
   338 
       
   339     // Do the mapping.
       
   340     NSMutableString *mutableCopy = [string mutableCopy];
       
   341     unsigned i = [hostNameRanges count];
       
   342     while (i-- != 0) {
       
   343         NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
       
   344         NSString *mappedHostName = encode
       
   345             ? [string _web_encodeHostNameWithRange:hostNameRange]
       
   346             : [string _web_decodeHostNameWithRange:hostNameRange];
       
   347         [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
       
   348     }
       
   349     [hostNameRanges release];
       
   350     return [mutableCopy autorelease];
       
   351 }
       
   352 
       
   353 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
       
   354 {
       
   355     if (string == nil) {
       
   356         return nil;
       
   357     }
       
   358     string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
       
   359 
       
   360     NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
       
   361     ASSERT(userTypedData);
       
   362 
       
   363     const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]);
       
   364     int inLength = [userTypedData length];
       
   365     if (inLength == 0) {
       
   366         return [NSURL URLWithString:@""];
       
   367     }
       
   368     
       
   369     char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character
       
   370     char *p = outBytes;
       
   371     int outLength = 0;
       
   372     int i;
       
   373     for (i = 0; i < inLength; i++) {
       
   374         UInt8 c = inBytes[i];
       
   375         if (c <= 0x20 || c >= 0x7f) {
       
   376             *p++ = '%';
       
   377             *p++ = hexDigit(c >> 4);
       
   378             *p++ = hexDigit(c & 0xf);
       
   379             outLength += 3;
       
   380         }
       
   381         else {
       
   382             *p++ = c;
       
   383             outLength++;
       
   384         }
       
   385     }
       
   386  
       
   387     NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
       
   388     return [self _web_URLWithData:data relativeToURL:URL];
       
   389 }
       
   390 
       
   391 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string
       
   392 {
       
   393     return [self _web_URLWithUserTypedString:string relativeToURL:nil];
       
   394 }
       
   395 
       
   396 + (NSURL *)_web_URLWithDataAsString:(NSString *)string
       
   397 {
       
   398     if (string == nil) {
       
   399         return nil;
       
   400     }
       
   401     return [self _web_URLWithDataAsString:string relativeToURL:nil];
       
   402 }
       
   403 
       
   404 + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
       
   405 {
       
   406     if (string == nil) {
       
   407         return nil;
       
   408     }
       
   409     string = [string _webkit_stringByTrimmingWhitespace];
       
   410     NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
       
   411     return [self _web_URLWithData:data relativeToURL:baseURL];
       
   412 }
       
   413 
       
   414 + (NSURL *)_web_URLWithData:(NSData *)data
       
   415 {
       
   416     return urlWithData(data);
       
   417 }      
       
   418 
       
   419 + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
       
   420 {
       
   421     return urlWithDataRelativeToURL(data, baseURL);
       
   422 }
       
   423 
       
   424 - (NSData *)_web_originalData
       
   425 {
       
   426     return urlOriginalData(self);
       
   427 }
       
   428 
       
   429 - (NSString *)_web_originalDataAsString
       
   430 {
       
   431     return urlOriginalDataAsString(self);
       
   432 }
       
   433 
       
   434 - (NSString *)_web_userVisibleString
       
   435 {
       
   436     NSData *data = [self _web_originalData];
       
   437     const unsigned char *before = static_cast<const unsigned char*>([data bytes]);
       
   438     int length = [data length];
       
   439 
       
   440     bool needsHostNameDecoding = false;
       
   441 
       
   442     const unsigned char *p = before;
       
   443     int bufferLength = (length * 3) + 1;
       
   444     char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character
       
   445     char *q = after;
       
   446     int i;
       
   447     for (i = 0; i < length; i++) {
       
   448         unsigned char c = p[i];
       
   449         // escape control characters, space, and delete
       
   450         if (c <= 0x20 || c == 0x7f) {
       
   451             *q++ = '%';
       
   452             *q++ = hexDigit(c >> 4);
       
   453             *q++ = hexDigit(c & 0xf);
       
   454         }
       
   455         // unescape escape sequences that indicate bytes greater than 0x7f
       
   456         else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
       
   457             unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
       
   458             if (u > 0x7f) {
       
   459                 // unescape
       
   460                 *q++ = u;
       
   461             }
       
   462             else {
       
   463                 // do not unescape
       
   464                 *q++ = p[i];
       
   465                 *q++ = p[i + 1];
       
   466                 *q++ = p[i + 2];
       
   467             }
       
   468             i += 2;
       
   469         } 
       
   470         else {
       
   471             *q++ = c;
       
   472 
       
   473             // Check for "xn--" in an efficient, non-case-sensitive, way.
       
   474             if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
       
   475                 needsHostNameDecoding = true;
       
   476         }
       
   477     }
       
   478     *q = '\0';
       
   479     
       
   480     // Check string to see if it can be converted to display using UTF-8  
       
   481     NSString *result = [NSString stringWithUTF8String:after];
       
   482     if (!result) {
       
   483         // Could not convert to UTF-8.
       
   484         // Convert characters greater than 0x7f to escape sequences.
       
   485         // Shift current string to the end of the buffer
       
   486         // then we will copy back bytes to the start of the buffer 
       
   487         // as we convert.
       
   488         int afterlength = q - after;
       
   489         char *p = after + bufferLength - afterlength - 1;
       
   490         memmove(p, after, afterlength + 1); // copies trailing '\0'
       
   491         char *q = after;
       
   492         while (*p) {
       
   493             unsigned char c = *p;
       
   494             if (c > 0x7f) {
       
   495                 *q++ = '%';
       
   496                 *q++ = hexDigit(c >> 4);
       
   497                 *q++ = hexDigit(c & 0xf);
       
   498             }
       
   499             else {
       
   500                 *q++ = *p;
       
   501             }
       
   502             p++;
       
   503         }
       
   504         *q = '\0';
       
   505         result = [NSString stringWithUTF8String:after];
       
   506     }
       
   507 
       
   508     free(after);
       
   509     
       
   510     // As an optimization, only do host name decoding if we have "xn--" somewhere.
       
   511     return needsHostNameDecoding ? mapHostNames(result, NO) : result;
       
   512 }
       
   513 
       
   514 - (BOOL)_web_isEmpty
       
   515 {
       
   516     return urlIsEmpty(self);
       
   517 }
       
   518 
       
   519 - (const char *)_web_URLCString
       
   520 {
       
   521     NSMutableData *data = [NSMutableData data];
       
   522     [data appendData:[self _web_originalData]];
       
   523     [data appendBytes:"\0" length:1];
       
   524     return (const char *)[data bytes];
       
   525  }
       
   526 
       
   527 - (NSURL *)_webkit_canonicalize
       
   528 {
       
   529     InitWebCoreSystemInterface();
       
   530     return canonicalURL(self);
       
   531 }
       
   532 
       
   533 typedef struct {
       
   534     NSString *scheme;
       
   535     NSString *user;
       
   536     NSString *password;
       
   537     NSString *host;
       
   538     CFIndex port; // kCFNotFound means ignore/omit
       
   539     NSString *path;
       
   540     NSString *query;
       
   541     NSString *fragment;
       
   542 } WebKitURLComponents;
       
   543 
       
   544 
       
   545 
       
   546 - (NSURL *)_webkit_URLByRemovingComponent:(CFURLComponentType)component
       
   547 {
       
   548     return urlByRemovingComponent(self, component);
       
   549 }
       
   550 
       
   551 - (NSURL *)_webkit_URLByRemovingFragment
       
   552 {
       
   553     return urlByRemovingFragment(self);
       
   554 }
       
   555 
       
   556 - (NSURL *)_webkit_URLByRemovingResourceSpecifier
       
   557 {
       
   558     return urlByRemovingResourceSpecifier(self);
       
   559 }
       
   560 
       
   561 - (BOOL)_webkit_isJavaScriptURL
       
   562 {
       
   563     return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
       
   564 }
       
   565 
       
   566 - (NSString *)_webkit_scriptIfJavaScriptURL
       
   567 {
       
   568     return [[self absoluteString] _webkit_scriptIfJavaScriptURL];
       
   569 }
       
   570 
       
   571 - (BOOL)_webkit_isFileURL
       
   572 {
       
   573     return urlIsFileURL(self);
       
   574 }
       
   575 
       
   576 - (BOOL)_webkit_isFTPDirectoryURL
       
   577 {
       
   578     return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
       
   579 }
       
   580 
       
   581 - (BOOL)_webkit_shouldLoadAsEmptyDocument
       
   582 {
       
   583     return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
       
   584 }
       
   585 
       
   586 - (NSURL *)_web_URLWithLowercasedScheme
       
   587 {
       
   588     CFRange range;
       
   589     CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
       
   590     if (range.location == kCFNotFound) {
       
   591         return self;
       
   592     }
       
   593     
       
   594     UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
       
   595     UInt8 *buffer = static_buffer;
       
   596     CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
       
   597     if (bytesFilled == -1) {
       
   598         CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
       
   599         buffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
       
   600         bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
       
   601         ASSERT(bytesFilled == bytesToAllocate);
       
   602     }
       
   603     
       
   604     int i;
       
   605     BOOL changed = NO;
       
   606     for (i = 0; i < range.length; ++i) {
       
   607         UInt8 c = buffer[range.location + i];
       
   608         UInt8 lower = tolower(c);
       
   609         if (c != lower) {
       
   610             buffer[range.location + i] = lower;
       
   611             changed = YES;
       
   612         }
       
   613     }
       
   614     
       
   615     NSURL *result = changed
       
   616         ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
       
   617         : (NSURL *)self;
       
   618 
       
   619     if (buffer != static_buffer) {
       
   620         free(buffer);
       
   621     }
       
   622     
       
   623     return result;
       
   624 }
       
   625 
       
   626 
       
   627 -(BOOL)_web_hasQuestionMarkOnlyQueryString
       
   628 {
       
   629     CFRange rangeWithSeparators;
       
   630     CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
       
   631     if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
       
   632         return YES;
       
   633     }
       
   634     return NO;
       
   635 }
       
   636 
       
   637 -(NSData *)_web_schemeSeparatorWithoutColon
       
   638 {
       
   639     NSData *result = nil;
       
   640     CFRange rangeWithSeparators;
       
   641     CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
       
   642     if (rangeWithSeparators.location != kCFNotFound) {
       
   643         NSString *absoluteString = [self absoluteString];
       
   644         NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
       
   645         if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
       
   646             NSString *slashes = [absoluteString substringWithRange:separatorsRange];
       
   647             result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
       
   648         }
       
   649     }
       
   650     return result;
       
   651 }
       
   652 
       
   653 #define completeURL (CFURLComponentType)-1
       
   654 
       
   655 -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
       
   656 {
       
   657     static int URLComponentTypeBufferLength = 2048;
       
   658     
       
   659     UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
       
   660     UInt8 *allBytesBuffer = staticAllBytesBuffer;
       
   661     
       
   662     CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
       
   663     if (bytesFilled == -1) {
       
   664         CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
       
   665         allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
       
   666         bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
       
   667     }
       
   668     
       
   669     CFRange range;
       
   670     if (componentType != completeURL) {
       
   671         range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
       
   672         if (range.location == kCFNotFound) {
       
   673             return nil;
       
   674         }
       
   675     }
       
   676     else {
       
   677         range.location = 0;
       
   678         range.length = bytesFilled;
       
   679     }
       
   680     
       
   681     NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length]; 
       
   682     
       
   683     const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]);
       
   684     NSMutableData *resultData = [NSMutableData data];
       
   685     // NOTE: add leading '?' to query strings non-zero length query strings.
       
   686     // NOTE: retain question-mark only query strings.
       
   687     if (componentType == kCFURLComponentQuery) {
       
   688         if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
       
   689             [resultData appendBytes:"?" length:1];    
       
   690         }
       
   691     }
       
   692     int i;
       
   693     for (i = 0; i < range.length; i++) {
       
   694         unsigned char c = bytes[i];
       
   695         if (c <= 0x20 || c >= 0x7f) {
       
   696             char escaped[3];
       
   697             escaped[0] = '%';
       
   698             escaped[1] = hexDigit(c >> 4);
       
   699             escaped[2] = hexDigit(c & 0xf);
       
   700             [resultData appendBytes:escaped length:3];    
       
   701         }
       
   702         else {
       
   703             char b[1];
       
   704             b[0] = c;
       
   705             [resultData appendBytes:b length:1];    
       
   706         }               
       
   707     }
       
   708     
       
   709     if (staticAllBytesBuffer != allBytesBuffer) {
       
   710         free(allBytesBuffer);
       
   711     }
       
   712     
       
   713     return resultData;
       
   714 }
       
   715 
       
   716 -(NSData *)_web_schemeData
       
   717 {
       
   718     return [self _web_dataForURLComponentType:kCFURLComponentScheme];
       
   719 }
       
   720 
       
   721 -(NSData *)_web_hostData
       
   722 {
       
   723     NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
       
   724     NSData *scheme = [self _web_schemeData];
       
   725     // Take off localhost for file
       
   726     if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
       
   727         return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
       
   728     }
       
   729     return result;
       
   730 }
       
   731 
       
   732 - (NSString *)_web_hostString
       
   733 {
       
   734     NSData *data = [self _web_hostData];
       
   735     if (!data) {
       
   736         data = [NSData data];
       
   737     }
       
   738     return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
       
   739 }
       
   740 
       
   741 - (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType
       
   742 {
       
   743     return suggestedFilenameWithMIMEType(self, MIMEType);
       
   744 }
       
   745 
       
   746 @end
       
   747 
       
   748 @implementation NSString (WebNSURLExtras)
       
   749 
       
   750 - (BOOL)_web_isUserVisibleURL
       
   751 {
       
   752     BOOL valid = YES;
       
   753     // get buffer
       
   754 
       
   755     char static_buffer[1024];
       
   756     const char *p;
       
   757     BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
       
   758     if (success) {
       
   759         p = static_buffer;
       
   760     } else {
       
   761         p = [self UTF8String];
       
   762     }
       
   763 
       
   764     int length = strlen(p);
       
   765 
       
   766     // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
       
   767     // are the things that will lead _web_userVisibleString to actually change things.
       
   768     int i;
       
   769     for (i = 0; i < length; i++) {
       
   770         unsigned char c = p[i];
       
   771         // escape control characters, space, and delete
       
   772         if (c <= 0x20 || c == 0x7f) {
       
   773             valid = NO;
       
   774             break;
       
   775         } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
       
   776             unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
       
   777             if (u > 0x7f) {
       
   778                 valid = NO;
       
   779                 break;
       
   780             }
       
   781             i += 2;
       
   782         } else {
       
   783             // Check for "xn--" in an efficient, non-case-sensitive, way.
       
   784             if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') {
       
   785                 valid = NO;
       
   786                 break;
       
   787             }
       
   788         }
       
   789     }
       
   790 
       
   791     return valid;
       
   792 }
       
   793 
       
   794 
       
   795 - (BOOL)_webkit_isJavaScriptURL
       
   796 {
       
   797     return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"];
       
   798 }
       
   799 
       
   800 - (BOOL)_webkit_isFileURL
       
   801 {
       
   802     return stringIsFileURL(self);
       
   803 }
       
   804 
       
   805 - (NSString *)_webkit_stringByReplacingValidPercentEscapes
       
   806 {
       
   807     DeprecatedString s = KURL::decode_string(DeprecatedString::fromNSString(self));
       
   808     return s.getNSString();
       
   809 }
       
   810 
       
   811 - (NSString *)_webkit_scriptIfJavaScriptURL
       
   812 {
       
   813     if (![self _webkit_isJavaScriptURL]) {
       
   814         return nil;
       
   815     }
       
   816     return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
       
   817 }
       
   818 
       
   819 - (BOOL)_webkit_isFTPDirectoryURL
       
   820 {
       
   821     int length = [self length];
       
   822     if (length < 5) {  // 5 is length of "ftp:/"
       
   823         return NO;
       
   824     }
       
   825     unichar lastChar = [self characterAtIndex:length - 1];
       
   826     return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"];
       
   827 }
       
   828 
       
   829 
       
   830 static BOOL readIDNScriptWhiteListFile(NSString *filename)
       
   831 {
       
   832     if (!filename) {
       
   833         return NO;
       
   834     }
       
   835     FILE *file = fopen([filename fileSystemRepresentation], "r");
       
   836     if (file == NULL) {
       
   837         return NO;
       
   838     }
       
   839 
       
   840     // Read a word at a time.
       
   841     // Allow comments, starting with # character to the end of the line.
       
   842     while (1) {
       
   843         // Skip a comment if present.
       
   844         int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
       
   845         if (result == EOF) {
       
   846             break;
       
   847         }
       
   848 
       
   849         // Read a script name if present.
       
   850         char word[33];
       
   851         result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
       
   852         if (result == EOF) {
       
   853             break;
       
   854         }
       
   855         if (result == 1) {
       
   856             // Got a word, map to script code and put it into the array.
       
   857             int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
       
   858             if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
       
   859                 size_t index = script / 32;
       
   860                 uint32_t mask = 1 << (script % 32);
       
   861                 IDNScriptWhiteList[index] |= mask;
       
   862             }
       
   863         }
       
   864     }
       
   865     fclose(file);
       
   866     return YES;
       
   867 }
       
   868 
       
   869 static void readIDNScriptWhiteList(void)
       
   870 {
       
   871     // Read white list from library.
       
   872     NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
       
   873     int i, numDirs = [dirs count];
       
   874     for (i = 0; i < numDirs; i++) {
       
   875         NSString *dir = [dirs objectAtIndex:i];
       
   876         if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
       
   877             return;
       
   878         }
       
   879     }
       
   880 
       
   881     // Fall back on white list inside bundle.
       
   882     NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
       
   883     readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
       
   884 }
       
   885 
       
   886 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
       
   887 {
       
   888     pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
       
   889 
       
   890     int32_t i = 0;
       
   891     while (i < length) {
       
   892         UChar32 c;
       
   893         U16_NEXT(buffer, i, length, c)
       
   894         UErrorCode error = U_ZERO_ERROR;
       
   895         UScriptCode script = uscript_getScript(c, &error);
       
   896         if (error != U_ZERO_ERROR) {
       
   897             LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
       
   898             return NO;
       
   899         }
       
   900         if (script < 0) {
       
   901             LOG_ERROR("got negative number for script code from ICU: %d", script);
       
   902             return NO;
       
   903         }
       
   904         if (script >= USCRIPT_CODE_LIMIT) {
       
   905             return NO;
       
   906         }
       
   907         size_t index = script / 32;
       
   908         uint32_t mask = 1 << (script % 32);
       
   909         if (!(IDNScriptWhiteList[index] & mask)) {
       
   910             return NO;
       
   911         }
       
   912 
       
   913         if (isLookalikeCharacter(c))
       
   914             return NO;
       
   915     }
       
   916     return YES;
       
   917 }
       
   918 
       
   919 // Return value of nil means no mapping is necessary.
       
   920 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
       
   921 // If makeString is YES, then return value is either nil or the mapped string.
       
   922 - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
       
   923 {
       
   924     if (range.length > HOST_NAME_BUFFER_LENGTH) {
       
   925         return nil;
       
   926     }
       
   927 
       
   928     if ([self length] == 0)
       
   929         return nil;
       
   930     
       
   931     UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
       
   932     UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
       
   933     
       
   934     NSString *string = self;
       
   935     if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
       
   936         NSString *substring = [self substringWithRange:range];
       
   937         substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
       
   938         if (substring != nil) {
       
   939             string = substring;
       
   940             range = NSMakeRange(0, [string length]);
       
   941         }
       
   942     }
       
   943     
       
   944     int length = range.length;
       
   945     [string getCharacters:sourceBuffer range:range];
       
   946 
       
   947     UErrorCode error = U_ZERO_ERROR;
       
   948     int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
       
   949         (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
       
   950     if (error != U_ZERO_ERROR) {
       
   951         return nil;
       
   952     }
       
   953     if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
       
   954         return nil;
       
   955     }
       
   956     if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) {
       
   957         return nil;
       
   958     }
       
   959     return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self;
       
   960 }
       
   961 
       
   962 - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
       
   963 {
       
   964     return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
       
   965 }
       
   966 
       
   967 - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
       
   968 {
       
   969     return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
       
   970 }
       
   971 
       
   972 - (NSString *)_web_decodeHostNameWithRange:(NSRange)range
       
   973 {
       
   974     return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
       
   975 }
       
   976 
       
   977 - (NSString *)_web_encodeHostNameWithRange:(NSRange)range
       
   978 {
       
   979     return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
       
   980 }
       
   981 
       
   982 - (NSString *)_web_decodeHostName
       
   983 {
       
   984     NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
       
   985     return name == nil ? self : name;
       
   986 }
       
   987 
       
   988 - (NSString *)_web_encodeHostName
       
   989 {
       
   990     NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
       
   991     return name == nil ? self : name;
       
   992 }
       
   993 
       
   994 -(NSRange)_webkit_rangeOfURLScheme
       
   995 {
       
   996     NSRange colon = [self rangeOfString:@":"];
       
   997     if (colon.location != NSNotFound && colon.location > 0) {
       
   998         NSRange scheme = {0, colon.location};
       
   999         static NSCharacterSet *InverseSchemeCharacterSet = nil;
       
  1000         if (!InverseSchemeCharacterSet) {
       
  1001             /*
       
  1002              This stuff is very expensive.  10-15 msec on a 2x1.2GHz.  If not cached it swamps
       
  1003              everything else when adding items to the autocomplete DB.  Makes me wonder if we
       
  1004              even need to enforce the character set here.
       
  1005             */
       
  1006             NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
       
  1007             InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain];
       
  1008         }
       
  1009         NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme];
       
  1010         if (illegals.location == NSNotFound)
       
  1011             return scheme;
       
  1012     }
       
  1013     return NSMakeRange(NSNotFound, 0);
       
  1014 }
       
  1015 
       
  1016 -(BOOL)_webkit_looksLikeAbsoluteURL
       
  1017 {
       
  1018     // Trim whitespace because _web_URLWithString allows whitespace.
       
  1019     return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound;
       
  1020 }
       
  1021 
       
  1022 - (NSString *)_webkit_URLFragment
       
  1023 {
       
  1024     NSRange fragmentRange;
       
  1025     
       
  1026     fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch];
       
  1027     if (fragmentRange.location == NSNotFound)
       
  1028         return nil;
       
  1029     return [self substringFromIndex:fragmentRange.location + 1];
       
  1030 }
       
  1031 
       
  1032 @end