|
1 /* |
|
2 * Copyright (C) 2005 Apple Computer, Inc. All rights reserved. |
|
3 * Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
|
4 * |
|
5 * Redistribution and use in source and binary forms, with or without |
|
6 * modification, are permitted provided that the following conditions |
|
7 * are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright |
|
12 * notice, this list of conditions and the following disclaimer in the |
|
13 * documentation and/or other materials provided with the distribution. |
|
14 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
|
15 * its contributors may be used to endorse or promote products derived |
|
16 * from this software without specific prior written permission. |
|
17 * |
|
18 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
|
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
21 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
|
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
28 */ |
|
29 |
|
30 #import <WebKit/WebNSURLExtras.h> |
|
31 |
|
32 #import <JavaScriptCore/Assertions.h> |
|
33 #import <WebKit/WebKitNSStringExtras.h> |
|
34 #import <WebKit/WebNSDataExtras.h> |
|
35 #import <WebKit/WebNSObjectExtras.h> |
|
36 #import <WebKit/WebLocalizableStrings.h> |
|
37 #import <WebCore/KURL.h> |
|
38 #import <WebCore/LoaderNSURLExtras.h> |
|
39 |
|
40 #import <WebKitSystemInterface.h> |
|
41 #import "WebSystemInterface.h" |
|
42 |
|
43 #import <Foundation/NSURLRequest.h> |
|
44 |
|
45 #import <unicode/uchar.h> |
|
46 #import <unicode/uidna.h> |
|
47 #import <unicode/uscript.h> |
|
48 |
|
49 using namespace WebCore; |
|
50 |
|
51 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context); |
|
52 |
|
53 // Needs to be big enough to hold an IDN-encoded name. |
|
54 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. |
|
55 #define HOST_NAME_BUFFER_LENGTH 2048 |
|
56 |
|
57 #define URL_BYTES_BUFFER_LENGTH 2048 |
|
58 |
|
59 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT; |
|
60 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32]; |
|
61 |
|
62 static inline BOOL isLookalikeCharacter(int charCode) |
|
63 { |
|
64 // FIXME: Move this code down into WebCore so it can be shared with other platforms. |
|
65 |
|
66 // This function treats the following as unsafe, lookalike characters: |
|
67 // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU, |
|
68 // and any ignorable character. |
|
69 |
|
70 // We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars), |
|
71 // and included all of these characters that ICU can encode. |
|
72 |
|
73 if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) |
|
74 return YES; |
|
75 |
|
76 switch (charCode) { |
|
77 case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */ |
|
78 case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */ |
|
79 case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */ |
|
80 case 0x05B4: /* HEBREW POINT HIRIQ */ |
|
81 case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */ |
|
82 case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */ |
|
83 case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */ |
|
84 case 0x0660: /* ARABIC INDIC DIGIT ZERO */ |
|
85 case 0x06D4: /* ARABIC FULL STOP */ |
|
86 case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */ |
|
87 case 0x2027: /* HYPHENATION POINT */ |
|
88 case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ |
|
89 case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ |
|
90 case 0x2044: /* FRACTION SLASH */ |
|
91 case 0x2215: /* DIVISION SLASH */ |
|
92 case 0x23ae: /* INTEGRAL EXTENSION */ |
|
93 case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */ |
|
94 case 0x29F8: /* BIG SOLIDUS */ |
|
95 case 0x29f6: /* SOLIDUS WITH OVERBAR */ |
|
96 case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */ |
|
97 case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */ |
|
98 case 0x3008: /* LEFT ANGLE BRACKET */ |
|
99 case 0x3014: /* LEFT TORTOISE SHELL BRACKET */ |
|
100 case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */ |
|
101 case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */ |
|
102 case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */ |
|
103 case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */ |
|
104 case 0x33DF: /* SQUARE A OVER M */ |
|
105 case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ |
|
106 case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ |
|
107 case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ |
|
108 case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */ |
|
109 case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */ |
|
110 return YES; |
|
111 default: |
|
112 return NO; |
|
113 } |
|
114 } |
|
115 |
|
116 static char hexDigit(int i) |
|
117 { |
|
118 if (i < 0 || i > 16) { |
|
119 LOG_ERROR("illegal hex digit"); |
|
120 return '0'; |
|
121 } |
|
122 int h = i; |
|
123 if (h >= 10) { |
|
124 h = h - 10 + 'A'; |
|
125 } |
|
126 else { |
|
127 h += '0'; |
|
128 } |
|
129 return h; |
|
130 } |
|
131 |
|
132 static BOOL isHexDigit(char c) |
|
133 { |
|
134 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); |
|
135 } |
|
136 |
|
137 static int hexDigitValue(char c) |
|
138 { |
|
139 if (c >= '0' && c <= '9') { |
|
140 return c - '0'; |
|
141 } |
|
142 if (c >= 'A' && c <= 'F') { |
|
143 return c - 'A' + 10; |
|
144 } |
|
145 if (c >= 'a' && c <= 'f') { |
|
146 return c - 'a' + 10; |
|
147 } |
|
148 LOG_ERROR("illegal hex digit"); |
|
149 return 0; |
|
150 } |
|
151 |
|
152 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context) |
|
153 { |
|
154 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character. |
|
155 // Skip quoted strings so that characters in them don't confuse us. |
|
156 // When we find a '?' character, we are past the part of the URL that contains host names. |
|
157 |
|
158 static NSCharacterSet *hostNameOrStringStartCharacters; |
|
159 if (hostNameOrStringStartCharacters == nil) { |
|
160 hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"]; |
|
161 CFRetain(hostNameOrStringStartCharacters); |
|
162 } |
|
163 static NSCharacterSet *hostNameEndCharacters; |
|
164 if (hostNameEndCharacters == nil) { |
|
165 hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"]; |
|
166 CFRetain(hostNameEndCharacters); |
|
167 } |
|
168 static NSCharacterSet *quotedStringCharacters; |
|
169 if (quotedStringCharacters == nil) { |
|
170 quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"]; |
|
171 CFRetain(quotedStringCharacters); |
|
172 } |
|
173 |
|
174 unsigned stringLength = [string length]; |
|
175 NSRange remaining = NSMakeRange(0, stringLength); |
|
176 |
|
177 while (1) { |
|
178 // Find start of host name or of quoted string. |
|
179 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining]; |
|
180 if (hostNameOrStringStart.location == NSNotFound) { |
|
181 return; |
|
182 } |
|
183 unichar c = [string characterAtIndex:hostNameOrStringStart.location]; |
|
184 remaining.location = NSMaxRange(hostNameOrStringStart); |
|
185 remaining.length = stringLength - remaining.location; |
|
186 |
|
187 if (c == '?') { |
|
188 return; |
|
189 } |
|
190 |
|
191 if (c == '@') { |
|
192 // Find end of host name. |
|
193 unsigned hostNameStart = remaining.location; |
|
194 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining]; |
|
195 BOOL done; |
|
196 if (hostNameEnd.location == NSNotFound) { |
|
197 hostNameEnd.location = stringLength; |
|
198 done = YES; |
|
199 } else { |
|
200 remaining.location = hostNameEnd.location; |
|
201 remaining.length = stringLength - remaining.location; |
|
202 done = NO; |
|
203 } |
|
204 |
|
205 // Process host name range. |
|
206 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context); |
|
207 |
|
208 if (done) { |
|
209 return; |
|
210 } |
|
211 } else { |
|
212 // Skip quoted string. |
|
213 ASSERT(c == '"'); |
|
214 while (1) { |
|
215 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining]; |
|
216 if (escapedCharacterOrStringEnd.location == NSNotFound) { |
|
217 return; |
|
218 } |
|
219 c = [string characterAtIndex:escapedCharacterOrStringEnd.location]; |
|
220 remaining.location = NSMaxRange(escapedCharacterOrStringEnd); |
|
221 remaining.length = stringLength - remaining.location; |
|
222 |
|
223 // If we are the end of the string, then break from the string loop back to the host name loop. |
|
224 if (c == '"') { |
|
225 break; |
|
226 } |
|
227 |
|
228 // Skip escaped character. |
|
229 ASSERT(c == '\\'); |
|
230 if (remaining.length == 0) { |
|
231 return; |
|
232 } |
|
233 remaining.location += 1; |
|
234 remaining.length -= 1; |
|
235 } |
|
236 } |
|
237 } |
|
238 } |
|
239 |
|
240 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context) |
|
241 { |
|
242 // Find hostnames. Too bad we can't use any real URL-parsing code to do this, |
|
243 // but we have to do it before doing all the %-escaping, and this is the only |
|
244 // code we have that parses mailto URLs anyway. |
|
245 |
|
246 // Maybe we should implement this using a character buffer instead? |
|
247 |
|
248 if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) { |
|
249 applyHostNameFunctionToMailToURLString(string, f, context); |
|
250 return; |
|
251 } |
|
252 |
|
253 // Find the host name in a hierarchical URL. |
|
254 // It comes after a "://" sequence, with scheme characters preceding. |
|
255 // If ends with the end of the string or a ":", "/", or a "?". |
|
256 // If there is a "@" character, the host part is just the part after the "@". |
|
257 NSRange separatorRange = [string rangeOfString:@"://"]; |
|
258 if (separatorRange.location == NSNotFound) { |
|
259 return; |
|
260 } |
|
261 |
|
262 // Check that all characters before the :// are valid scheme characters. |
|
263 static NSCharacterSet *nonSchemeCharacters; |
|
264 if (nonSchemeCharacters == nil) { |
|
265 nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet]; |
|
266 CFRetain(nonSchemeCharacters); |
|
267 } |
|
268 if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) { |
|
269 return; |
|
270 } |
|
271 |
|
272 unsigned stringLength = [string length]; |
|
273 |
|
274 static NSCharacterSet *hostTerminators; |
|
275 if (hostTerminators == nil) { |
|
276 hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"]; |
|
277 CFRetain(hostTerminators); |
|
278 } |
|
279 |
|
280 // Start after the separator. |
|
281 unsigned authorityStart = NSMaxRange(separatorRange); |
|
282 |
|
283 // Find terminating character. |
|
284 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)]; |
|
285 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location; |
|
286 |
|
287 // Find "@" for the start of the host name. |
|
288 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)]; |
|
289 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator); |
|
290 |
|
291 f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context); |
|
292 } |
|
293 |
|
294 @implementation NSURL (WebNSURLExtras) |
|
295 |
|
296 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode) |
|
297 { |
|
298 BOOL needsMapping = encode |
|
299 ? [string _web_hostNameNeedsEncodingWithRange:range] |
|
300 : [string _web_hostNameNeedsDecodingWithRange:range]; |
|
301 if (!needsMapping) { |
|
302 return; |
|
303 } |
|
304 |
|
305 NSMutableArray **array = (NSMutableArray **)context; |
|
306 if (*array == nil) { |
|
307 *array = [[NSMutableArray alloc] init]; |
|
308 } |
|
309 |
|
310 [*array addObject:[NSValue valueWithRange:range]]; |
|
311 } |
|
312 |
|
313 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context) |
|
314 { |
|
315 return collectRangesThatNeedMapping(string, range, context, YES); |
|
316 } |
|
317 |
|
318 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context) |
|
319 { |
|
320 return collectRangesThatNeedMapping(string, range, context, NO); |
|
321 } |
|
322 |
|
323 static NSString *mapHostNames(NSString *string, BOOL encode) |
|
324 { |
|
325 // Generally, we want to optimize for the case where there is one host name that does not need mapping. |
|
326 |
|
327 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding]) |
|
328 return string; |
|
329 |
|
330 // Make a list of ranges that actually need mapping. |
|
331 NSMutableArray *hostNameRanges = nil; |
|
332 StringRangeApplierFunction f = encode |
|
333 ? collectRangesThatNeedEncoding |
|
334 : collectRangesThatNeedDecoding; |
|
335 applyHostNameFunctionToURLString(string, f, &hostNameRanges); |
|
336 if (hostNameRanges == nil) |
|
337 return string; |
|
338 |
|
339 // Do the mapping. |
|
340 NSMutableString *mutableCopy = [string mutableCopy]; |
|
341 unsigned i = [hostNameRanges count]; |
|
342 while (i-- != 0) { |
|
343 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue]; |
|
344 NSString *mappedHostName = encode |
|
345 ? [string _web_encodeHostNameWithRange:hostNameRange] |
|
346 : [string _web_decodeHostNameWithRange:hostNameRange]; |
|
347 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName]; |
|
348 } |
|
349 [hostNameRanges release]; |
|
350 return [mutableCopy autorelease]; |
|
351 } |
|
352 |
|
353 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL |
|
354 { |
|
355 if (string == nil) { |
|
356 return nil; |
|
357 } |
|
358 string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES); |
|
359 |
|
360 NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding]; |
|
361 ASSERT(userTypedData); |
|
362 |
|
363 const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]); |
|
364 int inLength = [userTypedData length]; |
|
365 if (inLength == 0) { |
|
366 return [NSURL URLWithString:@""]; |
|
367 } |
|
368 |
|
369 char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character |
|
370 char *p = outBytes; |
|
371 int outLength = 0; |
|
372 int i; |
|
373 for (i = 0; i < inLength; i++) { |
|
374 UInt8 c = inBytes[i]; |
|
375 if (c <= 0x20 || c >= 0x7f) { |
|
376 *p++ = '%'; |
|
377 *p++ = hexDigit(c >> 4); |
|
378 *p++ = hexDigit(c & 0xf); |
|
379 outLength += 3; |
|
380 } |
|
381 else { |
|
382 *p++ = c; |
|
383 outLength++; |
|
384 } |
|
385 } |
|
386 |
|
387 NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes |
|
388 return [self _web_URLWithData:data relativeToURL:URL]; |
|
389 } |
|
390 |
|
391 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string |
|
392 { |
|
393 return [self _web_URLWithUserTypedString:string relativeToURL:nil]; |
|
394 } |
|
395 |
|
396 + (NSURL *)_web_URLWithDataAsString:(NSString *)string |
|
397 { |
|
398 if (string == nil) { |
|
399 return nil; |
|
400 } |
|
401 return [self _web_URLWithDataAsString:string relativeToURL:nil]; |
|
402 } |
|
403 |
|
404 + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL |
|
405 { |
|
406 if (string == nil) { |
|
407 return nil; |
|
408 } |
|
409 string = [string _webkit_stringByTrimmingWhitespace]; |
|
410 NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding]; |
|
411 return [self _web_URLWithData:data relativeToURL:baseURL]; |
|
412 } |
|
413 |
|
414 + (NSURL *)_web_URLWithData:(NSData *)data |
|
415 { |
|
416 return urlWithData(data); |
|
417 } |
|
418 |
|
419 + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL |
|
420 { |
|
421 return urlWithDataRelativeToURL(data, baseURL); |
|
422 } |
|
423 |
|
424 - (NSData *)_web_originalData |
|
425 { |
|
426 return urlOriginalData(self); |
|
427 } |
|
428 |
|
429 - (NSString *)_web_originalDataAsString |
|
430 { |
|
431 return urlOriginalDataAsString(self); |
|
432 } |
|
433 |
|
434 - (NSString *)_web_userVisibleString |
|
435 { |
|
436 NSData *data = [self _web_originalData]; |
|
437 const unsigned char *before = static_cast<const unsigned char*>([data bytes]); |
|
438 int length = [data length]; |
|
439 |
|
440 bool needsHostNameDecoding = false; |
|
441 |
|
442 const unsigned char *p = before; |
|
443 int bufferLength = (length * 3) + 1; |
|
444 char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character |
|
445 char *q = after; |
|
446 int i; |
|
447 for (i = 0; i < length; i++) { |
|
448 unsigned char c = p[i]; |
|
449 // escape control characters, space, and delete |
|
450 if (c <= 0x20 || c == 0x7f) { |
|
451 *q++ = '%'; |
|
452 *q++ = hexDigit(c >> 4); |
|
453 *q++ = hexDigit(c & 0xf); |
|
454 } |
|
455 // unescape escape sequences that indicate bytes greater than 0x7f |
|
456 else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { |
|
457 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); |
|
458 if (u > 0x7f) { |
|
459 // unescape |
|
460 *q++ = u; |
|
461 } |
|
462 else { |
|
463 // do not unescape |
|
464 *q++ = p[i]; |
|
465 *q++ = p[i + 1]; |
|
466 *q++ = p[i + 2]; |
|
467 } |
|
468 i += 2; |
|
469 } |
|
470 else { |
|
471 *q++ = c; |
|
472 |
|
473 // Check for "xn--" in an efficient, non-case-sensitive, way. |
|
474 if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-') |
|
475 needsHostNameDecoding = true; |
|
476 } |
|
477 } |
|
478 *q = '\0'; |
|
479 |
|
480 // Check string to see if it can be converted to display using UTF-8 |
|
481 NSString *result = [NSString stringWithUTF8String:after]; |
|
482 if (!result) { |
|
483 // Could not convert to UTF-8. |
|
484 // Convert characters greater than 0x7f to escape sequences. |
|
485 // Shift current string to the end of the buffer |
|
486 // then we will copy back bytes to the start of the buffer |
|
487 // as we convert. |
|
488 int afterlength = q - after; |
|
489 char *p = after + bufferLength - afterlength - 1; |
|
490 memmove(p, after, afterlength + 1); // copies trailing '\0' |
|
491 char *q = after; |
|
492 while (*p) { |
|
493 unsigned char c = *p; |
|
494 if (c > 0x7f) { |
|
495 *q++ = '%'; |
|
496 *q++ = hexDigit(c >> 4); |
|
497 *q++ = hexDigit(c & 0xf); |
|
498 } |
|
499 else { |
|
500 *q++ = *p; |
|
501 } |
|
502 p++; |
|
503 } |
|
504 *q = '\0'; |
|
505 result = [NSString stringWithUTF8String:after]; |
|
506 } |
|
507 |
|
508 free(after); |
|
509 |
|
510 // As an optimization, only do host name decoding if we have "xn--" somewhere. |
|
511 return needsHostNameDecoding ? mapHostNames(result, NO) : result; |
|
512 } |
|
513 |
|
514 - (BOOL)_web_isEmpty |
|
515 { |
|
516 return urlIsEmpty(self); |
|
517 } |
|
518 |
|
519 - (const char *)_web_URLCString |
|
520 { |
|
521 NSMutableData *data = [NSMutableData data]; |
|
522 [data appendData:[self _web_originalData]]; |
|
523 [data appendBytes:"\0" length:1]; |
|
524 return (const char *)[data bytes]; |
|
525 } |
|
526 |
|
527 - (NSURL *)_webkit_canonicalize |
|
528 { |
|
529 InitWebCoreSystemInterface(); |
|
530 return canonicalURL(self); |
|
531 } |
|
532 |
|
533 typedef struct { |
|
534 NSString *scheme; |
|
535 NSString *user; |
|
536 NSString *password; |
|
537 NSString *host; |
|
538 CFIndex port; // kCFNotFound means ignore/omit |
|
539 NSString *path; |
|
540 NSString *query; |
|
541 NSString *fragment; |
|
542 } WebKitURLComponents; |
|
543 |
|
544 |
|
545 |
|
546 - (NSURL *)_webkit_URLByRemovingComponent:(CFURLComponentType)component |
|
547 { |
|
548 return urlByRemovingComponent(self, component); |
|
549 } |
|
550 |
|
551 - (NSURL *)_webkit_URLByRemovingFragment |
|
552 { |
|
553 return urlByRemovingFragment(self); |
|
554 } |
|
555 |
|
556 - (NSURL *)_webkit_URLByRemovingResourceSpecifier |
|
557 { |
|
558 return urlByRemovingResourceSpecifier(self); |
|
559 } |
|
560 |
|
561 - (BOOL)_webkit_isJavaScriptURL |
|
562 { |
|
563 return [[self _web_originalDataAsString] _webkit_isJavaScriptURL]; |
|
564 } |
|
565 |
|
566 - (NSString *)_webkit_scriptIfJavaScriptURL |
|
567 { |
|
568 return [[self absoluteString] _webkit_scriptIfJavaScriptURL]; |
|
569 } |
|
570 |
|
571 - (BOOL)_webkit_isFileURL |
|
572 { |
|
573 return urlIsFileURL(self); |
|
574 } |
|
575 |
|
576 - (BOOL)_webkit_isFTPDirectoryURL |
|
577 { |
|
578 return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL]; |
|
579 } |
|
580 |
|
581 - (BOOL)_webkit_shouldLoadAsEmptyDocument |
|
582 { |
|
583 return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty]; |
|
584 } |
|
585 |
|
586 - (NSURL *)_web_URLWithLowercasedScheme |
|
587 { |
|
588 CFRange range; |
|
589 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range); |
|
590 if (range.location == kCFNotFound) { |
|
591 return self; |
|
592 } |
|
593 |
|
594 UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH]; |
|
595 UInt8 *buffer = static_buffer; |
|
596 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH); |
|
597 if (bytesFilled == -1) { |
|
598 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); |
|
599 buffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); |
|
600 bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate); |
|
601 ASSERT(bytesFilled == bytesToAllocate); |
|
602 } |
|
603 |
|
604 int i; |
|
605 BOOL changed = NO; |
|
606 for (i = 0; i < range.length; ++i) { |
|
607 UInt8 c = buffer[range.location + i]; |
|
608 UInt8 lower = tolower(c); |
|
609 if (c != lower) { |
|
610 buffer[range.location + i] = lower; |
|
611 changed = YES; |
|
612 } |
|
613 } |
|
614 |
|
615 NSURL *result = changed |
|
616 ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES)) |
|
617 : (NSURL *)self; |
|
618 |
|
619 if (buffer != static_buffer) { |
|
620 free(buffer); |
|
621 } |
|
622 |
|
623 return result; |
|
624 } |
|
625 |
|
626 |
|
627 -(BOOL)_web_hasQuestionMarkOnlyQueryString |
|
628 { |
|
629 CFRange rangeWithSeparators; |
|
630 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators); |
|
631 if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) { |
|
632 return YES; |
|
633 } |
|
634 return NO; |
|
635 } |
|
636 |
|
637 -(NSData *)_web_schemeSeparatorWithoutColon |
|
638 { |
|
639 NSData *result = nil; |
|
640 CFRange rangeWithSeparators; |
|
641 CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators); |
|
642 if (rangeWithSeparators.location != kCFNotFound) { |
|
643 NSString *absoluteString = [self absoluteString]; |
|
644 NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1); |
|
645 if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) { |
|
646 NSString *slashes = [absoluteString substringWithRange:separatorsRange]; |
|
647 result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding]; |
|
648 } |
|
649 } |
|
650 return result; |
|
651 } |
|
652 |
|
653 #define completeURL (CFURLComponentType)-1 |
|
654 |
|
655 -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType |
|
656 { |
|
657 static int URLComponentTypeBufferLength = 2048; |
|
658 |
|
659 UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength]; |
|
660 UInt8 *allBytesBuffer = staticAllBytesBuffer; |
|
661 |
|
662 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength); |
|
663 if (bytesFilled == -1) { |
|
664 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); |
|
665 allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); |
|
666 bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate); |
|
667 } |
|
668 |
|
669 CFRange range; |
|
670 if (componentType != completeURL) { |
|
671 range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL); |
|
672 if (range.location == kCFNotFound) { |
|
673 return nil; |
|
674 } |
|
675 } |
|
676 else { |
|
677 range.location = 0; |
|
678 range.length = bytesFilled; |
|
679 } |
|
680 |
|
681 NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length]; |
|
682 |
|
683 const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]); |
|
684 NSMutableData *resultData = [NSMutableData data]; |
|
685 // NOTE: add leading '?' to query strings non-zero length query strings. |
|
686 // NOTE: retain question-mark only query strings. |
|
687 if (componentType == kCFURLComponentQuery) { |
|
688 if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) { |
|
689 [resultData appendBytes:"?" length:1]; |
|
690 } |
|
691 } |
|
692 int i; |
|
693 for (i = 0; i < range.length; i++) { |
|
694 unsigned char c = bytes[i]; |
|
695 if (c <= 0x20 || c >= 0x7f) { |
|
696 char escaped[3]; |
|
697 escaped[0] = '%'; |
|
698 escaped[1] = hexDigit(c >> 4); |
|
699 escaped[2] = hexDigit(c & 0xf); |
|
700 [resultData appendBytes:escaped length:3]; |
|
701 } |
|
702 else { |
|
703 char b[1]; |
|
704 b[0] = c; |
|
705 [resultData appendBytes:b length:1]; |
|
706 } |
|
707 } |
|
708 |
|
709 if (staticAllBytesBuffer != allBytesBuffer) { |
|
710 free(allBytesBuffer); |
|
711 } |
|
712 |
|
713 return resultData; |
|
714 } |
|
715 |
|
716 -(NSData *)_web_schemeData |
|
717 { |
|
718 return [self _web_dataForURLComponentType:kCFURLComponentScheme]; |
|
719 } |
|
720 |
|
721 -(NSData *)_web_hostData |
|
722 { |
|
723 NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost]; |
|
724 NSData *scheme = [self _web_schemeData]; |
|
725 // Take off localhost for file |
|
726 if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) { |
|
727 return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result; |
|
728 } |
|
729 return result; |
|
730 } |
|
731 |
|
732 - (NSString *)_web_hostString |
|
733 { |
|
734 NSData *data = [self _web_hostData]; |
|
735 if (!data) { |
|
736 data = [NSData data]; |
|
737 } |
|
738 return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease]; |
|
739 } |
|
740 |
|
741 - (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType |
|
742 { |
|
743 return suggestedFilenameWithMIMEType(self, MIMEType); |
|
744 } |
|
745 |
|
746 @end |
|
747 |
|
748 @implementation NSString (WebNSURLExtras) |
|
749 |
|
750 - (BOOL)_web_isUserVisibleURL |
|
751 { |
|
752 BOOL valid = YES; |
|
753 // get buffer |
|
754 |
|
755 char static_buffer[1024]; |
|
756 const char *p; |
|
757 BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8); |
|
758 if (success) { |
|
759 p = static_buffer; |
|
760 } else { |
|
761 p = [self UTF8String]; |
|
762 } |
|
763 |
|
764 int length = strlen(p); |
|
765 |
|
766 // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these |
|
767 // are the things that will lead _web_userVisibleString to actually change things. |
|
768 int i; |
|
769 for (i = 0; i < length; i++) { |
|
770 unsigned char c = p[i]; |
|
771 // escape control characters, space, and delete |
|
772 if (c <= 0x20 || c == 0x7f) { |
|
773 valid = NO; |
|
774 break; |
|
775 } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { |
|
776 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); |
|
777 if (u > 0x7f) { |
|
778 valid = NO; |
|
779 break; |
|
780 } |
|
781 i += 2; |
|
782 } else { |
|
783 // Check for "xn--" in an efficient, non-case-sensitive, way. |
|
784 if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') { |
|
785 valid = NO; |
|
786 break; |
|
787 } |
|
788 } |
|
789 } |
|
790 |
|
791 return valid; |
|
792 } |
|
793 |
|
794 |
|
795 - (BOOL)_webkit_isJavaScriptURL |
|
796 { |
|
797 return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"]; |
|
798 } |
|
799 |
|
800 - (BOOL)_webkit_isFileURL |
|
801 { |
|
802 return stringIsFileURL(self); |
|
803 } |
|
804 |
|
805 - (NSString *)_webkit_stringByReplacingValidPercentEscapes |
|
806 { |
|
807 DeprecatedString s = KURL::decode_string(DeprecatedString::fromNSString(self)); |
|
808 return s.getNSString(); |
|
809 } |
|
810 |
|
811 - (NSString *)_webkit_scriptIfJavaScriptURL |
|
812 { |
|
813 if (![self _webkit_isJavaScriptURL]) { |
|
814 return nil; |
|
815 } |
|
816 return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes]; |
|
817 } |
|
818 |
|
819 - (BOOL)_webkit_isFTPDirectoryURL |
|
820 { |
|
821 int length = [self length]; |
|
822 if (length < 5) { // 5 is length of "ftp:/" |
|
823 return NO; |
|
824 } |
|
825 unichar lastChar = [self characterAtIndex:length - 1]; |
|
826 return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"]; |
|
827 } |
|
828 |
|
829 |
|
830 static BOOL readIDNScriptWhiteListFile(NSString *filename) |
|
831 { |
|
832 if (!filename) { |
|
833 return NO; |
|
834 } |
|
835 FILE *file = fopen([filename fileSystemRepresentation], "r"); |
|
836 if (file == NULL) { |
|
837 return NO; |
|
838 } |
|
839 |
|
840 // Read a word at a time. |
|
841 // Allow comments, starting with # character to the end of the line. |
|
842 while (1) { |
|
843 // Skip a comment if present. |
|
844 int result = fscanf(file, " #%*[^\n\r]%*[\n\r]"); |
|
845 if (result == EOF) { |
|
846 break; |
|
847 } |
|
848 |
|
849 // Read a script name if present. |
|
850 char word[33]; |
|
851 result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word); |
|
852 if (result == EOF) { |
|
853 break; |
|
854 } |
|
855 if (result == 1) { |
|
856 // Got a word, map to script code and put it into the array. |
|
857 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word); |
|
858 if (script >= 0 && script < USCRIPT_CODE_LIMIT) { |
|
859 size_t index = script / 32; |
|
860 uint32_t mask = 1 << (script % 32); |
|
861 IDNScriptWhiteList[index] |= mask; |
|
862 } |
|
863 } |
|
864 } |
|
865 fclose(file); |
|
866 return YES; |
|
867 } |
|
868 |
|
869 static void readIDNScriptWhiteList(void) |
|
870 { |
|
871 // Read white list from library. |
|
872 NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES); |
|
873 int i, numDirs = [dirs count]; |
|
874 for (i = 0; i < numDirs; i++) { |
|
875 NSString *dir = [dirs objectAtIndex:i]; |
|
876 if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) { |
|
877 return; |
|
878 } |
|
879 } |
|
880 |
|
881 // Fall back on white list inside bundle. |
|
882 NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"]; |
|
883 readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]); |
|
884 } |
|
885 |
|
886 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length) |
|
887 { |
|
888 pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList); |
|
889 |
|
890 int32_t i = 0; |
|
891 while (i < length) { |
|
892 UChar32 c; |
|
893 U16_NEXT(buffer, i, length, c) |
|
894 UErrorCode error = U_ZERO_ERROR; |
|
895 UScriptCode script = uscript_getScript(c, &error); |
|
896 if (error != U_ZERO_ERROR) { |
|
897 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); |
|
898 return NO; |
|
899 } |
|
900 if (script < 0) { |
|
901 LOG_ERROR("got negative number for script code from ICU: %d", script); |
|
902 return NO; |
|
903 } |
|
904 if (script >= USCRIPT_CODE_LIMIT) { |
|
905 return NO; |
|
906 } |
|
907 size_t index = script / 32; |
|
908 uint32_t mask = 1 << (script % 32); |
|
909 if (!(IDNScriptWhiteList[index] & mask)) { |
|
910 return NO; |
|
911 } |
|
912 |
|
913 if (isLookalikeCharacter(c)) |
|
914 return NO; |
|
915 } |
|
916 return YES; |
|
917 } |
|
918 |
|
919 // Return value of nil means no mapping is necessary. |
|
920 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary. |
|
921 // If makeString is YES, then return value is either nil or the mapped string. |
|
922 - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString |
|
923 { |
|
924 if (range.length > HOST_NAME_BUFFER_LENGTH) { |
|
925 return nil; |
|
926 } |
|
927 |
|
928 if ([self length] == 0) |
|
929 return nil; |
|
930 |
|
931 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH]; |
|
932 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH]; |
|
933 |
|
934 NSString *string = self; |
|
935 if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) { |
|
936 NSString *substring = [self substringWithRange:range]; |
|
937 substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR(""))); |
|
938 if (substring != nil) { |
|
939 string = substring; |
|
940 range = NSMakeRange(0, [string length]); |
|
941 } |
|
942 } |
|
943 |
|
944 int length = range.length; |
|
945 [string getCharacters:sourceBuffer range:range]; |
|
946 |
|
947 UErrorCode error = U_ZERO_ERROR; |
|
948 int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode) |
|
949 (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error); |
|
950 if (error != U_ZERO_ERROR) { |
|
951 return nil; |
|
952 } |
|
953 if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) { |
|
954 return nil; |
|
955 } |
|
956 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted)) { |
|
957 return nil; |
|
958 } |
|
959 return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self; |
|
960 } |
|
961 |
|
962 - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range |
|
963 { |
|
964 return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil; |
|
965 } |
|
966 |
|
967 - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range |
|
968 { |
|
969 return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil; |
|
970 } |
|
971 |
|
972 - (NSString *)_web_decodeHostNameWithRange:(NSRange)range |
|
973 { |
|
974 return [self _web_mapHostNameWithRange:range encode:NO makeString:YES]; |
|
975 } |
|
976 |
|
977 - (NSString *)_web_encodeHostNameWithRange:(NSRange)range |
|
978 { |
|
979 return [self _web_mapHostNameWithRange:range encode:YES makeString:YES]; |
|
980 } |
|
981 |
|
982 - (NSString *)_web_decodeHostName |
|
983 { |
|
984 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES]; |
|
985 return name == nil ? self : name; |
|
986 } |
|
987 |
|
988 - (NSString *)_web_encodeHostName |
|
989 { |
|
990 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES]; |
|
991 return name == nil ? self : name; |
|
992 } |
|
993 |
|
994 -(NSRange)_webkit_rangeOfURLScheme |
|
995 { |
|
996 NSRange colon = [self rangeOfString:@":"]; |
|
997 if (colon.location != NSNotFound && colon.location > 0) { |
|
998 NSRange scheme = {0, colon.location}; |
|
999 static NSCharacterSet *InverseSchemeCharacterSet = nil; |
|
1000 if (!InverseSchemeCharacterSet) { |
|
1001 /* |
|
1002 This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps |
|
1003 everything else when adding items to the autocomplete DB. Makes me wonder if we |
|
1004 even need to enforce the character set here. |
|
1005 */ |
|
1006 NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"; |
|
1007 InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain]; |
|
1008 } |
|
1009 NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme]; |
|
1010 if (illegals.location == NSNotFound) |
|
1011 return scheme; |
|
1012 } |
|
1013 return NSMakeRange(NSNotFound, 0); |
|
1014 } |
|
1015 |
|
1016 -(BOOL)_webkit_looksLikeAbsoluteURL |
|
1017 { |
|
1018 // Trim whitespace because _web_URLWithString allows whitespace. |
|
1019 return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound; |
|
1020 } |
|
1021 |
|
1022 - (NSString *)_webkit_URLFragment |
|
1023 { |
|
1024 NSRange fragmentRange; |
|
1025 |
|
1026 fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch]; |
|
1027 if (fragmentRange.location == NSNotFound) |
|
1028 return nil; |
|
1029 return [self substringFromIndex:fragmentRange.location + 1]; |
|
1030 } |
|
1031 |
|
1032 @end |