|
1 /* |
|
2 * Copyright (C) 2005 Apple Computer, Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * |
|
8 * 1. Redistributions of source code must retain the above copyright |
|
9 * notice, this list of conditions and the following disclaimer. |
|
10 * 2. Redistributions in binary form must reproduce the above copyright |
|
11 * notice, this list of conditions and the following disclaimer in the |
|
12 * documentation and/or other materials provided with the distribution. |
|
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
|
14 * its contributors may be used to endorse or promote products derived |
|
15 * from this software without specific prior written permission. |
|
16 * |
|
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
|
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
|
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #import <WebKit/WebNSDataExtras.h> |
|
30 #import <WebKit/WebNSDataExtrasPrivate.h> |
|
31 |
|
32 #import <wtf/Assertions.h> |
|
33 |
|
34 @interface NSString (WebNSDataExtrasInternal) |
|
35 - (NSString *)_web_capitalizeRFC822HeaderFieldName; |
|
36 @end |
|
37 |
|
38 @implementation NSString (WebNSDataExtrasInternal) |
|
39 |
|
40 -(NSString *)_web_capitalizeRFC822HeaderFieldName |
|
41 { |
|
42 CFStringRef name = (CFStringRef)self; |
|
43 NSString *result = nil; |
|
44 |
|
45 CFIndex i; |
|
46 CFIndex len = CFStringGetLength(name); |
|
47 char *charPtr = NULL; |
|
48 UniChar *uniCharPtr = NULL; |
|
49 Boolean useUniCharPtr = FALSE; |
|
50 Boolean shouldCapitalize = TRUE; |
|
51 Boolean somethingChanged = FALSE; |
|
52 |
|
53 for (i = 0; i < len; i ++) { |
|
54 UniChar ch = CFStringGetCharacterAtIndex(name, i); |
|
55 Boolean replace = FALSE; |
|
56 if (shouldCapitalize && ch >= 'a' && ch <= 'z') { |
|
57 ch = ch + 'A' - 'a'; |
|
58 replace = TRUE; |
|
59 } |
|
60 else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') { |
|
61 ch = ch + 'a' - 'A'; |
|
62 replace = TRUE; |
|
63 } |
|
64 if (replace) { |
|
65 if (!somethingChanged) { |
|
66 somethingChanged = TRUE; |
|
67 if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) { |
|
68 // Can be encoded in ISOLatin1 |
|
69 useUniCharPtr = FALSE; |
|
70 charPtr = CFAllocatorAllocate(NULL, len + 1, 0); |
|
71 CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1); |
|
72 } |
|
73 else { |
|
74 useUniCharPtr = TRUE; |
|
75 uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0); |
|
76 CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr); |
|
77 } |
|
78 } |
|
79 if (useUniCharPtr) { |
|
80 uniCharPtr[i] = ch; |
|
81 } |
|
82 else { |
|
83 charPtr[i] = ch; |
|
84 } |
|
85 } |
|
86 if (ch == '-') { |
|
87 shouldCapitalize = TRUE; |
|
88 } |
|
89 else { |
|
90 shouldCapitalize = FALSE; |
|
91 } |
|
92 } |
|
93 if (somethingChanged) { |
|
94 if (useUniCharPtr) { |
|
95 result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL)); |
|
96 } |
|
97 else { |
|
98 result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL)); |
|
99 } |
|
100 } |
|
101 else { |
|
102 result = [self retain]; |
|
103 } |
|
104 |
|
105 return [result autorelease]; |
|
106 } |
|
107 |
|
108 @end |
|
109 |
|
110 @implementation NSData (WebKitExtras) |
|
111 |
|
112 -(NSString *)_webkit_guessedMIMETypeForXML |
|
113 { |
|
114 int length = [self length]; |
|
115 const UInt8 *bytes = [self bytes]; |
|
116 |
|
117 #define CHANNEL_TAG_LENGTH 7 |
|
118 |
|
119 const char *p = (const char *)bytes; |
|
120 int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1); |
|
121 |
|
122 BOOL foundRDF = false; |
|
123 |
|
124 while (remaining > 0) { |
|
125 // Look for a "<". |
|
126 const char *hit = memchr(p, '<', remaining); |
|
127 if (!hit) { |
|
128 break; |
|
129 } |
|
130 |
|
131 // We are trying to identify RSS or Atom. RSS has a top-level |
|
132 // element of either <rss> or <rdf>. However, there are |
|
133 // non-RSS RDF files, so in the case of <rdf> we further look |
|
134 // for a <channel> element. In the case of an Atom file, a |
|
135 // top-level <feed> element is all we need to see. Only tags |
|
136 // starting with <? or <! can precede the root element. We |
|
137 // bail if we don't find an <rss>, <feed> or <rdf> element |
|
138 // right after those. |
|
139 |
|
140 if (foundRDF) { |
|
141 if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) { |
|
142 return @"application/rss+xml"; |
|
143 } |
|
144 } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) { |
|
145 foundRDF = TRUE; |
|
146 } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) { |
|
147 return @"application/rss+xml"; |
|
148 } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) { |
|
149 return @"application/atom+xml"; |
|
150 } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) { |
|
151 return nil; |
|
152 } |
|
153 |
|
154 // Skip the "<" and continue. |
|
155 remaining -= (hit + 1) - p; |
|
156 p = hit + 1; |
|
157 } |
|
158 |
|
159 return nil; |
|
160 } |
|
161 |
|
162 -(NSString *)_webkit_guessedMIMEType |
|
163 { |
|
164 #define JPEG_MAGIC_NUMBER_LENGTH 4 |
|
165 #define SCRIPT_TAG_LENGTH 7 |
|
166 #define TEXT_HTML_LENGTH 9 |
|
167 #define VCARD_HEADER_LENGTH 11 |
|
168 #define VCAL_HEADER_LENGTH 15 |
|
169 |
|
170 NSString *MIMEType = [self _webkit_guessedMIMETypeForXML]; |
|
171 if ([MIMEType length]) |
|
172 return MIMEType; |
|
173 |
|
174 int length = [self length]; |
|
175 const char *bytes = [self bytes]; |
|
176 |
|
177 const char *p = bytes; |
|
178 int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1); |
|
179 while (remaining > 0) { |
|
180 // Look for a "<". |
|
181 const char *hit = memchr(p, '<', remaining); |
|
182 if (!hit) { |
|
183 break; |
|
184 } |
|
185 |
|
186 // If we found a "<", look for "<html>" or "<a " or "<script". |
|
187 if (strncasecmp(hit, "<html>", strlen("<html>")) == 0 || |
|
188 strncasecmp(hit, "<a ", strlen("<a ")) == 0 || |
|
189 strncasecmp(hit, "<script", strlen("<script")) == 0 || |
|
190 strncasecmp(hit, "<title>", strlen("<title>")) == 0) { |
|
191 return @"text/html"; |
|
192 } |
|
193 |
|
194 // Skip the "<" and continue. |
|
195 remaining -= (hit + 1) - p; |
|
196 p = hit + 1; |
|
197 } |
|
198 |
|
199 // Test for a broken server which has sent the content type as part of the content. |
|
200 // This code could be improved to look for other mime types. |
|
201 p = bytes; |
|
202 remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1); |
|
203 while (remaining > 0) { |
|
204 // Look for a "t" or "T". |
|
205 const char *hit = NULL; |
|
206 const char *lowerhit = memchr(p, 't', remaining); |
|
207 const char *upperhit = memchr(p, 'T', remaining); |
|
208 if (!lowerhit && !upperhit) { |
|
209 break; |
|
210 } |
|
211 if (!lowerhit) { |
|
212 hit = upperhit; |
|
213 } |
|
214 else if (!upperhit) { |
|
215 hit = lowerhit; |
|
216 } |
|
217 else { |
|
218 hit = MIN(lowerhit, upperhit); |
|
219 } |
|
220 |
|
221 // If we found a "t/T", look for "text/html". |
|
222 if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) { |
|
223 return @"text/html"; |
|
224 } |
|
225 |
|
226 // Skip the "t/T" and continue. |
|
227 remaining -= (hit + 1) - p; |
|
228 p = hit + 1; |
|
229 } |
|
230 |
|
231 if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) { |
|
232 return @"text/vcard"; |
|
233 } |
|
234 if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) { |
|
235 return @"text/calendar"; |
|
236 } |
|
237 |
|
238 // Test for plain text. |
|
239 int i; |
|
240 for(i=0; i<length; i++){ |
|
241 char c = bytes[i]; |
|
242 if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) { |
|
243 break; |
|
244 } |
|
245 } |
|
246 if (i == length) { |
|
247 // Didn't encounter any bad characters, looks like plain text. |
|
248 return @"text/plain"; |
|
249 } |
|
250 |
|
251 // Looks like this is a binary file. |
|
252 |
|
253 // Sniff for the JPEG magic number. |
|
254 if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) { |
|
255 return @"image/jpeg"; |
|
256 } |
|
257 |
|
258 #undef JPEG_MAGIC_NUMBER_LENGTH |
|
259 #undef SCRIPT_TAG_LENGTH |
|
260 #undef TEXT_HTML_LENGTH |
|
261 #undef VCARD_HEADER_LENGTH |
|
262 #undef VCAL_HEADER_LENGTH |
|
263 |
|
264 return nil; |
|
265 } |
|
266 |
|
267 @end |
|
268 |
|
269 @implementation NSData (WebNSDataExtras) |
|
270 |
|
271 -(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string |
|
272 { |
|
273 ASSERT(string); |
|
274 |
|
275 const char *bytes = [self bytes]; |
|
276 return strncasecmp(bytes, string, [self length]) == 0; |
|
277 } |
|
278 |
|
279 static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) { |
|
280 |
|
281 // According to the HTTP specification EOL is defined as |
|
282 // a CRLF pair. Unfortunately, some servers will use LF |
|
283 // instead. Worse yet, some servers will use a combination |
|
284 // of both (e.g. <header>CRLFLF<body>), so findEOL needs |
|
285 // to be more forgiving. It will now accept CRLF, LF, or |
|
286 // CR. |
|
287 // |
|
288 // It returns NULL if EOL is not found or it will return |
|
289 // a pointer to the first terminating character. |
|
290 CFIndex i; |
|
291 for (i = 0; i < len; i++) |
|
292 { |
|
293 UInt8 c = bytes[i]; |
|
294 if ('\n' == c) return bytes + i; |
|
295 if ('\r' == c) |
|
296 { |
|
297 // Check to see if spanning buffer bounds |
|
298 // (CRLF is across reads). If so, wait for |
|
299 // next read. |
|
300 if (i + 1 == len) break; |
|
301 |
|
302 return bytes + i; |
|
303 } |
|
304 } |
|
305 |
|
306 return NULL; |
|
307 } |
|
308 |
|
309 -(NSMutableDictionary *)_webkit_parseRFC822HeaderFields |
|
310 { |
|
311 NSMutableDictionary *headerFields = [NSMutableDictionary dictionary]; |
|
312 |
|
313 const UInt8 *bytes = [self bytes]; |
|
314 unsigned length = [self length]; |
|
315 NSString *lastKey = nil; |
|
316 const UInt8 *eol; |
|
317 |
|
318 // Loop over lines until we're past the header, or we can't find any more end-of-lines |
|
319 while ((eol = _findEOL(bytes, length))) { |
|
320 const UInt8 *line = bytes; |
|
321 SInt32 lineLength = eol - bytes; |
|
322 |
|
323 // Move bytes to the character after the terminator as returned by _findEOL. |
|
324 bytes = eol + 1; |
|
325 if (('\r' == *eol) && ('\n' == *bytes)) { |
|
326 bytes++; // Safe since _findEOL won't return a spanning CRLF. |
|
327 } |
|
328 |
|
329 length -= (bytes - line); |
|
330 if (lineLength == 0) { |
|
331 // Blank line; we're at the end of the header |
|
332 break; |
|
333 } |
|
334 else if (*line == ' ' || *line == '\t') { |
|
335 // Continuation of the previous header |
|
336 if (!lastKey) { |
|
337 // malformed header; ignore it and continue |
|
338 continue; |
|
339 } |
|
340 else { |
|
341 // Merge the continuation of the previous header |
|
342 NSString *currentValue = [headerFields objectForKey:lastKey]; |
|
343 NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE)); |
|
344 ASSERT(currentValue); |
|
345 ASSERT(newValue); |
|
346 NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue]; |
|
347 [headerFields setObject:(NSString *)mergedValue forKey:lastKey]; |
|
348 [newValue release]; |
|
349 [mergedValue release]; |
|
350 // Note: currentValue is autoreleased |
|
351 } |
|
352 } |
|
353 else { |
|
354 // Brand new header |
|
355 const UInt8 *colon; |
|
356 for (colon = line; *colon != ':' && colon != eol; colon ++) { |
|
357 // empty loop |
|
358 } |
|
359 if (colon == eol) { |
|
360 // malformed header; ignore it and continue |
|
361 continue; |
|
362 } |
|
363 else { |
|
364 lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE)); |
|
365 [lastKey autorelease]; |
|
366 NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName]; |
|
367 lastKey = value; |
|
368 for (colon++; colon != eol; colon++) { |
|
369 if (*colon != ' ' && *colon != '\t') { |
|
370 break; |
|
371 } |
|
372 } |
|
373 if (colon == eol) { |
|
374 value = [[NSString alloc] initWithString:@""]; |
|
375 [value autorelease]; |
|
376 } |
|
377 else { |
|
378 value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE)); |
|
379 [value autorelease]; |
|
380 } |
|
381 NSString *oldValue = [headerFields objectForKey:lastKey]; |
|
382 if (oldValue) { |
|
383 NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value]; |
|
384 value = newValue; |
|
385 [newValue autorelease]; |
|
386 } |
|
387 [headerFields setObject:(NSString *)value forKey:lastKey]; |
|
388 } |
|
389 } |
|
390 } |
|
391 |
|
392 return headerFields; |
|
393 } |
|
394 |
|
395 - (BOOL)_web_startsWithBlankLine |
|
396 { |
|
397 return [self length] > 0 && ((const char *)[self bytes])[0] == '\n'; |
|
398 } |
|
399 |
|
400 - (NSInteger)_web_locationAfterFirstBlankLine |
|
401 { |
|
402 const char *bytes = (const char *)[self bytes]; |
|
403 unsigned length = [self length]; |
|
404 |
|
405 unsigned i; |
|
406 for (i = 0; i < length - 4; i++) { |
|
407 |
|
408 // Support for Acrobat. It sends "\n\n". |
|
409 if (bytes[i] == '\n' && bytes[i+1] == '\n') { |
|
410 return i+2; |
|
411 } |
|
412 |
|
413 // Returns the position after 2 CRLF's or 1 CRLF if it is the first line. |
|
414 if (bytes[i] == '\r' && bytes[i+1] == '\n') { |
|
415 i += 2; |
|
416 if (i == 2) { |
|
417 return i; |
|
418 } else if (bytes[i] == '\n') { |
|
419 // Support for Director. It sends "\r\n\n" (3880387). |
|
420 return i+1; |
|
421 } else if (bytes[i] == '\r' && bytes[i+1] == '\n') { |
|
422 // Support for Flash. It sends "\r\n\r\n" (3758113). |
|
423 return i+2; |
|
424 } |
|
425 } |
|
426 } |
|
427 return NSNotFound; |
|
428 } |
|
429 |
|
430 @end |