WebKit/mac/Misc/WebNSDataExtras.m
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2005 Apple Computer, Inc.  All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  *
       
     8  * 1.  Redistributions of source code must retain the above copyright
       
     9  *     notice, this list of conditions and the following disclaimer. 
       
    10  * 2.  Redistributions in binary form must reproduce the above copyright
       
    11  *     notice, this list of conditions and the following disclaimer in the
       
    12  *     documentation and/or other materials provided with the distribution. 
       
    13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
       
    14  *     its contributors may be used to endorse or promote products derived
       
    15  *     from this software without specific prior written permission. 
       
    16  *
       
    17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
       
    18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
       
    20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
       
    21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
       
    23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
       
    24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       
    26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    27  */
       
    28 
       
    29 #import <WebKit/WebNSDataExtras.h>
       
    30 #import <WebKit/WebNSDataExtrasPrivate.h>
       
    31 
       
    32 #import <wtf/Assertions.h>
       
    33 
       
    34 @interface NSString (WebNSDataExtrasInternal)
       
    35 - (NSString *)_web_capitalizeRFC822HeaderFieldName;
       
    36 @end
       
    37 
       
    38 @implementation NSString (WebNSDataExtrasInternal)
       
    39 
       
    40 -(NSString *)_web_capitalizeRFC822HeaderFieldName
       
    41 {
       
    42     CFStringRef name = (CFStringRef)self;
       
    43     NSString *result = nil;
       
    44 
       
    45     CFIndex i; 
       
    46     CFIndex len = CFStringGetLength(name);
       
    47     char *charPtr = NULL;
       
    48     UniChar *uniCharPtr = NULL;
       
    49     Boolean useUniCharPtr = FALSE;
       
    50     Boolean shouldCapitalize = TRUE;
       
    51     Boolean somethingChanged = FALSE;
       
    52     
       
    53     for (i = 0; i < len; i ++) {
       
    54         UniChar ch = CFStringGetCharacterAtIndex(name, i);
       
    55         Boolean replace = FALSE;
       
    56         if (shouldCapitalize && ch >= 'a' && ch <= 'z') {
       
    57             ch = ch + 'A' - 'a';
       
    58             replace = TRUE;
       
    59         } 
       
    60         else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') {
       
    61             ch = ch + 'a' - 'A';
       
    62             replace = TRUE;
       
    63         }
       
    64         if (replace) {
       
    65             if (!somethingChanged) {
       
    66                 somethingChanged = TRUE;
       
    67                 if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) {
       
    68                     // Can be encoded in ISOLatin1
       
    69                     useUniCharPtr = FALSE;
       
    70                     charPtr = CFAllocatorAllocate(NULL, len + 1, 0);
       
    71                     CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1);
       
    72                 } 
       
    73                 else {
       
    74                     useUniCharPtr = TRUE;
       
    75                     uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0);
       
    76                     CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr);
       
    77                 }
       
    78             }
       
    79             if (useUniCharPtr) {
       
    80                 uniCharPtr[i] = ch;
       
    81             } 
       
    82             else {
       
    83                 charPtr[i] = ch;
       
    84             }
       
    85         }
       
    86         if (ch == '-') {
       
    87             shouldCapitalize = TRUE;
       
    88         } 
       
    89         else {
       
    90             shouldCapitalize = FALSE;
       
    91         }
       
    92     }
       
    93     if (somethingChanged) {
       
    94         if (useUniCharPtr) {
       
    95             result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL));
       
    96         } 
       
    97         else {
       
    98             result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL));
       
    99         }
       
   100     } 
       
   101     else {
       
   102         result = [self retain];
       
   103     }
       
   104     
       
   105     return [result autorelease];
       
   106 }
       
   107 
       
   108 @end
       
   109 
       
   110 @implementation NSData (WebKitExtras)
       
   111 
       
   112 -(NSString *)_webkit_guessedMIMETypeForXML
       
   113 {
       
   114     int length = [self length];
       
   115     const UInt8 *bytes = [self bytes];
       
   116     
       
   117 #define CHANNEL_TAG_LENGTH 7
       
   118     
       
   119     const char *p = (const char *)bytes;
       
   120     int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1);
       
   121     
       
   122     BOOL foundRDF = false;
       
   123     
       
   124     while (remaining > 0) {
       
   125         // Look for a "<".
       
   126         const char *hit = memchr(p, '<', remaining);
       
   127         if (!hit) {
       
   128             break;
       
   129         }
       
   130         
       
   131         // We are trying to identify RSS or Atom. RSS has a top-level
       
   132         // element of either <rss> or <rdf>. However, there are
       
   133         // non-RSS RDF files, so in the case of <rdf> we further look
       
   134         // for a <channel> element. In the case of an Atom file, a
       
   135         // top-level <feed> element is all we need to see. Only tags
       
   136         // starting with <? or <! can precede the root element. We
       
   137         // bail if we don't find an <rss>, <feed> or <rdf> element
       
   138         // right after those.
       
   139         
       
   140         if (foundRDF) {
       
   141             if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) {
       
   142                 return @"application/rss+xml";
       
   143             }
       
   144         } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) {
       
   145             foundRDF = TRUE;
       
   146         } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) {
       
   147             return @"application/rss+xml";
       
   148         } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) {
       
   149             return @"application/atom+xml";
       
   150         } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) {
       
   151             return nil;
       
   152         }
       
   153         
       
   154         // Skip the "<" and continue.
       
   155         remaining -= (hit + 1) - p;
       
   156         p = hit + 1;
       
   157     }
       
   158     
       
   159     return nil;
       
   160 }
       
   161 
       
   162 -(NSString *)_webkit_guessedMIMEType
       
   163 {
       
   164 #define JPEG_MAGIC_NUMBER_LENGTH 4
       
   165 #define SCRIPT_TAG_LENGTH 7
       
   166 #define TEXT_HTML_LENGTH 9
       
   167 #define VCARD_HEADER_LENGTH 11
       
   168 #define VCAL_HEADER_LENGTH 15
       
   169     
       
   170     NSString *MIMEType = [self _webkit_guessedMIMETypeForXML];
       
   171     if ([MIMEType length])
       
   172         return MIMEType;
       
   173     
       
   174     int length = [self length];
       
   175     const char *bytes = [self bytes];
       
   176     
       
   177     const char *p = bytes;
       
   178     int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1);
       
   179     while (remaining > 0) {
       
   180         // Look for a "<".
       
   181         const char *hit = memchr(p, '<', remaining);
       
   182         if (!hit) {
       
   183             break;
       
   184         }
       
   185         
       
   186         // If we found a "<", look for "<html>" or "<a " or "<script".
       
   187         if (strncasecmp(hit, "<html>",  strlen("<html>")) == 0 ||
       
   188             strncasecmp(hit, "<a ",     strlen("<a ")) == 0 ||
       
   189             strncasecmp(hit, "<script", strlen("<script")) == 0 ||
       
   190             strncasecmp(hit, "<title>", strlen("<title>")) == 0) {
       
   191             return @"text/html";
       
   192         }
       
   193         
       
   194         // Skip the "<" and continue.
       
   195         remaining -= (hit + 1) - p;
       
   196         p = hit + 1;
       
   197     }
       
   198     
       
   199     // Test for a broken server which has sent the content type as part of the content.
       
   200     // This code could be improved to look for other mime types.
       
   201     p = bytes;
       
   202     remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1);
       
   203     while (remaining > 0) {
       
   204         // Look for a "t" or "T".
       
   205         const char *hit = NULL;
       
   206         const char *lowerhit = memchr(p, 't', remaining);
       
   207         const char *upperhit = memchr(p, 'T', remaining);
       
   208         if (!lowerhit && !upperhit) {
       
   209             break;
       
   210         }
       
   211         if (!lowerhit) {
       
   212             hit = upperhit;
       
   213         }
       
   214         else if (!upperhit) {
       
   215             hit = lowerhit;
       
   216         }
       
   217         else {
       
   218             hit = MIN(lowerhit, upperhit);
       
   219         }
       
   220         
       
   221         // If we found a "t/T", look for "text/html".
       
   222         if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) {
       
   223             return @"text/html";
       
   224         }
       
   225         
       
   226         // Skip the "t/T" and continue.
       
   227         remaining -= (hit + 1) - p;
       
   228         p = hit + 1;
       
   229     }
       
   230     
       
   231     if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) {
       
   232         return @"text/vcard";
       
   233     }
       
   234     if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) {
       
   235         return @"text/calendar";
       
   236     }
       
   237     
       
   238     // Test for plain text.
       
   239     int i;
       
   240     for(i=0; i<length; i++){
       
   241         char c = bytes[i];
       
   242         if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) {
       
   243             break;
       
   244         }
       
   245     }
       
   246     if (i == length) {
       
   247         // Didn't encounter any bad characters, looks like plain text.
       
   248         return @"text/plain";
       
   249     }
       
   250     
       
   251     // Looks like this is a binary file.
       
   252     
       
   253     // Sniff for the JPEG magic number.
       
   254     if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) {
       
   255         return @"image/jpeg";
       
   256     }
       
   257     
       
   258 #undef JPEG_MAGIC_NUMBER_LENGTH
       
   259 #undef SCRIPT_TAG_LENGTH
       
   260 #undef TEXT_HTML_LENGTH
       
   261 #undef VCARD_HEADER_LENGTH
       
   262 #undef VCAL_HEADER_LENGTH
       
   263     
       
   264     return nil;
       
   265 }
       
   266 
       
   267 @end
       
   268 
       
   269 @implementation NSData (WebNSDataExtras)
       
   270 
       
   271 -(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string
       
   272 {
       
   273     ASSERT(string);
       
   274     
       
   275     const char *bytes = [self bytes];
       
   276     return strncasecmp(bytes, string, [self length]) == 0;
       
   277 }
       
   278 
       
   279 static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) {
       
   280     
       
   281     // According to the HTTP specification EOL is defined as
       
   282     // a CRLF pair.  Unfortunately, some servers will use LF
       
   283     // instead.  Worse yet, some servers will use a combination
       
   284     // of both (e.g. <header>CRLFLF<body>), so findEOL needs
       
   285     // to be more forgiving.  It will now accept CRLF, LF, or
       
   286     // CR.
       
   287     //
       
   288     // It returns NULL if EOL is not found or it will return
       
   289     // a pointer to the first terminating character.
       
   290     CFIndex i;
       
   291     for (i = 0;  i < len; i++)
       
   292     {
       
   293         UInt8 c = bytes[i];
       
   294         if ('\n' == c) return bytes + i;
       
   295         if ('\r' == c)
       
   296         {
       
   297             // Check to see if spanning buffer bounds
       
   298             // (CRLF is across reads).  If so, wait for
       
   299             // next read.
       
   300             if (i + 1 == len) break;
       
   301                 
       
   302             return bytes + i;
       
   303         }
       
   304     }
       
   305     
       
   306     return NULL;
       
   307 }
       
   308 
       
   309 -(NSMutableDictionary *)_webkit_parseRFC822HeaderFields
       
   310 {
       
   311     NSMutableDictionary *headerFields = [NSMutableDictionary dictionary];
       
   312 
       
   313     const UInt8 *bytes = [self bytes];
       
   314     unsigned length = [self length];
       
   315     NSString *lastKey = nil;
       
   316     const UInt8 *eol;
       
   317 
       
   318     // Loop over lines until we're past the header, or we can't find any more end-of-lines
       
   319     while ((eol = _findEOL(bytes, length))) {
       
   320         const UInt8 *line = bytes;
       
   321         SInt32 lineLength = eol - bytes;
       
   322 
       
   323         // Move bytes to the character after the terminator as returned by _findEOL.
       
   324         bytes = eol + 1;
       
   325         if (('\r' == *eol) && ('\n' == *bytes)) {
       
   326             bytes++; // Safe since _findEOL won't return a spanning CRLF.
       
   327         }
       
   328 
       
   329         length -= (bytes - line);
       
   330         if (lineLength == 0) {
       
   331             // Blank line; we're at the end of the header
       
   332             break;
       
   333         }
       
   334         else if (*line == ' ' || *line == '\t') {
       
   335             // Continuation of the previous header
       
   336             if (!lastKey) {
       
   337                 // malformed header; ignore it and continue
       
   338                 continue;
       
   339             }
       
   340             else {
       
   341                 // Merge the continuation of the previous header
       
   342                 NSString *currentValue = [headerFields objectForKey:lastKey];
       
   343                 NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE));
       
   344                 ASSERT(currentValue);
       
   345                 ASSERT(newValue);
       
   346                 NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue];
       
   347                 [headerFields setObject:(NSString *)mergedValue forKey:lastKey];
       
   348                 [newValue release];
       
   349                 [mergedValue release];
       
   350                 // Note: currentValue is autoreleased
       
   351             }
       
   352         }
       
   353         else {
       
   354             // Brand new header
       
   355             const UInt8 *colon;
       
   356             for (colon = line; *colon != ':' && colon != eol; colon ++) {
       
   357                 // empty loop
       
   358             }
       
   359             if (colon == eol) {
       
   360                 // malformed header; ignore it and continue
       
   361                 continue;
       
   362             }
       
   363             else {
       
   364                 lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE));
       
   365                 [lastKey autorelease];
       
   366                 NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName];
       
   367                 lastKey = value;
       
   368                 for (colon++; colon != eol; colon++) {
       
   369                     if (*colon != ' ' && *colon != '\t') {
       
   370                         break;
       
   371                     }
       
   372                 }
       
   373                 if (colon == eol) {
       
   374                     value = [[NSString alloc] initWithString:@""];
       
   375                     [value autorelease];
       
   376                 }
       
   377                 else {
       
   378                     value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE));
       
   379                     [value autorelease];
       
   380                 }
       
   381                 NSString *oldValue = [headerFields objectForKey:lastKey];
       
   382                 if (oldValue) {
       
   383                     NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value];
       
   384                     value = newValue;
       
   385                     [newValue autorelease];
       
   386                 }
       
   387                 [headerFields setObject:(NSString *)value forKey:lastKey];
       
   388             }
       
   389         }
       
   390     }
       
   391 
       
   392     return headerFields;
       
   393 }
       
   394 
       
   395 - (BOOL)_web_startsWithBlankLine
       
   396 {
       
   397     return [self length] > 0 && ((const char *)[self bytes])[0] == '\n';
       
   398 }
       
   399 
       
   400 - (NSInteger)_web_locationAfterFirstBlankLine
       
   401 {
       
   402     const char *bytes = (const char *)[self bytes];
       
   403     unsigned length = [self length];
       
   404     
       
   405     unsigned i;
       
   406     for (i = 0; i < length - 4; i++) {
       
   407         
       
   408         //  Support for Acrobat. It sends "\n\n".
       
   409         if (bytes[i] == '\n' && bytes[i+1] == '\n') {
       
   410             return i+2;
       
   411         }
       
   412         
       
   413         // Returns the position after 2 CRLF's or 1 CRLF if it is the first line.
       
   414         if (bytes[i] == '\r' && bytes[i+1] == '\n') {
       
   415             i += 2;
       
   416             if (i == 2) {
       
   417                 return i;
       
   418             } else if (bytes[i] == '\n') {
       
   419                 // Support for Director. It sends "\r\n\n" (3880387).
       
   420                 return i+1;
       
   421             } else if (bytes[i] == '\r' && bytes[i+1] == '\n') {
       
   422                 // Support for Flash. It sends "\r\n\r\n" (3758113).
       
   423                 return i+2;
       
   424             }
       
   425         }
       
   426     }
       
   427     return NSNotFound;
       
   428 }
       
   429 
       
   430 @end