webengine/osswebengine/WebCore/platform/TextEncodingRegistry.cpp
changeset 0 dd21522fd290
child 13 10e98eab6f85
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebCore/platform/TextEncodingRegistry.cpp	Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextEncodingRegistry.h"
+
+#include "PlatformString.h"
+#include "TextCodecLatin1.h"
+#include "TextCodecUTF16.h"
+#include <ctype.h>
+#include <wtf/Assertions.h>
+#include <wtf/HashMap.h>
+
+#if USE(ICU_UNICODE)
+#include "TextCodecICU.h"
+#endif
+#if PLATFORM(MAC)
+#include "TextCodecMac.h"
+#endif
+#if PLATFORM(QT)
+#include "qt/TextCodecQt.h"
+#endif
+#if PLATFORM(SYMBIAN)
+#include "StreamingTextCodecSymbian.h"
+#endif
+
+namespace WebCore {
+
+const size_t maxEncodingNameLength = 63;
+
+// Hash for all-ASCII strings that does case folding and skips any characters
+// that are not alphanumeric. If passed any non-ASCII characters, depends on
+// the behavior of isalnum -- if that returns false as it does on OS X, then
+// it will properly skip those characters too.
+struct TextEncodingNameHash {
+
+    // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
+    // or anything like that.
+    static const unsigned PHI = 0x9e3779b9U;
+
+    static bool equal(const char* s1, const char* s2)
+    {
+        char c1;
+        char c2;
+        do {
+            do
+                c1 = *s1++;
+            while (c1 && !isalnum(c1));
+            do
+                c2 = *s2++;
+            while (c2 && !isalnum(c2));
+            if (tolower(c1) != tolower(c2))
+                return false;
+        } while (c1 && c2);
+        return !c1 && !c2;
+    }
+
+    // This algorithm is the one-at-a-time hash from:
+    // http://burtleburtle.net/bob/hash/hashfaq.html
+    // http://burtleburtle.net/bob/hash/doobs.html
+    static unsigned hash(const char* s)
+    {
+        unsigned h = PHI;
+        for (;;) {
+            char c;
+            do {
+                c = *s++;
+                if (!c) {
+                    h += (h << 3);
+                    h ^= (h >> 11);
+                    h += (h << 15);
+                    return h;
+                }
+            } while (!isalnum(c));
+            h += tolower(c);
+            h += (h << 10); 
+            h ^= (h >> 6); 
+        }
+    }
+
+};
+
+struct TextCodecFactory {
+    NewTextCodecFunction function;
+    const void* additionalData;
+    TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
+};
+
+typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
+typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
+
+static TextEncodingNameMap* textEncodingNameMap;
+static TextCodecMap* textCodecMap;
+static bool didExtendTextCodecMaps;
+
+#if ERROR_DISABLED
+
+static inline void checkExistingName(const char*, const char*) { }
+
+#else
+
+static void checkExistingName(const char* alias, const char* atomicName)
+{
+    const char* oldAtomicName = textEncodingNameMap->get(alias);
+    if (!oldAtomicName)
+        return;
+    if (oldAtomicName == atomicName)
+        return;
+    // Keep the warning silent about one case where we know this will happen.
+    if (strcmp(alias, "ISO-8859-8-I") == 0
+            && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
+            && strcmp(atomicName, "ISO_8859-8:1988") == 0)
+        return;
+    LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
+        alias, oldAtomicName, atomicName);
+}
+
+#endif
+
+static void addToTextEncodingNameMap(const char* alias, const char* name)
+{
+    ASSERT(strlen(alias) <= maxEncodingNameLength);
+    const char* atomicName = textEncodingNameMap->get(name);
+    ASSERT(strcmp(alias, name) == 0 || atomicName);
+    if (!atomicName)
+        atomicName = name;
+    checkExistingName(alias, atomicName);
+    textEncodingNameMap->add(alias, atomicName);
+}
+
+static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
+{
+    TextEncoding encoding(name);
+    ASSERT(encoding.isValid());
+    textCodecMap->add(encoding.name(), TextCodecFactory(function, additionalData));
+}
+
+static void buildBaseTextCodecMaps()
+{
+    textCodecMap = new TextCodecMap;
+    textEncodingNameMap = new TextEncodingNameMap;
+
+    TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecLatin1::registerCodecs(addToTextCodecMap);
+
+    TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecUTF16::registerCodecs(addToTextCodecMap);
+
+#if USE(ICU_UNICODE)&& !USE(SYMBIAN_UNICODE)
+    TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
+    TextCodecICU::registerBaseCodecs(addToTextCodecMap);
+#endif
+#if PLATFORM(SYMBIAN)
+    // ICU codecs uses too much memory.
+    // we have to fall back to Symbian codecs.
+    TextCodecSymbian::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecSymbian::registerCodecs(addToTextCodecMap);
+#endif
+
+}
+
+static void extendTextCodecMaps()
+{
+#if USE(ICU_UNICODE) && !USE(SYMBIAN_UNICODE)
+    TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
+    TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
+#if USE(QT4_UNICODE)
+    TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecQt::registerCodecs(addToTextCodecMap);
+#endif
+
+#if PLATFORM(MAC)
+    TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecMac::registerCodecs(addToTextCodecMap);
+#endif
+#if PLATFORM(SYMBIAN)
+    // use Symbian OS codecs
+    TextCodecSymbian::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecSymbian::registerCodecs(addToTextCodecMap);
+#endif
+}
+
+std::auto_ptr<TextCodec> newTextCodec(const TextEncoding& encoding)
+{
+    ASSERT(textCodecMap);
+    TextCodecFactory factory = textCodecMap->get(encoding.name());
+    ASSERT(factory.function);
+    return factory.function(encoding, factory.additionalData);
+}
+
+const char* atomicCanonicalTextEncodingName(const char* name)
+{
+    if (!name || !name[0])
+        return 0;
+    if (!textEncodingNameMap)
+        buildBaseTextCodecMaps();
+    if (const char* atomicName = textEncodingNameMap->get(name))
+        return atomicName;
+    if (didExtendTextCodecMaps)
+        return 0;
+    extendTextCodecMaps();
+    didExtendTextCodecMaps = true;
+    return textEncodingNameMap->get(name);
+}
+
+const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
+{
+    char buffer[maxEncodingNameLength + 1];
+    size_t j = 0;
+    for (size_t i = 0; i < length; ++i) {
+        UChar c = characters[i];
+        if (isalnum(c)) {
+            if (j == maxEncodingNameLength)
+                return 0;
+            buffer[j++] = c;
+        }
+    }
+    buffer[j] = 0;
+    return atomicCanonicalTextEncodingName(buffer);
+}
+
+bool noExtendedTextEncodingNameUsed()
+{
+    return !didExtendTextCodecMaps;
+}
+
+} // namespace WebCore