webengine/osswebengine/WebCore/platform/RegularExpression.cpp
changeset 0 dd21522fd290
child 5 10e98eab6f85
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebCore/platform/RegularExpression.cpp	Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2004 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "Logging.h"
+#include "Shared.h"
+#include <pcre/pcre.h>
+#include <sys/types.h>
+
+namespace WebCore {
+
+const size_t maxSubstrings = 10;
+const size_t maxOffsets = 3 * maxSubstrings;
+
+class RegularExpression::Private : public Shared<RegularExpression::Private>
+{
+public:
+    Private();
+    Private(DeprecatedString pattern, bool caseSensitive, bool glob);
+    ~Private();
+
+    void compile(bool caseSensitive, bool glob);
+
+    DeprecatedString pattern;
+    pcre *regex;
+
+    DeprecatedString lastMatchString;
+    int lastMatchOffsets[maxOffsets];
+    int lastMatchCount;
+    int lastMatchPos;
+    int lastMatchLength;
+};
+
+RegularExpression::Private::Private() : pattern("")
+{
+    compile(true, false);
+}
+
+RegularExpression::Private::Private(DeprecatedString p, bool caseSensitive, bool glob) : pattern(p), lastMatchPos(-1), lastMatchLength(-1)
+{
+    compile(caseSensitive, glob);
+}
+
+static DeprecatedString RegExpFromGlob(DeprecatedString glob)
+{
+    DeprecatedString result = glob;
+
+    // escape regexp metacharacters which are NOT glob metacharacters
+
+    result.replace(RegularExpression("\\\\"), "\\\\");
+    result.replace(RegularExpression("\\."), "\\.");
+    result.replace(RegularExpression("\\+"), "\\+");
+    result.replace(RegularExpression("\\$"), "\\$");
+    // FIXME: incorrect for ^ inside bracket group
+    result.replace(RegularExpression("\\^"), "\\^");
+
+    // translate glob metacharacters into regexp metacharacters
+    result.replace(RegularExpression("\\*"), ".*");
+    result.replace(RegularExpression("\\?"), ".");
+   
+    // Require the glob to match the whole string
+    result = "^" + result + "$";
+
+    return result;
+}
+
+void RegularExpression::Private::compile(bool caseSensitive, bool glob)
+{
+    DeprecatedString p;
+
+    if (glob) {
+        p = RegExpFromGlob(pattern);
+    } else {
+        p = pattern;
+    }
+    // Note we don't honor the Qt syntax for various character classes.  If we convert
+    // to a different underlying engine, we may need to change client code that relies
+    // on the regex syntax (see FrameMac.mm for a couple examples).
+    
+    const char *errorMessage;
+    int errorOffset;
+    regex = pcre_compile(reinterpret_cast<const uint16_t *>(p.unicode()), p.length(), caseSensitive ? 0 : PCRE_CASELESS, &errorMessage, &errorOffset, NULL);
+    if (regex == NULL) {
+        LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
+    }
+}
+
+RegularExpression::Private::~Private()
+{
+    pcre_free(regex);
+}
+
+
+RegularExpression::RegularExpression() : d(new RegularExpression::Private())
+{
+}
+
+RegularExpression::RegularExpression(const DeprecatedString &pattern, bool caseSensitive, bool glob) : d(new RegularExpression::Private(pattern, caseSensitive, glob))
+{
+}
+
+RegularExpression::RegularExpression(const char *cpattern) : d(new RegularExpression::Private(cpattern, true, false))
+{
+}
+
+
+RegularExpression::RegularExpression(const RegularExpression &re) : d (re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression &RegularExpression::operator=(const RegularExpression &re)
+{
+    RegularExpression tmp(re);
+    RefPtr<RegularExpression::Private> tmpD = tmp.d;
+    
+    tmp.d = d;
+    d = tmpD;
+
+    return *this;
+}
+
+DeprecatedString RegularExpression::pattern() const
+{
+    return d->pattern;
+}
+
+int RegularExpression::match(const DeprecatedString &str, int startFrom, int *matchLength) const
+{
+    d->lastMatchString = str;
+    // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
+    d->lastMatchCount = pcre_exec(d->regex, NULL, reinterpret_cast<const uint16_t *>(d->lastMatchString.unicode()), d->lastMatchString.length(), startFrom, startFrom == 0 ? 0 : PCRE_NOTBOL, d->lastMatchOffsets, maxOffsets);
+    if (d->lastMatchCount < 0) {
+        if (d->lastMatchCount != PCRE_ERROR_NOMATCH)
+            LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", d->lastMatchCount);
+        d->lastMatchPos = -1;
+        d->lastMatchLength = -1;
+        d->lastMatchString = DeprecatedString();
+        return -1;
+    }
+    
+    // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
+    //ASSERT(d->lastMatchCount < 2);
+    d->lastMatchPos = d->lastMatchOffsets[0];
+    d->lastMatchLength = d->lastMatchOffsets[1] - d->lastMatchOffsets[0];
+    if (matchLength != NULL) {
+        *matchLength = d->lastMatchLength;
+    }
+    return d->lastMatchPos;
+}
+
+int RegularExpression::search(const DeprecatedString &str, int startFrom) const
+{
+    if (startFrom < 0) {
+        startFrom = str.length() - startFrom;
+    }
+    return match(str, startFrom, NULL);
+}
+
+int RegularExpression::searchRev(const DeprecatedString &str) const
+{
+    // FIXME: Total hack for now.  Search forward, return the last, greedy match
+    int start = 0;
+    int pos;
+    int lastPos = -1;
+    int lastMatchLength = -1;
+    do {
+        int matchLength;
+        pos = match(str, start, &matchLength);
+        if (pos >= 0) {
+            if ((pos+matchLength) > (lastPos+lastMatchLength)) {
+                // replace last match if this one is later and not a subset of the last match
+                lastPos = pos;
+                lastMatchLength = matchLength;
+            }
+            start = pos + 1;
+        }
+    } while (pos != -1);
+    d->lastMatchPos = lastPos;
+    d->lastMatchLength = lastMatchLength;
+    return lastPos;
+}
+
+int RegularExpression::pos(int n)
+{
+    ASSERT(n == 0);
+    return d->lastMatchPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+    return d->lastMatchLength;
+}
+
+DeprecatedString RegularExpression::cap(int n) const
+{
+    const pcre_char *substring = NULL;
+    int substringLength = pcre_get_substring(reinterpret_cast<const uint16_t *>(d->lastMatchString.unicode()), d->lastMatchOffsets, d->lastMatchCount, n, &substring);
+    if (substringLength > 0) {
+       DeprecatedString capture(reinterpret_cast<const DeprecatedChar *>(substring), substringLength);
+       pcre_free_substring(substring);
+       return capture;
+    }
+    return DeprecatedString();
+}
+
+}