--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebCore/platform/RegularExpression.cpp Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "Logging.h"
+#include "Shared.h"
+#include <pcre/pcre.h>
+#include <sys/types.h>
+
+namespace WebCore {
+
+const size_t maxSubstrings = 10;
+const size_t maxOffsets = 3 * maxSubstrings;
+
+class RegularExpression::Private : public Shared<RegularExpression::Private>
+{
+public:
+ Private();
+ Private(DeprecatedString pattern, bool caseSensitive, bool glob);
+ ~Private();
+
+ void compile(bool caseSensitive, bool glob);
+
+ DeprecatedString pattern;
+ pcre *regex;
+
+ DeprecatedString lastMatchString;
+ int lastMatchOffsets[maxOffsets];
+ int lastMatchCount;
+ int lastMatchPos;
+ int lastMatchLength;
+};
+
+RegularExpression::Private::Private() : pattern("")
+{
+ compile(true, false);
+}
+
+RegularExpression::Private::Private(DeprecatedString p, bool caseSensitive, bool glob) : pattern(p), lastMatchPos(-1), lastMatchLength(-1)
+{
+ compile(caseSensitive, glob);
+}
+
+static DeprecatedString RegExpFromGlob(DeprecatedString glob)
+{
+ DeprecatedString result = glob;
+
+ // escape regexp metacharacters which are NOT glob metacharacters
+
+ result.replace(RegularExpression("\\\\"), "\\\\");
+ result.replace(RegularExpression("\\."), "\\.");
+ result.replace(RegularExpression("\\+"), "\\+");
+ result.replace(RegularExpression("\\$"), "\\$");
+ // FIXME: incorrect for ^ inside bracket group
+ result.replace(RegularExpression("\\^"), "\\^");
+
+ // translate glob metacharacters into regexp metacharacters
+ result.replace(RegularExpression("\\*"), ".*");
+ result.replace(RegularExpression("\\?"), ".");
+
+ // Require the glob to match the whole string
+ result = "^" + result + "$";
+
+ return result;
+}
+
+void RegularExpression::Private::compile(bool caseSensitive, bool glob)
+{
+ DeprecatedString p;
+
+ if (glob) {
+ p = RegExpFromGlob(pattern);
+ } else {
+ p = pattern;
+ }
+ // Note we don't honor the Qt syntax for various character classes. If we convert
+ // to a different underlying engine, we may need to change client code that relies
+ // on the regex syntax (see FrameMac.mm for a couple examples).
+
+ const char *errorMessage;
+ int errorOffset;
+ regex = pcre_compile(reinterpret_cast<const uint16_t *>(p.unicode()), p.length(), caseSensitive ? 0 : PCRE_CASELESS, &errorMessage, &errorOffset, NULL);
+ if (regex == NULL) {
+ LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
+ }
+}
+
+RegularExpression::Private::~Private()
+{
+ pcre_free(regex);
+}
+
+
+RegularExpression::RegularExpression() : d(new RegularExpression::Private())
+{
+}
+
+RegularExpression::RegularExpression(const DeprecatedString &pattern, bool caseSensitive, bool glob) : d(new RegularExpression::Private(pattern, caseSensitive, glob))
+{
+}
+
+RegularExpression::RegularExpression(const char *cpattern) : d(new RegularExpression::Private(cpattern, true, false))
+{
+}
+
+
+RegularExpression::RegularExpression(const RegularExpression &re) : d (re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression &RegularExpression::operator=(const RegularExpression &re)
+{
+ RegularExpression tmp(re);
+ RefPtr<RegularExpression::Private> tmpD = tmp.d;
+
+ tmp.d = d;
+ d = tmpD;
+
+ return *this;
+}
+
+DeprecatedString RegularExpression::pattern() const
+{
+ return d->pattern;
+}
+
+int RegularExpression::match(const DeprecatedString &str, int startFrom, int *matchLength) const
+{
+ d->lastMatchString = str;
+ // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
+ d->lastMatchCount = pcre_exec(d->regex, NULL, reinterpret_cast<const uint16_t *>(d->lastMatchString.unicode()), d->lastMatchString.length(), startFrom, startFrom == 0 ? 0 : PCRE_NOTBOL, d->lastMatchOffsets, maxOffsets);
+ if (d->lastMatchCount < 0) {
+ if (d->lastMatchCount != PCRE_ERROR_NOMATCH)
+ LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", d->lastMatchCount);
+ d->lastMatchPos = -1;
+ d->lastMatchLength = -1;
+ d->lastMatchString = DeprecatedString();
+ return -1;
+ }
+
+ // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
+ //ASSERT(d->lastMatchCount < 2);
+ d->lastMatchPos = d->lastMatchOffsets[0];
+ d->lastMatchLength = d->lastMatchOffsets[1] - d->lastMatchOffsets[0];
+ if (matchLength != NULL) {
+ *matchLength = d->lastMatchLength;
+ }
+ return d->lastMatchPos;
+}
+
+int RegularExpression::search(const DeprecatedString &str, int startFrom) const
+{
+ if (startFrom < 0) {
+ startFrom = str.length() - startFrom;
+ }
+ return match(str, startFrom, NULL);
+}
+
+int RegularExpression::searchRev(const DeprecatedString &str) const
+{
+ // FIXME: Total hack for now. Search forward, return the last, greedy match
+ int start = 0;
+ int pos;
+ int lastPos = -1;
+ int lastMatchLength = -1;
+ do {
+ int matchLength;
+ pos = match(str, start, &matchLength);
+ if (pos >= 0) {
+ if ((pos+matchLength) > (lastPos+lastMatchLength)) {
+ // replace last match if this one is later and not a subset of the last match
+ lastPos = pos;
+ lastMatchLength = matchLength;
+ }
+ start = pos + 1;
+ }
+ } while (pos != -1);
+ d->lastMatchPos = lastPos;
+ d->lastMatchLength = lastMatchLength;
+ return lastPos;
+}
+
+int RegularExpression::pos(int n)
+{
+ ASSERT(n == 0);
+ return d->lastMatchPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+ return d->lastMatchLength;
+}
+
+DeprecatedString RegularExpression::cap(int n) const
+{
+ const pcre_char *substring = NULL;
+ int substringLength = pcre_get_substring(reinterpret_cast<const uint16_t *>(d->lastMatchString.unicode()), d->lastMatchOffsets, d->lastMatchCount, n, &substring);
+ if (substringLength > 0) {
+ DeprecatedString capture(reinterpret_cast<const DeprecatedChar *>(substring), substringLength);
+ pcre_free_substring(substring);
+ return capture;
+ }
+ return DeprecatedString();
+}
+
+}