diff -r 000000000000 -r dd21522fd290 webengine/osswebengine/JavaScriptCore/kjs/regexp.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/webengine/osswebengine/JavaScriptCore/kjs/regexp.cpp Mon Mar 30 12:54:55 2009 +0300 @@ -0,0 +1,218 @@ +// -*- c-basic-offset: 2 -*- +/* + * This file is part of the KDE libraries + * Copyright (C) 1999-2001,2004 Harri Porten (porten@kde.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "config.h" +#include "regexp.h" + +#include "lexer.h" + +#include +#include +#include +#include + +namespace KJS { + +RegExp::RegExp(const UString &p, int flags) + : m_flags(flags), m_constructionError(0), m_numSubPatterns(0) +{ +#if HAVE(PCREPOSIX) + + int options = PCRE_UTF8; + // Note: the Global flag is already handled by RegExpProtoFunc::execute. + // FIXME: That last comment is dubious. Not all RegExps get run through RegExpProtoFunc::execute. + if (flags & IgnoreCase) + options |= PCRE_CASELESS; + if (flags & Multiline) + options |= PCRE_MULTILINE; + + const char *errorMessage; + int errorOffset; + + m_regex = pcre_compile(reinterpret_cast(p.data()), p.size(), + options, &errorMessage, &errorOffset, NULL); + if (!m_regex) { + m_constructionError = strdup(errorMessage); + return; + } + +#ifdef PCRE_INFO_CAPTURECOUNT + // Get number of subpatterns that will be returned. + pcre_fullinfo(m_regex, NULL, PCRE_INFO_CAPTURECOUNT, &m_numSubPatterns); +#endif + +#else /* HAVE(PCREPOSIX) */ + + int regflags = 0; +#ifdef REG_EXTENDED + regflags |= REG_EXTENDED; +#endif +#ifdef REG_ICASE + if ( f & IgnoreCase ) + regflags |= REG_ICASE; +#endif + + //NOTE: Multiline is not feasible with POSIX regex. + //if ( f & Multiline ) + // ; + // Note: the Global flag is already handled by RegExpProtoFunc::execute + + // FIXME: support \u Unicode escapes. + + int errorCode = regcomp(&m_regex, intern.ascii(), regflags); + if (errorCode != 0) { + char errorMessage[80]; + regerror(errorCode, &m_regex, errorMessage, sizeof errorMessage); + m_constructionError = strdup(errorMessage); + } + +#endif +} + +RegExp::~RegExp() +{ +#if HAVE(PCREPOSIX) + pcre_free(m_regex); +#else + /* TODO: is this really okay after an error ? */ + regfree(&m_regex); +#endif + free(m_constructionError); +} + +UString RegExp::match(const UString &s, int i, int *pos, int **ovector) +{ + if (i < 0) + i = 0; + int dummyPos; + if (!pos) + pos = &dummyPos; + *pos = -1; + if (ovector) + *ovector = 0; + + if (i > s.size() || s.isNull()) + return UString::null(); + +#if HAVE(PCREPOSIX) + + if (!m_regex) + return UString::null(); + + // Set up the offset vector for the result. + // First 2/3 used for result, the last third used by PCRE. + int *offsetVector; + int offsetVectorSize; + int fixedSizeOffsetVector[3]; + if (!ovector) { + offsetVectorSize = 3; + offsetVector = fixedSizeOffsetVector; + } else { + offsetVectorSize = (m_numSubPatterns + 1) * 3; + offsetVector = new int [offsetVectorSize]; + } + + const int numMatches = pcre_exec(m_regex, NULL, reinterpret_cast(s.data()), s.size(), i, 0, offsetVector, offsetVectorSize); + + if (numMatches < 0) { +#ifndef NDEBUG + if (numMatches != PCRE_ERROR_NOMATCH) + fprintf(stderr, "KJS: pcre_exec() failed with result %d\n", numMatches); +#endif + if (offsetVector != fixedSizeOffsetVector) + delete [] offsetVector; + return UString::null(); + } + + *pos = offsetVector[0]; + if (ovector) + *ovector = offsetVector; + return s.substr(offsetVector[0], offsetVector[1] - offsetVector[0]); + +#else + + const unsigned maxMatch = 10; + regmatch_t rmatch[maxMatch]; + + char *str = strdup(s.ascii()); // TODO: why ??? + if (regexec(&m_regex, str + i, maxMatch, rmatch, 0)) { + free(str); + return UString::null(); + } + free(str); + + if (!ovector) { + *pos = rmatch[0].rm_so + i; + return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so); + } + + // map rmatch array to ovector used in PCRE case + m_numSubPatterns = 0; + for(unsigned j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) + m_numSubPatterns++; + int ovecsize = (m_numSubPatterns+1)*3; // see above + *ovector = new int[ovecsize]; + for (unsigned j = 0; j < m_numSubPatterns + 1; j++) { + if (j>maxMatch) + break; + (*ovector)[2*j] = rmatch[j].rm_so + i; + (*ovector)[2*j+1] = rmatch[j].rm_eo + i; + } + + *pos = (*ovector)[0]; + return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]); + +#endif +} + +bool RegExp::isHexDigit(UChar uc) +{ + int c = uc.unicode(); + return (c >= '0' && c <= '9' || + c >= 'a' && c <= 'f' || + c >= 'A' && c <= 'F'); +} + +unsigned char RegExp::convertHex(int c) +{ + if (c >= '0' && c <= '9') + return static_cast(c - '0'); + if (c >= 'a' && c <= 'f') + return static_cast(c - 'a' + 10); + return static_cast(c - 'A' + 10); +} + +unsigned char RegExp::convertHex(int c1, int c2) +{ + return ((convertHex(c1) << 4) + convertHex(c2)); +} + +UChar RegExp::convertUnicode(UChar uc1, UChar uc2, UChar uc3, UChar uc4) +{ + int c1 = uc1.unicode(); + int c2 = uc2.unicode(); + int c3 = uc3.unicode(); + int c4 = uc4.unicode(); + return UChar((convertHex(c1) << 4) + convertHex(c2), + (convertHex(c3) << 4) + convertHex(c4)); +} + +} // namespace KJS