author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Tue, 06 Jul 2010 15:10:48 +0300 | |
changeset 30 | 5dc02b23752f |
parent 0 | 1918ee327afb |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) |
|
3 |
* Copyright (c) 2007, 2008 Apple Inc. All rights reserved. |
|
4 |
* Copyright (C) 2009 Torch Mobile, Inc. |
|
5 |
* |
|
6 |
* This library is free software; you can redistribute it and/or |
|
7 |
* modify it under the terms of the GNU Lesser General Public |
|
8 |
* License as published by the Free Software Foundation; either |
|
9 |
* version 2 of the License, or (at your option) any later version. |
|
10 |
* |
|
11 |
* This library is distributed in the hope that it will be useful, |
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
* Lesser General Public License for more details. |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU Lesser General Public |
|
17 |
* License along with this library; if not, write to the Free Software |
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
* |
|
20 |
*/ |
|
21 |
||
22 |
#include "config.h" |
|
23 |
#include "RegExp.h" |
|
24 |
#include "Lexer.h" |
|
25 |
#include <stdio.h> |
|
26 |
#include <stdlib.h> |
|
27 |
#include <string.h> |
|
28 |
#include <wtf/Assertions.h> |
|
29 |
#include <wtf/OwnArrayPtr.h> |
|
30 |
||
31 |
||
32 |
#if ENABLE(YARR) |
|
33 |
||
34 |
#include "yarr/RegexCompiler.h" |
|
35 |
#if ENABLE(YARR_JIT) |
|
36 |
#include "yarr/RegexJIT.h" |
|
37 |
#else |
|
38 |
#include "yarr/RegexInterpreter.h" |
|
39 |
#endif |
|
40 |
||
41 |
#else |
|
42 |
||
43 |
#include <pcre/pcre.h> |
|
44 |
||
45 |
#endif |
|
46 |
||
47 |
namespace JSC { |
|
48 |
||
49 |
inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern) |
|
50 |
: m_pattern(pattern) |
|
51 |
, m_flagBits(0) |
|
52 |
, m_constructionError(0) |
|
53 |
, m_numSubpatterns(0) |
|
54 |
{ |
|
55 |
compile(globalData); |
|
56 |
} |
|
57 |
||
58 |
inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
|
59 |
: m_pattern(pattern) |
|
60 |
, m_flagBits(0) |
|
61 |
, m_constructionError(0) |
|
62 |
, m_numSubpatterns(0) |
|
63 |
{ |
|
64 |
// NOTE: The global flag is handled on a case-by-case basis by functions like |
|
65 |
// String::match and RegExpObject::match. |
|
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
66 |
if (flags.find('g') != UString::NotFound) |
0 | 67 |
m_flagBits |= Global; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
68 |
if (flags.find('i') != UString::NotFound) |
0 | 69 |
m_flagBits |= IgnoreCase; |
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
70 |
if (flags.find('m') != UString::NotFound) |
0 | 71 |
m_flagBits |= Multiline; |
72 |
||
73 |
compile(globalData); |
|
74 |
} |
|
75 |
||
76 |
#if !ENABLE(YARR) |
|
77 |
RegExp::~RegExp() |
|
78 |
{ |
|
79 |
jsRegExpFree(m_regExp); |
|
80 |
} |
|
81 |
#endif |
|
82 |
||
83 |
PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern) |
|
84 |
{ |
|
85 |
return adoptRef(new RegExp(globalData, pattern)); |
|
86 |
} |
|
87 |
||
88 |
PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
|
89 |
{ |
|
90 |
return adoptRef(new RegExp(globalData, pattern, flags)); |
|
91 |
} |
|
92 |
||
93 |
#if ENABLE(YARR) |
|
94 |
||
95 |
void RegExp::compile(JSGlobalData* globalData) |
|
96 |
{ |
|
97 |
#if ENABLE(YARR_JIT) |
|
98 |
Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()); |
|
99 |
#else |
|
100 |
UNUSED_PARAM(globalData); |
|
101 |
m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline())); |
|
102 |
#endif |
|
103 |
} |
|
104 |
||
105 |
int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
|
106 |
{ |
|
107 |
if (startOffset < 0) |
|
108 |
startOffset = 0; |
|
109 |
if (ovector) |
|
110 |
ovector->clear(); |
|
111 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
112 |
if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) |
0 | 113 |
return -1; |
114 |
||
115 |
#if ENABLE(YARR_JIT) |
|
116 |
if (!!m_regExpJITCode) { |
|
117 |
#else |
|
118 |
if (m_regExpBytecode) { |
|
119 |
#endif |
|
120 |
int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre. |
|
121 |
int* offsetVector; |
|
122 |
Vector<int, 32> nonReturnedOvector; |
|
123 |
if (ovector) { |
|
124 |
ovector->resize(offsetVectorSize); |
|
125 |
offsetVector = ovector->data(); |
|
126 |
} else { |
|
127 |
nonReturnedOvector.resize(offsetVectorSize); |
|
128 |
offsetVector = nonReturnedOvector.data(); |
|
129 |
} |
|
130 |
||
131 |
ASSERT(offsetVector); |
|
132 |
for (int j = 0; j < offsetVectorSize; ++j) |
|
133 |
offsetVector[j] = -1; |
|
134 |
||
135 |
||
136 |
#if ENABLE(YARR_JIT) |
|
137 |
int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize); |
|
138 |
#else |
|
139 |
int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector); |
|
140 |
#endif |
|
141 |
||
142 |
if (result < 0) { |
|
143 |
#ifndef NDEBUG |
|
144 |
// TODO: define up a symbol, rather than magic -1 |
|
145 |
if (result != -1) |
|
146 |
fprintf(stderr, "jsRegExpExecute failed with result %d\n", result); |
|
147 |
#endif |
|
148 |
if (ovector) |
|
149 |
ovector->clear(); |
|
150 |
} |
|
151 |
return result; |
|
152 |
} |
|
153 |
||
154 |
return -1; |
|
155 |
} |
|
156 |
||
157 |
#else |
|
158 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
159 |
void RegExp::compile(JSGlobalData*) |
0 | 160 |
{ |
161 |
m_regExp = 0; |
|
162 |
JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
|
163 |
JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine; |
|
164 |
m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError); |
|
165 |
} |
|
166 |
||
167 |
int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
|
168 |
{ |
|
169 |
if (startOffset < 0) |
|
170 |
startOffset = 0; |
|
171 |
if (ovector) |
|
172 |
ovector->clear(); |
|
173 |
||
30
5dc02b23752f
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
174 |
if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) |
0 | 175 |
return -1; |
176 |
||
177 |
if (m_regExp) { |
|
178 |
// Set up the offset vector for the result. |
|
179 |
// First 2/3 used for result, the last third used by PCRE. |
|
180 |
int* offsetVector; |
|
181 |
int offsetVectorSize; |
|
182 |
int fixedSizeOffsetVector[3]; |
|
183 |
if (!ovector) { |
|
184 |
offsetVectorSize = 3; |
|
185 |
offsetVector = fixedSizeOffsetVector; |
|
186 |
} else { |
|
187 |
offsetVectorSize = (m_numSubpatterns + 1) * 3; |
|
188 |
ovector->resize(offsetVectorSize); |
|
189 |
offsetVector = ovector->data(); |
|
190 |
} |
|
191 |
||
192 |
int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize); |
|
193 |
||
194 |
if (numMatches < 0) { |
|
195 |
#ifndef NDEBUG |
|
196 |
if (numMatches != JSRegExpErrorNoMatch) |
|
197 |
fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches); |
|
198 |
#endif |
|
199 |
if (ovector) |
|
200 |
ovector->clear(); |
|
201 |
return -1; |
|
202 |
} |
|
203 |
||
204 |
return offsetVector[0]; |
|
205 |
} |
|
206 |
||
207 |
return -1; |
|
208 |
} |
|
209 |
||
210 |
#endif |
|
211 |
||
212 |
} // namespace JSC |