|
1 /* |
|
2 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #ifndef RegexPattern_h |
|
27 #define RegexPattern_h |
|
28 |
|
29 |
|
30 #if ENABLE(YARR) |
|
31 |
|
32 #include <wtf/Vector.h> |
|
33 #include <wtf/unicode/Unicode.h> |
|
34 |
|
35 |
|
36 namespace JSC { namespace Yarr { |
|
37 |
|
38 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. |
|
39 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. |
|
40 #define RegexStackSpaceForBackTrackInfoBackReference 2 |
|
41 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. |
|
42 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 |
|
43 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. |
|
44 #define RegexStackSpaceForBackTrackInfoParentheses 4 |
|
45 |
|
46 struct PatternDisjunction; |
|
47 |
|
48 struct CharacterRange { |
|
49 UChar begin; |
|
50 UChar end; |
|
51 |
|
52 CharacterRange(UChar begin, UChar end) |
|
53 : begin(begin) |
|
54 , end(end) |
|
55 { |
|
56 } |
|
57 }; |
|
58 |
|
59 struct CharacterClassTable : RefCounted<CharacterClassTable> { |
|
60 const char* m_table; |
|
61 bool m_inverted; |
|
62 static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted) |
|
63 { |
|
64 return adoptRef(new CharacterClassTable(table, inverted)); |
|
65 } |
|
66 |
|
67 private: |
|
68 CharacterClassTable(const char* table, bool inverted) |
|
69 : m_table(table) |
|
70 , m_inverted(inverted) |
|
71 { |
|
72 } |
|
73 }; |
|
74 |
|
75 struct CharacterClass : FastAllocBase { |
|
76 // All CharacterClass instances have to have the full set of matches and ranges, |
|
77 // they may have an optional table for faster lookups (which must match the |
|
78 // specified matches and ranges) |
|
79 CharacterClass(PassRefPtr<CharacterClassTable> table) |
|
80 : m_table(table) |
|
81 { |
|
82 } |
|
83 Vector<UChar> m_matches; |
|
84 Vector<CharacterRange> m_ranges; |
|
85 Vector<UChar> m_matchesUnicode; |
|
86 Vector<CharacterRange> m_rangesUnicode; |
|
87 RefPtr<CharacterClassTable> m_table; |
|
88 }; |
|
89 |
|
90 enum QuantifierType { |
|
91 QuantifierFixedCount, |
|
92 QuantifierGreedy, |
|
93 QuantifierNonGreedy, |
|
94 }; |
|
95 |
|
96 struct PatternTerm { |
|
97 enum Type { |
|
98 TypeAssertionBOL, |
|
99 TypeAssertionEOL, |
|
100 TypeAssertionWordBoundary, |
|
101 TypePatternCharacter, |
|
102 TypeCharacterClass, |
|
103 TypeBackReference, |
|
104 TypeForwardReference, |
|
105 TypeParenthesesSubpattern, |
|
106 TypeParentheticalAssertion, |
|
107 } type; |
|
108 bool invertOrCapture; |
|
109 union { |
|
110 UChar patternCharacter; |
|
111 CharacterClass* characterClass; |
|
112 unsigned subpatternId; |
|
113 struct { |
|
114 PatternDisjunction* disjunction; |
|
115 unsigned subpatternId; |
|
116 unsigned lastSubpatternId; |
|
117 bool isCopy; |
|
118 } parentheses; |
|
119 }; |
|
120 QuantifierType quantityType; |
|
121 unsigned quantityCount; |
|
122 int inputPosition; |
|
123 unsigned frameLocation; |
|
124 |
|
125 PatternTerm(UChar ch) |
|
126 : type(PatternTerm::TypePatternCharacter) |
|
127 { |
|
128 patternCharacter = ch; |
|
129 quantityType = QuantifierFixedCount; |
|
130 quantityCount = 1; |
|
131 } |
|
132 |
|
133 PatternTerm(CharacterClass* charClass, bool invert) |
|
134 : type(PatternTerm::TypeCharacterClass) |
|
135 , invertOrCapture(invert) |
|
136 { |
|
137 characterClass = charClass; |
|
138 quantityType = QuantifierFixedCount; |
|
139 quantityCount = 1; |
|
140 } |
|
141 |
|
142 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) |
|
143 : type(type) |
|
144 , invertOrCapture(invertOrCapture) |
|
145 { |
|
146 parentheses.disjunction = disjunction; |
|
147 parentheses.subpatternId = subpatternId; |
|
148 parentheses.isCopy = false; |
|
149 quantityType = QuantifierFixedCount; |
|
150 quantityCount = 1; |
|
151 } |
|
152 |
|
153 PatternTerm(Type type, bool invert = false) |
|
154 : type(type) |
|
155 , invertOrCapture(invert) |
|
156 { |
|
157 quantityType = QuantifierFixedCount; |
|
158 quantityCount = 1; |
|
159 } |
|
160 |
|
161 PatternTerm(unsigned spatternId) |
|
162 : type(TypeBackReference) |
|
163 , invertOrCapture(false) |
|
164 { |
|
165 subpatternId = spatternId; |
|
166 quantityType = QuantifierFixedCount; |
|
167 quantityCount = 1; |
|
168 } |
|
169 |
|
170 static PatternTerm ForwardReference() |
|
171 { |
|
172 return PatternTerm(TypeForwardReference); |
|
173 } |
|
174 |
|
175 static PatternTerm BOL() |
|
176 { |
|
177 return PatternTerm(TypeAssertionBOL); |
|
178 } |
|
179 |
|
180 static PatternTerm EOL() |
|
181 { |
|
182 return PatternTerm(TypeAssertionEOL); |
|
183 } |
|
184 |
|
185 static PatternTerm WordBoundary(bool invert) |
|
186 { |
|
187 return PatternTerm(TypeAssertionWordBoundary, invert); |
|
188 } |
|
189 |
|
190 bool invert() |
|
191 { |
|
192 return invertOrCapture; |
|
193 } |
|
194 |
|
195 bool capture() |
|
196 { |
|
197 return invertOrCapture; |
|
198 } |
|
199 |
|
200 void quantify(unsigned count, QuantifierType type) |
|
201 { |
|
202 quantityCount = count; |
|
203 quantityType = type; |
|
204 } |
|
205 }; |
|
206 |
|
207 struct PatternAlternative : FastAllocBase { |
|
208 PatternAlternative(PatternDisjunction* disjunction) |
|
209 : m_parent(disjunction) |
|
210 { |
|
211 } |
|
212 |
|
213 PatternTerm& lastTerm() |
|
214 { |
|
215 ASSERT(m_terms.size()); |
|
216 return m_terms[m_terms.size() - 1]; |
|
217 } |
|
218 |
|
219 void removeLastTerm() |
|
220 { |
|
221 ASSERT(m_terms.size()); |
|
222 m_terms.shrink(m_terms.size() - 1); |
|
223 } |
|
224 |
|
225 Vector<PatternTerm> m_terms; |
|
226 PatternDisjunction* m_parent; |
|
227 unsigned m_minimumSize; |
|
228 bool m_hasFixedSize; |
|
229 }; |
|
230 |
|
231 struct PatternDisjunction : FastAllocBase { |
|
232 PatternDisjunction(PatternAlternative* parent = 0) |
|
233 : m_parent(parent) |
|
234 { |
|
235 } |
|
236 |
|
237 ~PatternDisjunction() |
|
238 { |
|
239 deleteAllValues(m_alternatives); |
|
240 } |
|
241 |
|
242 PatternAlternative* addNewAlternative() |
|
243 { |
|
244 PatternAlternative* alternative = new PatternAlternative(this); |
|
245 m_alternatives.append(alternative); |
|
246 return alternative; |
|
247 } |
|
248 |
|
249 Vector<PatternAlternative*> m_alternatives; |
|
250 PatternAlternative* m_parent; |
|
251 unsigned m_minimumSize; |
|
252 unsigned m_callFrameSize; |
|
253 bool m_hasFixedSize; |
|
254 }; |
|
255 |
|
256 // You probably don't want to be calling these functions directly |
|
257 // (please to be calling newlineCharacterClass() et al on your |
|
258 // friendly neighborhood RegexPattern instance to get nicely |
|
259 // cached copies). |
|
260 CharacterClass* newlineCreate(); |
|
261 CharacterClass* digitsCreate(); |
|
262 CharacterClass* spacesCreate(); |
|
263 CharacterClass* wordcharCreate(); |
|
264 CharacterClass* nondigitsCreate(); |
|
265 CharacterClass* nonspacesCreate(); |
|
266 CharacterClass* nonwordcharCreate(); |
|
267 |
|
268 struct RegexPattern { |
|
269 RegexPattern(bool ignoreCase, bool multiline) |
|
270 : m_ignoreCase(ignoreCase) |
|
271 , m_multiline(multiline) |
|
272 , m_numSubpatterns(0) |
|
273 , m_maxBackReference(0) |
|
274 , m_containsBackreferences(false) |
|
275 , newlineCached(0) |
|
276 , digitsCached(0) |
|
277 , spacesCached(0) |
|
278 , wordcharCached(0) |
|
279 , nondigitsCached(0) |
|
280 , nonspacesCached(0) |
|
281 , nonwordcharCached(0) |
|
282 { |
|
283 } |
|
284 |
|
285 ~RegexPattern() |
|
286 { |
|
287 deleteAllValues(m_disjunctions); |
|
288 deleteAllValues(m_userCharacterClasses); |
|
289 } |
|
290 |
|
291 void reset() |
|
292 { |
|
293 m_numSubpatterns = 0; |
|
294 m_maxBackReference = 0; |
|
295 |
|
296 m_containsBackreferences = false; |
|
297 |
|
298 newlineCached = 0; |
|
299 digitsCached = 0; |
|
300 spacesCached = 0; |
|
301 wordcharCached = 0; |
|
302 nondigitsCached = 0; |
|
303 nonspacesCached = 0; |
|
304 nonwordcharCached = 0; |
|
305 |
|
306 deleteAllValues(m_disjunctions); |
|
307 m_disjunctions.clear(); |
|
308 deleteAllValues(m_userCharacterClasses); |
|
309 m_userCharacterClasses.clear(); |
|
310 } |
|
311 |
|
312 bool containsIllegalBackReference() |
|
313 { |
|
314 return m_maxBackReference > m_numSubpatterns; |
|
315 } |
|
316 |
|
317 CharacterClass* newlineCharacterClass() |
|
318 { |
|
319 if (!newlineCached) |
|
320 m_userCharacterClasses.append(newlineCached = newlineCreate()); |
|
321 return newlineCached; |
|
322 } |
|
323 CharacterClass* digitsCharacterClass() |
|
324 { |
|
325 if (!digitsCached) |
|
326 m_userCharacterClasses.append(digitsCached = digitsCreate()); |
|
327 return digitsCached; |
|
328 } |
|
329 CharacterClass* spacesCharacterClass() |
|
330 { |
|
331 if (!spacesCached) |
|
332 m_userCharacterClasses.append(spacesCached = spacesCreate()); |
|
333 return spacesCached; |
|
334 } |
|
335 CharacterClass* wordcharCharacterClass() |
|
336 { |
|
337 if (!wordcharCached) |
|
338 m_userCharacterClasses.append(wordcharCached = wordcharCreate()); |
|
339 return wordcharCached; |
|
340 } |
|
341 CharacterClass* nondigitsCharacterClass() |
|
342 { |
|
343 if (!nondigitsCached) |
|
344 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); |
|
345 return nondigitsCached; |
|
346 } |
|
347 CharacterClass* nonspacesCharacterClass() |
|
348 { |
|
349 if (!nonspacesCached) |
|
350 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); |
|
351 return nonspacesCached; |
|
352 } |
|
353 CharacterClass* nonwordcharCharacterClass() |
|
354 { |
|
355 if (!nonwordcharCached) |
|
356 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); |
|
357 return nonwordcharCached; |
|
358 } |
|
359 |
|
360 bool m_ignoreCase; |
|
361 bool m_multiline; |
|
362 unsigned m_numSubpatterns; |
|
363 unsigned m_maxBackReference; |
|
364 bool m_containsBackreferences; |
|
365 PatternDisjunction* m_body; |
|
366 Vector<PatternDisjunction*, 4> m_disjunctions; |
|
367 Vector<CharacterClass*> m_userCharacterClasses; |
|
368 |
|
369 private: |
|
370 CharacterClass* newlineCached; |
|
371 CharacterClass* digitsCached; |
|
372 CharacterClass* spacesCached; |
|
373 CharacterClass* wordcharCached; |
|
374 CharacterClass* nondigitsCached; |
|
375 CharacterClass* nonspacesCached; |
|
376 CharacterClass* nonwordcharCached; |
|
377 }; |
|
378 |
|
379 } } // namespace JSC::Yarr |
|
380 |
|
381 #endif |
|
382 |
|
383 #endif // RegexPattern_h |