|
1 /* |
|
2 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #include "config.h" |
|
27 #include "RegexJIT.h" |
|
28 |
|
29 #include "ASCIICType.h" |
|
30 #include "JSGlobalData.h" |
|
31 #include "LinkBuffer.h" |
|
32 #include "MacroAssembler.h" |
|
33 #include "RegexCompiler.h" |
|
34 |
|
35 #include "pcre.h" // temporary, remove when fallback is removed. |
|
36 |
|
37 #if ENABLE(YARR_JIT) |
|
38 |
|
39 using namespace WTF; |
|
40 |
|
41 namespace JSC { namespace Yarr { |
|
42 |
|
43 class RegexGenerator : private MacroAssembler { |
|
44 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); |
|
45 |
|
46 #if CPU(ARM) |
|
47 static const RegisterID input = ARMRegisters::r0; |
|
48 static const RegisterID index = ARMRegisters::r1; |
|
49 static const RegisterID length = ARMRegisters::r2; |
|
50 static const RegisterID output = ARMRegisters::r4; |
|
51 |
|
52 static const RegisterID regT0 = ARMRegisters::r5; |
|
53 static const RegisterID regT1 = ARMRegisters::r6; |
|
54 |
|
55 static const RegisterID returnRegister = ARMRegisters::r0; |
|
56 #elif CPU(MIPS) |
|
57 static const RegisterID input = MIPSRegisters::a0; |
|
58 static const RegisterID index = MIPSRegisters::a1; |
|
59 static const RegisterID length = MIPSRegisters::a2; |
|
60 static const RegisterID output = MIPSRegisters::a3; |
|
61 |
|
62 static const RegisterID regT0 = MIPSRegisters::t4; |
|
63 static const RegisterID regT1 = MIPSRegisters::t5; |
|
64 |
|
65 static const RegisterID returnRegister = MIPSRegisters::v0; |
|
66 #elif CPU(X86) |
|
67 static const RegisterID input = X86Registers::eax; |
|
68 static const RegisterID index = X86Registers::edx; |
|
69 static const RegisterID length = X86Registers::ecx; |
|
70 static const RegisterID output = X86Registers::edi; |
|
71 |
|
72 static const RegisterID regT0 = X86Registers::ebx; |
|
73 static const RegisterID regT1 = X86Registers::esi; |
|
74 |
|
75 static const RegisterID returnRegister = X86Registers::eax; |
|
76 #elif CPU(X86_64) |
|
77 static const RegisterID input = X86Registers::edi; |
|
78 static const RegisterID index = X86Registers::esi; |
|
79 static const RegisterID length = X86Registers::edx; |
|
80 static const RegisterID output = X86Registers::ecx; |
|
81 |
|
82 static const RegisterID regT0 = X86Registers::eax; |
|
83 static const RegisterID regT1 = X86Registers::ebx; |
|
84 |
|
85 static const RegisterID returnRegister = X86Registers::eax; |
|
86 #endif |
|
87 |
|
88 void optimizeAlternative(PatternAlternative* alternative) |
|
89 { |
|
90 if (!alternative->m_terms.size()) |
|
91 return; |
|
92 |
|
93 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { |
|
94 PatternTerm& term = alternative->m_terms[i]; |
|
95 PatternTerm& nextTerm = alternative->m_terms[i + 1]; |
|
96 |
|
97 if ((term.type == PatternTerm::TypeCharacterClass) |
|
98 && (term.quantityType == QuantifierFixedCount) |
|
99 && (nextTerm.type == PatternTerm::TypePatternCharacter) |
|
100 && (nextTerm.quantityType == QuantifierFixedCount)) { |
|
101 PatternTerm termCopy = term; |
|
102 alternative->m_terms[i] = nextTerm; |
|
103 alternative->m_terms[i + 1] = termCopy; |
|
104 } |
|
105 } |
|
106 } |
|
107 |
|
108 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) |
|
109 { |
|
110 do { |
|
111 // pick which range we're going to generate |
|
112 int which = count >> 1; |
|
113 char lo = ranges[which].begin; |
|
114 char hi = ranges[which].end; |
|
115 |
|
116 // check if there are any ranges or matches below lo. If not, just jl to failure - |
|
117 // if there is anything else to check, check that first, if it falls through jmp to failure. |
|
118 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
|
119 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
|
120 |
|
121 // generate code for all ranges before this one |
|
122 if (which) |
|
123 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
|
124 |
|
125 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
|
126 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); |
|
127 ++*matchIndex; |
|
128 } |
|
129 failures.append(jump()); |
|
130 |
|
131 loOrAbove.link(this); |
|
132 } else if (which) { |
|
133 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
|
134 |
|
135 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
|
136 failures.append(jump()); |
|
137 |
|
138 loOrAbove.link(this); |
|
139 } else |
|
140 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); |
|
141 |
|
142 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) |
|
143 ++*matchIndex; |
|
144 |
|
145 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); |
|
146 // fall through to here, the value is above hi. |
|
147 |
|
148 // shuffle along & loop around if there are any more matches to handle. |
|
149 unsigned next = which + 1; |
|
150 ranges += next; |
|
151 count -= next; |
|
152 } while (count); |
|
153 } |
|
154 |
|
155 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) |
|
156 { |
|
157 if (charClass->m_table) { |
|
158 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); |
|
159 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); |
|
160 return; |
|
161 } |
|
162 Jump unicodeFail; |
|
163 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { |
|
164 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); |
|
165 |
|
166 if (charClass->m_matchesUnicode.size()) { |
|
167 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { |
|
168 UChar ch = charClass->m_matchesUnicode[i]; |
|
169 matchDest.append(branch32(Equal, character, Imm32(ch))); |
|
170 } |
|
171 } |
|
172 |
|
173 if (charClass->m_rangesUnicode.size()) { |
|
174 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { |
|
175 UChar lo = charClass->m_rangesUnicode[i].begin; |
|
176 UChar hi = charClass->m_rangesUnicode[i].end; |
|
177 |
|
178 Jump below = branch32(LessThan, character, Imm32(lo)); |
|
179 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); |
|
180 below.link(this); |
|
181 } |
|
182 } |
|
183 |
|
184 unicodeFail = jump(); |
|
185 isAscii.link(this); |
|
186 } |
|
187 |
|
188 if (charClass->m_ranges.size()) { |
|
189 unsigned matchIndex = 0; |
|
190 JumpList failures; |
|
191 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); |
|
192 while (matchIndex < charClass->m_matches.size()) |
|
193 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); |
|
194 |
|
195 failures.link(this); |
|
196 } else if (charClass->m_matches.size()) { |
|
197 // optimization: gather 'a','A' etc back together, can mask & test once. |
|
198 Vector<char> matchesAZaz; |
|
199 |
|
200 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { |
|
201 char ch = charClass->m_matches[i]; |
|
202 if (m_pattern.m_ignoreCase) { |
|
203 if (isASCIILower(ch)) { |
|
204 matchesAZaz.append(ch); |
|
205 continue; |
|
206 } |
|
207 if (isASCIIUpper(ch)) |
|
208 continue; |
|
209 } |
|
210 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); |
|
211 } |
|
212 |
|
213 if (unsigned countAZaz = matchesAZaz.size()) { |
|
214 or32(Imm32(32), character); |
|
215 for (unsigned i = 0; i < countAZaz; ++i) |
|
216 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); |
|
217 } |
|
218 } |
|
219 |
|
220 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) |
|
221 unicodeFail.link(this); |
|
222 } |
|
223 |
|
224 // Jumps if input not available; will have (incorrectly) incremented already! |
|
225 Jump jumpIfNoAvailableInput(unsigned countToCheck) |
|
226 { |
|
227 add32(Imm32(countToCheck), index); |
|
228 return branch32(Above, index, length); |
|
229 } |
|
230 |
|
231 Jump jumpIfAvailableInput(unsigned countToCheck) |
|
232 { |
|
233 add32(Imm32(countToCheck), index); |
|
234 return branch32(BelowOrEqual, index, length); |
|
235 } |
|
236 |
|
237 Jump checkInput() |
|
238 { |
|
239 return branch32(BelowOrEqual, index, length); |
|
240 } |
|
241 |
|
242 Jump atEndOfInput() |
|
243 { |
|
244 return branch32(Equal, index, length); |
|
245 } |
|
246 |
|
247 Jump notAtEndOfInput() |
|
248 { |
|
249 return branch32(NotEqual, index, length); |
|
250 } |
|
251 |
|
252 Jump jumpIfCharEquals(UChar ch, int inputPosition) |
|
253 { |
|
254 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); |
|
255 } |
|
256 |
|
257 Jump jumpIfCharNotEquals(UChar ch, int inputPosition) |
|
258 { |
|
259 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); |
|
260 } |
|
261 |
|
262 void readCharacter(int inputPosition, RegisterID reg) |
|
263 { |
|
264 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); |
|
265 } |
|
266 |
|
267 void storeToFrame(RegisterID reg, unsigned frameLocation) |
|
268 { |
|
269 poke(reg, frameLocation); |
|
270 } |
|
271 |
|
272 void storeToFrame(Imm32 imm, unsigned frameLocation) |
|
273 { |
|
274 poke(imm, frameLocation); |
|
275 } |
|
276 |
|
277 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) |
|
278 { |
|
279 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); |
|
280 } |
|
281 |
|
282 void loadFromFrame(unsigned frameLocation, RegisterID reg) |
|
283 { |
|
284 peek(reg, frameLocation); |
|
285 } |
|
286 |
|
287 void loadFromFrameAndJump(unsigned frameLocation) |
|
288 { |
|
289 jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); |
|
290 } |
|
291 |
|
292 struct AlternativeBacktrackRecord { |
|
293 DataLabelPtr dataLabel; |
|
294 Label backtrackLocation; |
|
295 |
|
296 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) |
|
297 : dataLabel(dataLabel) |
|
298 , backtrackLocation(backtrackLocation) |
|
299 { |
|
300 } |
|
301 }; |
|
302 |
|
303 struct TermGenerationState { |
|
304 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) |
|
305 : disjunction(disjunction) |
|
306 , checkedTotal(checkedTotal) |
|
307 { |
|
308 } |
|
309 |
|
310 void resetAlternative() |
|
311 { |
|
312 isBackTrackGenerated = false; |
|
313 alt = 0; |
|
314 } |
|
315 bool alternativeValid() |
|
316 { |
|
317 return alt < disjunction->m_alternatives.size(); |
|
318 } |
|
319 void nextAlternative() |
|
320 { |
|
321 ++alt; |
|
322 } |
|
323 PatternAlternative* alternative() |
|
324 { |
|
325 return disjunction->m_alternatives[alt]; |
|
326 } |
|
327 |
|
328 void resetTerm() |
|
329 { |
|
330 ASSERT(alternativeValid()); |
|
331 t = 0; |
|
332 } |
|
333 bool termValid() |
|
334 { |
|
335 ASSERT(alternativeValid()); |
|
336 return t < alternative()->m_terms.size(); |
|
337 } |
|
338 void nextTerm() |
|
339 { |
|
340 ASSERT(alternativeValid()); |
|
341 ++t; |
|
342 } |
|
343 PatternTerm& term() |
|
344 { |
|
345 ASSERT(alternativeValid()); |
|
346 return alternative()->m_terms[t]; |
|
347 } |
|
348 bool isLastTerm() |
|
349 { |
|
350 ASSERT(alternativeValid()); |
|
351 return (t + 1) == alternative()->m_terms.size(); |
|
352 } |
|
353 bool isMainDisjunction() |
|
354 { |
|
355 return !disjunction->m_parent; |
|
356 } |
|
357 |
|
358 PatternTerm& lookaheadTerm() |
|
359 { |
|
360 ASSERT(alternativeValid()); |
|
361 ASSERT((t + 1) < alternative()->m_terms.size()); |
|
362 return alternative()->m_terms[t + 1]; |
|
363 } |
|
364 bool isSinglePatternCharacterLookaheadTerm() |
|
365 { |
|
366 ASSERT(alternativeValid()); |
|
367 return ((t + 1) < alternative()->m_terms.size()) |
|
368 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) |
|
369 && (lookaheadTerm().quantityType == QuantifierFixedCount) |
|
370 && (lookaheadTerm().quantityCount == 1); |
|
371 } |
|
372 |
|
373 int inputOffset() |
|
374 { |
|
375 return term().inputPosition - checkedTotal; |
|
376 } |
|
377 |
|
378 void jumpToBacktrack(Jump jump, MacroAssembler* masm) |
|
379 { |
|
380 if (isBackTrackGenerated) |
|
381 jump.linkTo(backtrackLabel, masm); |
|
382 else |
|
383 backTrackJumps.append(jump); |
|
384 } |
|
385 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm) |
|
386 { |
|
387 if (isBackTrackGenerated) |
|
388 jumps.linkTo(backtrackLabel, masm); |
|
389 else |
|
390 backTrackJumps.append(jumps); |
|
391 } |
|
392 bool plantJumpToBacktrackIfExists(MacroAssembler* masm) |
|
393 { |
|
394 if (isBackTrackGenerated) { |
|
395 masm->jump(backtrackLabel); |
|
396 return true; |
|
397 } |
|
398 return false; |
|
399 } |
|
400 void addBacktrackJump(Jump jump) |
|
401 { |
|
402 backTrackJumps.append(jump); |
|
403 } |
|
404 void setBacktrackGenerated(Label label) |
|
405 { |
|
406 isBackTrackGenerated = true; |
|
407 backtrackLabel = label; |
|
408 } |
|
409 void linkAlternativeBacktracks(MacroAssembler* masm) |
|
410 { |
|
411 isBackTrackGenerated = false; |
|
412 backTrackJumps.link(masm); |
|
413 } |
|
414 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm) |
|
415 { |
|
416 isBackTrackGenerated = false; |
|
417 backTrackJumps.linkTo(label, masm); |
|
418 } |
|
419 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm) |
|
420 { |
|
421 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm); |
|
422 if (nestedParenthesesState.isBackTrackGenerated) |
|
423 setBacktrackGenerated(nestedParenthesesState.backtrackLabel); |
|
424 } |
|
425 |
|
426 PatternDisjunction* disjunction; |
|
427 int checkedTotal; |
|
428 private: |
|
429 unsigned alt; |
|
430 unsigned t; |
|
431 JumpList backTrackJumps; |
|
432 Label backtrackLabel; |
|
433 bool isBackTrackGenerated; |
|
434 }; |
|
435 |
|
436 void generateAssertionBOL(TermGenerationState& state) |
|
437 { |
|
438 PatternTerm& term = state.term(); |
|
439 |
|
440 if (m_pattern.m_multiline) { |
|
441 const RegisterID character = regT0; |
|
442 |
|
443 JumpList matchDest; |
|
444 if (!term.inputPosition) |
|
445 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); |
|
446 |
|
447 readCharacter(state.inputOffset() - 1, character); |
|
448 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
|
449 state.jumpToBacktrack(jump(), this); |
|
450 |
|
451 matchDest.link(this); |
|
452 } else { |
|
453 // Erk, really should poison out these alternatives early. :-/ |
|
454 if (term.inputPosition) |
|
455 state.jumpToBacktrack(jump(), this); |
|
456 else |
|
457 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this); |
|
458 } |
|
459 } |
|
460 |
|
461 void generateAssertionEOL(TermGenerationState& state) |
|
462 { |
|
463 PatternTerm& term = state.term(); |
|
464 |
|
465 if (m_pattern.m_multiline) { |
|
466 const RegisterID character = regT0; |
|
467 |
|
468 JumpList matchDest; |
|
469 if (term.inputPosition == state.checkedTotal) |
|
470 matchDest.append(atEndOfInput()); |
|
471 |
|
472 readCharacter(state.inputOffset(), character); |
|
473 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
|
474 state.jumpToBacktrack(jump(), this); |
|
475 |
|
476 matchDest.link(this); |
|
477 } else { |
|
478 if (term.inputPosition == state.checkedTotal) |
|
479 state.jumpToBacktrack(notAtEndOfInput(), this); |
|
480 // Erk, really should poison out these alternatives early. :-/ |
|
481 else |
|
482 state.jumpToBacktrack(jump(), this); |
|
483 } |
|
484 } |
|
485 |
|
486 // Also falls though on nextIsNotWordChar. |
|
487 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) |
|
488 { |
|
489 const RegisterID character = regT0; |
|
490 PatternTerm& term = state.term(); |
|
491 |
|
492 if (term.inputPosition == state.checkedTotal) |
|
493 nextIsNotWordChar.append(atEndOfInput()); |
|
494 |
|
495 readCharacter(state.inputOffset(), character); |
|
496 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); |
|
497 } |
|
498 |
|
499 void generateAssertionWordBoundary(TermGenerationState& state) |
|
500 { |
|
501 const RegisterID character = regT0; |
|
502 PatternTerm& term = state.term(); |
|
503 |
|
504 Jump atBegin; |
|
505 JumpList matchDest; |
|
506 if (!term.inputPosition) |
|
507 atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); |
|
508 readCharacter(state.inputOffset() - 1, character); |
|
509 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); |
|
510 if (!term.inputPosition) |
|
511 atBegin.link(this); |
|
512 |
|
513 // We fall through to here if the last character was not a wordchar. |
|
514 JumpList nonWordCharThenWordChar; |
|
515 JumpList nonWordCharThenNonWordChar; |
|
516 if (term.invertOrCapture) { |
|
517 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); |
|
518 nonWordCharThenWordChar.append(jump()); |
|
519 } else { |
|
520 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); |
|
521 nonWordCharThenNonWordChar.append(jump()); |
|
522 } |
|
523 state.jumpToBacktrack(nonWordCharThenNonWordChar, this); |
|
524 |
|
525 // We jump here if the last character was a wordchar. |
|
526 matchDest.link(this); |
|
527 JumpList wordCharThenWordChar; |
|
528 JumpList wordCharThenNonWordChar; |
|
529 if (term.invertOrCapture) { |
|
530 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); |
|
531 wordCharThenWordChar.append(jump()); |
|
532 } else { |
|
533 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); |
|
534 // This can fall-though! |
|
535 } |
|
536 |
|
537 state.jumpToBacktrack(wordCharThenWordChar, this); |
|
538 |
|
539 nonWordCharThenWordChar.link(this); |
|
540 wordCharThenNonWordChar.link(this); |
|
541 } |
|
542 |
|
543 void generatePatternCharacterSingle(TermGenerationState& state) |
|
544 { |
|
545 const RegisterID character = regT0; |
|
546 UChar ch = state.term().patternCharacter; |
|
547 |
|
548 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
549 readCharacter(state.inputOffset(), character); |
|
550 or32(Imm32(32), character); |
|
551 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); |
|
552 } else { |
|
553 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
|
554 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this); |
|
555 } |
|
556 } |
|
557 |
|
558 void generatePatternCharacterPair(TermGenerationState& state) |
|
559 { |
|
560 const RegisterID character = regT0; |
|
561 UChar ch1 = state.term().patternCharacter; |
|
562 UChar ch2 = state.lookaheadTerm().patternCharacter; |
|
563 |
|
564 int mask = 0; |
|
565 int chPair = ch1 | (ch2 << 16); |
|
566 |
|
567 if (m_pattern.m_ignoreCase) { |
|
568 if (isASCIIAlpha(ch1)) |
|
569 mask |= 32; |
|
570 if (isASCIIAlpha(ch2)) |
|
571 mask |= 32 << 16; |
|
572 } |
|
573 |
|
574 if (mask) { |
|
575 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); |
|
576 or32(Imm32(mask), character); |
|
577 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this); |
|
578 } else |
|
579 state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this); |
|
580 } |
|
581 |
|
582 void generatePatternCharacterFixed(TermGenerationState& state) |
|
583 { |
|
584 const RegisterID character = regT0; |
|
585 const RegisterID countRegister = regT1; |
|
586 PatternTerm& term = state.term(); |
|
587 UChar ch = term.patternCharacter; |
|
588 |
|
589 move(index, countRegister); |
|
590 sub32(Imm32(term.quantityCount), countRegister); |
|
591 |
|
592 Label loop(this); |
|
593 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
594 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); |
|
595 or32(Imm32(32), character); |
|
596 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); |
|
597 } else { |
|
598 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
|
599 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this); |
|
600 } |
|
601 add32(Imm32(1), countRegister); |
|
602 branch32(NotEqual, countRegister, index).linkTo(loop, this); |
|
603 } |
|
604 |
|
605 void generatePatternCharacterGreedy(TermGenerationState& state) |
|
606 { |
|
607 const RegisterID character = regT0; |
|
608 const RegisterID countRegister = regT1; |
|
609 PatternTerm& term = state.term(); |
|
610 UChar ch = term.patternCharacter; |
|
611 |
|
612 move(Imm32(0), countRegister); |
|
613 |
|
614 JumpList failures; |
|
615 Label loop(this); |
|
616 failures.append(atEndOfInput()); |
|
617 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
618 readCharacter(state.inputOffset(), character); |
|
619 or32(Imm32(32), character); |
|
620 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); |
|
621 } else { |
|
622 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
|
623 failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); |
|
624 } |
|
625 |
|
626 add32(Imm32(1), countRegister); |
|
627 add32(Imm32(1), index); |
|
628 if (term.quantityCount != 0xffffffff) { |
|
629 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); |
|
630 failures.append(jump()); |
|
631 } else |
|
632 jump(loop); |
|
633 |
|
634 Label backtrackBegin(this); |
|
635 loadFromFrame(term.frameLocation, countRegister); |
|
636 state.jumpToBacktrack(branchTest32(Zero, countRegister), this); |
|
637 sub32(Imm32(1), countRegister); |
|
638 sub32(Imm32(1), index); |
|
639 |
|
640 failures.link(this); |
|
641 |
|
642 storeToFrame(countRegister, term.frameLocation); |
|
643 |
|
644 state.setBacktrackGenerated(backtrackBegin); |
|
645 } |
|
646 |
|
647 void generatePatternCharacterNonGreedy(TermGenerationState& state) |
|
648 { |
|
649 const RegisterID character = regT0; |
|
650 const RegisterID countRegister = regT1; |
|
651 PatternTerm& term = state.term(); |
|
652 UChar ch = term.patternCharacter; |
|
653 |
|
654 move(Imm32(0), countRegister); |
|
655 |
|
656 Jump firstTimeDoNothing = jump(); |
|
657 |
|
658 Label hardFail(this); |
|
659 sub32(countRegister, index); |
|
660 state.jumpToBacktrack(jump(), this); |
|
661 |
|
662 Label backtrackBegin(this); |
|
663 loadFromFrame(term.frameLocation, countRegister); |
|
664 |
|
665 atEndOfInput().linkTo(hardFail, this); |
|
666 if (term.quantityCount != 0xffffffff) |
|
667 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); |
|
668 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
669 readCharacter(state.inputOffset(), character); |
|
670 or32(Imm32(32), character); |
|
671 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); |
|
672 } else { |
|
673 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
|
674 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); |
|
675 } |
|
676 |
|
677 add32(Imm32(1), countRegister); |
|
678 add32(Imm32(1), index); |
|
679 |
|
680 firstTimeDoNothing.link(this); |
|
681 storeToFrame(countRegister, term.frameLocation); |
|
682 |
|
683 state.setBacktrackGenerated(backtrackBegin); |
|
684 } |
|
685 |
|
686 void generateCharacterClassSingle(TermGenerationState& state) |
|
687 { |
|
688 const RegisterID character = regT0; |
|
689 PatternTerm& term = state.term(); |
|
690 |
|
691 JumpList matchDest; |
|
692 readCharacter(state.inputOffset(), character); |
|
693 matchCharacterClass(character, matchDest, term.characterClass); |
|
694 |
|
695 if (term.invertOrCapture) |
|
696 state.jumpToBacktrack(matchDest, this); |
|
697 else { |
|
698 state.jumpToBacktrack(jump(), this); |
|
699 matchDest.link(this); |
|
700 } |
|
701 } |
|
702 |
|
703 void generateCharacterClassFixed(TermGenerationState& state) |
|
704 { |
|
705 const RegisterID character = regT0; |
|
706 const RegisterID countRegister = regT1; |
|
707 PatternTerm& term = state.term(); |
|
708 |
|
709 move(index, countRegister); |
|
710 sub32(Imm32(term.quantityCount), countRegister); |
|
711 |
|
712 Label loop(this); |
|
713 JumpList matchDest; |
|
714 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); |
|
715 matchCharacterClass(character, matchDest, term.characterClass); |
|
716 |
|
717 if (term.invertOrCapture) |
|
718 state.jumpToBacktrack(matchDest, this); |
|
719 else { |
|
720 state.jumpToBacktrack(jump(), this); |
|
721 matchDest.link(this); |
|
722 } |
|
723 |
|
724 add32(Imm32(1), countRegister); |
|
725 branch32(NotEqual, countRegister, index).linkTo(loop, this); |
|
726 } |
|
727 |
|
728 void generateCharacterClassGreedy(TermGenerationState& state) |
|
729 { |
|
730 const RegisterID character = regT0; |
|
731 const RegisterID countRegister = regT1; |
|
732 PatternTerm& term = state.term(); |
|
733 |
|
734 move(Imm32(0), countRegister); |
|
735 |
|
736 JumpList failures; |
|
737 Label loop(this); |
|
738 failures.append(atEndOfInput()); |
|
739 |
|
740 if (term.invertOrCapture) { |
|
741 readCharacter(state.inputOffset(), character); |
|
742 matchCharacterClass(character, failures, term.characterClass); |
|
743 } else { |
|
744 JumpList matchDest; |
|
745 readCharacter(state.inputOffset(), character); |
|
746 matchCharacterClass(character, matchDest, term.characterClass); |
|
747 failures.append(jump()); |
|
748 matchDest.link(this); |
|
749 } |
|
750 |
|
751 add32(Imm32(1), countRegister); |
|
752 add32(Imm32(1), index); |
|
753 if (term.quantityCount != 0xffffffff) { |
|
754 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); |
|
755 failures.append(jump()); |
|
756 } else |
|
757 jump(loop); |
|
758 |
|
759 Label backtrackBegin(this); |
|
760 loadFromFrame(term.frameLocation, countRegister); |
|
761 state.jumpToBacktrack(branchTest32(Zero, countRegister), this); |
|
762 sub32(Imm32(1), countRegister); |
|
763 sub32(Imm32(1), index); |
|
764 |
|
765 failures.link(this); |
|
766 |
|
767 storeToFrame(countRegister, term.frameLocation); |
|
768 |
|
769 state.setBacktrackGenerated(backtrackBegin); |
|
770 } |
|
771 |
|
772 void generateCharacterClassNonGreedy(TermGenerationState& state) |
|
773 { |
|
774 const RegisterID character = regT0; |
|
775 const RegisterID countRegister = regT1; |
|
776 PatternTerm& term = state.term(); |
|
777 |
|
778 move(Imm32(0), countRegister); |
|
779 |
|
780 Jump firstTimeDoNothing = jump(); |
|
781 |
|
782 Label hardFail(this); |
|
783 sub32(countRegister, index); |
|
784 state.jumpToBacktrack(jump(), this); |
|
785 |
|
786 Label backtrackBegin(this); |
|
787 loadFromFrame(term.frameLocation, countRegister); |
|
788 |
|
789 atEndOfInput().linkTo(hardFail, this); |
|
790 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); |
|
791 |
|
792 JumpList matchDest; |
|
793 readCharacter(state.inputOffset(), character); |
|
794 matchCharacterClass(character, matchDest, term.characterClass); |
|
795 |
|
796 if (term.invertOrCapture) |
|
797 matchDest.linkTo(hardFail, this); |
|
798 else { |
|
799 jump(hardFail); |
|
800 matchDest.link(this); |
|
801 } |
|
802 |
|
803 add32(Imm32(1), countRegister); |
|
804 add32(Imm32(1), index); |
|
805 |
|
806 firstTimeDoNothing.link(this); |
|
807 storeToFrame(countRegister, term.frameLocation); |
|
808 |
|
809 state.setBacktrackGenerated(backtrackBegin); |
|
810 } |
|
811 |
|
812 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) |
|
813 { |
|
814 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); |
|
815 ASSERT(parenthesesTerm.quantityCount == 1); |
|
816 |
|
817 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; |
|
818 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; |
|
819 |
|
820 if (disjunction->m_alternatives.size() == 1) { |
|
821 state.resetAlternative(); |
|
822 ASSERT(state.alternativeValid()); |
|
823 PatternAlternative* alternative = state.alternative(); |
|
824 optimizeAlternative(alternative); |
|
825 |
|
826 int countToCheck = alternative->m_minimumSize - preCheckedCount; |
|
827 if (countToCheck) { |
|
828 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); |
|
829 |
|
830 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' |
|
831 // will be forced to always trampoline into here, just to decrement the index. |
|
832 // Ick. |
|
833 Jump skip = jump(); |
|
834 |
|
835 Label backtrackBegin(this); |
|
836 sub32(Imm32(countToCheck), index); |
|
837 state.addBacktrackJump(jump()); |
|
838 |
|
839 skip.link(this); |
|
840 |
|
841 state.setBacktrackGenerated(backtrackBegin); |
|
842 |
|
843 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this); |
|
844 state.checkedTotal += countToCheck; |
|
845 } |
|
846 |
|
847 for (state.resetTerm(); state.termValid(); state.nextTerm()) |
|
848 generateTerm(state); |
|
849 |
|
850 state.checkedTotal -= countToCheck; |
|
851 } else { |
|
852 JumpList successes; |
|
853 |
|
854 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) { |
|
855 |
|
856 PatternAlternative* alternative = state.alternative(); |
|
857 optimizeAlternative(alternative); |
|
858 |
|
859 ASSERT(alternative->m_minimumSize >= preCheckedCount); |
|
860 int countToCheck = alternative->m_minimumSize - preCheckedCount; |
|
861 if (countToCheck) { |
|
862 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); |
|
863 state.checkedTotal += countToCheck; |
|
864 } |
|
865 |
|
866 for (state.resetTerm(); state.termValid(); state.nextTerm()) |
|
867 generateTerm(state); |
|
868 |
|
869 // Matched an alternative. |
|
870 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); |
|
871 successes.append(jump()); |
|
872 |
|
873 // Alternative did not match. |
|
874 Label backtrackLocation(this); |
|
875 |
|
876 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one. |
|
877 state.plantJumpToBacktrackIfExists(this); |
|
878 |
|
879 state.linkAlternativeBacktracks(this); |
|
880 |
|
881 if (countToCheck) { |
|
882 sub32(Imm32(countToCheck), index); |
|
883 state.checkedTotal -= countToCheck; |
|
884 } |
|
885 |
|
886 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation)); |
|
887 } |
|
888 // We fall through to here when the last alternative fails. |
|
889 // Add a backtrack out of here for the parenthese handling code to link up. |
|
890 state.addBacktrackJump(jump()); |
|
891 |
|
892 // Generate a trampoline for the parens code to backtrack to, to retry the |
|
893 // next alternative. |
|
894 state.setBacktrackGenerated(label()); |
|
895 loadFromFrameAndJump(alternativeFrameLocation); |
|
896 |
|
897 // FIXME: both of the above hooks are a little inefficient, in that you |
|
898 // may end up trampolining here, just to trampoline back out to the |
|
899 // parentheses code, or vice versa. We can probably eliminate a jump |
|
900 // by restructuring, but coding this way for now for simplicity during |
|
901 // development. |
|
902 |
|
903 successes.link(this); |
|
904 } |
|
905 } |
|
906 |
|
907 void generateParenthesesSingle(TermGenerationState& state) |
|
908 { |
|
909 const RegisterID indexTemporary = regT0; |
|
910 PatternTerm& term = state.term(); |
|
911 PatternDisjunction* disjunction = term.parentheses.disjunction; |
|
912 ASSERT(term.quantityCount == 1); |
|
913 |
|
914 if (term.parentheses.isCopy) { |
|
915 m_shouldFallBack = true; |
|
916 return; |
|
917 } |
|
918 |
|
919 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0; |
|
920 |
|
921 unsigned parenthesesFrameLocation = term.frameLocation; |
|
922 unsigned alternativeFrameLocation = parenthesesFrameLocation; |
|
923 if (term.quantityType != QuantifierFixedCount) |
|
924 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; |
|
925 |
|
926 // optimized case - no capture & no quantifier can be handled in a light-weight manner. |
|
927 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) { |
|
928 TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
|
929 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
|
930 // this expects that any backtracks back out of the parentheses will be in the |
|
931 // parenthesesState's backTrackJumps vector, and that if they need backtracking |
|
932 // they will have set an entry point on the parenthesesState's backtrackLabel. |
|
933 state.propagateBacktrackingFrom(parenthesesState, this); |
|
934 } else { |
|
935 Jump nonGreedySkipParentheses; |
|
936 Label nonGreedyTryParentheses; |
|
937 if (term.quantityType == QuantifierGreedy) |
|
938 storeToFrame(Imm32(1), parenthesesFrameLocation); |
|
939 else if (term.quantityType == QuantifierNonGreedy) { |
|
940 storeToFrame(Imm32(0), parenthesesFrameLocation); |
|
941 nonGreedySkipParentheses = jump(); |
|
942 nonGreedyTryParentheses = label(); |
|
943 storeToFrame(Imm32(1), parenthesesFrameLocation); |
|
944 } |
|
945 |
|
946 // store the match start index |
|
947 if (term.invertOrCapture) { |
|
948 int inputOffset = state.inputOffset() - preCheckedCount; |
|
949 if (inputOffset) { |
|
950 move(index, indexTemporary); |
|
951 add32(Imm32(inputOffset), indexTemporary); |
|
952 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
|
953 } else |
|
954 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
|
955 } |
|
956 |
|
957 // generate the body of the parentheses |
|
958 TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
|
959 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
|
960 |
|
961 // store the match end index |
|
962 if (term.invertOrCapture) { |
|
963 int inputOffset = state.inputOffset(); |
|
964 if (inputOffset) { |
|
965 move(index, indexTemporary); |
|
966 add32(Imm32(state.inputOffset()), indexTemporary); |
|
967 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
|
968 } else |
|
969 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
|
970 } |
|
971 Jump success = jump(); |
|
972 |
|
973 // A failure AFTER the parens jumps here |
|
974 Label backtrackFromAfterParens(this); |
|
975 |
|
976 if (term.quantityType == QuantifierGreedy) { |
|
977 // If this is zero we have now tested with both with and without the parens. |
|
978 loadFromFrame(parenthesesFrameLocation, indexTemporary); |
|
979 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this); |
|
980 } else if (term.quantityType == QuantifierNonGreedy) { |
|
981 // If this is zero we have now tested with both with and without the parens. |
|
982 loadFromFrame(parenthesesFrameLocation, indexTemporary); |
|
983 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this); |
|
984 } |
|
985 |
|
986 parenthesesState.plantJumpToBacktrackIfExists(this); |
|
987 // A failure WITHIN the parens jumps here |
|
988 parenthesesState.linkAlternativeBacktracks(this); |
|
989 if (term.invertOrCapture) { |
|
990 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
|
991 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
|
992 } |
|
993 |
|
994 if (term.quantityType == QuantifierGreedy) |
|
995 storeToFrame(Imm32(0), parenthesesFrameLocation); |
|
996 else |
|
997 state.jumpToBacktrack(jump(), this); |
|
998 |
|
999 state.setBacktrackGenerated(backtrackFromAfterParens); |
|
1000 if (term.quantityType == QuantifierNonGreedy) |
|
1001 nonGreedySkipParentheses.link(this); |
|
1002 success.link(this); |
|
1003 } |
|
1004 } |
|
1005 |
|
1006 void generateParenthesesGreedyNoBacktrack(TermGenerationState& state) |
|
1007 { |
|
1008 PatternTerm& parenthesesTerm = state.term(); |
|
1009 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; |
|
1010 ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); |
|
1011 ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. |
|
1012 |
|
1013 // Capturing not yet implemented! |
|
1014 if (parenthesesTerm.invertOrCapture) { |
|
1015 m_shouldFallBack = true; |
|
1016 return; |
|
1017 } |
|
1018 |
|
1019 // Quantification limit not yet implemented! |
|
1020 if (parenthesesTerm.quantityCount != 0xffffffff) { |
|
1021 m_shouldFallBack = true; |
|
1022 return; |
|
1023 } |
|
1024 |
|
1025 // Need to reset nested subpatterns between iterations... |
|
1026 // for the minute this crude check rejects all patterns with any subpatterns! |
|
1027 if (m_pattern.m_numSubpatterns) { |
|
1028 m_shouldFallBack = true; |
|
1029 return; |
|
1030 } |
|
1031 |
|
1032 TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
|
1033 |
|
1034 Label matchAgain(this); |
|
1035 for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) { |
|
1036 |
|
1037 PatternAlternative* alternative = parenthesesState.alternative(); |
|
1038 optimizeAlternative(alternative); |
|
1039 |
|
1040 int countToCheck = alternative->m_minimumSize; |
|
1041 if (countToCheck) { |
|
1042 parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); |
|
1043 parenthesesState.checkedTotal += countToCheck; |
|
1044 } |
|
1045 |
|
1046 for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm()) |
|
1047 generateTerm(parenthesesState); |
|
1048 |
|
1049 // If we get here, we matched! Limit not yet supported, so just try to match more! |
|
1050 jump(matchAgain); |
|
1051 |
|
1052 parenthesesState.linkAlternativeBacktracks(this); |
|
1053 // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. |
|
1054 |
|
1055 if (countToCheck) { |
|
1056 sub32(Imm32(countToCheck), index); |
|
1057 parenthesesState.checkedTotal -= countToCheck; |
|
1058 } |
|
1059 } |
|
1060 |
|
1061 // If the last alternative falls through to here, we have a failed match... |
|
1062 // Which means that we match whatever we have matched up to this point (even if nothing). |
|
1063 } |
|
1064 |
|
1065 void generateParentheticalAssertion(TermGenerationState& state) |
|
1066 { |
|
1067 PatternTerm& term = state.term(); |
|
1068 PatternDisjunction* disjunction = term.parentheses.disjunction; |
|
1069 ASSERT(term.quantityCount == 1); |
|
1070 ASSERT(term.quantityType == QuantifierFixedCount); |
|
1071 |
|
1072 unsigned parenthesesFrameLocation = term.frameLocation; |
|
1073 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; |
|
1074 |
|
1075 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; |
|
1076 |
|
1077 if (term.invertOrCapture) { |
|
1078 // Inverted case |
|
1079 storeToFrame(index, parenthesesFrameLocation); |
|
1080 |
|
1081 state.checkedTotal -= countCheckedAfterAssertion; |
|
1082 if (countCheckedAfterAssertion) |
|
1083 sub32(Imm32(countCheckedAfterAssertion), index); |
|
1084 |
|
1085 TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
|
1086 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
|
1087 // Success! - which means - Fail! |
|
1088 loadFromFrame(parenthesesFrameLocation, index); |
|
1089 state.jumpToBacktrack(jump(), this); |
|
1090 |
|
1091 // And fail means success. |
|
1092 parenthesesState.linkAlternativeBacktracks(this); |
|
1093 loadFromFrame(parenthesesFrameLocation, index); |
|
1094 |
|
1095 state.checkedTotal += countCheckedAfterAssertion; |
|
1096 } else { |
|
1097 // Normal case |
|
1098 storeToFrame(index, parenthesesFrameLocation); |
|
1099 |
|
1100 state.checkedTotal -= countCheckedAfterAssertion; |
|
1101 if (countCheckedAfterAssertion) |
|
1102 sub32(Imm32(countCheckedAfterAssertion), index); |
|
1103 |
|
1104 TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
|
1105 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
|
1106 // Success! - which means - Success! |
|
1107 loadFromFrame(parenthesesFrameLocation, index); |
|
1108 Jump success = jump(); |
|
1109 |
|
1110 parenthesesState.linkAlternativeBacktracks(this); |
|
1111 loadFromFrame(parenthesesFrameLocation, index); |
|
1112 state.jumpToBacktrack(jump(), this); |
|
1113 |
|
1114 success.link(this); |
|
1115 |
|
1116 state.checkedTotal += countCheckedAfterAssertion; |
|
1117 } |
|
1118 } |
|
1119 |
|
1120 void generateTerm(TermGenerationState& state) |
|
1121 { |
|
1122 PatternTerm& term = state.term(); |
|
1123 |
|
1124 switch (term.type) { |
|
1125 case PatternTerm::TypeAssertionBOL: |
|
1126 generateAssertionBOL(state); |
|
1127 break; |
|
1128 |
|
1129 case PatternTerm::TypeAssertionEOL: |
|
1130 generateAssertionEOL(state); |
|
1131 break; |
|
1132 |
|
1133 case PatternTerm::TypeAssertionWordBoundary: |
|
1134 generateAssertionWordBoundary(state); |
|
1135 break; |
|
1136 |
|
1137 case PatternTerm::TypePatternCharacter: |
|
1138 switch (term.quantityType) { |
|
1139 case QuantifierFixedCount: |
|
1140 if (term.quantityCount == 1) { |
|
1141 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { |
|
1142 generatePatternCharacterPair(state); |
|
1143 state.nextTerm(); |
|
1144 } else |
|
1145 generatePatternCharacterSingle(state); |
|
1146 } else |
|
1147 generatePatternCharacterFixed(state); |
|
1148 break; |
|
1149 case QuantifierGreedy: |
|
1150 generatePatternCharacterGreedy(state); |
|
1151 break; |
|
1152 case QuantifierNonGreedy: |
|
1153 generatePatternCharacterNonGreedy(state); |
|
1154 break; |
|
1155 } |
|
1156 break; |
|
1157 |
|
1158 case PatternTerm::TypeCharacterClass: |
|
1159 switch (term.quantityType) { |
|
1160 case QuantifierFixedCount: |
|
1161 if (term.quantityCount == 1) |
|
1162 generateCharacterClassSingle(state); |
|
1163 else |
|
1164 generateCharacterClassFixed(state); |
|
1165 break; |
|
1166 case QuantifierGreedy: |
|
1167 generateCharacterClassGreedy(state); |
|
1168 break; |
|
1169 case QuantifierNonGreedy: |
|
1170 generateCharacterClassNonGreedy(state); |
|
1171 break; |
|
1172 } |
|
1173 break; |
|
1174 |
|
1175 case PatternTerm::TypeBackReference: |
|
1176 m_shouldFallBack = true; |
|
1177 break; |
|
1178 |
|
1179 case PatternTerm::TypeForwardReference: |
|
1180 break; |
|
1181 |
|
1182 case PatternTerm::TypeParenthesesSubpattern: |
|
1183 if (term.quantityCount == 1) { |
|
1184 generateParenthesesSingle(state); |
|
1185 break; |
|
1186 } else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction? |
|
1187 // If this has a greedy quantifier, then it will never need to backtrack! |
|
1188 if (term.quantityType == QuantifierGreedy) { |
|
1189 generateParenthesesGreedyNoBacktrack(state); |
|
1190 break; |
|
1191 } |
|
1192 } |
|
1193 m_shouldFallBack = true; |
|
1194 break; |
|
1195 |
|
1196 case PatternTerm::TypeParentheticalAssertion: |
|
1197 generateParentheticalAssertion(state); |
|
1198 break; |
|
1199 } |
|
1200 } |
|
1201 |
|
1202 void generateDisjunction(PatternDisjunction* disjunction) |
|
1203 { |
|
1204 TermGenerationState state(disjunction, 0); |
|
1205 state.resetAlternative(); |
|
1206 |
|
1207 // Plant a check to see if there is sufficient input available to run the first alternative. |
|
1208 // Jumping back to the label 'firstAlternative' will get to this check, jumping to |
|
1209 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having |
|
1210 // skipped this check. |
|
1211 |
|
1212 Label firstAlternative(this); |
|
1213 |
|
1214 // check availability for the next alternative |
|
1215 int countCheckedForCurrentAlternative = 0; |
|
1216 int countToCheckForFirstAlternative = 0; |
|
1217 bool hasShorterAlternatives = false; |
|
1218 JumpList notEnoughInputForPreviousAlternative; |
|
1219 |
|
1220 if (state.alternativeValid()) { |
|
1221 PatternAlternative* alternative = state.alternative(); |
|
1222 countToCheckForFirstAlternative = alternative->m_minimumSize; |
|
1223 state.checkedTotal += countToCheckForFirstAlternative; |
|
1224 if (countToCheckForFirstAlternative) |
|
1225 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); |
|
1226 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; |
|
1227 } |
|
1228 |
|
1229 Label firstAlternativeInputChecked(this); |
|
1230 |
|
1231 while (state.alternativeValid()) { |
|
1232 // Track whether any alternatives are shorter than the first one. |
|
1233 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); |
|
1234 |
|
1235 PatternAlternative* alternative = state.alternative(); |
|
1236 optimizeAlternative(alternative); |
|
1237 |
|
1238 for (state.resetTerm(); state.termValid(); state.nextTerm()) |
|
1239 generateTerm(state); |
|
1240 |
|
1241 // If we get here, the alternative matched. |
|
1242 if (m_pattern.m_body->m_callFrameSize) |
|
1243 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); |
|
1244 |
|
1245 ASSERT(index != returnRegister); |
|
1246 if (m_pattern.m_body->m_hasFixedSize) { |
|
1247 move(index, returnRegister); |
|
1248 if (alternative->m_minimumSize) |
|
1249 sub32(Imm32(alternative->m_minimumSize), returnRegister); |
|
1250 |
|
1251 store32(returnRegister, output); |
|
1252 } else |
|
1253 load32(Address(output), returnRegister); |
|
1254 |
|
1255 store32(index, Address(output, 4)); |
|
1256 |
|
1257 generateReturn(); |
|
1258 |
|
1259 state.nextAlternative(); |
|
1260 |
|
1261 // if there are any more alternatives, plant the check for input before looping. |
|
1262 if (state.alternativeValid()) { |
|
1263 PatternAlternative* nextAlternative = state.alternative(); |
|
1264 int countToCheckForNextAlternative = nextAlternative->m_minimumSize; |
|
1265 |
|
1266 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. |
|
1267 // If we get here, there the last input checked failed. |
|
1268 notEnoughInputForPreviousAlternative.link(this); |
|
1269 |
|
1270 // Check if sufficent input available to run the next alternative |
|
1271 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); |
|
1272 // We are now in the correct state to enter the next alternative; this add is only required |
|
1273 // to mirror and revert operation of the sub32, just below. |
|
1274 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); |
|
1275 |
|
1276 // If we get here, there the last input checked passed. |
|
1277 state.linkAlternativeBacktracks(this); |
|
1278 // No need to check if we can run the next alternative, since it is shorter - |
|
1279 // just update index. |
|
1280 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); |
|
1281 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. |
|
1282 // If we get here, there the last input checked failed. |
|
1283 // If there is insufficient input to run the current alternative, and the next alternative is longer, |
|
1284 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if |
|
1285 // we had checked. |
|
1286 notEnoughInputForPreviousAlternative.link(this); |
|
1287 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); |
|
1288 notEnoughInputForPreviousAlternative.append(jump()); |
|
1289 |
|
1290 // The next alternative is longer than the current one; check the difference. |
|
1291 state.linkAlternativeBacktracks(this); |
|
1292 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); |
|
1293 } else { // CASE 3: Both alternatives are the same length. |
|
1294 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); |
|
1295 |
|
1296 // If the next alterative is the same length as this one, then no need to check the input - |
|
1297 // if there was sufficent input to run the current alternative then there is sufficient |
|
1298 // input to run the next one; if not, there isn't. |
|
1299 state.linkAlternativeBacktracks(this); |
|
1300 } |
|
1301 |
|
1302 state.checkedTotal -= countCheckedForCurrentAlternative; |
|
1303 countCheckedForCurrentAlternative = countToCheckForNextAlternative; |
|
1304 state.checkedTotal += countCheckedForCurrentAlternative; |
|
1305 } |
|
1306 } |
|
1307 |
|
1308 // If we get here, all Alternatives failed... |
|
1309 |
|
1310 state.checkedTotal -= countCheckedForCurrentAlternative; |
|
1311 |
|
1312 // How much more input need there be to be able to retry from the first alternative? |
|
1313 // examples: |
|
1314 // /yarr_jit/ or /wrec|pcre/ |
|
1315 // In these examples we need check for one more input before looping. |
|
1316 // /yarr_jit|pcre/ |
|
1317 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative |
|
1318 // being four longer than the last alternative checked, and another +1 to effectively move |
|
1319 // the start position along by one). |
|
1320 // /yarr|rules/ or /wrec|notsomuch/ |
|
1321 // In these examples, provided that there was sufficient input to have just been matching for |
|
1322 // the second alternative we can loop without checking for available input (since the second |
|
1323 // alternative is longer than the first). In the latter example we need to decrement index |
|
1324 // (by 4) so the start position is only progressed by 1 from the last iteration. |
|
1325 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; |
|
1326 |
|
1327 // First, deal with the cases where there was sufficient input to try the last alternative. |
|
1328 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. |
|
1329 state.linkAlternativeBacktracks(this); |
|
1330 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! |
|
1331 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this); |
|
1332 else { // no need to check the input, but we do have some bookkeeping to do first. |
|
1333 state.linkAlternativeBacktracks(this); |
|
1334 |
|
1335 // Where necessary update our preserved start position. |
|
1336 if (!m_pattern.m_body->m_hasFixedSize) { |
|
1337 move(index, regT0); |
|
1338 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); |
|
1339 store32(regT0, Address(output)); |
|
1340 } |
|
1341 |
|
1342 // Update index if necessary, and loop (without checking). |
|
1343 if (incrementForNextIter) |
|
1344 add32(Imm32(incrementForNextIter), index); |
|
1345 jump().linkTo(firstAlternativeInputChecked, this); |
|
1346 } |
|
1347 |
|
1348 notEnoughInputForPreviousAlternative.link(this); |
|
1349 // Update our idea of the start position, if we're tracking this. |
|
1350 if (!m_pattern.m_body->m_hasFixedSize) { |
|
1351 if (countCheckedForCurrentAlternative - 1) { |
|
1352 move(index, regT0); |
|
1353 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); |
|
1354 store32(regT0, Address(output)); |
|
1355 } else |
|
1356 store32(index, Address(output)); |
|
1357 } |
|
1358 // Check if there is sufficent input to run the first alternative again. |
|
1359 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); |
|
1360 // No - insufficent input to run the first alteranative, are there any other alternatives we |
|
1361 // might need to check? If so, the last check will have left the index incremented by |
|
1362 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative |
|
1363 // LESS input is available, to have the effect of just progressing the start position by 1 |
|
1364 // from the last iteration. If this check passes we can just jump up to the check associated |
|
1365 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the |
|
1366 // first alternative again, and this check will fail (otherwise the check planted just above |
|
1367 // here would have passed). This is a bit sad, however it saves trying to do something more |
|
1368 // complex here in compilation, and in the common case we should end up coallescing the checks. |
|
1369 // |
|
1370 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least |
|
1371 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, |
|
1372 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there |
|
1373 // is sufficient input to run either alternative (constantly failing). If there had been only |
|
1374 // one alternative, or if the shorter alternative had come first, we would have terminated |
|
1375 // immediately. :-/ |
|
1376 if (hasShorterAlternatives) |
|
1377 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); |
|
1378 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, |
|
1379 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... |
|
1380 // but since we're about to return a failure this doesn't really matter!) |
|
1381 |
|
1382 if (m_pattern.m_body->m_callFrameSize) |
|
1383 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); |
|
1384 |
|
1385 move(Imm32(-1), returnRegister); |
|
1386 |
|
1387 generateReturn(); |
|
1388 } |
|
1389 |
|
1390 void generateEnter() |
|
1391 { |
|
1392 #if CPU(X86_64) |
|
1393 push(X86Registers::ebp); |
|
1394 move(stackPointerRegister, X86Registers::ebp); |
|
1395 push(X86Registers::ebx); |
|
1396 #elif CPU(X86) |
|
1397 push(X86Registers::ebp); |
|
1398 move(stackPointerRegister, X86Registers::ebp); |
|
1399 // TODO: do we need spill registers to fill the output pointer if there are no sub captures? |
|
1400 push(X86Registers::ebx); |
|
1401 push(X86Registers::edi); |
|
1402 push(X86Registers::esi); |
|
1403 // load output into edi (2 = saved ebp + return address). |
|
1404 #if COMPILER(MSVC) |
|
1405 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); |
|
1406 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); |
|
1407 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); |
|
1408 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); |
|
1409 #else |
|
1410 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); |
|
1411 #endif |
|
1412 #elif CPU(ARM) |
|
1413 push(ARMRegisters::r4); |
|
1414 push(ARMRegisters::r5); |
|
1415 push(ARMRegisters::r6); |
|
1416 move(ARMRegisters::r3, output); |
|
1417 #elif CPU(MIPS) |
|
1418 // Do nothing. |
|
1419 #endif |
|
1420 } |
|
1421 |
|
1422 void generateReturn() |
|
1423 { |
|
1424 #if CPU(X86_64) |
|
1425 pop(X86Registers::ebx); |
|
1426 pop(X86Registers::ebp); |
|
1427 #elif CPU(X86) |
|
1428 pop(X86Registers::esi); |
|
1429 pop(X86Registers::edi); |
|
1430 pop(X86Registers::ebx); |
|
1431 pop(X86Registers::ebp); |
|
1432 #elif CPU(ARM) |
|
1433 pop(ARMRegisters::r6); |
|
1434 pop(ARMRegisters::r5); |
|
1435 pop(ARMRegisters::r4); |
|
1436 #elif CPU(MIPS) |
|
1437 // Do nothing |
|
1438 #endif |
|
1439 ret(); |
|
1440 } |
|
1441 |
|
1442 public: |
|
1443 RegexGenerator(RegexPattern& pattern) |
|
1444 : m_pattern(pattern) |
|
1445 , m_shouldFallBack(false) |
|
1446 { |
|
1447 } |
|
1448 |
|
1449 void generate() |
|
1450 { |
|
1451 generateEnter(); |
|
1452 |
|
1453 if (!m_pattern.m_body->m_hasFixedSize) |
|
1454 store32(index, Address(output)); |
|
1455 |
|
1456 if (m_pattern.m_body->m_callFrameSize) |
|
1457 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); |
|
1458 |
|
1459 generateDisjunction(m_pattern.m_body); |
|
1460 } |
|
1461 |
|
1462 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject) |
|
1463 { |
|
1464 generate(); |
|
1465 |
|
1466 LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size())); |
|
1467 |
|
1468 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) |
|
1469 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation)); |
|
1470 |
|
1471 jitObject.set(patchBuffer.finalizeCode()); |
|
1472 } |
|
1473 |
|
1474 bool shouldFallBack() |
|
1475 { |
|
1476 return m_shouldFallBack; |
|
1477 } |
|
1478 |
|
1479 private: |
|
1480 RegexPattern& m_pattern; |
|
1481 bool m_shouldFallBack; |
|
1482 Vector<AlternativeBacktrackRecord> m_backtrackRecords; |
|
1483 }; |
|
1484 |
|
1485 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) |
|
1486 { |
|
1487 RegexPattern pattern(ignoreCase, multiline); |
|
1488 if ((error = compileRegex(patternString, pattern))) |
|
1489 return; |
|
1490 numSubpatterns = pattern.m_numSubpatterns; |
|
1491 |
|
1492 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { |
|
1493 RegexGenerator generator(pattern); |
|
1494 generator.compile(globalData, jitObject); |
|
1495 if (!generator.shouldFallBack()) |
|
1496 return; |
|
1497 } |
|
1498 |
|
1499 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
|
1500 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine; |
|
1501 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error)); |
|
1502 } |
|
1503 |
|
1504 }} |
|
1505 |
|
1506 #endif |
|
1507 |
|
1508 |
|
1509 |
|
1510 |
|
1511 |