JavaScriptCore/yarr/RegexJIT.cpp
changeset 0 4f2f89ce4247
child 2 303757a437d3
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2009 Apple Inc. All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "RegexJIT.h"
       
    28 
       
    29 #include "ASCIICType.h"
       
    30 #include "JSGlobalData.h"
       
    31 #include "LinkBuffer.h"
       
    32 #include "MacroAssembler.h"
       
    33 #include "RegexCompiler.h"
       
    34 
       
    35 #include "pcre.h" // temporary, remove when fallback is removed.
       
    36 
       
    37 #if ENABLE(YARR_JIT)
       
    38 
       
    39 using namespace WTF;
       
    40 
       
    41 namespace JSC { namespace Yarr {
       
    42 
       
    43 class RegexGenerator : private MacroAssembler {
       
    44     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
       
    45 
       
    46 #if CPU(ARM)
       
    47     static const RegisterID input = ARMRegisters::r0;
       
    48     static const RegisterID index = ARMRegisters::r1;
       
    49     static const RegisterID length = ARMRegisters::r2;
       
    50     static const RegisterID output = ARMRegisters::r4;
       
    51 
       
    52     static const RegisterID regT0 = ARMRegisters::r5;
       
    53     static const RegisterID regT1 = ARMRegisters::r6;
       
    54 
       
    55     static const RegisterID returnRegister = ARMRegisters::r0;
       
    56 #elif CPU(MIPS)
       
    57     static const RegisterID input = MIPSRegisters::a0;
       
    58     static const RegisterID index = MIPSRegisters::a1;
       
    59     static const RegisterID length = MIPSRegisters::a2;
       
    60     static const RegisterID output = MIPSRegisters::a3;
       
    61 
       
    62     static const RegisterID regT0 = MIPSRegisters::t4;
       
    63     static const RegisterID regT1 = MIPSRegisters::t5;
       
    64 
       
    65     static const RegisterID returnRegister = MIPSRegisters::v0;
       
    66 #elif CPU(X86)
       
    67     static const RegisterID input = X86Registers::eax;
       
    68     static const RegisterID index = X86Registers::edx;
       
    69     static const RegisterID length = X86Registers::ecx;
       
    70     static const RegisterID output = X86Registers::edi;
       
    71 
       
    72     static const RegisterID regT0 = X86Registers::ebx;
       
    73     static const RegisterID regT1 = X86Registers::esi;
       
    74 
       
    75     static const RegisterID returnRegister = X86Registers::eax;
       
    76 #elif CPU(X86_64)
       
    77     static const RegisterID input = X86Registers::edi;
       
    78     static const RegisterID index = X86Registers::esi;
       
    79     static const RegisterID length = X86Registers::edx;
       
    80     static const RegisterID output = X86Registers::ecx;
       
    81 
       
    82     static const RegisterID regT0 = X86Registers::eax;
       
    83     static const RegisterID regT1 = X86Registers::ebx;
       
    84 
       
    85     static const RegisterID returnRegister = X86Registers::eax;
       
    86 #endif
       
    87 
       
    88     void optimizeAlternative(PatternAlternative* alternative)
       
    89     {
       
    90         if (!alternative->m_terms.size())
       
    91             return;
       
    92 
       
    93         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
       
    94             PatternTerm& term = alternative->m_terms[i];
       
    95             PatternTerm& nextTerm = alternative->m_terms[i + 1];
       
    96 
       
    97             if ((term.type == PatternTerm::TypeCharacterClass)
       
    98                 && (term.quantityType == QuantifierFixedCount)
       
    99                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
       
   100                 && (nextTerm.quantityType == QuantifierFixedCount)) {
       
   101                 PatternTerm termCopy = term;
       
   102                 alternative->m_terms[i] = nextTerm;
       
   103                 alternative->m_terms[i + 1] = termCopy;
       
   104             }
       
   105         }
       
   106     }
       
   107 
       
   108     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
       
   109     {
       
   110         do {
       
   111             // pick which range we're going to generate
       
   112             int which = count >> 1;
       
   113             char lo = ranges[which].begin;
       
   114             char hi = ranges[which].end;
       
   115             
       
   116             // check if there are any ranges or matches below lo.  If not, just jl to failure -
       
   117             // if there is anything else to check, check that first, if it falls through jmp to failure.
       
   118             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
       
   119                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
       
   120                 
       
   121                 // generate code for all ranges before this one
       
   122                 if (which)
       
   123                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
       
   124                 
       
   125                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
       
   126                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
       
   127                     ++*matchIndex;
       
   128                 }
       
   129                 failures.append(jump());
       
   130 
       
   131                 loOrAbove.link(this);
       
   132             } else if (which) {
       
   133                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
       
   134 
       
   135                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
       
   136                 failures.append(jump());
       
   137 
       
   138                 loOrAbove.link(this);
       
   139             } else
       
   140                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
       
   141 
       
   142             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
       
   143                 ++*matchIndex;
       
   144 
       
   145             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
       
   146             // fall through to here, the value is above hi.
       
   147 
       
   148             // shuffle along & loop around if there are any more matches to handle.
       
   149             unsigned next = which + 1;
       
   150             ranges += next;
       
   151             count -= next;
       
   152         } while (count);
       
   153     }
       
   154 
       
   155     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
       
   156     {
       
   157         if (charClass->m_table) {
       
   158             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
       
   159             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
       
   160             return;
       
   161         }
       
   162         Jump unicodeFail;
       
   163         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
       
   164             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
       
   165         
       
   166             if (charClass->m_matchesUnicode.size()) {
       
   167                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
       
   168                     UChar ch = charClass->m_matchesUnicode[i];
       
   169                     matchDest.append(branch32(Equal, character, Imm32(ch)));
       
   170                 }
       
   171             }
       
   172             
       
   173             if (charClass->m_rangesUnicode.size()) {
       
   174                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
       
   175                     UChar lo = charClass->m_rangesUnicode[i].begin;
       
   176                     UChar hi = charClass->m_rangesUnicode[i].end;
       
   177                     
       
   178                     Jump below = branch32(LessThan, character, Imm32(lo));
       
   179                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
       
   180                     below.link(this);
       
   181                 }
       
   182             }
       
   183 
       
   184             unicodeFail = jump();
       
   185             isAscii.link(this);
       
   186         }
       
   187 
       
   188         if (charClass->m_ranges.size()) {
       
   189             unsigned matchIndex = 0;
       
   190             JumpList failures; 
       
   191             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
       
   192             while (matchIndex < charClass->m_matches.size())
       
   193                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
       
   194 
       
   195             failures.link(this);
       
   196         } else if (charClass->m_matches.size()) {
       
   197             // optimization: gather 'a','A' etc back together, can mask & test once.
       
   198             Vector<char> matchesAZaz;
       
   199 
       
   200             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
       
   201                 char ch = charClass->m_matches[i];
       
   202                 if (m_pattern.m_ignoreCase) {
       
   203                     if (isASCIILower(ch)) {
       
   204                         matchesAZaz.append(ch);
       
   205                         continue;
       
   206                     }
       
   207                     if (isASCIIUpper(ch))
       
   208                         continue;
       
   209                 }
       
   210                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
       
   211             }
       
   212 
       
   213             if (unsigned countAZaz = matchesAZaz.size()) {
       
   214                 or32(Imm32(32), character);
       
   215                 for (unsigned i = 0; i < countAZaz; ++i)
       
   216                     matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
       
   217             }
       
   218         }
       
   219 
       
   220         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
       
   221             unicodeFail.link(this);
       
   222     }
       
   223 
       
   224     // Jumps if input not available; will have (incorrectly) incremented already!
       
   225     Jump jumpIfNoAvailableInput(unsigned countToCheck)
       
   226     {
       
   227         add32(Imm32(countToCheck), index);
       
   228         return branch32(Above, index, length);
       
   229     }
       
   230 
       
   231     Jump jumpIfAvailableInput(unsigned countToCheck)
       
   232     {
       
   233         add32(Imm32(countToCheck), index);
       
   234         return branch32(BelowOrEqual, index, length);
       
   235     }
       
   236 
       
   237     Jump checkInput()
       
   238     {
       
   239         return branch32(BelowOrEqual, index, length);
       
   240     }
       
   241 
       
   242     Jump atEndOfInput()
       
   243     {
       
   244         return branch32(Equal, index, length);
       
   245     }
       
   246 
       
   247     Jump notAtEndOfInput()
       
   248     {
       
   249         return branch32(NotEqual, index, length);
       
   250     }
       
   251 
       
   252     Jump jumpIfCharEquals(UChar ch, int inputPosition)
       
   253     {
       
   254         return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
       
   255     }
       
   256 
       
   257     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
       
   258     {
       
   259         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
       
   260     }
       
   261 
       
   262     void readCharacter(int inputPosition, RegisterID reg)
       
   263     {
       
   264         load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
       
   265     }
       
   266 
       
   267     void storeToFrame(RegisterID reg, unsigned frameLocation)
       
   268     {
       
   269         poke(reg, frameLocation);
       
   270     }
       
   271 
       
   272     void storeToFrame(Imm32 imm, unsigned frameLocation)
       
   273     {
       
   274         poke(imm, frameLocation);
       
   275     }
       
   276 
       
   277     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
       
   278     {
       
   279         return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
       
   280     }
       
   281 
       
   282     void loadFromFrame(unsigned frameLocation, RegisterID reg)
       
   283     {
       
   284         peek(reg, frameLocation);
       
   285     }
       
   286 
       
   287     void loadFromFrameAndJump(unsigned frameLocation)
       
   288     {
       
   289         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
       
   290     }
       
   291 
       
   292     struct AlternativeBacktrackRecord {
       
   293         DataLabelPtr dataLabel;
       
   294         Label backtrackLocation;
       
   295 
       
   296         AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
       
   297             : dataLabel(dataLabel)
       
   298             , backtrackLocation(backtrackLocation)
       
   299         {
       
   300         }
       
   301     };
       
   302 
       
   303     struct TermGenerationState {
       
   304         TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
       
   305             : disjunction(disjunction)
       
   306             , checkedTotal(checkedTotal)
       
   307         {
       
   308         }
       
   309 
       
   310         void resetAlternative()
       
   311         {
       
   312             isBackTrackGenerated = false;
       
   313             alt = 0;
       
   314         }
       
   315         bool alternativeValid()
       
   316         {
       
   317             return alt < disjunction->m_alternatives.size();
       
   318         }
       
   319         void nextAlternative()
       
   320         {
       
   321             ++alt;
       
   322         }
       
   323         PatternAlternative* alternative()
       
   324         {
       
   325             return disjunction->m_alternatives[alt];
       
   326         }
       
   327 
       
   328         void resetTerm()
       
   329         {
       
   330             ASSERT(alternativeValid());
       
   331             t = 0;
       
   332         }
       
   333         bool termValid()
       
   334         {
       
   335             ASSERT(alternativeValid());
       
   336             return t < alternative()->m_terms.size();
       
   337         }
       
   338         void nextTerm()
       
   339         {
       
   340             ASSERT(alternativeValid());
       
   341             ++t;
       
   342         }
       
   343         PatternTerm& term()
       
   344         {
       
   345             ASSERT(alternativeValid());
       
   346             return alternative()->m_terms[t];
       
   347         }
       
   348         bool isLastTerm()
       
   349         {
       
   350             ASSERT(alternativeValid());
       
   351             return (t + 1) == alternative()->m_terms.size();
       
   352         }
       
   353         bool isMainDisjunction()
       
   354         {
       
   355             return !disjunction->m_parent;
       
   356         }
       
   357 
       
   358         PatternTerm& lookaheadTerm()
       
   359         {
       
   360             ASSERT(alternativeValid());
       
   361             ASSERT((t + 1) < alternative()->m_terms.size());
       
   362             return alternative()->m_terms[t + 1];
       
   363         }
       
   364         bool isSinglePatternCharacterLookaheadTerm()
       
   365         {
       
   366             ASSERT(alternativeValid());
       
   367             return ((t + 1) < alternative()->m_terms.size())
       
   368                 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
       
   369                 && (lookaheadTerm().quantityType == QuantifierFixedCount)
       
   370                 && (lookaheadTerm().quantityCount == 1);
       
   371         }
       
   372 
       
   373         int inputOffset()
       
   374         {
       
   375             return term().inputPosition - checkedTotal;
       
   376         }
       
   377 
       
   378         void jumpToBacktrack(Jump jump, MacroAssembler* masm)
       
   379         {
       
   380             if (isBackTrackGenerated)
       
   381                 jump.linkTo(backtrackLabel, masm);
       
   382             else
       
   383                 backTrackJumps.append(jump);
       
   384         }
       
   385         void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
       
   386         {
       
   387             if (isBackTrackGenerated)
       
   388                 jumps.linkTo(backtrackLabel, masm);
       
   389             else
       
   390                 backTrackJumps.append(jumps);
       
   391         }
       
   392         bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
       
   393         {
       
   394             if (isBackTrackGenerated) {
       
   395                 masm->jump(backtrackLabel);
       
   396                 return true;
       
   397             }
       
   398             return false;
       
   399         }
       
   400         void addBacktrackJump(Jump jump)
       
   401         {
       
   402             backTrackJumps.append(jump);
       
   403         }
       
   404         void setBacktrackGenerated(Label label)
       
   405         {
       
   406             isBackTrackGenerated = true;
       
   407             backtrackLabel = label;
       
   408         }
       
   409         void linkAlternativeBacktracks(MacroAssembler* masm)
       
   410         {
       
   411             isBackTrackGenerated = false;
       
   412             backTrackJumps.link(masm);
       
   413         }
       
   414         void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
       
   415         {
       
   416             isBackTrackGenerated = false;
       
   417             backTrackJumps.linkTo(label, masm);
       
   418         }
       
   419         void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
       
   420         {
       
   421             jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
       
   422             if (nestedParenthesesState.isBackTrackGenerated)
       
   423                 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
       
   424         }
       
   425 
       
   426         PatternDisjunction* disjunction;
       
   427         int checkedTotal;
       
   428     private:
       
   429         unsigned alt;
       
   430         unsigned t;
       
   431         JumpList backTrackJumps;
       
   432         Label backtrackLabel;
       
   433         bool isBackTrackGenerated;
       
   434     };
       
   435 
       
   436     void generateAssertionBOL(TermGenerationState& state)
       
   437     {
       
   438         PatternTerm& term = state.term();
       
   439 
       
   440         if (m_pattern.m_multiline) {
       
   441             const RegisterID character = regT0;
       
   442 
       
   443             JumpList matchDest;
       
   444             if (!term.inputPosition)
       
   445                 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
       
   446 
       
   447             readCharacter(state.inputOffset() - 1, character);
       
   448             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
       
   449             state.jumpToBacktrack(jump(), this);
       
   450 
       
   451             matchDest.link(this);
       
   452         } else {
       
   453             // Erk, really should poison out these alternatives early. :-/
       
   454             if (term.inputPosition)
       
   455                 state.jumpToBacktrack(jump(), this);
       
   456             else
       
   457                 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
       
   458         }
       
   459     }
       
   460 
       
   461     void generateAssertionEOL(TermGenerationState& state)
       
   462     {
       
   463         PatternTerm& term = state.term();
       
   464 
       
   465         if (m_pattern.m_multiline) {
       
   466             const RegisterID character = regT0;
       
   467 
       
   468             JumpList matchDest;
       
   469             if (term.inputPosition == state.checkedTotal)
       
   470                 matchDest.append(atEndOfInput());
       
   471 
       
   472             readCharacter(state.inputOffset(), character);
       
   473             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
       
   474             state.jumpToBacktrack(jump(), this);
       
   475 
       
   476             matchDest.link(this);
       
   477         } else {
       
   478             if (term.inputPosition == state.checkedTotal)
       
   479                 state.jumpToBacktrack(notAtEndOfInput(), this);
       
   480             // Erk, really should poison out these alternatives early. :-/
       
   481             else
       
   482                 state.jumpToBacktrack(jump(), this);
       
   483         }
       
   484     }
       
   485 
       
   486     // Also falls though on nextIsNotWordChar.
       
   487     void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
       
   488     {
       
   489         const RegisterID character = regT0;
       
   490         PatternTerm& term = state.term();
       
   491 
       
   492         if (term.inputPosition == state.checkedTotal)
       
   493             nextIsNotWordChar.append(atEndOfInput());
       
   494 
       
   495         readCharacter(state.inputOffset(), character);
       
   496         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
       
   497     }
       
   498 
       
   499     void generateAssertionWordBoundary(TermGenerationState& state)
       
   500     {
       
   501         const RegisterID character = regT0;
       
   502         PatternTerm& term = state.term();
       
   503 
       
   504         Jump atBegin;
       
   505         JumpList matchDest;
       
   506         if (!term.inputPosition)
       
   507             atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
       
   508         readCharacter(state.inputOffset() - 1, character);
       
   509         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
       
   510         if (!term.inputPosition)
       
   511             atBegin.link(this);
       
   512 
       
   513         // We fall through to here if the last character was not a wordchar.
       
   514         JumpList nonWordCharThenWordChar;
       
   515         JumpList nonWordCharThenNonWordChar;
       
   516         if (term.invertOrCapture) {
       
   517             matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
       
   518             nonWordCharThenWordChar.append(jump());
       
   519         } else {
       
   520             matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
       
   521             nonWordCharThenNonWordChar.append(jump());
       
   522         }
       
   523         state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
       
   524 
       
   525         // We jump here if the last character was a wordchar.
       
   526         matchDest.link(this);
       
   527         JumpList wordCharThenWordChar;
       
   528         JumpList wordCharThenNonWordChar;
       
   529         if (term.invertOrCapture) {
       
   530             matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
       
   531             wordCharThenWordChar.append(jump());
       
   532         } else {
       
   533             matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
       
   534             // This can fall-though!
       
   535         }
       
   536 
       
   537         state.jumpToBacktrack(wordCharThenWordChar, this);
       
   538         
       
   539         nonWordCharThenWordChar.link(this);
       
   540         wordCharThenNonWordChar.link(this);
       
   541     }
       
   542 
       
   543     void generatePatternCharacterSingle(TermGenerationState& state)
       
   544     {
       
   545         const RegisterID character = regT0;
       
   546         UChar ch = state.term().patternCharacter;
       
   547 
       
   548         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
       
   549             readCharacter(state.inputOffset(), character);
       
   550             or32(Imm32(32), character);
       
   551             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
       
   552         } else {
       
   553             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
       
   554             state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
       
   555         }
       
   556     }
       
   557 
       
   558     void generatePatternCharacterPair(TermGenerationState& state)
       
   559     {
       
   560         const RegisterID character = regT0;
       
   561         UChar ch1 = state.term().patternCharacter;
       
   562         UChar ch2 = state.lookaheadTerm().patternCharacter;
       
   563 
       
   564         int mask = 0;
       
   565         int chPair = ch1 | (ch2 << 16);
       
   566         
       
   567         if (m_pattern.m_ignoreCase) {
       
   568             if (isASCIIAlpha(ch1))
       
   569                 mask |= 32;
       
   570             if (isASCIIAlpha(ch2))
       
   571                 mask |= 32 << 16;
       
   572         }
       
   573 
       
   574         if (mask) {
       
   575             load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
       
   576             or32(Imm32(mask), character);
       
   577             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
       
   578         } else
       
   579             state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
       
   580     }
       
   581 
       
   582     void generatePatternCharacterFixed(TermGenerationState& state)
       
   583     {
       
   584         const RegisterID character = regT0;
       
   585         const RegisterID countRegister = regT1;
       
   586         PatternTerm& term = state.term();
       
   587         UChar ch = term.patternCharacter;
       
   588 
       
   589         move(index, countRegister);
       
   590         sub32(Imm32(term.quantityCount), countRegister);
       
   591 
       
   592         Label loop(this);
       
   593         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
       
   594             load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
       
   595             or32(Imm32(32), character);
       
   596             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
       
   597         } else {
       
   598             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
       
   599             state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
       
   600         }
       
   601         add32(Imm32(1), countRegister);
       
   602         branch32(NotEqual, countRegister, index).linkTo(loop, this);
       
   603     }
       
   604 
       
   605     void generatePatternCharacterGreedy(TermGenerationState& state)
       
   606     {
       
   607         const RegisterID character = regT0;
       
   608         const RegisterID countRegister = regT1;
       
   609         PatternTerm& term = state.term();
       
   610         UChar ch = term.patternCharacter;
       
   611     
       
   612         move(Imm32(0), countRegister);
       
   613 
       
   614         JumpList failures;
       
   615         Label loop(this);
       
   616         failures.append(atEndOfInput());
       
   617         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
       
   618             readCharacter(state.inputOffset(), character);
       
   619             or32(Imm32(32), character);
       
   620             failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
       
   621         } else {
       
   622             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
       
   623             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
       
   624         }
       
   625 
       
   626         add32(Imm32(1), countRegister);
       
   627         add32(Imm32(1), index);
       
   628         if (term.quantityCount != 0xffffffff) {
       
   629             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
       
   630             failures.append(jump());
       
   631         } else
       
   632             jump(loop);
       
   633 
       
   634         Label backtrackBegin(this);
       
   635         loadFromFrame(term.frameLocation, countRegister);
       
   636         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
       
   637         sub32(Imm32(1), countRegister);
       
   638         sub32(Imm32(1), index);
       
   639 
       
   640         failures.link(this);
       
   641 
       
   642         storeToFrame(countRegister, term.frameLocation);
       
   643 
       
   644         state.setBacktrackGenerated(backtrackBegin);
       
   645     }
       
   646 
       
   647     void generatePatternCharacterNonGreedy(TermGenerationState& state)
       
   648     {
       
   649         const RegisterID character = regT0;
       
   650         const RegisterID countRegister = regT1;
       
   651         PatternTerm& term = state.term();
       
   652         UChar ch = term.patternCharacter;
       
   653     
       
   654         move(Imm32(0), countRegister);
       
   655 
       
   656         Jump firstTimeDoNothing = jump();
       
   657 
       
   658         Label hardFail(this);
       
   659         sub32(countRegister, index);
       
   660         state.jumpToBacktrack(jump(), this);
       
   661 
       
   662         Label backtrackBegin(this);
       
   663         loadFromFrame(term.frameLocation, countRegister);
       
   664 
       
   665         atEndOfInput().linkTo(hardFail, this);
       
   666         if (term.quantityCount != 0xffffffff)
       
   667             branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
       
   668         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
       
   669             readCharacter(state.inputOffset(), character);
       
   670             or32(Imm32(32), character);
       
   671             branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
       
   672         } else {
       
   673             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
       
   674             jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
       
   675         }
       
   676 
       
   677         add32(Imm32(1), countRegister);
       
   678         add32(Imm32(1), index);
       
   679 
       
   680         firstTimeDoNothing.link(this);
       
   681         storeToFrame(countRegister, term.frameLocation);
       
   682 
       
   683         state.setBacktrackGenerated(backtrackBegin);
       
   684     }
       
   685 
       
   686     void generateCharacterClassSingle(TermGenerationState& state)
       
   687     {
       
   688         const RegisterID character = regT0;
       
   689         PatternTerm& term = state.term();
       
   690 
       
   691         JumpList matchDest;
       
   692         readCharacter(state.inputOffset(), character);
       
   693         matchCharacterClass(character, matchDest, term.characterClass);
       
   694 
       
   695         if (term.invertOrCapture)
       
   696             state.jumpToBacktrack(matchDest, this);
       
   697         else {
       
   698             state.jumpToBacktrack(jump(), this);
       
   699             matchDest.link(this);
       
   700         }
       
   701     }
       
   702 
       
   703     void generateCharacterClassFixed(TermGenerationState& state)
       
   704     {
       
   705         const RegisterID character = regT0;
       
   706         const RegisterID countRegister = regT1;
       
   707         PatternTerm& term = state.term();
       
   708 
       
   709         move(index, countRegister);
       
   710         sub32(Imm32(term.quantityCount), countRegister);
       
   711 
       
   712         Label loop(this);
       
   713         JumpList matchDest;
       
   714         load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
       
   715         matchCharacterClass(character, matchDest, term.characterClass);
       
   716 
       
   717         if (term.invertOrCapture)
       
   718             state.jumpToBacktrack(matchDest, this);
       
   719         else {
       
   720             state.jumpToBacktrack(jump(), this);
       
   721             matchDest.link(this);
       
   722         }
       
   723 
       
   724         add32(Imm32(1), countRegister);
       
   725         branch32(NotEqual, countRegister, index).linkTo(loop, this);
       
   726     }
       
   727 
       
   728     void generateCharacterClassGreedy(TermGenerationState& state)
       
   729     {
       
   730         const RegisterID character = regT0;
       
   731         const RegisterID countRegister = regT1;
       
   732         PatternTerm& term = state.term();
       
   733     
       
   734         move(Imm32(0), countRegister);
       
   735 
       
   736         JumpList failures;
       
   737         Label loop(this);
       
   738         failures.append(atEndOfInput());
       
   739 
       
   740         if (term.invertOrCapture) {
       
   741             readCharacter(state.inputOffset(), character);
       
   742             matchCharacterClass(character, failures, term.characterClass);
       
   743         } else {
       
   744             JumpList matchDest;
       
   745             readCharacter(state.inputOffset(), character);
       
   746             matchCharacterClass(character, matchDest, term.characterClass);
       
   747             failures.append(jump());
       
   748             matchDest.link(this);
       
   749         }
       
   750 
       
   751         add32(Imm32(1), countRegister);
       
   752         add32(Imm32(1), index);
       
   753         if (term.quantityCount != 0xffffffff) {
       
   754             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
       
   755             failures.append(jump());
       
   756         } else
       
   757             jump(loop);
       
   758 
       
   759         Label backtrackBegin(this);
       
   760         loadFromFrame(term.frameLocation, countRegister);
       
   761         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
       
   762         sub32(Imm32(1), countRegister);
       
   763         sub32(Imm32(1), index);
       
   764 
       
   765         failures.link(this);
       
   766 
       
   767         storeToFrame(countRegister, term.frameLocation);
       
   768 
       
   769         state.setBacktrackGenerated(backtrackBegin);
       
   770     }
       
   771 
       
   772     void generateCharacterClassNonGreedy(TermGenerationState& state)
       
   773     {
       
   774         const RegisterID character = regT0;
       
   775         const RegisterID countRegister = regT1;
       
   776         PatternTerm& term = state.term();
       
   777     
       
   778         move(Imm32(0), countRegister);
       
   779 
       
   780         Jump firstTimeDoNothing = jump();
       
   781 
       
   782         Label hardFail(this);
       
   783         sub32(countRegister, index);
       
   784         state.jumpToBacktrack(jump(), this);
       
   785 
       
   786         Label backtrackBegin(this);
       
   787         loadFromFrame(term.frameLocation, countRegister);
       
   788 
       
   789         atEndOfInput().linkTo(hardFail, this);
       
   790         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
       
   791 
       
   792         JumpList matchDest;
       
   793         readCharacter(state.inputOffset(), character);
       
   794         matchCharacterClass(character, matchDest, term.characterClass);
       
   795 
       
   796         if (term.invertOrCapture)
       
   797             matchDest.linkTo(hardFail, this);
       
   798         else {
       
   799             jump(hardFail);
       
   800             matchDest.link(this);
       
   801         }
       
   802 
       
   803         add32(Imm32(1), countRegister);
       
   804         add32(Imm32(1), index);
       
   805 
       
   806         firstTimeDoNothing.link(this);
       
   807         storeToFrame(countRegister, term.frameLocation);
       
   808 
       
   809         state.setBacktrackGenerated(backtrackBegin);
       
   810     }
       
   811 
       
   812     void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
       
   813     {
       
   814         ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
       
   815         ASSERT(parenthesesTerm.quantityCount == 1);
       
   816     
       
   817         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
       
   818         unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
       
   819 
       
   820         if (disjunction->m_alternatives.size() == 1) {
       
   821             state.resetAlternative();
       
   822             ASSERT(state.alternativeValid());
       
   823             PatternAlternative* alternative = state.alternative();
       
   824             optimizeAlternative(alternative);
       
   825 
       
   826             int countToCheck = alternative->m_minimumSize - preCheckedCount;
       
   827             if (countToCheck) {
       
   828                 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
       
   829 
       
   830                 // FIXME: This is quite horrible.  The call to 'plantJumpToBacktrackIfExists'
       
   831                 // will be forced to always trampoline into here, just to decrement the index.
       
   832                 // Ick. 
       
   833                 Jump skip = jump();
       
   834 
       
   835                 Label backtrackBegin(this);
       
   836                 sub32(Imm32(countToCheck), index);
       
   837                 state.addBacktrackJump(jump());
       
   838                 
       
   839                 skip.link(this);
       
   840 
       
   841                 state.setBacktrackGenerated(backtrackBegin);
       
   842 
       
   843                 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
       
   844                 state.checkedTotal += countToCheck;
       
   845             }
       
   846 
       
   847             for (state.resetTerm(); state.termValid(); state.nextTerm())
       
   848                 generateTerm(state);
       
   849 
       
   850             state.checkedTotal -= countToCheck;
       
   851         } else {
       
   852             JumpList successes;
       
   853 
       
   854             for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
       
   855 
       
   856                 PatternAlternative* alternative = state.alternative();
       
   857                 optimizeAlternative(alternative);
       
   858 
       
   859                 ASSERT(alternative->m_minimumSize >= preCheckedCount);
       
   860                 int countToCheck = alternative->m_minimumSize - preCheckedCount;
       
   861                 if (countToCheck) {
       
   862                     state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
       
   863                     state.checkedTotal += countToCheck;
       
   864                 }
       
   865 
       
   866                 for (state.resetTerm(); state.termValid(); state.nextTerm())
       
   867                     generateTerm(state);
       
   868 
       
   869                 // Matched an alternative.
       
   870                 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
       
   871                 successes.append(jump());
       
   872 
       
   873                 // Alternative did not match.
       
   874                 Label backtrackLocation(this);
       
   875                 
       
   876                 // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
       
   877                 state.plantJumpToBacktrackIfExists(this);
       
   878                 
       
   879                 state.linkAlternativeBacktracks(this);
       
   880 
       
   881                 if (countToCheck) {
       
   882                     sub32(Imm32(countToCheck), index);
       
   883                     state.checkedTotal -= countToCheck;
       
   884                 }
       
   885 
       
   886                 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
       
   887             }
       
   888             // We fall through to here when the last alternative fails.
       
   889             // Add a backtrack out of here for the parenthese handling code to link up.
       
   890             state.addBacktrackJump(jump());
       
   891 
       
   892             // Generate a trampoline for the parens code to backtrack to, to retry the
       
   893             // next alternative.
       
   894             state.setBacktrackGenerated(label());
       
   895             loadFromFrameAndJump(alternativeFrameLocation);
       
   896 
       
   897             // FIXME: both of the above hooks are a little inefficient, in that you
       
   898             // may end up trampolining here, just to trampoline back out to the
       
   899             // parentheses code, or vice versa.  We can probably eliminate a jump
       
   900             // by restructuring, but coding this way for now for simplicity during
       
   901             // development.
       
   902 
       
   903             successes.link(this);
       
   904         }
       
   905     }
       
   906 
       
   907     void generateParenthesesSingle(TermGenerationState& state)
       
   908     {
       
   909         const RegisterID indexTemporary = regT0;
       
   910         PatternTerm& term = state.term();
       
   911         PatternDisjunction* disjunction = term.parentheses.disjunction;
       
   912         ASSERT(term.quantityCount == 1);
       
   913 
       
   914         if (term.parentheses.isCopy) {
       
   915             m_shouldFallBack = true;
       
   916             return;
       
   917         }
       
   918 
       
   919         unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
       
   920 
       
   921         unsigned parenthesesFrameLocation = term.frameLocation;
       
   922         unsigned alternativeFrameLocation = parenthesesFrameLocation;
       
   923         if (term.quantityType != QuantifierFixedCount)
       
   924             alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
       
   925 
       
   926         // optimized case - no capture & no quantifier can be handled in a light-weight manner.
       
   927         if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
       
   928             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
       
   929             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
       
   930             // this expects that any backtracks back out of the parentheses will be in the
       
   931             // parenthesesState's backTrackJumps vector, and that if they need backtracking
       
   932             // they will have set an entry point on the parenthesesState's backtrackLabel.
       
   933             state.propagateBacktrackingFrom(parenthesesState, this);
       
   934         } else {
       
   935             Jump nonGreedySkipParentheses;
       
   936             Label nonGreedyTryParentheses;
       
   937             if (term.quantityType == QuantifierGreedy)
       
   938                 storeToFrame(Imm32(1), parenthesesFrameLocation);
       
   939             else if (term.quantityType == QuantifierNonGreedy) {
       
   940                 storeToFrame(Imm32(0), parenthesesFrameLocation);
       
   941                 nonGreedySkipParentheses = jump();
       
   942                 nonGreedyTryParentheses = label();
       
   943                 storeToFrame(Imm32(1), parenthesesFrameLocation);
       
   944             }
       
   945 
       
   946             // store the match start index
       
   947             if (term.invertOrCapture) {
       
   948                 int inputOffset = state.inputOffset() - preCheckedCount;
       
   949                 if (inputOffset) {
       
   950                     move(index, indexTemporary);
       
   951                     add32(Imm32(inputOffset), indexTemporary);
       
   952                     store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
       
   953                 } else
       
   954                     store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
       
   955             }
       
   956 
       
   957             // generate the body of the parentheses
       
   958             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
       
   959             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
       
   960 
       
   961             // store the match end index
       
   962             if (term.invertOrCapture) {
       
   963                 int inputOffset = state.inputOffset();
       
   964                 if (inputOffset) {
       
   965                     move(index, indexTemporary);
       
   966                     add32(Imm32(state.inputOffset()), indexTemporary);
       
   967                     store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
       
   968                 } else
       
   969                     store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
       
   970             }
       
   971             Jump success = jump();
       
   972 
       
   973             // A failure AFTER the parens jumps here
       
   974             Label backtrackFromAfterParens(this);
       
   975 
       
   976             if (term.quantityType == QuantifierGreedy) {
       
   977                 // If this is zero we have now tested with both with and without the parens.
       
   978                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
       
   979                 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
       
   980             } else if (term.quantityType == QuantifierNonGreedy) {
       
   981                 // If this is zero we have now tested with both with and without the parens.
       
   982                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
       
   983                 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
       
   984             }
       
   985 
       
   986             parenthesesState.plantJumpToBacktrackIfExists(this);
       
   987             // A failure WITHIN the parens jumps here
       
   988             parenthesesState.linkAlternativeBacktracks(this);
       
   989             if (term.invertOrCapture) {
       
   990                 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
       
   991                 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
       
   992             }
       
   993 
       
   994             if (term.quantityType == QuantifierGreedy)
       
   995                 storeToFrame(Imm32(0), parenthesesFrameLocation);
       
   996             else
       
   997                 state.jumpToBacktrack(jump(), this);
       
   998 
       
   999             state.setBacktrackGenerated(backtrackFromAfterParens);
       
  1000             if (term.quantityType == QuantifierNonGreedy)
       
  1001                 nonGreedySkipParentheses.link(this);
       
  1002             success.link(this);
       
  1003         }
       
  1004     }
       
  1005 
       
  1006     void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
       
  1007     {
       
  1008         PatternTerm& parenthesesTerm = state.term();
       
  1009         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
       
  1010         ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
       
  1011         ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
       
  1012 
       
  1013         // Capturing not yet implemented!
       
  1014         if (parenthesesTerm.invertOrCapture) {
       
  1015             m_shouldFallBack = true;
       
  1016             return;
       
  1017         }
       
  1018 
       
  1019         // Quantification limit not yet implemented!
       
  1020         if (parenthesesTerm.quantityCount != 0xffffffff) {
       
  1021             m_shouldFallBack = true;
       
  1022             return;
       
  1023         }
       
  1024 
       
  1025         // Need to reset nested subpatterns between iterations...
       
  1026         // for the minute this crude check rejects all patterns with any subpatterns!
       
  1027         if (m_pattern.m_numSubpatterns) {
       
  1028             m_shouldFallBack = true;
       
  1029             return;
       
  1030         }
       
  1031 
       
  1032         TermGenerationState parenthesesState(disjunction, state.checkedTotal);
       
  1033 
       
  1034         Label matchAgain(this);
       
  1035         for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
       
  1036 
       
  1037             PatternAlternative* alternative = parenthesesState.alternative();
       
  1038             optimizeAlternative(alternative);
       
  1039 
       
  1040             int countToCheck = alternative->m_minimumSize;
       
  1041             if (countToCheck) {
       
  1042                 parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
       
  1043                 parenthesesState.checkedTotal += countToCheck;
       
  1044             }
       
  1045 
       
  1046             for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
       
  1047                 generateTerm(parenthesesState);
       
  1048 
       
  1049             // If we get here, we matched! Limit not yet supported, so just try to match more!
       
  1050             jump(matchAgain);
       
  1051             
       
  1052             parenthesesState.linkAlternativeBacktracks(this);
       
  1053             // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
       
  1054 
       
  1055             if (countToCheck) {
       
  1056                 sub32(Imm32(countToCheck), index);
       
  1057                 parenthesesState.checkedTotal -= countToCheck;
       
  1058             }
       
  1059         }
       
  1060 
       
  1061         // If the last alternative falls through to here, we have a failed match...
       
  1062         // Which means that we match whatever we have matched up to this point (even if nothing).
       
  1063     }
       
  1064 
       
  1065     void generateParentheticalAssertion(TermGenerationState& state)
       
  1066     {
       
  1067         PatternTerm& term = state.term();
       
  1068         PatternDisjunction* disjunction = term.parentheses.disjunction;
       
  1069         ASSERT(term.quantityCount == 1);
       
  1070         ASSERT(term.quantityType == QuantifierFixedCount);
       
  1071 
       
  1072         unsigned parenthesesFrameLocation = term.frameLocation;
       
  1073         unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
       
  1074 
       
  1075         int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
       
  1076 
       
  1077         if (term.invertOrCapture) {
       
  1078             // Inverted case
       
  1079             storeToFrame(index, parenthesesFrameLocation);
       
  1080 
       
  1081             state.checkedTotal -= countCheckedAfterAssertion;
       
  1082             if (countCheckedAfterAssertion)
       
  1083                 sub32(Imm32(countCheckedAfterAssertion), index);
       
  1084 
       
  1085             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
       
  1086             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
       
  1087             // Success! - which means - Fail!
       
  1088             loadFromFrame(parenthesesFrameLocation, index);
       
  1089             state.jumpToBacktrack(jump(), this);
       
  1090 
       
  1091             // And fail means success.
       
  1092             parenthesesState.linkAlternativeBacktracks(this);
       
  1093             loadFromFrame(parenthesesFrameLocation, index);
       
  1094 
       
  1095             state.checkedTotal += countCheckedAfterAssertion;
       
  1096         } else {
       
  1097             // Normal case
       
  1098             storeToFrame(index, parenthesesFrameLocation);
       
  1099 
       
  1100             state.checkedTotal -= countCheckedAfterAssertion;
       
  1101             if (countCheckedAfterAssertion)
       
  1102                 sub32(Imm32(countCheckedAfterAssertion), index);
       
  1103 
       
  1104             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
       
  1105             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
       
  1106             // Success! - which means - Success!
       
  1107             loadFromFrame(parenthesesFrameLocation, index);
       
  1108             Jump success = jump();
       
  1109 
       
  1110             parenthesesState.linkAlternativeBacktracks(this);
       
  1111             loadFromFrame(parenthesesFrameLocation, index);
       
  1112             state.jumpToBacktrack(jump(), this);
       
  1113 
       
  1114             success.link(this);
       
  1115 
       
  1116             state.checkedTotal += countCheckedAfterAssertion;
       
  1117         }
       
  1118     }
       
  1119 
       
  1120     void generateTerm(TermGenerationState& state)
       
  1121     {
       
  1122         PatternTerm& term = state.term();
       
  1123 
       
  1124         switch (term.type) {
       
  1125         case PatternTerm::TypeAssertionBOL:
       
  1126             generateAssertionBOL(state);
       
  1127             break;
       
  1128         
       
  1129         case PatternTerm::TypeAssertionEOL:
       
  1130             generateAssertionEOL(state);
       
  1131             break;
       
  1132         
       
  1133         case PatternTerm::TypeAssertionWordBoundary:
       
  1134             generateAssertionWordBoundary(state);
       
  1135             break;
       
  1136         
       
  1137         case PatternTerm::TypePatternCharacter:
       
  1138             switch (term.quantityType) {
       
  1139             case QuantifierFixedCount:
       
  1140                 if (term.quantityCount == 1) {
       
  1141                     if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
       
  1142                         generatePatternCharacterPair(state);
       
  1143                         state.nextTerm();
       
  1144                     } else
       
  1145                         generatePatternCharacterSingle(state);
       
  1146                 } else
       
  1147                     generatePatternCharacterFixed(state);
       
  1148                 break;
       
  1149             case QuantifierGreedy:
       
  1150                 generatePatternCharacterGreedy(state);
       
  1151                 break;
       
  1152             case QuantifierNonGreedy:
       
  1153                 generatePatternCharacterNonGreedy(state);
       
  1154                 break;
       
  1155             }
       
  1156             break;
       
  1157 
       
  1158         case PatternTerm::TypeCharacterClass:
       
  1159             switch (term.quantityType) {
       
  1160             case QuantifierFixedCount:
       
  1161                 if (term.quantityCount == 1)
       
  1162                     generateCharacterClassSingle(state);
       
  1163                 else
       
  1164                     generateCharacterClassFixed(state);
       
  1165                 break;
       
  1166             case QuantifierGreedy:
       
  1167                 generateCharacterClassGreedy(state);
       
  1168                 break;
       
  1169             case QuantifierNonGreedy:
       
  1170                 generateCharacterClassNonGreedy(state);
       
  1171                 break;
       
  1172             }
       
  1173             break;
       
  1174 
       
  1175         case PatternTerm::TypeBackReference:
       
  1176             m_shouldFallBack = true;
       
  1177             break;
       
  1178 
       
  1179         case PatternTerm::TypeForwardReference:
       
  1180             break;
       
  1181 
       
  1182         case PatternTerm::TypeParenthesesSubpattern:
       
  1183             if (term.quantityCount == 1) {
       
  1184                 generateParenthesesSingle(state);
       
  1185                 break;
       
  1186             } else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction?
       
  1187                 // If this has a greedy quantifier, then it will never need to backtrack!
       
  1188                 if (term.quantityType == QuantifierGreedy) {
       
  1189                     generateParenthesesGreedyNoBacktrack(state);
       
  1190                     break;
       
  1191                 }
       
  1192             }
       
  1193             m_shouldFallBack = true;
       
  1194             break;
       
  1195 
       
  1196         case PatternTerm::TypeParentheticalAssertion:
       
  1197             generateParentheticalAssertion(state);
       
  1198             break;
       
  1199         }
       
  1200     }
       
  1201 
       
  1202     void generateDisjunction(PatternDisjunction* disjunction)
       
  1203     {
       
  1204         TermGenerationState state(disjunction, 0);
       
  1205         state.resetAlternative();
       
  1206 
       
  1207         // Plant a check to see if there is sufficient input available to run the first alternative.
       
  1208         // Jumping back to the label 'firstAlternative' will get to this check, jumping to
       
  1209         // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
       
  1210         // skipped this check.
       
  1211 
       
  1212         Label firstAlternative(this);
       
  1213 
       
  1214         // check availability for the next alternative
       
  1215         int countCheckedForCurrentAlternative = 0;
       
  1216         int countToCheckForFirstAlternative = 0;
       
  1217         bool hasShorterAlternatives = false;
       
  1218         JumpList notEnoughInputForPreviousAlternative;
       
  1219 
       
  1220         if (state.alternativeValid()) {
       
  1221             PatternAlternative* alternative = state.alternative();
       
  1222             countToCheckForFirstAlternative = alternative->m_minimumSize;
       
  1223             state.checkedTotal += countToCheckForFirstAlternative;
       
  1224             if (countToCheckForFirstAlternative)
       
  1225                 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
       
  1226             countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
       
  1227         }
       
  1228 
       
  1229         Label firstAlternativeInputChecked(this);
       
  1230 
       
  1231         while (state.alternativeValid()) {
       
  1232             // Track whether any alternatives are shorter than the first one.
       
  1233             hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
       
  1234 
       
  1235             PatternAlternative* alternative = state.alternative();
       
  1236             optimizeAlternative(alternative);
       
  1237 
       
  1238             for (state.resetTerm(); state.termValid(); state.nextTerm())
       
  1239                 generateTerm(state);
       
  1240 
       
  1241             // If we get here, the alternative matched.
       
  1242             if (m_pattern.m_body->m_callFrameSize)
       
  1243                 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
       
  1244 
       
  1245             ASSERT(index != returnRegister);
       
  1246             if (m_pattern.m_body->m_hasFixedSize) {
       
  1247                 move(index, returnRegister);
       
  1248                 if (alternative->m_minimumSize)
       
  1249                     sub32(Imm32(alternative->m_minimumSize), returnRegister);
       
  1250 
       
  1251                 store32(returnRegister, output);
       
  1252             } else
       
  1253                 load32(Address(output), returnRegister);
       
  1254 
       
  1255             store32(index, Address(output, 4));
       
  1256 
       
  1257             generateReturn();
       
  1258 
       
  1259             state.nextAlternative();
       
  1260 
       
  1261             // if there are any more alternatives, plant the check for input before looping.
       
  1262             if (state.alternativeValid()) {
       
  1263                 PatternAlternative* nextAlternative = state.alternative();
       
  1264                 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
       
  1265 
       
  1266                 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
       
  1267                     // If we get here, there the last input checked failed.
       
  1268                     notEnoughInputForPreviousAlternative.link(this);
       
  1269 
       
  1270                     // Check if sufficent input available to run the next alternative 
       
  1271                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
       
  1272                     // We are now in the correct state to enter the next alternative; this add is only required
       
  1273                     // to mirror and revert operation of the sub32, just below.
       
  1274                     add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
       
  1275 
       
  1276                     // If we get here, there the last input checked passed.
       
  1277                     state.linkAlternativeBacktracks(this);
       
  1278                     // No need to check if we can run the next alternative, since it is shorter -
       
  1279                     // just update index.
       
  1280                     sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
       
  1281                 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
       
  1282                     // If we get here, there the last input checked failed.
       
  1283                     // If there is insufficient input to run the current alternative, and the next alternative is longer,
       
  1284                     // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
       
  1285                     // we had checked.
       
  1286                     notEnoughInputForPreviousAlternative.link(this);
       
  1287                     add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
       
  1288                     notEnoughInputForPreviousAlternative.append(jump());
       
  1289 
       
  1290                     // The next alternative is longer than the current one; check the difference.
       
  1291                     state.linkAlternativeBacktracks(this);
       
  1292                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
       
  1293                 } else { // CASE 3: Both alternatives are the same length.
       
  1294                     ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
       
  1295 
       
  1296                     // If the next alterative is the same length as this one, then no need to check the input -
       
  1297                     // if there was sufficent input to run the current alternative then there is sufficient
       
  1298                     // input to run the next one; if not, there isn't.
       
  1299                     state.linkAlternativeBacktracks(this);
       
  1300                 }
       
  1301 
       
  1302                 state.checkedTotal -= countCheckedForCurrentAlternative;
       
  1303                 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
       
  1304                 state.checkedTotal += countCheckedForCurrentAlternative;
       
  1305             }
       
  1306         }
       
  1307         
       
  1308         // If we get here, all Alternatives failed...
       
  1309 
       
  1310         state.checkedTotal -= countCheckedForCurrentAlternative;
       
  1311 
       
  1312         // How much more input need there be to be able to retry from the first alternative?
       
  1313         // examples:
       
  1314         //   /yarr_jit/ or /wrec|pcre/
       
  1315         //     In these examples we need check for one more input before looping.
       
  1316         //   /yarr_jit|pcre/
       
  1317         //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
       
  1318         //     being four longer than the last alternative checked, and another +1 to effectively move
       
  1319         //     the start position along by one).
       
  1320         //   /yarr|rules/ or /wrec|notsomuch/
       
  1321         //     In these examples, provided that there was sufficient input to have just been matching for
       
  1322         //     the second alternative we can loop without checking for available input (since the second
       
  1323         //     alternative is longer than the first).  In the latter example we need to decrement index
       
  1324         //     (by 4) so the start position is only progressed by 1 from the last iteration.
       
  1325         int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
       
  1326 
       
  1327         // First, deal with the cases where there was sufficient input to try the last alternative.
       
  1328         if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
       
  1329             state.linkAlternativeBacktracks(this);
       
  1330         else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
       
  1331             state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
       
  1332         else { // no need to check the input, but we do have some bookkeeping to do first.
       
  1333             state.linkAlternativeBacktracks(this);
       
  1334 
       
  1335             // Where necessary update our preserved start position.
       
  1336             if (!m_pattern.m_body->m_hasFixedSize) {
       
  1337                 move(index, regT0);
       
  1338                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
       
  1339                 store32(regT0, Address(output));
       
  1340             }
       
  1341 
       
  1342             // Update index if necessary, and loop (without checking).
       
  1343             if (incrementForNextIter)
       
  1344                 add32(Imm32(incrementForNextIter), index);
       
  1345             jump().linkTo(firstAlternativeInputChecked, this);
       
  1346         }
       
  1347 
       
  1348         notEnoughInputForPreviousAlternative.link(this);
       
  1349         // Update our idea of the start position, if we're tracking this.
       
  1350         if (!m_pattern.m_body->m_hasFixedSize) {
       
  1351             if (countCheckedForCurrentAlternative - 1) {
       
  1352                 move(index, regT0);
       
  1353                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
       
  1354                 store32(regT0, Address(output));
       
  1355             } else
       
  1356                 store32(index, Address(output));
       
  1357         }
       
  1358         // Check if there is sufficent input to run the first alternative again.
       
  1359         jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
       
  1360         // No - insufficent input to run the first alteranative, are there any other alternatives we
       
  1361         // might need to check?  If so, the last check will have left the index incremented by
       
  1362         // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
       
  1363         // LESS input is available, to have the effect of just progressing the start position by 1
       
  1364         // from the last iteration.  If this check passes we can just jump up to the check associated
       
  1365         // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
       
  1366         // first alternative again, and this check will fail (otherwise the check planted just above
       
  1367         // here would have passed).  This is a bit sad, however it saves trying to do something more
       
  1368         // complex here in compilation, and in the common case we should end up coallescing the checks.
       
  1369         //
       
  1370         // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
       
  1371         // of the minimum-alternative-lengths.  E.g. if I have two alternatives of length 200 and 150,
       
  1372         // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
       
  1373         // is sufficient input to run either alternative (constantly failing).  If there had been only
       
  1374         // one alternative, or if the shorter alternative had come first, we would have terminated
       
  1375         // immediately. :-/
       
  1376         if (hasShorterAlternatives)
       
  1377             jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
       
  1378         // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
       
  1379         // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
       
  1380         // but since we're about to return a failure this doesn't really matter!)
       
  1381 
       
  1382         if (m_pattern.m_body->m_callFrameSize)
       
  1383             addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
       
  1384 
       
  1385         move(Imm32(-1), returnRegister);
       
  1386 
       
  1387         generateReturn();
       
  1388     }
       
  1389 
       
  1390     void generateEnter()
       
  1391     {
       
  1392 #if CPU(X86_64)
       
  1393         push(X86Registers::ebp);
       
  1394         move(stackPointerRegister, X86Registers::ebp);
       
  1395         push(X86Registers::ebx);
       
  1396 #elif CPU(X86)
       
  1397         push(X86Registers::ebp);
       
  1398         move(stackPointerRegister, X86Registers::ebp);
       
  1399         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
       
  1400         push(X86Registers::ebx);
       
  1401         push(X86Registers::edi);
       
  1402         push(X86Registers::esi);
       
  1403         // load output into edi (2 = saved ebp + return address).
       
  1404     #if COMPILER(MSVC)
       
  1405         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
       
  1406         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
       
  1407         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
       
  1408         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
       
  1409     #else
       
  1410         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
       
  1411     #endif
       
  1412 #elif CPU(ARM)
       
  1413         push(ARMRegisters::r4);
       
  1414         push(ARMRegisters::r5);
       
  1415         push(ARMRegisters::r6);
       
  1416         move(ARMRegisters::r3, output);
       
  1417 #elif CPU(MIPS)
       
  1418         // Do nothing.
       
  1419 #endif
       
  1420     }
       
  1421 
       
  1422     void generateReturn()
       
  1423     {
       
  1424 #if CPU(X86_64)
       
  1425         pop(X86Registers::ebx);
       
  1426         pop(X86Registers::ebp);
       
  1427 #elif CPU(X86)
       
  1428         pop(X86Registers::esi);
       
  1429         pop(X86Registers::edi);
       
  1430         pop(X86Registers::ebx);
       
  1431         pop(X86Registers::ebp);
       
  1432 #elif CPU(ARM)
       
  1433         pop(ARMRegisters::r6);
       
  1434         pop(ARMRegisters::r5);
       
  1435         pop(ARMRegisters::r4);
       
  1436 #elif CPU(MIPS)
       
  1437         // Do nothing
       
  1438 #endif
       
  1439         ret();
       
  1440     }
       
  1441 
       
  1442 public:
       
  1443     RegexGenerator(RegexPattern& pattern)
       
  1444         : m_pattern(pattern)
       
  1445         , m_shouldFallBack(false)
       
  1446     {
       
  1447     }
       
  1448 
       
  1449     void generate()
       
  1450     {
       
  1451         generateEnter();
       
  1452 
       
  1453         if (!m_pattern.m_body->m_hasFixedSize)
       
  1454             store32(index, Address(output));
       
  1455 
       
  1456         if (m_pattern.m_body->m_callFrameSize)
       
  1457             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
       
  1458 
       
  1459         generateDisjunction(m_pattern.m_body);
       
  1460     }
       
  1461 
       
  1462     void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
       
  1463     {
       
  1464         generate();
       
  1465 
       
  1466         LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
       
  1467 
       
  1468         for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
       
  1469             patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
       
  1470 
       
  1471         jitObject.set(patchBuffer.finalizeCode());
       
  1472     }
       
  1473 
       
  1474     bool shouldFallBack()
       
  1475     {
       
  1476         return m_shouldFallBack;
       
  1477     }
       
  1478 
       
  1479 private:
       
  1480     RegexPattern& m_pattern;
       
  1481     bool m_shouldFallBack;
       
  1482     Vector<AlternativeBacktrackRecord> m_backtrackRecords;
       
  1483 };
       
  1484 
       
  1485 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
       
  1486 {
       
  1487     RegexPattern pattern(ignoreCase, multiline);
       
  1488     if ((error = compileRegex(patternString, pattern)))
       
  1489         return;
       
  1490     numSubpatterns = pattern.m_numSubpatterns;
       
  1491 
       
  1492     if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
       
  1493         RegexGenerator generator(pattern);
       
  1494         generator.compile(globalData, jitObject);
       
  1495         if (!generator.shouldFallBack())
       
  1496             return;
       
  1497     }
       
  1498 
       
  1499     JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
       
  1500     JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
       
  1501     jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
       
  1502 }
       
  1503 
       
  1504 }}
       
  1505 
       
  1506 #endif
       
  1507 
       
  1508 
       
  1509 
       
  1510 
       
  1511