imgtools/imglib/boostlibrary/boost/regex/v4/perl_matcher.hpp
changeset 600 6d08f4a05d93
equal deleted inserted replaced
599:fa7a3cc6effd 600:6d08f4a05d93
       
     1 /*
       
     2  *
       
     3  * Copyright (c) 2002
       
     4  * John Maddock
       
     5  *
       
     6  * Use, modification and distribution are subject to the 
       
     7  * Boost Software License, Version 1.0. (See accompanying file 
       
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
       
     9  *
       
    10  */
       
    11 
       
    12 #ifndef BOOST_REGEX_MATCHER_HPP
       
    13 #define BOOST_REGEX_MATCHER_HPP
       
    14 
       
    15 #include <boost/regex/v4/iterator_category.hpp>
       
    16 
       
    17 #ifdef BOOST_MSVC
       
    18 #pragma warning(push)
       
    19 #pragma warning(disable: 4103)
       
    20 #endif
       
    21 #ifdef BOOST_HAS_ABI_HEADERS
       
    22 #  include BOOST_ABI_PREFIX
       
    23 #endif
       
    24 #ifdef BOOST_MSVC
       
    25 #pragma warning(pop)
       
    26 #endif
       
    27 
       
    28 #ifdef BOOST_MSVC
       
    29 #  pragma warning(push)
       
    30 #  pragma warning(disable: 4800)
       
    31 #endif
       
    32 
       
    33 namespace boost{
       
    34 namespace re_detail{
       
    35 
       
    36 //
       
    37 // error checking API:
       
    38 //
       
    39 BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf);
       
    40 //
       
    41 // function can_start:
       
    42 //
       
    43 template <class charT>
       
    44 inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
       
    45 {
       
    46    return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
       
    47 }
       
    48 inline bool can_start(char c, const unsigned char* map, unsigned char mask)
       
    49 {
       
    50    return map[(unsigned char)c] & mask;
       
    51 }
       
    52 inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
       
    53 {
       
    54    return map[(unsigned char)c] & mask;
       
    55 }
       
    56 inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
       
    57 {
       
    58    return map[c] & mask;
       
    59 }
       
    60 inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
       
    61 {
       
    62    return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
       
    63 }
       
    64 #if !defined(__hpux) // WCHAR_MIN not usable in pp-directives.
       
    65 #if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
       
    66 inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
       
    67 {
       
    68    return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
       
    69 }
       
    70 #endif
       
    71 #endif
       
    72 #if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
       
    73 inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
       
    74 {
       
    75    return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
       
    76 }
       
    77 #endif
       
    78 
       
    79 
       
    80 //
       
    81 // Unfortunately Rogue Waves standard library appears to have a bug
       
    82 // in std::basic_string::compare that results in eroneous answers
       
    83 // in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
       
    84 // 0x020101) the test case was:
       
    85 // {39135,0} < {0xff,0}
       
    86 // which succeeds when it should not.
       
    87 //
       
    88 #ifndef _RWSTD_VER
       
    89 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1310)
       
    90 template <class C, class T, class A>
       
    91 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
       
    92 { 
       
    93    if(0 == *p)
       
    94    {
       
    95       if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
       
    96          return 0;
       
    97    }
       
    98    return s.compare(p); 
       
    99 }
       
   100 #endif
       
   101 #else
       
   102 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1310)
       
   103 template <class C, class T, class A>
       
   104 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
       
   105 { 
       
   106    if(0 == *p)
       
   107    {
       
   108       if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
       
   109          return 0;
       
   110    }
       
   111    return s.compare(p); 
       
   112 }
       
   113 #endif
       
   114 inline int string_compare(const std::string& s, const char* p)
       
   115 { return std::strcmp(s.c_str(), p); }
       
   116 # ifndef BOOST_NO_WREGEX
       
   117 inline int string_compare(const std::wstring& s, const wchar_t* p)
       
   118 { return std::wcscmp(s.c_str(), p); }
       
   119 #endif
       
   120 #endif
       
   121 template <class Seq, class C>
       
   122 inline int string_compare(const Seq& s, const C* p)
       
   123 {
       
   124    std::size_t i = 0;
       
   125    while((i < s.size()) && (p[i] == s[i]))
       
   126    {
       
   127       ++i;
       
   128    }
       
   129    return (i == s.size()) ? -p[i] : s[i] - p[i];
       
   130 }
       
   131 # define STR_COMP(s,p) string_compare(s,p)
       
   132 
       
   133 template<class charT>
       
   134 inline const charT* re_skip_past_null(const charT* p)
       
   135 {
       
   136   while (*p != static_cast<charT>(0)) ++p;
       
   137   return ++p;
       
   138 }
       
   139 
       
   140 template <class iterator, class charT, class traits_type, class char_classT>
       
   141 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
       
   142                           iterator last, 
       
   143                           const re_set_long<char_classT>* set_, 
       
   144                           const regex_data<charT, traits_type>& e, bool icase)
       
   145 {   
       
   146    const charT* p = reinterpret_cast<const charT*>(set_+1);
       
   147    iterator ptr;
       
   148    unsigned int i;
       
   149    //bool icase = e.m_flags & regex_constants::icase;
       
   150 
       
   151    if(next == last) return next;
       
   152 
       
   153    typedef typename traits_type::string_type traits_string_type;
       
   154    const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
       
   155    
       
   156    // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
       
   157    // referenced
       
   158    (void)traits_inst;
       
   159 
       
   160    // try and match a single character, could be a multi-character
       
   161    // collating element...
       
   162    for(i = 0; i < set_->csingles; ++i)
       
   163    {
       
   164       ptr = next;
       
   165       if(*p == static_cast<charT>(0))
       
   166       {
       
   167          // treat null string as special case:
       
   168          if(traits_inst.translate(*ptr, icase) != *p)
       
   169          {
       
   170             while(*p == static_cast<charT>(0))++p;
       
   171             continue;
       
   172          }
       
   173          return set_->isnot ? next : (ptr == next) ? ++next : ptr;
       
   174       }
       
   175       else
       
   176       {
       
   177          while(*p && (ptr != last))
       
   178          {
       
   179             if(traits_inst.translate(*ptr, icase) != *p)
       
   180                break;
       
   181             ++p;
       
   182             ++ptr;
       
   183          }
       
   184 
       
   185          if(*p == static_cast<charT>(0)) // if null we've matched
       
   186             return set_->isnot ? next : (ptr == next) ? ++next : ptr;
       
   187 
       
   188          p = re_skip_past_null(p);     // skip null
       
   189       }
       
   190    }
       
   191 
       
   192    charT col = traits_inst.translate(*next, icase);
       
   193 
       
   194 
       
   195    if(set_->cranges || set_->cequivalents)
       
   196    {
       
   197       traits_string_type s1;
       
   198       //
       
   199       // try and match a range, NB only a single character can match
       
   200       if(set_->cranges)
       
   201       {
       
   202          if((e.m_flags & regex_constants::collate) == 0)
       
   203             s1.assign(1, col);
       
   204          else
       
   205          {
       
   206             charT a[2] = { col, charT(0), };
       
   207             s1 = traits_inst.transform(a, a + 1);
       
   208          }
       
   209          for(i = 0; i < set_->cranges; ++i)
       
   210          {
       
   211             if(STR_COMP(s1, p) >= 0)
       
   212             {
       
   213                do{ ++p; }while(*p);
       
   214                ++p;
       
   215                if(STR_COMP(s1, p) <= 0)
       
   216                   return set_->isnot ? next : ++next;
       
   217             }
       
   218             else
       
   219             {
       
   220                // skip first string
       
   221                do{ ++p; }while(*p);
       
   222                ++p;
       
   223             }
       
   224             // skip second string
       
   225             do{ ++p; }while(*p);
       
   226             ++p;
       
   227          }
       
   228       }
       
   229       //
       
   230       // try and match an equivalence class, NB only a single character can match
       
   231       if(set_->cequivalents)
       
   232       {
       
   233          charT a[2] = { col, charT(0), };
       
   234          s1 = traits_inst.transform_primary(a, a +1);
       
   235          for(i = 0; i < set_->cequivalents; ++i)
       
   236          {
       
   237             if(STR_COMP(s1, p) == 0)
       
   238                return set_->isnot ? next : ++next;
       
   239             // skip string
       
   240             do{ ++p; }while(*p);
       
   241             ++p;
       
   242          }
       
   243       }
       
   244    }
       
   245    if(traits_inst.isctype(col, set_->cclasses) == true)
       
   246       return set_->isnot ? next : ++next;
       
   247    if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
       
   248       return set_->isnot ? next : ++next;
       
   249    return set_->isnot ? ++next : next;
       
   250 }
       
   251 
       
   252 template <class BidiIterator>
       
   253 class repeater_count
       
   254 {
       
   255    repeater_count** stack;
       
   256    repeater_count* next;
       
   257    int state_id;
       
   258    std::size_t count;        // the number of iterations so far
       
   259    BidiIterator start_pos;   // where the last repeat started
       
   260 public:
       
   261    repeater_count(repeater_count** s)
       
   262    {
       
   263       stack = s;
       
   264       next = 0;
       
   265       state_id = -1;
       
   266       count = 0;
       
   267    }
       
   268    repeater_count(int i, repeater_count** s, BidiIterator start)
       
   269       : start_pos(start)
       
   270    {
       
   271       state_id = i;
       
   272       stack = s;
       
   273       next = *stack;
       
   274       *stack = this;
       
   275       if(state_id > next->state_id)
       
   276          count = 0;
       
   277       else
       
   278       {
       
   279          repeater_count* p = next;
       
   280          while(p->state_id != state_id)
       
   281             p = p->next;
       
   282          count = p->count;
       
   283          start_pos = p->start_pos;
       
   284       }
       
   285    }
       
   286    ~repeater_count()
       
   287    {
       
   288       *stack = next;
       
   289    }
       
   290    std::size_t get_count() { return count; }
       
   291    int get_id() { return state_id; }
       
   292    std::size_t operator++() { return ++count; }
       
   293    bool check_null_repeat(const BidiIterator& pos, std::size_t max)
       
   294    {
       
   295       // this is called when we are about to start a new repeat,
       
   296       // if the last one was NULL move our count to max,
       
   297       // otherwise save the current position.
       
   298       bool result = (count == 0) ? false : (pos == start_pos);
       
   299       if(result)
       
   300          count = max;
       
   301       else
       
   302          start_pos = pos;
       
   303       return result;
       
   304    }
       
   305 };
       
   306 
       
   307 struct saved_state;
       
   308 
       
   309 enum saved_state_type
       
   310 {
       
   311    saved_type_end = 0,
       
   312    saved_type_paren = 1,
       
   313    saved_type_recurse = 2,
       
   314    saved_type_assertion = 3,
       
   315    saved_state_alt = 4,
       
   316    saved_state_repeater_count = 5,
       
   317    saved_state_extra_block = 6,
       
   318    saved_state_greedy_single_repeat = 7,
       
   319    saved_state_rep_slow_dot = 8,
       
   320    saved_state_rep_fast_dot = 9,
       
   321    saved_state_rep_char = 10,
       
   322    saved_state_rep_short_set = 11,
       
   323    saved_state_rep_long_set = 12,
       
   324    saved_state_non_greedy_long_repeat = 13, 
       
   325    saved_state_count = 14
       
   326 };
       
   327 
       
   328 #ifdef BOOST_MSVC
       
   329 #pragma warning(push)
       
   330 #pragma warning(disable : 4251 4231 4660)
       
   331 #endif
       
   332 
       
   333 template <class BidiIterator, class Allocator, class traits>
       
   334 class perl_matcher
       
   335 {
       
   336 public:
       
   337    typedef typename traits::char_type char_type;
       
   338    typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
       
   339    typedef bool (self_type::*matcher_proc_type)(void);
       
   340    typedef std::size_t traits_size_type;
       
   341    typedef typename is_byte<char_type>::width_type width_type;
       
   342    typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
       
   343 
       
   344    perl_matcher(BidiIterator first, BidiIterator end, 
       
   345       match_results<BidiIterator, Allocator>& what, 
       
   346       const basic_regex<char_type, traits>& e,
       
   347       match_flag_type f,
       
   348       BidiIterator l_base)
       
   349       :  m_result(what), base(first), last(end), 
       
   350          position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), 
       
   351          m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
       
   352    {
       
   353       construct_init(e, f);
       
   354    }
       
   355 
       
   356    bool match();
       
   357    bool find();
       
   358 
       
   359    void setf(match_flag_type f)
       
   360    { m_match_flags |= f; }
       
   361    void unsetf(match_flag_type f)
       
   362    { m_match_flags &= ~f; }
       
   363 
       
   364 private:
       
   365    void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
       
   366 
       
   367    bool find_imp();
       
   368    bool match_imp();
       
   369 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
       
   370    typedef bool (perl_matcher::*protected_proc_type)();
       
   371    bool protected_call(protected_proc_type);
       
   372 #endif
       
   373    void estimate_max_state_count(std::random_access_iterator_tag*);
       
   374    void estimate_max_state_count(void*);
       
   375    bool match_prefix();
       
   376    bool match_all_states();
       
   377 
       
   378    // match procs, stored in s_match_vtable:
       
   379    bool match_startmark();
       
   380    bool match_endmark();
       
   381    bool match_literal();
       
   382    bool match_start_line();
       
   383    bool match_end_line();
       
   384    bool match_wild();
       
   385    bool match_match();
       
   386    bool match_word_boundary();
       
   387    bool match_within_word();
       
   388    bool match_word_start();
       
   389    bool match_word_end();
       
   390    bool match_buffer_start();
       
   391    bool match_buffer_end();
       
   392    bool match_backref();
       
   393    bool match_long_set();
       
   394    bool match_set();
       
   395    bool match_jump();
       
   396    bool match_alt();
       
   397    bool match_rep();
       
   398    bool match_combining();
       
   399    bool match_soft_buffer_end();
       
   400    bool match_restart_continue();
       
   401    bool match_long_set_repeat();
       
   402    bool match_set_repeat();
       
   403    bool match_char_repeat();
       
   404    bool match_dot_repeat_fast();
       
   405    bool match_dot_repeat_slow();
       
   406    bool match_backstep();
       
   407    bool match_assert_backref();
       
   408    bool match_toggle_case();
       
   409 #ifdef BOOST_REGEX_RECURSIVE
       
   410    bool backtrack_till_match(std::size_t count);
       
   411 #endif
       
   412 
       
   413    // find procs stored in s_find_vtable:
       
   414    bool find_restart_any();
       
   415    bool find_restart_word();
       
   416    bool find_restart_line();
       
   417    bool find_restart_buf();
       
   418    bool find_restart_lit();
       
   419 
       
   420 private:
       
   421    // final result structure to be filled in:
       
   422    match_results<BidiIterator, Allocator>& m_result;
       
   423    // temporary result for POSIX matches:
       
   424    scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
       
   425    // pointer to actual result structure to fill in:
       
   426    match_results<BidiIterator, Allocator>* m_presult;
       
   427    // start of sequence being searched:
       
   428    BidiIterator base;
       
   429    // end of sequence being searched:
       
   430    BidiIterator last; 
       
   431    // current character being examined:
       
   432    BidiIterator position;
       
   433    // where to restart next search after failed match attempt:
       
   434    BidiIterator restart;
       
   435    // where the current search started from, acts as base for $` during grep:
       
   436    BidiIterator search_base;
       
   437    // how far we can go back when matching lookbehind:
       
   438    BidiIterator backstop;
       
   439    // the expression being examined:
       
   440    const basic_regex<char_type, traits>& re;
       
   441    // the expression's traits class:
       
   442    const ::boost::regex_traits_wrapper<traits>& traits_inst;
       
   443    // the next state in the machine being matched:
       
   444    const re_syntax_base* pstate;
       
   445    // matching flags in use:
       
   446    match_flag_type m_match_flags;
       
   447    // how many states we have examined so far:
       
   448    boost::uintmax_t state_count;
       
   449    // max number of states to examine before giving up:
       
   450    boost::uintmax_t max_state_count;
       
   451    // whether we should ignore case or not:
       
   452    bool icase;
       
   453    // set to true when (position == last), indicates that we may have a partial match:
       
   454    bool m_has_partial_match;
       
   455    // set to true whenever we get a match:
       
   456    bool m_has_found_match;
       
   457    // set to true whenever we're inside an independent sub-expression:
       
   458    bool m_independent;
       
   459    // the current repeat being examined:
       
   460    repeater_count<BidiIterator>* next_count;
       
   461    // the first repeat being examined (top of linked list):
       
   462    repeater_count<BidiIterator> rep_obj;
       
   463    // the mask to pass when matching word boundaries:
       
   464    typename traits::char_class_type m_word_mask;
       
   465    // the bitmask to use when determining whether a match_any matches a newline or not:
       
   466    unsigned char match_any_mask;
       
   467 
       
   468 #ifdef BOOST_REGEX_NON_RECURSIVE
       
   469    //
       
   470    // additional members for non-recursive version:
       
   471    //
       
   472    typedef bool (self_type::*unwind_proc_type)(bool);
       
   473 
       
   474    void extend_stack();
       
   475    bool unwind(bool);
       
   476    bool unwind_end(bool);
       
   477    bool unwind_paren(bool);
       
   478    bool unwind_recursion_stopper(bool);
       
   479    bool unwind_assertion(bool);
       
   480    bool unwind_alt(bool);
       
   481    bool unwind_repeater_counter(bool);
       
   482    bool unwind_extra_block(bool);
       
   483    bool unwind_greedy_single_repeat(bool);
       
   484    bool unwind_slow_dot_repeat(bool);
       
   485    bool unwind_fast_dot_repeat(bool);
       
   486    bool unwind_char_repeat(bool);
       
   487    bool unwind_short_set_repeat(bool);
       
   488    bool unwind_long_set_repeat(bool);
       
   489    bool unwind_non_greedy_repeat(bool);
       
   490    void destroy_single_repeat();
       
   491    void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
       
   492    void push_recursion_stopper();
       
   493    void push_assertion(const re_syntax_base* ps, bool positive);
       
   494    void push_alt(const re_syntax_base* ps);
       
   495    void push_repeater_count(int i, repeater_count<BidiIterator>** s);
       
   496    void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
       
   497    void push_non_greedy_repeat(const re_syntax_base* ps);
       
   498 
       
   499 
       
   500    // pointer to base of stack:
       
   501    saved_state* m_stack_base;
       
   502    // pointer to current stack position:
       
   503    saved_state* m_backup_state;
       
   504    // determines what value to return when unwinding from recursion,
       
   505    // allows for mixed recursive/non-recursive algorithm:
       
   506    bool m_recursive_result;
       
   507    // how many memory blocks have we used up?:
       
   508    unsigned used_block_count;
       
   509 #endif
       
   510 
       
   511    // these operations aren't allowed, so are declared private,
       
   512    // bodies are provided to keep explicit-instantiation requests happy:
       
   513    perl_matcher& operator=(const perl_matcher&)
       
   514    {
       
   515       return *this;
       
   516    }
       
   517    perl_matcher(const perl_matcher& that)
       
   518       : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
       
   519 };
       
   520 
       
   521 #ifdef BOOST_MSVC
       
   522 #pragma warning(pop)
       
   523 #endif
       
   524 
       
   525 } // namespace re_detail
       
   526 
       
   527 #ifdef BOOST_MSVC
       
   528 #pragma warning(push)
       
   529 #pragma warning(disable: 4103)
       
   530 #endif
       
   531 #ifdef BOOST_HAS_ABI_HEADERS
       
   532 #  include BOOST_ABI_SUFFIX
       
   533 #endif
       
   534 #ifdef BOOST_MSVC
       
   535 #pragma warning(pop)
       
   536 #endif
       
   537 
       
   538 } // namespace boost
       
   539 
       
   540 #ifdef BOOST_MSVC
       
   541 #  pragma warning(pop)
       
   542 #endif
       
   543 
       
   544 //
       
   545 // include the implementation of perl_matcher:
       
   546 //
       
   547 #ifdef BOOST_REGEX_RECURSIVE
       
   548 #include <boost/regex/v4/perl_matcher_recursive.hpp>
       
   549 #else
       
   550 #include <boost/regex/v4/perl_matcher_non_recursive.hpp>
       
   551 #endif
       
   552 // this one has to be last:
       
   553 #include <boost/regex/v4/perl_matcher_common.hpp>
       
   554 
       
   555 #endif
       
   556