imgtools/imglib/boostlibrary/boost/regex/v4/basic_regex_creator.hpp
changeset 600 6d08f4a05d93
equal deleted inserted replaced
599:fa7a3cc6effd 600:6d08f4a05d93
       
     1 /*
       
     2  *
       
     3  * Copyright (c) 2004
       
     4  * John Maddock
       
     5  *
       
     6  * Use, modification and distribution are subject to the 
       
     7  * Boost Software License, Version 1.0. (See accompanying file 
       
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
       
     9  *
       
    10  */
       
    11 
       
    12  /*
       
    13   *   LOCATION:    see http://www.boost.org for most recent version.
       
    14   *   FILE         basic_regex_creator.cpp
       
    15   *   VERSION      see <boost/version.hpp>
       
    16   *   DESCRIPTION: Declares template class basic_regex_creator which fills in
       
    17   *                the data members of a regex_data object.
       
    18   */
       
    19 
       
    20 #ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
       
    21 #define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
       
    22 
       
    23 #ifdef BOOST_MSVC
       
    24 #pragma warning(push)
       
    25 #pragma warning(disable: 4103)
       
    26 #endif
       
    27 #ifdef BOOST_HAS_ABI_HEADERS
       
    28 #  include BOOST_ABI_PREFIX
       
    29 #endif
       
    30 #ifdef BOOST_MSVC
       
    31 #pragma warning(pop)
       
    32 #endif
       
    33 
       
    34 #ifdef BOOST_MSVC
       
    35 #  pragma warning(push)
       
    36 #  pragma warning(disable: 4800)
       
    37 #endif
       
    38 
       
    39 namespace boost{
       
    40 
       
    41 namespace re_detail{
       
    42 
       
    43 template <class charT>
       
    44 struct digraph : public std::pair<charT, charT>
       
    45 {
       
    46    digraph() : std::pair<charT, charT>(0, 0){}
       
    47    digraph(charT c1) : std::pair<charT, charT>(c1, 0){}
       
    48    digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
       
    49    {}
       
    50 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
       
    51    digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
       
    52 #endif
       
    53    template <class Seq>
       
    54    digraph(const Seq& s) : std::pair<charT, charT>()
       
    55    {
       
    56       BOOST_ASSERT(s.size() <= 2);
       
    57       BOOST_ASSERT(s.size());
       
    58       this->first = s[0];
       
    59       this->second = (s.size() > 1) ? s[1] : 0;
       
    60    }
       
    61 };
       
    62 
       
    63 template <class charT, class traits>
       
    64 class basic_char_set
       
    65 {
       
    66 public:
       
    67    typedef digraph<charT>                   digraph_type;
       
    68    typedef typename traits::string_type     string_type;
       
    69    typedef typename traits::char_class_type mask_type;
       
    70 
       
    71    basic_char_set()
       
    72    {
       
    73       m_negate = false;
       
    74       m_has_digraphs = false;
       
    75       m_classes = 0;
       
    76       m_negated_classes = 0;
       
    77       m_empty = true;
       
    78    }
       
    79 
       
    80    void add_single(const digraph_type& s)
       
    81    {
       
    82       m_singles.insert(m_singles.end(), s);
       
    83       if(s.second)
       
    84          m_has_digraphs = true;
       
    85       m_empty = false;
       
    86    }
       
    87    void add_range(const digraph_type& first, const digraph_type& end)
       
    88    {
       
    89       m_ranges.insert(m_ranges.end(), first);
       
    90       m_ranges.insert(m_ranges.end(), end);
       
    91       if(first.second)
       
    92       {
       
    93          m_has_digraphs = true;
       
    94          add_single(first);
       
    95       }
       
    96       if(end.second)
       
    97       {
       
    98          m_has_digraphs = true;
       
    99          add_single(end);
       
   100       }
       
   101       m_empty = false;
       
   102    }
       
   103    void add_class(mask_type m)
       
   104    {
       
   105       m_classes |= m;
       
   106       m_empty = false;
       
   107    }
       
   108    void add_negated_class(mask_type m)
       
   109    {
       
   110       m_negated_classes |= m;
       
   111       m_empty = false;
       
   112    }
       
   113    void add_equivalent(const digraph_type& s)
       
   114    {
       
   115       m_equivalents.insert(m_equivalents.end(), s);
       
   116       if(s.second)
       
   117       {
       
   118          m_has_digraphs = true;
       
   119          add_single(s);
       
   120       }
       
   121       m_empty = false;
       
   122    }
       
   123    void negate()
       
   124    { 
       
   125       m_negate = true;
       
   126       //m_empty = false;
       
   127    }
       
   128 
       
   129    //
       
   130    // accessor functions:
       
   131    //
       
   132    bool has_digraphs()const
       
   133    {
       
   134       return m_has_digraphs;
       
   135    }
       
   136    bool is_negated()const
       
   137    {
       
   138       return m_negate;
       
   139    }
       
   140    typedef typename std::vector<digraph_type>::const_iterator  list_iterator;
       
   141    list_iterator singles_begin()const
       
   142    {
       
   143       return m_singles.begin();
       
   144    }
       
   145    list_iterator singles_end()const
       
   146    {
       
   147       return m_singles.end();
       
   148    }
       
   149    list_iterator ranges_begin()const
       
   150    {
       
   151       return m_ranges.begin();
       
   152    }
       
   153    list_iterator ranges_end()const
       
   154    {
       
   155       return m_ranges.end();
       
   156    }
       
   157    list_iterator equivalents_begin()const
       
   158    {
       
   159       return m_equivalents.begin();
       
   160    }
       
   161    list_iterator equivalents_end()const
       
   162    {
       
   163       return m_equivalents.end();
       
   164    }
       
   165    mask_type classes()const
       
   166    {
       
   167       return m_classes;
       
   168    }
       
   169    mask_type negated_classes()const
       
   170    {
       
   171       return m_negated_classes;
       
   172    }
       
   173    bool empty()const
       
   174    {
       
   175       return m_empty;
       
   176    }
       
   177 private:
       
   178    std::vector<digraph_type> m_singles;         // a list of single characters to match
       
   179    std::vector<digraph_type> m_ranges;          // a list of end points of our ranges
       
   180    bool                      m_negate;          // true if the set is to be negated
       
   181    bool                      m_has_digraphs;    // true if we have digraphs present
       
   182    mask_type                 m_classes;         // character classes to match
       
   183    mask_type                 m_negated_classes; // negated character classes to match
       
   184    bool                      m_empty;           // whether we've added anything yet
       
   185    std::vector<digraph_type> m_equivalents;     // a list of equivalence classes
       
   186 };
       
   187    
       
   188 template <class charT, class traits>
       
   189 class basic_regex_creator
       
   190 {
       
   191 public:
       
   192    basic_regex_creator(regex_data<charT, traits>* data);
       
   193    std::ptrdiff_t getoffset(void* addr)
       
   194    {
       
   195       return getoffset(addr, m_pdata->m_data.data());
       
   196    }
       
   197    std::ptrdiff_t getoffset(const void* addr, const void* base)
       
   198    {
       
   199       return static_cast<const char*>(addr) - static_cast<const char*>(base);
       
   200    }
       
   201    re_syntax_base* getaddress(std::ptrdiff_t off)
       
   202    {
       
   203       return getaddress(off, m_pdata->m_data.data());
       
   204    }
       
   205    re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
       
   206    {
       
   207       return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
       
   208    }
       
   209    void init(unsigned l_flags)
       
   210    {
       
   211       m_pdata->m_flags = l_flags;
       
   212       m_icase = l_flags & regex_constants::icase;
       
   213    }
       
   214    regbase::flag_type flags()
       
   215    {
       
   216       return m_pdata->m_flags;
       
   217    }
       
   218    void flags(regbase::flag_type f)
       
   219    {
       
   220       m_pdata->m_flags = f;
       
   221       if(m_icase != static_cast<bool>(f & regbase::icase))
       
   222       {
       
   223          m_icase = static_cast<bool>(f & regbase::icase);
       
   224       }
       
   225    }
       
   226    re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
       
   227    re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
       
   228    re_literal* append_literal(charT c);
       
   229    re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
       
   230    re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
       
   231    re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
       
   232    void finalize(const charT* p1, const charT* p2);
       
   233 protected:
       
   234    regex_data<charT, traits>*    m_pdata;              // pointer to the basic_regex_data struct we are filling in
       
   235    const ::boost::regex_traits_wrapper<traits>&  
       
   236                                  m_traits;             // convenience reference to traits class
       
   237    re_syntax_base*               m_last_state;         // the last state we added
       
   238    bool                          m_icase;              // true for case insensitive matches
       
   239    unsigned                      m_repeater_id;        // the state_id of the next repeater
       
   240    bool                          m_has_backrefs;       // true if there are actually any backrefs
       
   241    unsigned                      m_backrefs;           // bitmask of permitted backrefs
       
   242    boost::uintmax_t              m_bad_repeats;        // bitmask of repeats we can't deduce a startmap for;
       
   243    typename traits::char_class_type m_word_mask;       // mask used to determine if a character is a word character
       
   244    typename traits::char_class_type m_mask_space;      // mask used to determine if a character is a word character
       
   245    typename traits::char_class_type m_lower_mask;       // mask used to determine if a character is a lowercase character
       
   246    typename traits::char_class_type m_upper_mask;      // mask used to determine if a character is an uppercase character
       
   247    typename traits::char_class_type m_alpha_mask;      // mask used to determine if a character is an alphabetic character
       
   248 private:
       
   249    basic_regex_creator& operator=(const basic_regex_creator&);
       
   250    basic_regex_creator(const basic_regex_creator&);
       
   251 
       
   252    void fixup_pointers(re_syntax_base* state);
       
   253    void create_startmaps(re_syntax_base* state);
       
   254    int calculate_backstep(re_syntax_base* state);
       
   255    void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
       
   256    unsigned get_restart_type(re_syntax_base* state);
       
   257    void set_all_masks(unsigned char* bits, unsigned char);
       
   258    bool is_bad_repeat(re_syntax_base* pt);
       
   259    void set_bad_repeat(re_syntax_base* pt);
       
   260    syntax_element_type get_repeat_type(re_syntax_base* state);
       
   261    void probe_leading_repeat(re_syntax_base* state);
       
   262 };
       
   263 
       
   264 template <class charT, class traits>
       
   265 basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
       
   266    : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0)
       
   267 {
       
   268    m_pdata->m_data.clear();
       
   269    m_pdata->m_status = ::boost::regex_constants::error_ok;
       
   270    static const charT w = 'w';
       
   271    static const charT s = 's';
       
   272    static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
       
   273    static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
       
   274    static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
       
   275    m_word_mask = m_traits.lookup_classname(&w, &w +1);
       
   276    m_mask_space = m_traits.lookup_classname(&s, &s +1);
       
   277    m_lower_mask = m_traits.lookup_classname(l, l + 5);
       
   278    m_upper_mask = m_traits.lookup_classname(u, u + 5);
       
   279    m_alpha_mask = m_traits.lookup_classname(a, a + 5);
       
   280    m_pdata->m_word_mask = m_word_mask;
       
   281    BOOST_ASSERT(m_word_mask != 0); 
       
   282    BOOST_ASSERT(m_mask_space != 0); 
       
   283    BOOST_ASSERT(m_lower_mask != 0); 
       
   284    BOOST_ASSERT(m_upper_mask != 0); 
       
   285    BOOST_ASSERT(m_alpha_mask != 0); 
       
   286 }
       
   287 
       
   288 template <class charT, class traits>
       
   289 re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
       
   290 {
       
   291    // if the state is a backref then make a note of it:
       
   292    if(t == syntax_element_backref)
       
   293       this->m_has_backrefs = true;
       
   294    // append a new state, start by aligning our last one:
       
   295    m_pdata->m_data.align();
       
   296    // set the offset to the next state in our last one:
       
   297    if(m_last_state)
       
   298       m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
       
   299    // now actually extent our data:
       
   300    m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
       
   301    // fill in boilerplate options in the new state:
       
   302    m_last_state->next.i = 0;
       
   303    m_last_state->type = t;
       
   304    return m_last_state;
       
   305 }
       
   306 
       
   307 template <class charT, class traits>
       
   308 re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
       
   309 {
       
   310    // append a new state, start by aligning our last one:
       
   311    m_pdata->m_data.align();
       
   312    // set the offset to the next state in our last one:
       
   313    if(m_last_state)
       
   314       m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
       
   315    // remember the last state position:
       
   316    std::ptrdiff_t off = getoffset(m_last_state) + s;
       
   317    // now actually insert our data:
       
   318    re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
       
   319    // fill in boilerplate options in the new state:
       
   320    new_state->next.i = s;
       
   321    new_state->type = t;
       
   322    m_last_state = getaddress(off);
       
   323    return new_state;
       
   324 }
       
   325 
       
   326 template <class charT, class traits>
       
   327 re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
       
   328 {
       
   329    re_literal* result;
       
   330    // start by seeing if we have an existing re_literal we can extend:
       
   331    if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
       
   332    {
       
   333       // no existing re_literal, create a new one:
       
   334       result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
       
   335       result->length = 1;
       
   336       *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
       
   337    }
       
   338    else
       
   339    {
       
   340       // we have an existing re_literal, extend it:
       
   341       std::ptrdiff_t off = getoffset(m_last_state);
       
   342       m_pdata->m_data.extend(sizeof(charT));
       
   343       m_last_state = result = static_cast<re_literal*>(getaddress(off));
       
   344       charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
       
   345       characters[result->length] = m_traits.translate(c, m_icase);
       
   346       ++(result->length);
       
   347    }
       
   348    return result;
       
   349 }
       
   350 
       
   351 template <class charT, class traits>
       
   352 inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
       
   353    const basic_char_set<charT, traits>& char_set)
       
   354 {
       
   355    typedef mpl::bool_< (sizeof(charT) == 1) > truth_type;
       
   356    return char_set.has_digraphs() 
       
   357       ? append_set(char_set, static_cast<mpl::false_*>(0))
       
   358       : append_set(char_set, static_cast<truth_type*>(0));
       
   359 }
       
   360 
       
   361 template <class charT, class traits>
       
   362 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
       
   363    const basic_char_set<charT, traits>& char_set, mpl::false_*)
       
   364 {
       
   365    typedef typename traits::string_type string_type;
       
   366    typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
       
   367    typedef typename traits::char_class_type mask_type;
       
   368    
       
   369    re_set_long<mask_type>* result = static_cast<re_set_long<mask_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<mask_type>)));
       
   370    //
       
   371    // fill in the basics:
       
   372    //
       
   373    result->csingles = static_cast<unsigned int>(::boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end()));
       
   374    result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
       
   375    result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
       
   376    result->cclasses = char_set.classes();
       
   377    result->cnclasses = char_set.negated_classes();
       
   378    if(flags() & regbase::icase)
       
   379    {
       
   380       // adjust classes as needed:
       
   381       if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
       
   382          result->cclasses |= m_alpha_mask;
       
   383       if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
       
   384          result->cnclasses |= m_alpha_mask;
       
   385    }
       
   386 
       
   387    result->isnot = char_set.is_negated();
       
   388    result->singleton = !char_set.has_digraphs();
       
   389    //
       
   390    // remember where the state is for later:
       
   391    //
       
   392    std::ptrdiff_t offset = getoffset(result);
       
   393    //
       
   394    // now extend with all the singles:
       
   395    //
       
   396    item_iterator first, last;
       
   397    first = char_set.singles_begin();
       
   398    last = char_set.singles_end();
       
   399    while(first != last)
       
   400    {
       
   401       charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
       
   402       p[0] = m_traits.translate(first->first, m_icase);
       
   403       if(first->second)
       
   404       {
       
   405          p[1] = m_traits.translate(first->second, m_icase);
       
   406          p[2] = 0;
       
   407       }
       
   408       else
       
   409          p[1] = 0;
       
   410       ++first;
       
   411    }
       
   412    //
       
   413    // now extend with all the ranges:
       
   414    //
       
   415    first = char_set.ranges_begin();
       
   416    last = char_set.ranges_end();
       
   417    while(first != last)
       
   418    {
       
   419       // first grab the endpoints of the range:
       
   420       digraph<charT> c1 = *first;
       
   421       c1.first = this->m_traits.translate(c1.first, this->m_icase);
       
   422       c1.second = this->m_traits.translate(c1.second, this->m_icase);
       
   423       ++first;
       
   424       digraph<charT> c2 = *first;
       
   425       c2.first = this->m_traits.translate(c2.first, this->m_icase);
       
   426       c2.second = this->m_traits.translate(c2.second, this->m_icase);
       
   427       ++first;
       
   428       string_type s1, s2;
       
   429       // different actions now depending upon whether collation is turned on:
       
   430       if(flags() & regex_constants::collate)
       
   431       {
       
   432          // we need to transform our range into sort keys:
       
   433 #if BOOST_WORKAROUND(__GNUC__, < 3)
       
   434          string_type in(3, charT(0));
       
   435          in[0] = c1.first;
       
   436          in[1] = c1.second;
       
   437          s1 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
       
   438          in[0] = c2.first;
       
   439          in[1] = c2.second;
       
   440          s2 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1));
       
   441 #else
       
   442          charT a1[3] = { c1.first, c1.second, charT(0), };
       
   443          charT a2[3] = { c2.first, c2.second, charT(0), };
       
   444          s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
       
   445          s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
       
   446 #endif
       
   447          if(s1.size() == 0)
       
   448             s1 = string_type(1, charT(0));
       
   449          if(s2.size() == 0)
       
   450             s2 = string_type(1, charT(0));
       
   451       }
       
   452       else
       
   453       {
       
   454          if(c1.second)
       
   455          {
       
   456             s1.insert(s1.end(), c1.first);
       
   457             s1.insert(s1.end(), c1.second);
       
   458          }
       
   459          else
       
   460             s1 = string_type(1, c1.first);
       
   461          if(c2.second)
       
   462          {
       
   463             s2.insert(s2.end(), c2.first);
       
   464             s2.insert(s2.end(), c2.second);
       
   465          }
       
   466          else
       
   467             s2.insert(s2.end(), c2.first);
       
   468       }
       
   469       if(s1 > s2)
       
   470       {
       
   471          // Oops error:
       
   472          return 0;
       
   473       }
       
   474       charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
       
   475       re_detail::copy(s1.begin(), s1.end(), p);
       
   476       p[s1.size()] = charT(0);
       
   477       p += s1.size() + 1;
       
   478       re_detail::copy(s2.begin(), s2.end(), p);
       
   479       p[s2.size()] = charT(0);
       
   480    }
       
   481    //
       
   482    // now process the equivalence classes:
       
   483    //
       
   484    first = char_set.equivalents_begin();
       
   485    last = char_set.equivalents_end();
       
   486    while(first != last)
       
   487    {
       
   488       string_type s;
       
   489       if(first->second)
       
   490       {
       
   491 #if BOOST_WORKAROUND(__GNUC__, < 3)
       
   492          string_type in(3, charT(0));
       
   493          in[0] = first->first;
       
   494          in[1] = first->second;
       
   495          s = m_traits.transform_primary(in.c_str(), in.c_str()+2);
       
   496 #else
       
   497          charT cs[3] = { first->first, first->second, charT(0), };
       
   498          s = m_traits.transform_primary(cs, cs+2);
       
   499 #endif
       
   500       }
       
   501       else
       
   502          s = m_traits.transform_primary(&first->first, &first->first+1);
       
   503       if(s.empty())
       
   504          return 0;  // invalid or unsupported equivalence class
       
   505       charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
       
   506       re_detail::copy(s.begin(), s.end(), p);
       
   507       p[s.size()] = charT(0);
       
   508       ++first;
       
   509    }
       
   510    //
       
   511    // finally reset the address of our last state:
       
   512    //
       
   513    m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
       
   514    return result;
       
   515 }
       
   516 
       
   517 namespace{
       
   518 
       
   519 template<class T>
       
   520 inline bool char_less(T t1, T t2)
       
   521 {
       
   522    return t1 < t2;
       
   523 }
       
   524 template<>
       
   525 inline bool char_less<char>(char t1, char t2)
       
   526 {
       
   527    return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
       
   528 }
       
   529 template<>
       
   530 inline bool char_less<signed char>(signed char t1, signed char t2)
       
   531 {
       
   532    return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
       
   533 }
       
   534 }
       
   535 
       
   536 template <class charT, class traits>
       
   537 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
       
   538    const basic_char_set<charT, traits>& char_set, mpl::true_*)
       
   539 {
       
   540    typedef typename traits::string_type string_type;
       
   541    typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
       
   542    
       
   543    re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
       
   544    bool negate = char_set.is_negated();
       
   545    std::memset(result->_map, 0, sizeof(result->_map));
       
   546    //
       
   547    // handle singles first:
       
   548    //
       
   549    item_iterator first, last;
       
   550    first = char_set.singles_begin();
       
   551    last = char_set.singles_end();
       
   552    while(first != last)
       
   553    {
       
   554       for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
       
   555       {
       
   556          if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
       
   557             == this->m_traits.translate(first->first, this->m_icase))
       
   558             result->_map[i] = true;
       
   559       }
       
   560       ++first;
       
   561    }
       
   562    //
       
   563    // OK now handle ranges:
       
   564    //
       
   565    first = char_set.ranges_begin();
       
   566    last = char_set.ranges_end();
       
   567    while(first != last)
       
   568    {
       
   569       // first grab the endpoints of the range:
       
   570       charT c1 = this->m_traits.translate(first->first, this->m_icase);
       
   571       ++first;
       
   572       charT c2 = this->m_traits.translate(first->first, this->m_icase);
       
   573       ++first;
       
   574       // different actions now depending upon whether collation is turned on:
       
   575       if(flags() & regex_constants::collate)
       
   576       {
       
   577          // we need to transform our range into sort keys:
       
   578          charT c3[2] = { c1, charT(0), };
       
   579          string_type s1 = this->m_traits.transform(c3, c3+1);
       
   580          c3[0] = c2;
       
   581          string_type s2 = this->m_traits.transform(c3, c3+1);
       
   582          if(s1 > s2)
       
   583          {
       
   584             // Oops error:
       
   585             return 0;
       
   586          }
       
   587          BOOST_ASSERT(c3[1] == charT(0));
       
   588          for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
   589          {
       
   590             c3[0] = static_cast<charT>(i);
       
   591             string_type s3 = this->m_traits.transform(c3, c3 +1);
       
   592             if((s1 <= s3) && (s3 <= s2))
       
   593                result->_map[i] = true;
       
   594          }
       
   595       }
       
   596       else
       
   597       {
       
   598          if(char_less<charT>(c2, c1))
       
   599          {
       
   600             // Oops error:
       
   601             return 0;
       
   602          }
       
   603          // everything in range matches:
       
   604          std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
       
   605       }
       
   606    }
       
   607    //
       
   608    // and now the classes:
       
   609    //
       
   610    typedef typename traits::char_class_type mask_type;
       
   611    mask_type m = char_set.classes();
       
   612    if(flags() & regbase::icase)
       
   613    {
       
   614       // adjust m as needed:
       
   615       if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
       
   616          m |= m_alpha_mask;
       
   617    }
       
   618    if(m != 0)
       
   619    {
       
   620       for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
   621       {
       
   622          if(this->m_traits.isctype(static_cast<charT>(i), m))
       
   623             result->_map[i] = true;
       
   624       }
       
   625    }
       
   626    //
       
   627    // and now the negated classes:
       
   628    //
       
   629    m = char_set.negated_classes();
       
   630    if(flags() & regbase::icase)
       
   631    {
       
   632       // adjust m as needed:
       
   633       if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
       
   634          m |= m_alpha_mask;
       
   635    }
       
   636    if(m != 0)
       
   637    {
       
   638       for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
   639       {
       
   640          if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
       
   641             result->_map[i] = true;
       
   642       }
       
   643    }
       
   644    //
       
   645    // now process the equivalence classes:
       
   646    //
       
   647    first = char_set.equivalents_begin();
       
   648    last = char_set.equivalents_end();
       
   649    while(first != last)
       
   650    {
       
   651       string_type s;
       
   652       BOOST_ASSERT(static_cast<charT>(0) == first->second);
       
   653       s = m_traits.transform_primary(&first->first, &first->first+1);
       
   654       if(s.empty())
       
   655          return 0;  // invalid or unsupported equivalence class
       
   656       for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
   657       {
       
   658          charT c[2] = { (static_cast<charT>(i)), charT(0), };
       
   659          string_type s2 = this->m_traits.transform_primary(c, c+1);
       
   660          if(s == s2)
       
   661             result->_map[i] = true;
       
   662       }
       
   663       ++first;
       
   664    }
       
   665    if(negate)
       
   666    {
       
   667       for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
   668       {
       
   669          result->_map[i] = !(result->_map[i]);
       
   670       }
       
   671    }
       
   672    return result;
       
   673 }
       
   674 
       
   675 template <class charT, class traits>
       
   676 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
       
   677 {
       
   678    // we've added all the states we need, now finish things off.
       
   679    // start by adding a terminating state:
       
   680    append_state(syntax_element_match);
       
   681    // extend storage to store original expression:
       
   682    std::ptrdiff_t len = p2 - p1;
       
   683    m_pdata->m_expression_len = len;
       
   684    charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
       
   685    m_pdata->m_expression = ps;
       
   686    re_detail::copy(p1, p2, ps);
       
   687    ps[p2 - p1] = 0;
       
   688    // fill in our other data...
       
   689    // successful parsing implies a zero status:
       
   690    m_pdata->m_status = 0;
       
   691    // get the first state of the machine:
       
   692    m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
       
   693    // fixup pointers in the machine:
       
   694    fixup_pointers(m_pdata->m_first_state);
       
   695    // create nested startmaps:
       
   696    create_startmaps(m_pdata->m_first_state);
       
   697    // create main startmap:
       
   698    std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
       
   699    m_pdata->m_can_be_null = 0;
       
   700 
       
   701    m_bad_repeats = 0;
       
   702    create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
       
   703    // get the restart type:
       
   704    m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
       
   705    // optimise a leading repeat if there is one:
       
   706    probe_leading_repeat(m_pdata->m_first_state);
       
   707 }
       
   708 
       
   709 template <class charT, class traits>
       
   710 void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
       
   711 {
       
   712    while(state)
       
   713    {
       
   714       switch(state->type)
       
   715       {
       
   716       case syntax_element_rep:
       
   717       case syntax_element_dot_rep:
       
   718       case syntax_element_char_rep:
       
   719       case syntax_element_short_set_rep:
       
   720       case syntax_element_long_set_rep:
       
   721          // set the state_id of this repeat:
       
   722          static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
       
   723          // fall through:
       
   724       case syntax_element_alt:
       
   725          std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
       
   726          static_cast<re_alt*>(state)->can_be_null = 0;
       
   727          // fall through:
       
   728       case syntax_element_jump:
       
   729          static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
       
   730          // fall through again:
       
   731       default:
       
   732          if(state->next.i)
       
   733             state->next.p = getaddress(state->next.i, state);
       
   734          else
       
   735             state->next.p = 0;
       
   736       }
       
   737       state = state->next.p;
       
   738    }
       
   739 }
       
   740 
       
   741 template <class charT, class traits>
       
   742 void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
       
   743 {
       
   744    // non-recursive implementation:
       
   745    // create the last map in the machine first, so that earlier maps
       
   746    // can make use of the result...
       
   747    //
       
   748    // This was originally a recursive implementation, but that caused stack
       
   749    // overflows with complex expressions on small stacks (think COM+).
       
   750 
       
   751    // start by saving the case setting:
       
   752    bool l_icase = m_icase;
       
   753    std::vector<std::pair<bool, re_syntax_base*> > v;
       
   754 
       
   755    while(state)
       
   756    {
       
   757       switch(state->type)
       
   758       {
       
   759       case syntax_element_toggle_case:
       
   760          // we need to track case changes here:
       
   761          m_icase = static_cast<re_case*>(state)->icase;
       
   762          state = state->next.p;
       
   763          continue;
       
   764       case syntax_element_alt:
       
   765       case syntax_element_rep:
       
   766       case syntax_element_dot_rep:
       
   767       case syntax_element_char_rep:
       
   768       case syntax_element_short_set_rep:
       
   769       case syntax_element_long_set_rep:
       
   770          // just push the state onto our stack for now:
       
   771          v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
       
   772          state = state->next.p;
       
   773          break;
       
   774       case syntax_element_backstep:
       
   775          // we need to calculate how big the backstep is:
       
   776          static_cast<re_brace*>(state)->index
       
   777             = this->calculate_backstep(state->next.p);
       
   778          if(static_cast<re_brace*>(state)->index < 0)
       
   779          {
       
   780             // Oops error:
       
   781             if(0 == this->m_pdata->m_status) // update the error code if not already set
       
   782                this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
       
   783             //
       
   784             // clear the expression, we should be empty:
       
   785             //
       
   786             this->m_pdata->m_expression = 0;
       
   787             this->m_pdata->m_expression_len = 0;
       
   788             //
       
   789             // and throw if required:
       
   790             //
       
   791             if(0 == (this->flags() & regex_constants::no_except))
       
   792             {
       
   793                std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
       
   794                boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
       
   795                e.raise();
       
   796             }
       
   797          }
       
   798          // fall through:
       
   799       default:
       
   800          state = state->next.p;
       
   801       }
       
   802    }
       
   803    // now work through our list, building all the maps as we go:
       
   804    while(v.size())
       
   805    {
       
   806       const std::pair<bool, re_syntax_base*>& p = v.back();
       
   807       m_icase = p.first;
       
   808       state = p.second;
       
   809       v.pop_back();
       
   810 
       
   811       // Build maps:
       
   812       m_bad_repeats = 0;
       
   813       create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
       
   814       m_bad_repeats = 0;
       
   815       create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
       
   816       // adjust the type of the state to allow for faster matching:
       
   817       state->type = this->get_repeat_type(state);
       
   818    }
       
   819    // restore case sensitivity:
       
   820    m_icase = l_icase;
       
   821 }
       
   822 
       
   823 template <class charT, class traits>
       
   824 int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
       
   825 {
       
   826    typedef typename traits::char_class_type mask_type;
       
   827    int result = 0;
       
   828    while(state)
       
   829    {
       
   830       switch(state->type)
       
   831       {
       
   832       case syntax_element_startmark:
       
   833          if((static_cast<re_brace*>(state)->index == -1)
       
   834             || (static_cast<re_brace*>(state)->index == -2))
       
   835          {
       
   836             state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
       
   837             continue;
       
   838          }
       
   839          else if(static_cast<re_brace*>(state)->index == -3)
       
   840          {
       
   841             state = state->next.p->next.p;
       
   842             continue;
       
   843          }
       
   844          break;
       
   845       case syntax_element_endmark:
       
   846          if((static_cast<re_brace*>(state)->index == -1)
       
   847             || (static_cast<re_brace*>(state)->index == -2))
       
   848             return result;
       
   849          break;
       
   850       case syntax_element_literal:
       
   851          result += static_cast<re_literal*>(state)->length;
       
   852          break;
       
   853       case syntax_element_wild:
       
   854       case syntax_element_set:
       
   855          result += 1;
       
   856          break;
       
   857       case syntax_element_dot_rep:
       
   858       case syntax_element_char_rep:
       
   859       case syntax_element_short_set_rep:
       
   860       case syntax_element_backref:
       
   861       case syntax_element_rep:
       
   862       case syntax_element_combining:
       
   863       case syntax_element_long_set_rep:
       
   864       case syntax_element_backstep:
       
   865          {
       
   866             re_repeat* rep = static_cast<re_repeat *>(state);
       
   867             // adjust the type of the state to allow for faster matching:
       
   868             state->type = this->get_repeat_type(state);
       
   869             if((state->type == syntax_element_dot_rep) 
       
   870                || (state->type == syntax_element_char_rep)
       
   871                || (state->type == syntax_element_short_set_rep))
       
   872             {
       
   873                if(rep->max != rep->min)
       
   874                   return -1;
       
   875                result += static_cast<int>(rep->min);
       
   876                state = rep->alt.p;
       
   877                continue;
       
   878             }
       
   879             else if((state->type == syntax_element_long_set_rep)) 
       
   880             {
       
   881                BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
       
   882                if(static_cast<re_set_long<mask_type>*>(rep->next.p)->singleton == 0)
       
   883                   return -1;
       
   884                if(rep->max != rep->min)
       
   885                   return -1;
       
   886                result += static_cast<int>(rep->min);
       
   887                state = rep->alt.p;
       
   888                continue;
       
   889             }
       
   890          }
       
   891          return -1;
       
   892       case syntax_element_long_set:
       
   893          if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0)
       
   894             return -1;
       
   895          result += 1;
       
   896          break;
       
   897       case syntax_element_jump:
       
   898          state = static_cast<re_jump*>(state)->alt.p;
       
   899          continue;
       
   900       default:
       
   901          break;
       
   902       }
       
   903       state = state->next.p;
       
   904    }
       
   905    return -1;
       
   906 }
       
   907 
       
   908 template <class charT, class traits>
       
   909 void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
       
   910 {
       
   911    int not_last_jump = 1;
       
   912 
       
   913    // track case sensitivity:
       
   914    bool l_icase = m_icase;
       
   915 
       
   916    while(state)
       
   917    {
       
   918       switch(state->type)
       
   919       {
       
   920       case syntax_element_toggle_case:
       
   921          l_icase = static_cast<re_case*>(state)->icase;
       
   922          state = state->next.p;
       
   923          break;
       
   924       case syntax_element_literal:
       
   925       {
       
   926          // don't set anything in *pnull, set each element in l_map
       
   927          // that could match the first character in the literal:
       
   928          if(l_map)
       
   929          {
       
   930             l_map[0] |= mask_init;
       
   931             charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
       
   932             for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
       
   933             {
       
   934                if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
       
   935                   l_map[i] |= mask;
       
   936             }
       
   937          }
       
   938          return;
       
   939       }
       
   940       case syntax_element_end_line:
       
   941       {
       
   942          // next character must be a line separator (if there is one):
       
   943          if(l_map)
       
   944          {
       
   945             l_map[0] |= mask_init;
       
   946             l_map['\n'] |= mask;
       
   947             l_map['\r'] |= mask;
       
   948             l_map['\f'] |= mask;
       
   949             l_map[0x85] |= mask;
       
   950          }
       
   951          // now figure out if we can match a NULL string at this point:
       
   952          if(pnull)
       
   953             create_startmap(state->next.p, 0, pnull, mask);
       
   954          return;
       
   955       }
       
   956       case syntax_element_backref:
       
   957          // can be null, and any character can match:
       
   958          if(pnull)
       
   959             *pnull |= mask;
       
   960          // fall through:
       
   961       case syntax_element_wild:
       
   962       {
       
   963          // can't be null, any character can match:
       
   964          set_all_masks(l_map, mask);
       
   965          return;
       
   966       }
       
   967       case syntax_element_match:
       
   968       {
       
   969          // must be null, any character can match:
       
   970          set_all_masks(l_map, mask);
       
   971          if(pnull)
       
   972             *pnull |= mask;
       
   973          return;
       
   974       }
       
   975       case syntax_element_word_start:
       
   976       {
       
   977          // recurse, then AND with all the word characters:
       
   978          create_startmap(state->next.p, l_map, pnull, mask);
       
   979          if(l_map)
       
   980          {
       
   981             l_map[0] |= mask_init;
       
   982             for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
       
   983             {
       
   984                if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
       
   985                   l_map[i] &= static_cast<unsigned char>(~mask);
       
   986             }
       
   987          }
       
   988          return;
       
   989       }
       
   990       case syntax_element_word_end:
       
   991       {
       
   992          // recurse, then AND with all the word characters:
       
   993          create_startmap(state->next.p, l_map, pnull, mask);
       
   994          if(l_map)
       
   995          {
       
   996             l_map[0] |= mask_init;
       
   997             for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
       
   998             {
       
   999                if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
       
  1000                   l_map[i] &= static_cast<unsigned char>(~mask);
       
  1001             }
       
  1002          }
       
  1003          return;
       
  1004       }
       
  1005       case syntax_element_buffer_end:
       
  1006       {
       
  1007          // we *must be null* :
       
  1008          if(pnull)
       
  1009             *pnull |= mask;
       
  1010          return;
       
  1011       }
       
  1012       case syntax_element_long_set:
       
  1013          if(l_map)
       
  1014          {
       
  1015             typedef typename traits::char_class_type mask_type;
       
  1016             if(static_cast<re_set_long<mask_type>*>(state)->singleton)
       
  1017             {
       
  1018                l_map[0] |= mask_init;
       
  1019                for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
       
  1020                {
       
  1021                   charT c = static_cast<charT>(i);
       
  1022                   if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase))
       
  1023                      l_map[i] |= mask;
       
  1024                }
       
  1025             }
       
  1026             else
       
  1027                set_all_masks(l_map, mask);
       
  1028          }
       
  1029          return;
       
  1030       case syntax_element_set:
       
  1031          if(l_map)
       
  1032          {
       
  1033             l_map[0] |= mask_init;
       
  1034             for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
       
  1035             {
       
  1036                if(static_cast<re_set*>(state)->_map[
       
  1037                   static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
       
  1038                   l_map[i] |= mask;
       
  1039             }
       
  1040          }
       
  1041          return;
       
  1042       case syntax_element_jump:
       
  1043          // take the jump:
       
  1044          state = static_cast<re_alt*>(state)->alt.p;
       
  1045          not_last_jump = -1;
       
  1046          break;
       
  1047       case syntax_element_alt:
       
  1048       case syntax_element_rep:
       
  1049       case syntax_element_dot_rep:
       
  1050       case syntax_element_char_rep:
       
  1051       case syntax_element_short_set_rep:
       
  1052       case syntax_element_long_set_rep:
       
  1053          {
       
  1054             re_alt* rep = static_cast<re_alt*>(state);
       
  1055             if(rep->_map[0] & mask_init)
       
  1056             {
       
  1057                if(l_map)
       
  1058                {
       
  1059                   // copy previous results:
       
  1060                   l_map[0] |= mask_init;
       
  1061                   for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
       
  1062                   {
       
  1063                      if(rep->_map[i] & mask_any)
       
  1064                         l_map[i] |= mask;
       
  1065                   }
       
  1066                }
       
  1067                if(pnull)
       
  1068                {
       
  1069                   if(rep->can_be_null & mask_any)
       
  1070                      *pnull |= mask;
       
  1071                }
       
  1072             }
       
  1073             else
       
  1074             {
       
  1075                // we haven't created a startmap for this alternative yet
       
  1076                // so take the union of the two options:
       
  1077                if(is_bad_repeat(state))
       
  1078                {
       
  1079                   set_all_masks(l_map, mask);
       
  1080                   if(pnull)
       
  1081                      *pnull |= mask;
       
  1082                   return;
       
  1083                }
       
  1084                set_bad_repeat(state);
       
  1085                create_startmap(state->next.p, l_map, pnull, mask);
       
  1086                if((state->type == syntax_element_alt)
       
  1087                   || (static_cast<re_repeat*>(state)->min == 0)
       
  1088                   || (not_last_jump == 0))
       
  1089                   create_startmap(rep->alt.p, l_map, pnull, mask);
       
  1090             }
       
  1091          }
       
  1092          return;
       
  1093       case syntax_element_soft_buffer_end:
       
  1094          // match newline or null:
       
  1095          if(l_map)
       
  1096          {
       
  1097             l_map[0] |= mask_init;
       
  1098             l_map['\n'] |= mask;
       
  1099             l_map['\r'] |= mask;
       
  1100          }
       
  1101          if(pnull)
       
  1102             *pnull |= mask;
       
  1103          return;
       
  1104       case syntax_element_endmark:
       
  1105          // need to handle independent subs as a special case:
       
  1106          if(static_cast<re_brace*>(state)->index < 0)
       
  1107          {
       
  1108             // can be null, any character can match:
       
  1109             set_all_masks(l_map, mask);
       
  1110             if(pnull)
       
  1111                *pnull |= mask;
       
  1112             return;
       
  1113          }
       
  1114          else
       
  1115          {
       
  1116             state = state->next.p;
       
  1117             break;
       
  1118          }
       
  1119 
       
  1120       case syntax_element_startmark:
       
  1121          // need to handle independent subs as a special case:
       
  1122          if(static_cast<re_brace*>(state)->index == -3)
       
  1123          {
       
  1124             state = state->next.p->next.p;
       
  1125             break;
       
  1126          }
       
  1127          // otherwise fall through:
       
  1128       default:
       
  1129          state = state->next.p;
       
  1130       }
       
  1131       ++not_last_jump;
       
  1132    }
       
  1133 }
       
  1134 
       
  1135 template <class charT, class traits>
       
  1136 unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
       
  1137 {
       
  1138    //
       
  1139    // find out how the machine starts, so we can optimise the search:
       
  1140    //
       
  1141    while(state)
       
  1142    {
       
  1143       switch(state->type)
       
  1144       {
       
  1145       case syntax_element_startmark:
       
  1146       case syntax_element_endmark:
       
  1147          state = state->next.p;
       
  1148          continue;
       
  1149       case syntax_element_start_line:
       
  1150          return regbase::restart_line;
       
  1151       case syntax_element_word_start:
       
  1152          return regbase::restart_word;
       
  1153       case syntax_element_buffer_start:
       
  1154          return regbase::restart_buf;
       
  1155       case syntax_element_restart_continue:
       
  1156          return regbase::restart_continue;
       
  1157       default:
       
  1158          state = 0;
       
  1159          continue;
       
  1160       }
       
  1161    }
       
  1162    return regbase::restart_any;
       
  1163 }
       
  1164 
       
  1165 template <class charT, class traits>
       
  1166 void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
       
  1167 {
       
  1168    //
       
  1169    // set mask in all of bits elements, 
       
  1170    // if bits[0] has mask_init not set then we can 
       
  1171    // optimise this to a call to memset:
       
  1172    //
       
  1173    if(bits)
       
  1174    {
       
  1175       if(bits[0] == 0)
       
  1176          (std::memset)(bits, mask, 1u << CHAR_BIT);
       
  1177       else
       
  1178       {
       
  1179          for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
       
  1180             bits[i] |= mask;
       
  1181       }
       
  1182       bits[0] |= mask_init;
       
  1183    }
       
  1184 }
       
  1185 
       
  1186 template <class charT, class traits>
       
  1187 bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
       
  1188 {
       
  1189    switch(pt->type)
       
  1190    {
       
  1191    case syntax_element_rep:
       
  1192    case syntax_element_dot_rep:
       
  1193    case syntax_element_char_rep:
       
  1194    case syntax_element_short_set_rep:
       
  1195    case syntax_element_long_set_rep:
       
  1196       {
       
  1197          unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
       
  1198          if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
       
  1199             return true;  // run out of bits, assume we can't traverse this one.
       
  1200          static const boost::uintmax_t one = 1uL;
       
  1201          return m_bad_repeats & (one << state_id);
       
  1202       }
       
  1203    default:
       
  1204       return false;
       
  1205    }
       
  1206 }
       
  1207 
       
  1208 template <class charT, class traits>
       
  1209 void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
       
  1210 {
       
  1211    switch(pt->type)
       
  1212    {
       
  1213    case syntax_element_rep:
       
  1214    case syntax_element_dot_rep:
       
  1215    case syntax_element_char_rep:
       
  1216    case syntax_element_short_set_rep:
       
  1217    case syntax_element_long_set_rep:
       
  1218       {
       
  1219          unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
       
  1220          static const boost::uintmax_t one = 1uL;
       
  1221          if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
       
  1222             m_bad_repeats |= (one << state_id);
       
  1223       }
       
  1224    default:
       
  1225       break;
       
  1226    }
       
  1227 }
       
  1228 
       
  1229 template <class charT, class traits>
       
  1230 syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
       
  1231 {
       
  1232    typedef typename traits::char_class_type mask_type;
       
  1233    if(state->type == syntax_element_rep)
       
  1234    {
       
  1235       // check to see if we are repeating a single state:
       
  1236       if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
       
  1237       {
       
  1238          switch(state->next.p->type)
       
  1239          {
       
  1240          case re_detail::syntax_element_wild:
       
  1241             return re_detail::syntax_element_dot_rep;
       
  1242          case re_detail::syntax_element_literal:
       
  1243             return re_detail::syntax_element_char_rep;
       
  1244          case re_detail::syntax_element_set:
       
  1245             return re_detail::syntax_element_short_set_rep;
       
  1246          case re_detail::syntax_element_long_set:
       
  1247             if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton)
       
  1248                return re_detail::syntax_element_long_set_rep;
       
  1249             break;
       
  1250          default:
       
  1251             break;
       
  1252          }
       
  1253       }
       
  1254    }
       
  1255    return state->type;
       
  1256 }
       
  1257 
       
  1258 template <class charT, class traits>
       
  1259 void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
       
  1260 {
       
  1261    // enumerate our states, and see if we have a leading repeat 
       
  1262    // for which failed search restarts can be optimised;
       
  1263    do
       
  1264    {
       
  1265       switch(state->type)
       
  1266       {
       
  1267       case syntax_element_startmark:
       
  1268          if(static_cast<re_brace*>(state)->index >= 0)
       
  1269          {
       
  1270             state = state->next.p;
       
  1271             continue;
       
  1272          }
       
  1273          if((static_cast<re_brace*>(state)->index == -1)
       
  1274             || (static_cast<re_brace*>(state)->index == -2))
       
  1275          {
       
  1276             // skip past the zero width assertion:
       
  1277             state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
       
  1278             continue;
       
  1279          }
       
  1280          if(static_cast<re_brace*>(state)->index == -3)
       
  1281          {
       
  1282             // Have to skip the leading jump state:
       
  1283             state = state->next.p->next.p;
       
  1284             continue;
       
  1285          }
       
  1286          return;
       
  1287       case syntax_element_endmark:
       
  1288       case syntax_element_start_line:
       
  1289       case syntax_element_end_line:
       
  1290       case syntax_element_word_boundary:
       
  1291       case syntax_element_within_word:
       
  1292       case syntax_element_word_start:
       
  1293       case syntax_element_word_end:
       
  1294       case syntax_element_buffer_start:
       
  1295       case syntax_element_buffer_end:
       
  1296       case syntax_element_restart_continue:
       
  1297          state = state->next.p;
       
  1298          break;
       
  1299       case syntax_element_dot_rep:
       
  1300       case syntax_element_char_rep:
       
  1301       case syntax_element_short_set_rep:
       
  1302       case syntax_element_long_set_rep:
       
  1303          if(this->m_has_backrefs == 0)
       
  1304             static_cast<re_repeat*>(state)->leading = true;
       
  1305          // fall through:
       
  1306       default:
       
  1307          return;
       
  1308       }
       
  1309    }while(state);
       
  1310 }
       
  1311 
       
  1312 
       
  1313 } // namespace re_detail
       
  1314 
       
  1315 } // namespace boost
       
  1316 
       
  1317 #ifdef BOOST_MSVC
       
  1318 #  pragma warning(pop)
       
  1319 #endif
       
  1320 
       
  1321 #ifdef BOOST_MSVC
       
  1322 #pragma warning(push)
       
  1323 #pragma warning(disable: 4103)
       
  1324 #endif
       
  1325 #ifdef BOOST_HAS_ABI_HEADERS
       
  1326 #  include BOOST_ABI_SUFFIX
       
  1327 #endif
       
  1328 #ifdef BOOST_MSVC
       
  1329 #pragma warning(pop)
       
  1330 #endif
       
  1331 
       
  1332 #endif