imgtools/imglib/boostlibrary/boost/regex/v4/states.hpp
changeset 600 6d08f4a05d93
equal deleted inserted replaced
599:fa7a3cc6effd 600:6d08f4a05d93
       
     1 /*
       
     2  *
       
     3  * Copyright (c) 1998-2002
       
     4  * John Maddock
       
     5  *
       
     6  * Use, modification and distribution are subject to the 
       
     7  * Boost Software License, Version 1.0. (See accompanying file 
       
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
       
     9  *
       
    10  */
       
    11 
       
    12  /*
       
    13   *   LOCATION:    see http://www.boost.org for most recent version.
       
    14   *   FILE         states.cpp
       
    15   *   VERSION      see <boost/version.hpp>
       
    16   *   DESCRIPTION: Declares internal state machine structures.
       
    17   */
       
    18 
       
    19 #ifndef BOOST_REGEX_V4_STATES_HPP
       
    20 #define BOOST_REGEX_V4_STATES_HPP
       
    21 
       
    22 #ifdef BOOST_MSVC
       
    23 #pragma warning(push)
       
    24 #pragma warning(disable: 4103)
       
    25 #endif
       
    26 #ifdef BOOST_HAS_ABI_HEADERS
       
    27 #  include BOOST_ABI_PREFIX
       
    28 #endif
       
    29 #ifdef BOOST_MSVC
       
    30 #pragma warning(pop)
       
    31 #endif
       
    32 
       
    33 namespace boost{
       
    34 namespace re_detail{
       
    35 
       
    36 /*** mask_type *******************************************************
       
    37 Whenever we have a choice of two alternatives, we use an array of bytes
       
    38 to indicate which of the two alternatives it is possible to take for any
       
    39 given input character.  If mask_take is set, then we can take the next 
       
    40 state, and if mask_skip is set then we can take the alternative.
       
    41 ***********************************************************************/
       
    42 enum mask_type
       
    43 {
       
    44    mask_take = 1,
       
    45    mask_skip = 2,
       
    46    mask_init = 4,
       
    47    mask_any = mask_skip | mask_take,
       
    48    mask_all = mask_any
       
    49 };
       
    50 
       
    51 /*** helpers **********************************************************
       
    52 These helpers let us use function overload resolution to detect whether
       
    53 we have narrow or wide character strings:
       
    54 ***********************************************************************/
       
    55 struct _narrow_type{};
       
    56 struct _wide_type{};
       
    57 template <class charT> struct is_byte;
       
    58 template<>             struct is_byte<char>         { typedef _narrow_type width_type; };
       
    59 template<>             struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
       
    60 template<>             struct is_byte<signed char>  { typedef _narrow_type width_type; };
       
    61 template <class charT> struct is_byte               { typedef _wide_type width_type; };
       
    62 
       
    63 /*** enum syntax_element_type ******************************************
       
    64 Every record in the state machine falls into one of the following types:
       
    65 ***********************************************************************/
       
    66 enum syntax_element_type
       
    67 {
       
    68    // start of a marked sub-expression, or perl-style (?...) extension
       
    69    syntax_element_startmark = 0,
       
    70    // end of a marked sub-expression, or perl-style (?...) extension
       
    71    syntax_element_endmark = syntax_element_startmark + 1,
       
    72    // any sequence of literal characters
       
    73    syntax_element_literal = syntax_element_endmark + 1,
       
    74    // start of line assertion: ^
       
    75    syntax_element_start_line = syntax_element_literal + 1,
       
    76    // end of line assertion $
       
    77    syntax_element_end_line = syntax_element_start_line + 1,
       
    78    // match any character: .
       
    79    syntax_element_wild = syntax_element_end_line + 1,
       
    80    // end of expression: we have a match when we get here
       
    81    syntax_element_match = syntax_element_wild + 1,
       
    82    // perl style word boundary: \b
       
    83    syntax_element_word_boundary = syntax_element_match + 1,
       
    84    // perl style within word boundary: \B
       
    85    syntax_element_within_word = syntax_element_word_boundary + 1,
       
    86    // start of word assertion: \<
       
    87    syntax_element_word_start = syntax_element_within_word + 1,
       
    88    // end of word assertion: \>
       
    89    syntax_element_word_end = syntax_element_word_start + 1,
       
    90    // start of buffer assertion: \`
       
    91    syntax_element_buffer_start = syntax_element_word_end + 1,
       
    92    // end of buffer assertion: \'
       
    93    syntax_element_buffer_end = syntax_element_buffer_start + 1,
       
    94    // backreference to previously matched sub-expression
       
    95    syntax_element_backref = syntax_element_buffer_end + 1,
       
    96    // either a wide character set [..] or one with multicharacter collating elements:
       
    97    syntax_element_long_set = syntax_element_backref + 1,
       
    98    // narrow character set: [...]
       
    99    syntax_element_set = syntax_element_long_set + 1,
       
   100    // jump to a new state in the machine:
       
   101    syntax_element_jump = syntax_element_set + 1,
       
   102    // choose between two production states:
       
   103    syntax_element_alt = syntax_element_jump + 1,
       
   104    // a repeat
       
   105    syntax_element_rep = syntax_element_alt + 1,
       
   106    // match a combining character sequence
       
   107    syntax_element_combining = syntax_element_rep + 1,
       
   108    // perl style soft buffer end: \z
       
   109    syntax_element_soft_buffer_end = syntax_element_combining + 1,
       
   110    // perl style continuation: \G
       
   111    syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
       
   112    // single character repeats:
       
   113    syntax_element_dot_rep = syntax_element_restart_continue + 1,
       
   114    syntax_element_char_rep = syntax_element_dot_rep + 1,
       
   115    syntax_element_short_set_rep = syntax_element_char_rep + 1,
       
   116    syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
       
   117    // a backstep for lookbehind repeats:
       
   118    syntax_element_backstep = syntax_element_long_set_rep + 1,
       
   119    // an assertion that a mark was matched:
       
   120    syntax_element_assert_backref = syntax_element_backstep + 1,
       
   121    syntax_element_toggle_case = syntax_element_assert_backref + 1
       
   122 };
       
   123 
       
   124 #ifdef BOOST_REGEX_DEBUG
       
   125 // dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
       
   126 std::ostream& operator<<(std::ostream&, syntax_element_type);
       
   127 #endif
       
   128 
       
   129 struct re_syntax_base;
       
   130 
       
   131 /*** union offset_type ************************************************
       
   132 Points to another state in the machine.  During machine construction
       
   133 we use integral offsets, but these are converted to pointers before
       
   134 execution of the machine.
       
   135 ***********************************************************************/
       
   136 union offset_type
       
   137 {
       
   138    re_syntax_base*   p;
       
   139    std::ptrdiff_t    i;
       
   140 };
       
   141 
       
   142 /*** struct re_syntax_base ********************************************
       
   143 Base class for all states in the machine.
       
   144 ***********************************************************************/
       
   145 struct re_syntax_base
       
   146 {
       
   147    syntax_element_type   type;         // what kind of state this is
       
   148    offset_type           next;         // next state in the machine
       
   149 };
       
   150 
       
   151 /*** struct re_brace **************************************************
       
   152 A marked parenthesis.
       
   153 ***********************************************************************/
       
   154 struct re_brace : public re_syntax_base
       
   155 {
       
   156    // The index to match, can be zero (don't mark the sub-expression)
       
   157    // or negative (for perl style (?...) extentions):
       
   158    int index;
       
   159 };
       
   160 
       
   161 /*** struct re_dot **************************************************
       
   162 Match anything.
       
   163 ***********************************************************************/
       
   164 enum
       
   165 {
       
   166    dont_care = 1,
       
   167    force_not_newline = 0,
       
   168    force_newline = 2,
       
   169 
       
   170    test_not_newline = 2,
       
   171    test_newline = 3
       
   172 };
       
   173 struct re_dot : public re_syntax_base
       
   174 {
       
   175    unsigned char mask;
       
   176 };
       
   177 
       
   178 /*** struct re_literal ************************************************
       
   179 A string of literals, following this structure will be an 
       
   180 array of characters: charT[length]
       
   181 ***********************************************************************/
       
   182 struct re_literal : public re_syntax_base
       
   183 {
       
   184    unsigned int length;
       
   185 };
       
   186 
       
   187 /*** struct re_case ************************************************
       
   188 Indicates whether we are moving to a case insensive block or not
       
   189 ***********************************************************************/
       
   190 struct re_case : public re_syntax_base
       
   191 {
       
   192    bool icase;
       
   193 };
       
   194 
       
   195 /*** struct re_set_long ***********************************************
       
   196 A wide character set of characters, following this structure will be
       
   197 an array of type charT:
       
   198 First csingles null-terminated strings
       
   199 Then 2 * cranges NULL terminated strings
       
   200 Then cequivalents NULL terminated strings
       
   201 ***********************************************************************/
       
   202 template <class mask_type>
       
   203 struct re_set_long : public re_syntax_base
       
   204 {
       
   205    unsigned int            csingles, cranges, cequivalents;
       
   206    mask_type               cclasses;
       
   207    mask_type               cnclasses;
       
   208    bool                    isnot;
       
   209    bool                    singleton;
       
   210 };
       
   211 
       
   212 /*** struct re_set ****************************************************
       
   213 A set of narrow-characters, matches any of _map which is none-zero
       
   214 ***********************************************************************/
       
   215 struct re_set : public re_syntax_base
       
   216 {
       
   217    unsigned char _map[1 << CHAR_BIT];
       
   218 };
       
   219 
       
   220 /*** struct re_jump ***************************************************
       
   221 Jump to a new location in the machine (not next).
       
   222 ***********************************************************************/
       
   223 struct re_jump : public re_syntax_base
       
   224 {
       
   225    offset_type     alt;                 // location to jump to
       
   226 };
       
   227 
       
   228 /*** struct re_alt ***************************************************
       
   229 Jump to a new location in the machine (possibly next).
       
   230 ***********************************************************************/
       
   231 struct re_alt : public re_jump
       
   232 {
       
   233    unsigned char   _map[1 << CHAR_BIT]; // which characters can take the jump
       
   234    unsigned int    can_be_null;         // true if we match a NULL string
       
   235 };
       
   236 
       
   237 /*** struct re_repeat *************************************************
       
   238 Repeat a section of the machine
       
   239 ***********************************************************************/
       
   240 struct re_repeat : public re_alt
       
   241 {
       
   242    std::size_t   min, max;  // min and max allowable repeats
       
   243    int           state_id;        // Unique identifier for this repeat
       
   244    bool          leading;   // True if this repeat is at the start of the machine (lets us optimize some searches)
       
   245    bool          greedy;    // True if this is a greedy repeat
       
   246 };
       
   247 
       
   248 /*** enum re_jump_size_type *******************************************
       
   249 Provides compiled size of re_jump structure (allowing for trailing alignment).
       
   250 We provide this so we know how manybytes to insert when constructing the machine
       
   251 (The value of padding_mask is defined in regex_raw_buffer.hpp).
       
   252 ***********************************************************************/
       
   253 enum re_jump_size_type
       
   254 {
       
   255    re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
       
   256    re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
       
   257    re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
       
   258 };
       
   259 
       
   260 /*** proc re_is_set_member *********************************************
       
   261 Forward declaration: we'll need this one later...
       
   262 ***********************************************************************/
       
   263 
       
   264 template<class charT, class traits>
       
   265 struct regex_data;
       
   266 
       
   267 template <class iterator, class charT, class traits_type, class char_classT>
       
   268 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
       
   269                           iterator last, 
       
   270                           const re_set_long<char_classT>* set_, 
       
   271                           const regex_data<charT, traits_type>& e, bool icase);
       
   272 
       
   273 } // namespace re_detail
       
   274 
       
   275 } // namespace boost
       
   276 
       
   277 #ifdef BOOST_MSVC
       
   278 #pragma warning(push)
       
   279 #pragma warning(disable: 4103)
       
   280 #endif
       
   281 #ifdef BOOST_HAS_ABI_HEADERS
       
   282 #  include BOOST_ABI_SUFFIX
       
   283 #endif
       
   284 #ifdef BOOST_MSVC
       
   285 #pragma warning(pop)
       
   286 #endif
       
   287 
       
   288 #endif
       
   289 
       
   290