imgtools/imglib/boostlibrary/boost/regex/v4/regex_format.hpp
changeset 600 6d08f4a05d93
equal deleted inserted replaced
599:fa7a3cc6effd 600:6d08f4a05d93
       
     1 /*
       
     2  *
       
     3  * Copyright (c) 1998-2002
       
     4  * John Maddock
       
     5  *
       
     6  * Use, modification and distribution are subject to the 
       
     7  * Boost Software License, Version 1.0. (See accompanying file 
       
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
       
     9  *
       
    10  */
       
    11 
       
    12  /*
       
    13   *   LOCATION:    see http://www.boost.org for most recent version.
       
    14   *   FILE         regex_format.hpp
       
    15   *   VERSION      see <boost/version.hpp>
       
    16   *   DESCRIPTION: Provides formatting output routines for search and replace
       
    17   *                operations.  Note this is an internal header file included
       
    18   *                by regex.hpp, do not include on its own.
       
    19   */
       
    20 
       
    21 #ifndef BOOST_REGEX_FORMAT_HPP
       
    22 #define BOOST_REGEX_FORMAT_HPP
       
    23 
       
    24 
       
    25 namespace boost{
       
    26 
       
    27 #ifdef BOOST_MSVC
       
    28 #pragma warning(push)
       
    29 #pragma warning(disable: 4103)
       
    30 #endif
       
    31 #ifdef BOOST_HAS_ABI_HEADERS
       
    32 #  include BOOST_ABI_PREFIX
       
    33 #endif
       
    34 #ifdef BOOST_MSVC
       
    35 #pragma warning(pop)
       
    36 #endif
       
    37 
       
    38 //
       
    39 // Forward declaration:
       
    40 //
       
    41    template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type >
       
    42 class match_results;
       
    43 
       
    44 namespace re_detail{
       
    45 
       
    46 //
       
    47 // struct trivial_format_traits:
       
    48 // defines minimum localisation support for formatting
       
    49 // in the case that the actual regex traits is unavailable.
       
    50 //
       
    51 template <class charT>
       
    52 struct trivial_format_traits
       
    53 {
       
    54    typedef charT char_type;
       
    55 
       
    56    static std::ptrdiff_t length(const charT* p)
       
    57    {
       
    58       return global_length(p);
       
    59    }
       
    60    static charT tolower(charT c)
       
    61    {
       
    62       return ::boost::re_detail::global_lower(c);
       
    63    }
       
    64    static charT toupper(charT c)
       
    65    {
       
    66       return ::boost::re_detail::global_upper(c);
       
    67    }
       
    68    static int value(const charT c, int radix)
       
    69    {
       
    70       int result = global_value(c);
       
    71       return result >= radix ? -1 : result;
       
    72    }
       
    73    int toi(const charT*& p1, const charT* p2, int radix)const
       
    74    {
       
    75       return global_toi(p1, p2, radix, *this);
       
    76    }
       
    77 };
       
    78 
       
    79 template <class OutputIterator, class Results, class traits>
       
    80 class basic_regex_formatter
       
    81 {
       
    82 public:
       
    83    typedef typename traits::char_type char_type;
       
    84    basic_regex_formatter(OutputIterator o, const Results& r, const traits& t)
       
    85       : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {}
       
    86    OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f);
       
    87    OutputIterator format(const char_type* p1, match_flag_type f)
       
    88    {
       
    89       return format(p1, p1 + m_traits.length(p1), f);
       
    90    }
       
    91 private:
       
    92    typedef typename Results::value_type sub_match_type;
       
    93    enum output_state
       
    94    {
       
    95       output_copy,
       
    96       output_next_lower,
       
    97       output_next_upper,
       
    98       output_lower,
       
    99       output_upper,
       
   100       output_none
       
   101    };
       
   102 
       
   103    void put(char_type c);
       
   104    void put(const sub_match_type& sub);
       
   105    void format_all();
       
   106    void format_perl();
       
   107    void format_escape();
       
   108    void format_conditional();
       
   109    void format_until_scope_end();
       
   110 
       
   111    const traits& m_traits;       // the traits class for localised formatting operations
       
   112    const Results& m_results;     // the match_results being used.
       
   113    OutputIterator m_out;         // where to send output.
       
   114    const char_type* m_position;  // format string, current position
       
   115    const char_type* m_end;       // format string end
       
   116    match_flag_type m_flags;      // format flags to use
       
   117    output_state    m_state;      // what to do with the next character
       
   118    output_state    m_restore_state;  // what state to restore to.
       
   119    bool            m_have_conditional; // we are parsing a conditional
       
   120 private:
       
   121    basic_regex_formatter(const basic_regex_formatter&);
       
   122    basic_regex_formatter& operator=(const basic_regex_formatter&);
       
   123 };
       
   124 
       
   125 template <class OutputIterator, class Results, class traits>
       
   126 OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f)
       
   127 {
       
   128    m_position = p1;
       
   129    m_end = p2;
       
   130    m_flags = f;
       
   131    format_all();
       
   132    return m_out;
       
   133 }
       
   134 
       
   135 template <class OutputIterator, class Results, class traits>
       
   136 void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
       
   137 {
       
   138    // over and over:
       
   139    while(m_position != m_end)
       
   140    {
       
   141       switch(*m_position)
       
   142       {
       
   143       case '&':
       
   144          if(m_flags & ::boost::regex_constants::format_sed)
       
   145          {
       
   146             ++m_position;
       
   147             put(m_results[0]);
       
   148             break;
       
   149          }
       
   150          put(*m_position++);
       
   151          break;
       
   152       case '\\':
       
   153          format_escape();
       
   154          break;
       
   155       case '(':
       
   156          if(m_flags & boost::regex_constants::format_all)
       
   157          {
       
   158             ++m_position;
       
   159             bool have_conditional = m_have_conditional;
       
   160             m_have_conditional = false;
       
   161             format_until_scope_end();
       
   162             m_have_conditional = have_conditional;
       
   163             if(m_position == m_end)
       
   164                return;
       
   165             BOOST_ASSERT(*m_position == static_cast<char_type>(')'));
       
   166             ++m_position;  // skip the closing ')'
       
   167             break;
       
   168          }
       
   169          put(*m_position);
       
   170          ++m_position;
       
   171          break;
       
   172       case ')':
       
   173          if(m_flags & boost::regex_constants::format_all)
       
   174          {
       
   175             return;
       
   176          }
       
   177          put(*m_position);
       
   178          ++m_position;
       
   179          break;
       
   180       case ':':
       
   181          if((m_flags & boost::regex_constants::format_all) && m_have_conditional)
       
   182          {
       
   183             return;
       
   184          }
       
   185          put(*m_position);
       
   186          ++m_position;
       
   187          break;
       
   188       case '?':
       
   189          if(m_flags & boost::regex_constants::format_all)
       
   190          {
       
   191             ++m_position;
       
   192             format_conditional();
       
   193             break;
       
   194          }
       
   195          put(*m_position);
       
   196          ++m_position;
       
   197          break;
       
   198       case '$':
       
   199          if((m_flags & format_sed) == 0)
       
   200          {
       
   201             format_perl();
       
   202             break;
       
   203          }
       
   204          // fall through, not a special character:
       
   205       default:
       
   206          put(*m_position);
       
   207          ++m_position;
       
   208          break;
       
   209       }
       
   210    }
       
   211 }
       
   212 
       
   213 template <class OutputIterator, class Results, class traits>
       
   214 void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
       
   215 {
       
   216    //
       
   217    // On entry *m_position points to a '$' character
       
   218    // output the information that goes with it:
       
   219    //
       
   220    BOOST_ASSERT(*m_position == '$');
       
   221    //
       
   222    // see if this is a trailing '$':
       
   223    //
       
   224    if(++m_position == m_end)
       
   225    {
       
   226       --m_position;
       
   227       put(*m_position);
       
   228       ++m_position;
       
   229       return;
       
   230    }
       
   231    //
       
   232    // OK find out what kind it is:
       
   233    //
       
   234    bool have_brace = false;
       
   235    const char_type* save_position = m_position;
       
   236    switch(*m_position)
       
   237    {
       
   238    case '&':
       
   239       ++m_position;
       
   240       put(this->m_results[0]);
       
   241       break;
       
   242    case '`':
       
   243       ++m_position;
       
   244       put(this->m_results.prefix());
       
   245       break;
       
   246    case '\'':
       
   247       ++m_position;
       
   248       put(this->m_results.suffix());
       
   249       break;
       
   250    case '$':
       
   251       put(*m_position++);
       
   252       break;
       
   253    case '{':
       
   254       have_brace = true;
       
   255       ++m_position;
       
   256       // fall through....
       
   257    default:
       
   258       // see if we have a number:
       
   259       {
       
   260          std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
       
   261          len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
       
   262          int v = m_traits.toi(m_position, m_position + len, 10);
       
   263          if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}'))))
       
   264          {
       
   265             // leave the $ as is, and carry on:
       
   266             m_position = --save_position;
       
   267             put(*m_position);
       
   268             ++m_position;
       
   269             break;
       
   270          }
       
   271          // otherwise output sub v:
       
   272          put(this->m_results[v]);
       
   273          if(have_brace)
       
   274             ++m_position;
       
   275       }
       
   276    }
       
   277 }
       
   278 
       
   279 template <class OutputIterator, class Results, class traits>
       
   280 void basic_regex_formatter<OutputIterator, Results, traits>::format_escape()
       
   281 {
       
   282    // skip the escape and check for trailing escape:
       
   283    if(++m_position == m_end)
       
   284    {
       
   285       put(static_cast<char_type>('\\'));
       
   286       return;
       
   287    }
       
   288    // now switch on the escape type:
       
   289    switch(*m_position)
       
   290    {
       
   291    case 'a':
       
   292       put(static_cast<char_type>('\a'));
       
   293       ++m_position;
       
   294       break;
       
   295    case 'f':
       
   296       put(static_cast<char_type>('\f'));
       
   297       ++m_position;
       
   298       break;
       
   299    case 'n':
       
   300       put(static_cast<char_type>('\n'));
       
   301       ++m_position;
       
   302       break;
       
   303    case 'r':
       
   304       put(static_cast<char_type>('\r'));
       
   305       ++m_position;
       
   306       break;
       
   307    case 't':
       
   308       put(static_cast<char_type>('\t'));
       
   309       ++m_position;
       
   310       break;
       
   311    case 'v':
       
   312       put(static_cast<char_type>('\v'));
       
   313       ++m_position;
       
   314       break;
       
   315    case 'x':
       
   316       if(++m_position == m_end)
       
   317       {
       
   318          put(static_cast<char_type>('x'));
       
   319          return;
       
   320       }
       
   321       // maybe have \x{ddd}
       
   322       if(*m_position == static_cast<char_type>('{'))
       
   323       {
       
   324          ++m_position;
       
   325          int val = m_traits.toi(m_position, m_end, 16);
       
   326          if(val < 0)
       
   327          {
       
   328             // invalid value treat everything as literals:
       
   329             put(static_cast<char_type>('x'));
       
   330             put(static_cast<char_type>('{'));
       
   331             return;
       
   332          }
       
   333          if(*m_position != static_cast<char_type>('}'))
       
   334          {
       
   335             while(*m_position != static_cast<char_type>('\\'))
       
   336                --m_position;
       
   337             ++m_position;
       
   338             put(*m_position++);
       
   339             return;
       
   340          }
       
   341          ++m_position;
       
   342          put(static_cast<char_type>(val));
       
   343          return;
       
   344       }
       
   345       else
       
   346       {
       
   347          std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
       
   348          len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
       
   349          int val = m_traits.toi(m_position, m_position + len, 16);
       
   350          if(val < 0)
       
   351          {
       
   352             --m_position;
       
   353             put(*m_position++);
       
   354             return;
       
   355          }
       
   356          put(static_cast<char_type>(val));
       
   357       }
       
   358       break;
       
   359    case 'c':
       
   360       if(++m_position == m_end)
       
   361       {
       
   362          --m_position;
       
   363          put(*m_position++);
       
   364          return;
       
   365       }
       
   366       put(static_cast<char_type>(*m_position++ % 32));
       
   367       break;
       
   368    case 'e':
       
   369       put(static_cast<char_type>(27));
       
   370       ++m_position;
       
   371       break;
       
   372    default:
       
   373       // see if we have a perl specific escape:
       
   374       if((m_flags & boost::regex_constants::format_sed) == 0)
       
   375       {
       
   376          bool breakout = false;
       
   377          switch(*m_position)
       
   378          {
       
   379          case 'l':
       
   380             ++m_position;
       
   381             m_restore_state = m_state;
       
   382             m_state = output_next_lower;
       
   383             breakout = true;
       
   384             break;
       
   385          case 'L':
       
   386             ++m_position;
       
   387             m_state = output_lower;
       
   388             breakout = true;
       
   389             break;
       
   390          case 'u':
       
   391             ++m_position;
       
   392             m_restore_state = m_state;
       
   393             m_state = output_next_upper;
       
   394             breakout = true;
       
   395             break;
       
   396          case 'U':
       
   397             ++m_position;
       
   398             m_state = output_upper;
       
   399             breakout = true;
       
   400             break;
       
   401          case 'E':
       
   402             ++m_position;
       
   403             m_state = output_copy;
       
   404             breakout = true;
       
   405             break;
       
   406          }
       
   407          if(breakout)
       
   408             break;
       
   409       }
       
   410       // see if we have a \n sed style backreference:
       
   411       int v = m_traits.toi(m_position, m_position+1, 10);
       
   412       if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed)))
       
   413       {
       
   414          put(m_results[v]);
       
   415          break;
       
   416       }
       
   417       else if(v == 0)
       
   418       {
       
   419          // octal ecape sequence:
       
   420          --m_position;
       
   421          std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
       
   422          len = (std::min)(static_cast<std::ptrdiff_t>(4), len);
       
   423          v = m_traits.toi(m_position, m_position + len, 8);
       
   424          BOOST_ASSERT(v >= 0);
       
   425          put(static_cast<char_type>(v));
       
   426          break;
       
   427       }
       
   428       // Otherwise output the character "as is":
       
   429       put(*m_position++);
       
   430       break;
       
   431    }
       
   432 }
       
   433 
       
   434 template <class OutputIterator, class Results, class traits>
       
   435 void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional()
       
   436 {
       
   437    if(m_position == m_end)
       
   438    {
       
   439       // oops trailing '?':
       
   440       put(static_cast<char_type>('?'));
       
   441       return;
       
   442    }
       
   443    std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
       
   444    len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
       
   445    int v = m_traits.toi(m_position, m_position + len, 10);
       
   446    if(v < 0)
       
   447    {
       
   448       // oops not a number:
       
   449       put(static_cast<char_type>('?'));
       
   450       return;
       
   451    }
       
   452 
       
   453    // output varies depending upon whether sub-expression v matched or not:
       
   454    if(m_results[v].matched)
       
   455    {
       
   456       m_have_conditional = true;
       
   457       format_all();
       
   458       m_have_conditional = false;
       
   459       if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
       
   460       {
       
   461          // skip the ':':
       
   462          ++m_position;
       
   463          // save output state, then turn it off:
       
   464          output_state saved_state = m_state;
       
   465          m_state = output_none;
       
   466          // format the rest of this scope:
       
   467          format_until_scope_end();
       
   468          // restore output state:
       
   469          m_state = saved_state;
       
   470       }
       
   471    }
       
   472    else
       
   473    {
       
   474       // save output state, then turn it off:
       
   475       output_state saved_state = m_state;
       
   476       m_state = output_none;
       
   477       // format until ':' or ')':
       
   478       m_have_conditional = true;
       
   479       format_all();
       
   480       m_have_conditional = false;
       
   481       // restore state:
       
   482       m_state = saved_state;
       
   483       if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
       
   484       {
       
   485          // skip the ':':
       
   486          ++m_position;
       
   487          // format the rest of this scope:
       
   488          format_until_scope_end();
       
   489       }
       
   490    }
       
   491 }
       
   492 
       
   493 template <class OutputIterator, class Results, class traits>
       
   494 void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end()
       
   495 {
       
   496    do
       
   497    {
       
   498       format_all();
       
   499       if((m_position == m_end) || (*m_position == static_cast<char_type>(')')))
       
   500          return;
       
   501       put(*m_position++);
       
   502    }while(m_position != m_end);
       
   503 }
       
   504 
       
   505 template <class OutputIterator, class Results, class traits>
       
   506 void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c)
       
   507 {
       
   508    // write a single character to output
       
   509    // according to which case translation mode we are in:
       
   510    switch(this->m_state)
       
   511    {
       
   512    case output_none:
       
   513       return;
       
   514    case output_next_lower:
       
   515       c = m_traits.tolower(c);
       
   516       this->m_state = m_restore_state;
       
   517       break;
       
   518    case output_next_upper:
       
   519       c = m_traits.toupper(c);
       
   520       this->m_state = m_restore_state;
       
   521       break;
       
   522    case output_lower:
       
   523       c = m_traits.tolower(c);
       
   524       break;
       
   525    case output_upper:
       
   526       c = m_traits.toupper(c);
       
   527       break;
       
   528    default:
       
   529       break;
       
   530    }
       
   531    *m_out = c;
       
   532    ++m_out;
       
   533 }
       
   534 
       
   535 template <class OutputIterator, class Results, class traits>
       
   536 void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub)
       
   537 {
       
   538    typedef typename sub_match_type::iterator iterator_type;
       
   539    iterator_type i = sub.first;
       
   540    while(i != sub.second)
       
   541    {
       
   542       put(*i);
       
   543       ++i;
       
   544    }
       
   545 }
       
   546 
       
   547 template <class S>
       
   548 class string_out_iterator
       
   549 #ifndef BOOST_NO_STD_ITERATOR
       
   550    : public std::iterator<std::output_iterator_tag, typename S::value_type>
       
   551 #endif
       
   552 {
       
   553    S* out;
       
   554 public:
       
   555    string_out_iterator(S& s) : out(&s) {}
       
   556    string_out_iterator& operator++() { return *this; }
       
   557    string_out_iterator& operator++(int) { return *this; }
       
   558    string_out_iterator& operator*() { return *this; }
       
   559    string_out_iterator& operator=(typename S::value_type v) 
       
   560    { 
       
   561       out->append(1, v); 
       
   562       return *this; 
       
   563    }
       
   564 
       
   565 #ifdef BOOST_NO_STD_ITERATOR
       
   566    typedef std::ptrdiff_t difference_type;
       
   567    typedef typename S::value_type value_type;
       
   568    typedef value_type* pointer;
       
   569    typedef value_type& reference;
       
   570    typedef std::output_iterator_tag iterator_category;
       
   571 #endif
       
   572 };
       
   573 
       
   574 template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
       
   575 OutputIterator regex_format_imp(OutputIterator out,
       
   576                           const match_results<Iterator, Alloc>& m,
       
   577                           const charT* p1, const charT* p2,
       
   578                           match_flag_type flags,
       
   579                           const traits& t
       
   580                          )
       
   581 {
       
   582    if(flags & regex_constants::format_literal)
       
   583    {
       
   584       return re_detail::copy(p1, p2, out);
       
   585    }
       
   586 
       
   587    re_detail::basic_regex_formatter<
       
   588       OutputIterator, 
       
   589       match_results<Iterator, Alloc>, 
       
   590       traits > f(out, m, t);
       
   591    return f.format(p1, p2, flags);
       
   592 }
       
   593 
       
   594 
       
   595 } // namespace re_detail
       
   596 
       
   597 template <class OutputIterator, class Iterator, class charT>
       
   598 OutputIterator regex_format(OutputIterator out,
       
   599                           const match_results<Iterator>& m,
       
   600                           const charT* fmt,
       
   601                           match_flag_type flags = format_all
       
   602                          )
       
   603 {
       
   604    re_detail::trivial_format_traits<charT> traits;
       
   605    return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits);
       
   606 }
       
   607 
       
   608 template <class OutputIterator, class Iterator, class charT>
       
   609 OutputIterator regex_format(OutputIterator out,
       
   610                           const match_results<Iterator>& m,
       
   611                           const std::basic_string<charT>& fmt,
       
   612                           match_flag_type flags = format_all
       
   613                          )
       
   614 {
       
   615    re_detail::trivial_format_traits<charT> traits;
       
   616    return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
       
   617 }  
       
   618 
       
   619 template <class Iterator, class charT>
       
   620 std::basic_string<charT> regex_format(const match_results<Iterator>& m, 
       
   621                                       const charT* fmt, 
       
   622                                       match_flag_type flags = format_all)
       
   623 {
       
   624    std::basic_string<charT> result;
       
   625    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
       
   626    re_detail::trivial_format_traits<charT> traits;
       
   627    re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits);
       
   628    return result;
       
   629 }
       
   630 
       
   631 template <class Iterator, class charT>
       
   632 std::basic_string<charT> regex_format(const match_results<Iterator>& m, 
       
   633                                       const std::basic_string<charT>& fmt, 
       
   634                                       match_flag_type flags = format_all)
       
   635 {
       
   636    std::basic_string<charT> result;
       
   637    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
       
   638    re_detail::trivial_format_traits<charT> traits;
       
   639    re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
       
   640    return result;
       
   641 }
       
   642 
       
   643 #ifdef BOOST_MSVC
       
   644 #pragma warning(push)
       
   645 #pragma warning(disable: 4103)
       
   646 #endif
       
   647 #ifdef BOOST_HAS_ABI_HEADERS
       
   648 #  include BOOST_ABI_SUFFIX
       
   649 #endif
       
   650 #ifdef BOOST_MSVC
       
   651 #pragma warning(pop)
       
   652 #endif
       
   653 
       
   654 } // namespace boost
       
   655 
       
   656 #endif  // BOOST_REGEX_FORMAT_HPP
       
   657 
       
   658 
       
   659 
       
   660 
       
   661 
       
   662