imgtools/imglib/boostlibrary/boost/regex/pending/unicode_iterator.hpp
changeset 600 6d08f4a05d93
equal deleted inserted replaced
599:fa7a3cc6effd 600:6d08f4a05d93
       
     1 /*
       
     2  *
       
     3  * Copyright (c) 2004
       
     4  * John Maddock
       
     5  *
       
     6  * Use, modification and distribution are subject to the 
       
     7  * Boost Software License, Version 1.0. (See accompanying file 
       
     8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
       
     9  *
       
    10  */
       
    11  
       
    12  /*
       
    13   *   LOCATION:    see http://www.boost.org for most recent version.
       
    14   *   FILE         unicode_iterator.hpp
       
    15   *   VERSION      see <boost/version.hpp>
       
    16   *   DESCRIPTION: Iterator adapters for converting between different Unicode encodings.
       
    17   */
       
    18 
       
    19 /****************************************************************************
       
    20 
       
    21 Contents:
       
    22 ~~~~~~~~~
       
    23 
       
    24 1) Read Only, Input Adapters:
       
    25 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       
    26 
       
    27 template <class BaseIterator, class U8Type = ::boost::uint8_t>
       
    28 class u32_to_u8_iterator;
       
    29 
       
    30 Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8.
       
    31 
       
    32 template <class BaseIterator, class U32Type = ::boost::uint32_t>
       
    33 class u8_to_u32_iterator;
       
    34 
       
    35 Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32.
       
    36 
       
    37 template <class BaseIterator, class U16Type = ::boost::uint16_t>
       
    38 class u32_to_u16_iterator;
       
    39 
       
    40 Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16.
       
    41 
       
    42 template <class BaseIterator, class U32Type = ::boost::uint32_t>
       
    43 class u16_to_u32_iterator;
       
    44 
       
    45 Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32.
       
    46 
       
    47 2) Single pass output iterator adapters:
       
    48 
       
    49 template <class BaseIterator>
       
    50 class utf8_output_iterator;
       
    51 
       
    52 Accepts UTF-32 code points and forwards them on as UTF-8 code points.
       
    53 
       
    54 template <class BaseIterator>
       
    55 class utf16_output_iterator;
       
    56 
       
    57 Accepts UTF-32 code points and forwards them on as UTF-16 code points.
       
    58 
       
    59 ****************************************************************************/
       
    60 
       
    61 #ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP
       
    62 #define BOOST_REGEX_UNICODE_ITERATOR_HPP
       
    63 #include <boost/cstdint.hpp>
       
    64 #include <boost/assert.hpp>
       
    65 #include <boost/iterator/iterator_facade.hpp>
       
    66 #include <boost/static_assert.hpp>
       
    67 #include <boost/throw_exception.hpp>
       
    68 #include <stdexcept>
       
    69 #ifndef BOOST_NO_STD_LOCALE
       
    70 #include <sstream>
       
    71 #include <ios>
       
    72 #endif
       
    73 #include <limits.h> // CHAR_BIT
       
    74 
       
    75 namespace boost{
       
    76 
       
    77 namespace detail{
       
    78 
       
    79 static const ::boost::uint16_t high_surrogate_base = 0xD7C0u;
       
    80 static const ::boost::uint16_t low_surrogate_base = 0xDC00u;
       
    81 static const ::boost::uint32_t ten_bit_mask = 0x3FFu;
       
    82 
       
    83 inline bool is_high_surrogate(::boost::uint16_t v)
       
    84 {
       
    85    return (v & 0xFC00u) == 0xd800u;
       
    86 }
       
    87 inline bool is_low_surrogate(::boost::uint16_t v)
       
    88 {
       
    89    return (v & 0xFC00u) == 0xdc00u;
       
    90 }
       
    91 template <class T>
       
    92 inline bool is_surrogate(T v)
       
    93 {
       
    94    return (v & 0xF800u) == 0xd800;
       
    95 }
       
    96 
       
    97 inline unsigned utf8_byte_count(boost::uint8_t c)
       
    98 {
       
    99    // if the most significant bit with a zero in it is in position
       
   100    // 8-N then there are N bytes in this UTF-8 sequence:
       
   101    boost::uint8_t mask = 0x80u;
       
   102    unsigned result = 0;
       
   103    while(c & mask)
       
   104    {
       
   105       ++result;
       
   106       mask >>= 1;
       
   107    }
       
   108    return (result == 0) ? 1 : ((result > 4) ? 4 : result);
       
   109 }
       
   110 
       
   111 inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
       
   112 {
       
   113    return utf8_byte_count(c) - 1;
       
   114 }
       
   115 
       
   116 inline void invalid_utf32_code_point(::boost::uint32_t val)
       
   117 {
       
   118 #ifndef BOOST_NO_STD_LOCALE
       
   119    std::stringstream ss;
       
   120    ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence";
       
   121    std::out_of_range e(ss.str());
       
   122 #else
       
   123    std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence");
       
   124 #endif
       
   125    boost::throw_exception(e);
       
   126 }
       
   127 
       
   128 
       
   129 } // namespace detail
       
   130 
       
   131 template <class BaseIterator, class U16Type = ::boost::uint16_t>
       
   132 class u32_to_u16_iterator
       
   133    : public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type>
       
   134 {
       
   135    typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
       
   136 
       
   137 #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
       
   138    typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
       
   139 
       
   140    BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
       
   141    BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16);
       
   142 #endif
       
   143 
       
   144 public:
       
   145    typename base_type::reference
       
   146       dereference()const
       
   147    {
       
   148       if(m_current == 2)
       
   149          extract_current();
       
   150       return m_values[m_current];
       
   151    }
       
   152    bool equal(const u32_to_u16_iterator& that)const
       
   153    {
       
   154       if(m_position == that.m_position)
       
   155       {
       
   156          // Both m_currents must be equal, or both even
       
   157          // this is the same as saying their sum must be even:
       
   158          return (m_current + that.m_current) & 1u ? false : true;
       
   159       }
       
   160       return false;
       
   161    }
       
   162    void increment()
       
   163    {
       
   164       // if we have a pending read then read now, so that we know whether
       
   165       // to skip a position, or move to a low-surrogate:
       
   166       if(m_current == 2)
       
   167       {
       
   168          // pending read:
       
   169          extract_current();
       
   170       }
       
   171       // move to the next surrogate position:
       
   172       ++m_current;
       
   173       // if we've reached the end skip a position:
       
   174       if(m_values[m_current] == 0)
       
   175       {
       
   176          m_current = 2;
       
   177          ++m_position;
       
   178       }
       
   179    }
       
   180    void decrement()
       
   181    {
       
   182       if(m_current != 1)
       
   183       {
       
   184          // decrementing an iterator always leads to a valid position:
       
   185          --m_position;
       
   186          extract_current();
       
   187          m_current = m_values[1] ? 1 : 0;
       
   188       }
       
   189       else
       
   190       {
       
   191          m_current = 0;
       
   192       }
       
   193    }
       
   194    BaseIterator base()const
       
   195    {
       
   196       return m_position;
       
   197    }
       
   198    // construct:
       
   199    u32_to_u16_iterator() : m_position(), m_current(0)
       
   200    {
       
   201       m_values[0] = 0;
       
   202       m_values[1] = 0;
       
   203       m_values[2] = 0;
       
   204    }
       
   205    u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2)
       
   206    {
       
   207       m_values[0] = 0;
       
   208       m_values[1] = 0;
       
   209       m_values[2] = 0;
       
   210    }
       
   211 private:
       
   212 
       
   213    void extract_current()const
       
   214    {
       
   215       // begin by checking for a code point out of range:
       
   216       ::boost::uint32_t v = *m_position;
       
   217       if(v >= 0x10000u)
       
   218       {
       
   219          if(v > 0x10FFFFu)
       
   220             detail::invalid_utf32_code_point(*m_position);
       
   221          // split into two surrogates:
       
   222          m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base;
       
   223          m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
       
   224          m_current = 0;
       
   225          BOOST_ASSERT(detail::is_high_surrogate(m_values[0]));
       
   226          BOOST_ASSERT(detail::is_low_surrogate(m_values[1]));
       
   227       }
       
   228       else
       
   229       {
       
   230          // 16-bit code point:
       
   231          m_values[0] = static_cast<U16Type>(*m_position);
       
   232          m_values[1] = 0;
       
   233          m_current = 0;
       
   234          // value must not be a surrogate:
       
   235          if(detail::is_surrogate(m_values[0]))
       
   236             detail::invalid_utf32_code_point(*m_position);
       
   237       }
       
   238    }
       
   239    BaseIterator m_position;
       
   240    mutable U16Type m_values[3];
       
   241    mutable unsigned m_current;
       
   242 };
       
   243 
       
   244 template <class BaseIterator, class U32Type = ::boost::uint32_t>
       
   245 class u16_to_u32_iterator
       
   246    : public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
       
   247 {
       
   248    typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
       
   249    // special values for pending iterator reads:
       
   250    BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
       
   251 
       
   252 #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
       
   253    typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
       
   254 
       
   255    BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16);
       
   256    BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
       
   257 #endif
       
   258 
       
   259 public:
       
   260    typename base_type::reference
       
   261       dereference()const
       
   262    {
       
   263       if(m_value == pending_read)
       
   264          extract_current();
       
   265       return m_value;
       
   266    }
       
   267    bool equal(const u16_to_u32_iterator& that)const
       
   268    {
       
   269       return m_position == that.m_position;
       
   270    }
       
   271    void increment()
       
   272    {
       
   273       // skip high surrogate first if there is one:
       
   274       if(detail::is_high_surrogate(*m_position)) ++m_position;
       
   275       ++m_position;
       
   276       m_value = pending_read;
       
   277    }
       
   278    void decrement()
       
   279    {
       
   280       --m_position;
       
   281       // if we have a low surrogate then go back one more:
       
   282       if(detail::is_low_surrogate(*m_position)) 
       
   283          --m_position;
       
   284       m_value = pending_read;
       
   285    }
       
   286    BaseIterator base()const
       
   287    {
       
   288       return m_position;
       
   289    }
       
   290    // construct:
       
   291    u16_to_u32_iterator() : m_position()
       
   292    {
       
   293       m_value = pending_read;
       
   294    }
       
   295    u16_to_u32_iterator(BaseIterator b) : m_position(b)
       
   296    {
       
   297       m_value = pending_read;
       
   298    }
       
   299 private:
       
   300    static void invalid_code_point(::boost::uint16_t val)
       
   301    {
       
   302 #ifndef BOOST_NO_STD_LOCALE
       
   303       std::stringstream ss;
       
   304       ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence";
       
   305       std::out_of_range e(ss.str());
       
   306 #else
       
   307       std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence");
       
   308 #endif
       
   309       boost::throw_exception(e);
       
   310    }
       
   311    void extract_current()const
       
   312    {
       
   313       m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position));
       
   314       // if the last value is a high surrogate then adjust m_position and m_value as needed:
       
   315       if(detail::is_high_surrogate(*m_position))
       
   316       {
       
   317          // precondition; next value must have be a low-surrogate:
       
   318          BaseIterator next(m_position);
       
   319          ::boost::uint16_t t = *++next;
       
   320          if((t & 0xFC00u) != 0xDC00u)
       
   321             invalid_code_point(t);
       
   322          m_value = (m_value - detail::high_surrogate_base) << 10;
       
   323          m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask);
       
   324       }
       
   325       // postcondition; result must not be a surrogate:
       
   326       if(detail::is_surrogate(m_value))
       
   327          invalid_code_point(static_cast< ::boost::uint16_t>(m_value));
       
   328    }
       
   329    BaseIterator m_position;
       
   330    mutable U32Type m_value;
       
   331 };
       
   332 
       
   333 template <class BaseIterator, class U8Type = ::boost::uint8_t>
       
   334 class u32_to_u8_iterator
       
   335    : public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type>
       
   336 {
       
   337    typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type;
       
   338    
       
   339 #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
       
   340    typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
       
   341 
       
   342    BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
       
   343    BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8);
       
   344 #endif
       
   345 
       
   346 public:
       
   347    typename base_type::reference
       
   348       dereference()const
       
   349    {
       
   350       if(m_current == 4)
       
   351          extract_current();
       
   352       return m_values[m_current];
       
   353    }
       
   354    bool equal(const u32_to_u8_iterator& that)const
       
   355    {
       
   356       if(m_position == that.m_position)
       
   357       {
       
   358          // either the m_current's must be equal, or one must be 0 and 
       
   359          // the other 4: which means neither must have bits 1 or 2 set:
       
   360          return (m_current == that.m_current)
       
   361             || (((m_current | that.m_current) & 3) == 0);
       
   362       }
       
   363       return false;
       
   364    }
       
   365    void increment()
       
   366    {
       
   367       // if we have a pending read then read now, so that we know whether
       
   368       // to skip a position, or move to a low-surrogate:
       
   369       if(m_current == 4)
       
   370       {
       
   371          // pending read:
       
   372          extract_current();
       
   373       }
       
   374       // move to the next surrogate position:
       
   375       ++m_current;
       
   376       // if we've reached the end skip a position:
       
   377       if(m_values[m_current] == 0)
       
   378       {
       
   379          m_current = 4;
       
   380          ++m_position;
       
   381       }
       
   382    }
       
   383    void decrement()
       
   384    {
       
   385       if((m_current & 3) == 0)
       
   386       {
       
   387          --m_position;
       
   388          extract_current();
       
   389          m_current = 3;
       
   390          while(m_current && (m_values[m_current] == 0))
       
   391             --m_current;
       
   392       }
       
   393       else
       
   394          --m_current;
       
   395    }
       
   396    BaseIterator base()const
       
   397    {
       
   398       return m_position;
       
   399    }
       
   400    // construct:
       
   401    u32_to_u8_iterator() : m_position(), m_current(0)
       
   402    {
       
   403       m_values[0] = 0;
       
   404       m_values[1] = 0;
       
   405       m_values[2] = 0;
       
   406       m_values[3] = 0;
       
   407       m_values[4] = 0;
       
   408    }
       
   409    u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4)
       
   410    {
       
   411       m_values[0] = 0;
       
   412       m_values[1] = 0;
       
   413       m_values[2] = 0;
       
   414       m_values[3] = 0;
       
   415       m_values[4] = 0;
       
   416    }
       
   417 private:
       
   418 
       
   419    void extract_current()const
       
   420    {
       
   421       boost::uint32_t c = *m_position;
       
   422       if(c > 0x10FFFFu)
       
   423          detail::invalid_utf32_code_point(c);
       
   424       if(c < 0x80u)
       
   425       {
       
   426          m_values[0] = static_cast<unsigned char>(c);
       
   427          m_values[1] = static_cast<unsigned char>(0u);
       
   428          m_values[2] = static_cast<unsigned char>(0u);
       
   429          m_values[3] = static_cast<unsigned char>(0u);
       
   430       }
       
   431       else if(c < 0x800u)
       
   432       {
       
   433          m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6));
       
   434          m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   435          m_values[2] = static_cast<unsigned char>(0u);
       
   436          m_values[3] = static_cast<unsigned char>(0u);
       
   437       }
       
   438       else if(c < 0x10000u)
       
   439       {
       
   440          m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12));
       
   441          m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
       
   442          m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   443          m_values[3] = static_cast<unsigned char>(0u);
       
   444       }
       
   445       else
       
   446       {
       
   447          m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18));
       
   448          m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
       
   449          m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
       
   450          m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   451       }
       
   452       m_current= 0;
       
   453    }
       
   454    BaseIterator m_position;
       
   455    mutable U8Type m_values[5];
       
   456    mutable unsigned m_current;
       
   457 };
       
   458 
       
   459 template <class BaseIterator, class U32Type = ::boost::uint32_t>
       
   460 class u8_to_u32_iterator
       
   461    : public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
       
   462 {
       
   463    typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
       
   464    // special values for pending iterator reads:
       
   465    BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
       
   466 
       
   467 #if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
       
   468    typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
       
   469 
       
   470    BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8);
       
   471    BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
       
   472 #endif
       
   473 
       
   474 public:
       
   475    typename base_type::reference
       
   476       dereference()const
       
   477    {
       
   478       if(m_value == pending_read)
       
   479          extract_current();
       
   480       return m_value;
       
   481    }
       
   482    bool equal(const u8_to_u32_iterator& that)const
       
   483    {
       
   484       return m_position == that.m_position;
       
   485    }
       
   486    void increment()
       
   487    {
       
   488       // skip high surrogate first if there is one:
       
   489       unsigned c = detail::utf8_byte_count(*m_position);
       
   490       std::advance(m_position, c);
       
   491       m_value = pending_read;
       
   492    }
       
   493    void decrement()
       
   494    {
       
   495       // Keep backtracking until we don't have a trailing character:
       
   496       unsigned count = 0;
       
   497       while((*--m_position & 0xC0u) == 0x80u) ++count;
       
   498       // now check that the sequence was valid:
       
   499       if(count != detail::utf8_trailing_byte_count(*m_position))
       
   500          invalid_sequnce();
       
   501       m_value = pending_read;
       
   502    }
       
   503    BaseIterator base()const
       
   504    {
       
   505       return m_position;
       
   506    }
       
   507    // construct:
       
   508    u8_to_u32_iterator() : m_position()
       
   509    {
       
   510       m_value = pending_read;
       
   511    }
       
   512    u8_to_u32_iterator(BaseIterator b) : m_position(b)
       
   513    {
       
   514       m_value = pending_read;
       
   515    }
       
   516 private:
       
   517    static void invalid_sequnce()
       
   518    {
       
   519       std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character");
       
   520       boost::throw_exception(e);
       
   521    }
       
   522    void extract_current()const
       
   523    {
       
   524       m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position));
       
   525       // we must not have a continuation character:
       
   526       if((m_value & 0xC0u) == 0x80u)
       
   527          invalid_sequnce();
       
   528       // see how many extra byts we have:
       
   529       unsigned extra = detail::utf8_trailing_byte_count(*m_position);
       
   530       // extract the extra bits, 6 from each extra byte:
       
   531       BaseIterator next(m_position);
       
   532       for(unsigned c = 0; c < extra; ++c)
       
   533       {
       
   534          ++next;
       
   535          m_value <<= 6;
       
   536          m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
       
   537       }
       
   538       // we now need to remove a few of the leftmost bits, but how many depends
       
   539       // upon how many extra bytes we've extracted:
       
   540       static const boost::uint32_t masks[4] = 
       
   541       {
       
   542          0x7Fu,
       
   543          0x7FFu,
       
   544          0xFFFFu,
       
   545          0x1FFFFFu,
       
   546       };
       
   547       m_value &= masks[extra];
       
   548       // check the result:
       
   549       if(m_value > static_cast<U32Type>(0x10FFFFu))
       
   550          invalid_sequnce();
       
   551    }
       
   552    BaseIterator m_position;
       
   553    mutable U32Type m_value;
       
   554 };
       
   555 
       
   556 template <class BaseIterator>
       
   557 class utf16_output_iterator
       
   558 {
       
   559 public:
       
   560    typedef void                                   difference_type;
       
   561    typedef void                                   value_type;
       
   562    typedef boost::uint32_t*                       pointer;
       
   563    typedef boost::uint32_t&                       reference;
       
   564    typedef std::output_iterator_tag               iterator_category;
       
   565 
       
   566    utf16_output_iterator(const BaseIterator& b)
       
   567       : m_position(b){}
       
   568    utf16_output_iterator(const utf16_output_iterator& that)
       
   569       : m_position(that.m_position){}
       
   570    utf16_output_iterator& operator=(const utf16_output_iterator& that)
       
   571    {
       
   572       m_position = that.m_position;
       
   573       return *this;
       
   574    }
       
   575    const utf16_output_iterator& operator*()const
       
   576    {
       
   577       return *this;
       
   578    }
       
   579    void operator=(boost::uint32_t val)const
       
   580    {
       
   581       push(val);
       
   582    }
       
   583    utf16_output_iterator& operator++()
       
   584    {
       
   585       return *this;
       
   586    }
       
   587    utf16_output_iterator& operator++(int)
       
   588    {
       
   589       return *this;
       
   590    }
       
   591    BaseIterator base()const
       
   592    {
       
   593       return m_position;
       
   594    }
       
   595 private:
       
   596    void push(boost::uint32_t v)const
       
   597    {
       
   598       if(v >= 0x10000u)
       
   599       {
       
   600          // begin by checking for a code point out of range:
       
   601          if(v > 0x10FFFFu)
       
   602             detail::invalid_utf32_code_point(v);
       
   603          // split into two surrogates:
       
   604          *m_position++ = static_cast<boost::uint16_t>(v >> 10) + detail::high_surrogate_base;
       
   605          *m_position++ = static_cast<boost::uint16_t>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
       
   606       }
       
   607       else
       
   608       {
       
   609          // 16-bit code point:
       
   610          // value must not be a surrogate:
       
   611          if(detail::is_surrogate(v))
       
   612             detail::invalid_utf32_code_point(v);
       
   613          *m_position++ = static_cast<boost::uint16_t>(v);
       
   614       }
       
   615    }
       
   616    mutable BaseIterator m_position;
       
   617 };
       
   618 
       
   619 template <class BaseIterator>
       
   620 class utf8_output_iterator
       
   621 {
       
   622 public:
       
   623    typedef void                                   difference_type;
       
   624    typedef void                                   value_type;
       
   625    typedef boost::uint32_t*                       pointer;
       
   626    typedef boost::uint32_t&                       reference;
       
   627    typedef std::output_iterator_tag               iterator_category;
       
   628 
       
   629    utf8_output_iterator(const BaseIterator& b)
       
   630       : m_position(b){}
       
   631    utf8_output_iterator(const utf8_output_iterator& that)
       
   632       : m_position(that.m_position){}
       
   633    utf8_output_iterator& operator=(const utf8_output_iterator& that)
       
   634    {
       
   635       m_position = that.m_position;
       
   636       return *this;
       
   637    }
       
   638    const utf8_output_iterator& operator*()const
       
   639    {
       
   640       return *this;
       
   641    }
       
   642    void operator=(boost::uint32_t val)const
       
   643    {
       
   644       push(val);
       
   645    }
       
   646    utf8_output_iterator& operator++()
       
   647    {
       
   648       return *this;
       
   649    }
       
   650    utf8_output_iterator& operator++(int)
       
   651    {
       
   652       return *this;
       
   653    }
       
   654    BaseIterator base()const
       
   655    {
       
   656       return m_position;
       
   657    }
       
   658 private:
       
   659    void push(boost::uint32_t c)const
       
   660    {
       
   661       if(c > 0x10FFFFu)
       
   662          detail::invalid_utf32_code_point(c);
       
   663       if(c < 0x80u)
       
   664       {
       
   665          *m_position++ = static_cast<unsigned char>(c);
       
   666       }
       
   667       else if(c < 0x800u)
       
   668       {
       
   669          *m_position++ = static_cast<unsigned char>(0xC0u + (c >> 6));
       
   670          *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   671       }
       
   672       else if(c < 0x10000u)
       
   673       {
       
   674          *m_position++ = static_cast<unsigned char>(0xE0u + (c >> 12));
       
   675          *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
       
   676          *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   677       }
       
   678       else
       
   679       {
       
   680          *m_position++ = static_cast<unsigned char>(0xF0u + (c >> 18));
       
   681          *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
       
   682          *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
       
   683          *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
       
   684       }
       
   685    }
       
   686    mutable BaseIterator m_position;
       
   687 };
       
   688 
       
   689 } // namespace boost
       
   690 
       
   691 #endif // BOOST_REGEX_UNICODE_ITERATOR_HPP
       
   692