|
1 /* |
|
2 * |
|
3 * Copyright (c) 1998-2002 |
|
4 * John Maddock |
|
5 * |
|
6 * Use, modification and distribution are subject to the |
|
7 * Boost Software License, Version 1.0. (See accompanying file |
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|
9 * |
|
10 */ |
|
11 |
|
12 /* |
|
13 * LOCATION: see http://www.boost.org for most recent version. |
|
14 * FILE regex_format.hpp |
|
15 * VERSION see <boost/version.hpp> |
|
16 * DESCRIPTION: Provides formatting output routines for search and replace |
|
17 * operations. Note this is an internal header file included |
|
18 * by regex.hpp, do not include on its own. |
|
19 */ |
|
20 |
|
21 #ifndef BOOST_REGEX_FORMAT_HPP |
|
22 #define BOOST_REGEX_FORMAT_HPP |
|
23 |
|
24 |
|
25 namespace boost{ |
|
26 |
|
27 #ifdef BOOST_MSVC |
|
28 #pragma warning(push) |
|
29 #pragma warning(disable: 4103) |
|
30 #endif |
|
31 #ifdef BOOST_HAS_ABI_HEADERS |
|
32 # include BOOST_ABI_PREFIX |
|
33 #endif |
|
34 #ifdef BOOST_MSVC |
|
35 #pragma warning(pop) |
|
36 #endif |
|
37 |
|
38 // |
|
39 // Forward declaration: |
|
40 // |
|
41 template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type > |
|
42 class match_results; |
|
43 |
|
44 namespace re_detail{ |
|
45 |
|
46 // |
|
47 // struct trivial_format_traits: |
|
48 // defines minimum localisation support for formatting |
|
49 // in the case that the actual regex traits is unavailable. |
|
50 // |
|
51 template <class charT> |
|
52 struct trivial_format_traits |
|
53 { |
|
54 typedef charT char_type; |
|
55 |
|
56 static std::ptrdiff_t length(const charT* p) |
|
57 { |
|
58 return global_length(p); |
|
59 } |
|
60 static charT tolower(charT c) |
|
61 { |
|
62 return ::boost::re_detail::global_lower(c); |
|
63 } |
|
64 static charT toupper(charT c) |
|
65 { |
|
66 return ::boost::re_detail::global_upper(c); |
|
67 } |
|
68 static int value(const charT c, int radix) |
|
69 { |
|
70 int result = global_value(c); |
|
71 return result >= radix ? -1 : result; |
|
72 } |
|
73 int toi(const charT*& p1, const charT* p2, int radix)const |
|
74 { |
|
75 return global_toi(p1, p2, radix, *this); |
|
76 } |
|
77 }; |
|
78 |
|
79 template <class OutputIterator, class Results, class traits> |
|
80 class basic_regex_formatter |
|
81 { |
|
82 public: |
|
83 typedef typename traits::char_type char_type; |
|
84 basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) |
|
85 : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} |
|
86 OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f); |
|
87 OutputIterator format(const char_type* p1, match_flag_type f) |
|
88 { |
|
89 return format(p1, p1 + m_traits.length(p1), f); |
|
90 } |
|
91 private: |
|
92 typedef typename Results::value_type sub_match_type; |
|
93 enum output_state |
|
94 { |
|
95 output_copy, |
|
96 output_next_lower, |
|
97 output_next_upper, |
|
98 output_lower, |
|
99 output_upper, |
|
100 output_none |
|
101 }; |
|
102 |
|
103 void put(char_type c); |
|
104 void put(const sub_match_type& sub); |
|
105 void format_all(); |
|
106 void format_perl(); |
|
107 void format_escape(); |
|
108 void format_conditional(); |
|
109 void format_until_scope_end(); |
|
110 |
|
111 const traits& m_traits; // the traits class for localised formatting operations |
|
112 const Results& m_results; // the match_results being used. |
|
113 OutputIterator m_out; // where to send output. |
|
114 const char_type* m_position; // format string, current position |
|
115 const char_type* m_end; // format string end |
|
116 match_flag_type m_flags; // format flags to use |
|
117 output_state m_state; // what to do with the next character |
|
118 output_state m_restore_state; // what state to restore to. |
|
119 bool m_have_conditional; // we are parsing a conditional |
|
120 private: |
|
121 basic_regex_formatter(const basic_regex_formatter&); |
|
122 basic_regex_formatter& operator=(const basic_regex_formatter&); |
|
123 }; |
|
124 |
|
125 template <class OutputIterator, class Results, class traits> |
|
126 OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f) |
|
127 { |
|
128 m_position = p1; |
|
129 m_end = p2; |
|
130 m_flags = f; |
|
131 format_all(); |
|
132 return m_out; |
|
133 } |
|
134 |
|
135 template <class OutputIterator, class Results, class traits> |
|
136 void basic_regex_formatter<OutputIterator, Results, traits>::format_all() |
|
137 { |
|
138 // over and over: |
|
139 while(m_position != m_end) |
|
140 { |
|
141 switch(*m_position) |
|
142 { |
|
143 case '&': |
|
144 if(m_flags & ::boost::regex_constants::format_sed) |
|
145 { |
|
146 ++m_position; |
|
147 put(m_results[0]); |
|
148 break; |
|
149 } |
|
150 put(*m_position++); |
|
151 break; |
|
152 case '\\': |
|
153 format_escape(); |
|
154 break; |
|
155 case '(': |
|
156 if(m_flags & boost::regex_constants::format_all) |
|
157 { |
|
158 ++m_position; |
|
159 bool have_conditional = m_have_conditional; |
|
160 m_have_conditional = false; |
|
161 format_until_scope_end(); |
|
162 m_have_conditional = have_conditional; |
|
163 if(m_position == m_end) |
|
164 return; |
|
165 BOOST_ASSERT(*m_position == static_cast<char_type>(')')); |
|
166 ++m_position; // skip the closing ')' |
|
167 break; |
|
168 } |
|
169 put(*m_position); |
|
170 ++m_position; |
|
171 break; |
|
172 case ')': |
|
173 if(m_flags & boost::regex_constants::format_all) |
|
174 { |
|
175 return; |
|
176 } |
|
177 put(*m_position); |
|
178 ++m_position; |
|
179 break; |
|
180 case ':': |
|
181 if((m_flags & boost::regex_constants::format_all) && m_have_conditional) |
|
182 { |
|
183 return; |
|
184 } |
|
185 put(*m_position); |
|
186 ++m_position; |
|
187 break; |
|
188 case '?': |
|
189 if(m_flags & boost::regex_constants::format_all) |
|
190 { |
|
191 ++m_position; |
|
192 format_conditional(); |
|
193 break; |
|
194 } |
|
195 put(*m_position); |
|
196 ++m_position; |
|
197 break; |
|
198 case '$': |
|
199 if((m_flags & format_sed) == 0) |
|
200 { |
|
201 format_perl(); |
|
202 break; |
|
203 } |
|
204 // fall through, not a special character: |
|
205 default: |
|
206 put(*m_position); |
|
207 ++m_position; |
|
208 break; |
|
209 } |
|
210 } |
|
211 } |
|
212 |
|
213 template <class OutputIterator, class Results, class traits> |
|
214 void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() |
|
215 { |
|
216 // |
|
217 // On entry *m_position points to a '$' character |
|
218 // output the information that goes with it: |
|
219 // |
|
220 BOOST_ASSERT(*m_position == '$'); |
|
221 // |
|
222 // see if this is a trailing '$': |
|
223 // |
|
224 if(++m_position == m_end) |
|
225 { |
|
226 --m_position; |
|
227 put(*m_position); |
|
228 ++m_position; |
|
229 return; |
|
230 } |
|
231 // |
|
232 // OK find out what kind it is: |
|
233 // |
|
234 bool have_brace = false; |
|
235 const char_type* save_position = m_position; |
|
236 switch(*m_position) |
|
237 { |
|
238 case '&': |
|
239 ++m_position; |
|
240 put(this->m_results[0]); |
|
241 break; |
|
242 case '`': |
|
243 ++m_position; |
|
244 put(this->m_results.prefix()); |
|
245 break; |
|
246 case '\'': |
|
247 ++m_position; |
|
248 put(this->m_results.suffix()); |
|
249 break; |
|
250 case '$': |
|
251 put(*m_position++); |
|
252 break; |
|
253 case '{': |
|
254 have_brace = true; |
|
255 ++m_position; |
|
256 // fall through.... |
|
257 default: |
|
258 // see if we have a number: |
|
259 { |
|
260 std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
|
261 len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
|
262 int v = m_traits.toi(m_position, m_position + len, 10); |
|
263 if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) |
|
264 { |
|
265 // leave the $ as is, and carry on: |
|
266 m_position = --save_position; |
|
267 put(*m_position); |
|
268 ++m_position; |
|
269 break; |
|
270 } |
|
271 // otherwise output sub v: |
|
272 put(this->m_results[v]); |
|
273 if(have_brace) |
|
274 ++m_position; |
|
275 } |
|
276 } |
|
277 } |
|
278 |
|
279 template <class OutputIterator, class Results, class traits> |
|
280 void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() |
|
281 { |
|
282 // skip the escape and check for trailing escape: |
|
283 if(++m_position == m_end) |
|
284 { |
|
285 put(static_cast<char_type>('\\')); |
|
286 return; |
|
287 } |
|
288 // now switch on the escape type: |
|
289 switch(*m_position) |
|
290 { |
|
291 case 'a': |
|
292 put(static_cast<char_type>('\a')); |
|
293 ++m_position; |
|
294 break; |
|
295 case 'f': |
|
296 put(static_cast<char_type>('\f')); |
|
297 ++m_position; |
|
298 break; |
|
299 case 'n': |
|
300 put(static_cast<char_type>('\n')); |
|
301 ++m_position; |
|
302 break; |
|
303 case 'r': |
|
304 put(static_cast<char_type>('\r')); |
|
305 ++m_position; |
|
306 break; |
|
307 case 't': |
|
308 put(static_cast<char_type>('\t')); |
|
309 ++m_position; |
|
310 break; |
|
311 case 'v': |
|
312 put(static_cast<char_type>('\v')); |
|
313 ++m_position; |
|
314 break; |
|
315 case 'x': |
|
316 if(++m_position == m_end) |
|
317 { |
|
318 put(static_cast<char_type>('x')); |
|
319 return; |
|
320 } |
|
321 // maybe have \x{ddd} |
|
322 if(*m_position == static_cast<char_type>('{')) |
|
323 { |
|
324 ++m_position; |
|
325 int val = m_traits.toi(m_position, m_end, 16); |
|
326 if(val < 0) |
|
327 { |
|
328 // invalid value treat everything as literals: |
|
329 put(static_cast<char_type>('x')); |
|
330 put(static_cast<char_type>('{')); |
|
331 return; |
|
332 } |
|
333 if(*m_position != static_cast<char_type>('}')) |
|
334 { |
|
335 while(*m_position != static_cast<char_type>('\\')) |
|
336 --m_position; |
|
337 ++m_position; |
|
338 put(*m_position++); |
|
339 return; |
|
340 } |
|
341 ++m_position; |
|
342 put(static_cast<char_type>(val)); |
|
343 return; |
|
344 } |
|
345 else |
|
346 { |
|
347 std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
|
348 len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
|
349 int val = m_traits.toi(m_position, m_position + len, 16); |
|
350 if(val < 0) |
|
351 { |
|
352 --m_position; |
|
353 put(*m_position++); |
|
354 return; |
|
355 } |
|
356 put(static_cast<char_type>(val)); |
|
357 } |
|
358 break; |
|
359 case 'c': |
|
360 if(++m_position == m_end) |
|
361 { |
|
362 --m_position; |
|
363 put(*m_position++); |
|
364 return; |
|
365 } |
|
366 put(static_cast<char_type>(*m_position++ % 32)); |
|
367 break; |
|
368 case 'e': |
|
369 put(static_cast<char_type>(27)); |
|
370 ++m_position; |
|
371 break; |
|
372 default: |
|
373 // see if we have a perl specific escape: |
|
374 if((m_flags & boost::regex_constants::format_sed) == 0) |
|
375 { |
|
376 bool breakout = false; |
|
377 switch(*m_position) |
|
378 { |
|
379 case 'l': |
|
380 ++m_position; |
|
381 m_restore_state = m_state; |
|
382 m_state = output_next_lower; |
|
383 breakout = true; |
|
384 break; |
|
385 case 'L': |
|
386 ++m_position; |
|
387 m_state = output_lower; |
|
388 breakout = true; |
|
389 break; |
|
390 case 'u': |
|
391 ++m_position; |
|
392 m_restore_state = m_state; |
|
393 m_state = output_next_upper; |
|
394 breakout = true; |
|
395 break; |
|
396 case 'U': |
|
397 ++m_position; |
|
398 m_state = output_upper; |
|
399 breakout = true; |
|
400 break; |
|
401 case 'E': |
|
402 ++m_position; |
|
403 m_state = output_copy; |
|
404 breakout = true; |
|
405 break; |
|
406 } |
|
407 if(breakout) |
|
408 break; |
|
409 } |
|
410 // see if we have a \n sed style backreference: |
|
411 int v = m_traits.toi(m_position, m_position+1, 10); |
|
412 if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) |
|
413 { |
|
414 put(m_results[v]); |
|
415 break; |
|
416 } |
|
417 else if(v == 0) |
|
418 { |
|
419 // octal ecape sequence: |
|
420 --m_position; |
|
421 std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
|
422 len = (std::min)(static_cast<std::ptrdiff_t>(4), len); |
|
423 v = m_traits.toi(m_position, m_position + len, 8); |
|
424 BOOST_ASSERT(v >= 0); |
|
425 put(static_cast<char_type>(v)); |
|
426 break; |
|
427 } |
|
428 // Otherwise output the character "as is": |
|
429 put(*m_position++); |
|
430 break; |
|
431 } |
|
432 } |
|
433 |
|
434 template <class OutputIterator, class Results, class traits> |
|
435 void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional() |
|
436 { |
|
437 if(m_position == m_end) |
|
438 { |
|
439 // oops trailing '?': |
|
440 put(static_cast<char_type>('?')); |
|
441 return; |
|
442 } |
|
443 std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
|
444 len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
|
445 int v = m_traits.toi(m_position, m_position + len, 10); |
|
446 if(v < 0) |
|
447 { |
|
448 // oops not a number: |
|
449 put(static_cast<char_type>('?')); |
|
450 return; |
|
451 } |
|
452 |
|
453 // output varies depending upon whether sub-expression v matched or not: |
|
454 if(m_results[v].matched) |
|
455 { |
|
456 m_have_conditional = true; |
|
457 format_all(); |
|
458 m_have_conditional = false; |
|
459 if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) |
|
460 { |
|
461 // skip the ':': |
|
462 ++m_position; |
|
463 // save output state, then turn it off: |
|
464 output_state saved_state = m_state; |
|
465 m_state = output_none; |
|
466 // format the rest of this scope: |
|
467 format_until_scope_end(); |
|
468 // restore output state: |
|
469 m_state = saved_state; |
|
470 } |
|
471 } |
|
472 else |
|
473 { |
|
474 // save output state, then turn it off: |
|
475 output_state saved_state = m_state; |
|
476 m_state = output_none; |
|
477 // format until ':' or ')': |
|
478 m_have_conditional = true; |
|
479 format_all(); |
|
480 m_have_conditional = false; |
|
481 // restore state: |
|
482 m_state = saved_state; |
|
483 if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) |
|
484 { |
|
485 // skip the ':': |
|
486 ++m_position; |
|
487 // format the rest of this scope: |
|
488 format_until_scope_end(); |
|
489 } |
|
490 } |
|
491 } |
|
492 |
|
493 template <class OutputIterator, class Results, class traits> |
|
494 void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end() |
|
495 { |
|
496 do |
|
497 { |
|
498 format_all(); |
|
499 if((m_position == m_end) || (*m_position == static_cast<char_type>(')'))) |
|
500 return; |
|
501 put(*m_position++); |
|
502 }while(m_position != m_end); |
|
503 } |
|
504 |
|
505 template <class OutputIterator, class Results, class traits> |
|
506 void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c) |
|
507 { |
|
508 // write a single character to output |
|
509 // according to which case translation mode we are in: |
|
510 switch(this->m_state) |
|
511 { |
|
512 case output_none: |
|
513 return; |
|
514 case output_next_lower: |
|
515 c = m_traits.tolower(c); |
|
516 this->m_state = m_restore_state; |
|
517 break; |
|
518 case output_next_upper: |
|
519 c = m_traits.toupper(c); |
|
520 this->m_state = m_restore_state; |
|
521 break; |
|
522 case output_lower: |
|
523 c = m_traits.tolower(c); |
|
524 break; |
|
525 case output_upper: |
|
526 c = m_traits.toupper(c); |
|
527 break; |
|
528 default: |
|
529 break; |
|
530 } |
|
531 *m_out = c; |
|
532 ++m_out; |
|
533 } |
|
534 |
|
535 template <class OutputIterator, class Results, class traits> |
|
536 void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub) |
|
537 { |
|
538 typedef typename sub_match_type::iterator iterator_type; |
|
539 iterator_type i = sub.first; |
|
540 while(i != sub.second) |
|
541 { |
|
542 put(*i); |
|
543 ++i; |
|
544 } |
|
545 } |
|
546 |
|
547 template <class S> |
|
548 class string_out_iterator |
|
549 #ifndef BOOST_NO_STD_ITERATOR |
|
550 : public std::iterator<std::output_iterator_tag, typename S::value_type> |
|
551 #endif |
|
552 { |
|
553 S* out; |
|
554 public: |
|
555 string_out_iterator(S& s) : out(&s) {} |
|
556 string_out_iterator& operator++() { return *this; } |
|
557 string_out_iterator& operator++(int) { return *this; } |
|
558 string_out_iterator& operator*() { return *this; } |
|
559 string_out_iterator& operator=(typename S::value_type v) |
|
560 { |
|
561 out->append(1, v); |
|
562 return *this; |
|
563 } |
|
564 |
|
565 #ifdef BOOST_NO_STD_ITERATOR |
|
566 typedef std::ptrdiff_t difference_type; |
|
567 typedef typename S::value_type value_type; |
|
568 typedef value_type* pointer; |
|
569 typedef value_type& reference; |
|
570 typedef std::output_iterator_tag iterator_category; |
|
571 #endif |
|
572 }; |
|
573 |
|
574 template <class OutputIterator, class Iterator, class Alloc, class charT, class traits> |
|
575 OutputIterator regex_format_imp(OutputIterator out, |
|
576 const match_results<Iterator, Alloc>& m, |
|
577 const charT* p1, const charT* p2, |
|
578 match_flag_type flags, |
|
579 const traits& t |
|
580 ) |
|
581 { |
|
582 if(flags & regex_constants::format_literal) |
|
583 { |
|
584 return re_detail::copy(p1, p2, out); |
|
585 } |
|
586 |
|
587 re_detail::basic_regex_formatter< |
|
588 OutputIterator, |
|
589 match_results<Iterator, Alloc>, |
|
590 traits > f(out, m, t); |
|
591 return f.format(p1, p2, flags); |
|
592 } |
|
593 |
|
594 |
|
595 } // namespace re_detail |
|
596 |
|
597 template <class OutputIterator, class Iterator, class charT> |
|
598 OutputIterator regex_format(OutputIterator out, |
|
599 const match_results<Iterator>& m, |
|
600 const charT* fmt, |
|
601 match_flag_type flags = format_all |
|
602 ) |
|
603 { |
|
604 re_detail::trivial_format_traits<charT> traits; |
|
605 return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits); |
|
606 } |
|
607 |
|
608 template <class OutputIterator, class Iterator, class charT> |
|
609 OutputIterator regex_format(OutputIterator out, |
|
610 const match_results<Iterator>& m, |
|
611 const std::basic_string<charT>& fmt, |
|
612 match_flag_type flags = format_all |
|
613 ) |
|
614 { |
|
615 re_detail::trivial_format_traits<charT> traits; |
|
616 return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); |
|
617 } |
|
618 |
|
619 template <class Iterator, class charT> |
|
620 std::basic_string<charT> regex_format(const match_results<Iterator>& m, |
|
621 const charT* fmt, |
|
622 match_flag_type flags = format_all) |
|
623 { |
|
624 std::basic_string<charT> result; |
|
625 re_detail::string_out_iterator<std::basic_string<charT> > i(result); |
|
626 re_detail::trivial_format_traits<charT> traits; |
|
627 re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits); |
|
628 return result; |
|
629 } |
|
630 |
|
631 template <class Iterator, class charT> |
|
632 std::basic_string<charT> regex_format(const match_results<Iterator>& m, |
|
633 const std::basic_string<charT>& fmt, |
|
634 match_flag_type flags = format_all) |
|
635 { |
|
636 std::basic_string<charT> result; |
|
637 re_detail::string_out_iterator<std::basic_string<charT> > i(result); |
|
638 re_detail::trivial_format_traits<charT> traits; |
|
639 re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); |
|
640 return result; |
|
641 } |
|
642 |
|
643 #ifdef BOOST_MSVC |
|
644 #pragma warning(push) |
|
645 #pragma warning(disable: 4103) |
|
646 #endif |
|
647 #ifdef BOOST_HAS_ABI_HEADERS |
|
648 # include BOOST_ABI_SUFFIX |
|
649 #endif |
|
650 #ifdef BOOST_MSVC |
|
651 #pragma warning(pop) |
|
652 #endif |
|
653 |
|
654 } // namespace boost |
|
655 |
|
656 #endif // BOOST_REGEX_FORMAT_HPP |
|
657 |
|
658 |
|
659 |
|
660 |
|
661 |
|
662 |