|
1 /* |
|
2 * |
|
3 * Copyright (c) 2004 |
|
4 * John Maddock |
|
5 * |
|
6 * Use, modification and distribution are subject to the |
|
7 * Boost Software License, Version 1.0. (See accompanying file |
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|
9 * |
|
10 */ |
|
11 |
|
12 /* |
|
13 * LOCATION: see http://www.boost.org for most recent version. |
|
14 * FILE regex_traits_defaults.hpp |
|
15 * VERSION see <boost/version.hpp> |
|
16 * DESCRIPTION: Declares API's for access to regex_traits default properties. |
|
17 */ |
|
18 |
|
19 #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED |
|
20 #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED |
|
21 |
|
22 #ifdef BOOST_MSVC |
|
23 #pragma warning(push) |
|
24 #pragma warning(disable: 4103) |
|
25 #endif |
|
26 #ifdef BOOST_HAS_ABI_HEADERS |
|
27 # include BOOST_ABI_PREFIX |
|
28 #endif |
|
29 #ifdef BOOST_MSVC |
|
30 #pragma warning(pop) |
|
31 #endif |
|
32 |
|
33 #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP |
|
34 #include <boost/regex/v4/syntax_type.hpp> |
|
35 #endif |
|
36 #ifndef BOOST_REGEX_ERROR_TYPE_HPP |
|
37 #include <boost/regex/v4/error_type.hpp> |
|
38 #endif |
|
39 |
|
40 #ifdef BOOST_NO_STDC_NAMESPACE |
|
41 namespace std{ |
|
42 using ::strlen; |
|
43 } |
|
44 #endif |
|
45 |
|
46 namespace boost{ namespace re_detail{ |
|
47 |
|
48 |
|
49 // |
|
50 // helpers to suppress warnings: |
|
51 // |
|
52 template <class charT> |
|
53 inline bool is_extended(charT c) |
|
54 { return c > 256; } |
|
55 inline bool is_extended(char) |
|
56 { return false; } |
|
57 |
|
58 |
|
59 BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); |
|
60 BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); |
|
61 BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); |
|
62 BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); |
|
63 |
|
64 // is charT c a combining character? |
|
65 BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); |
|
66 |
|
67 template <class charT> |
|
68 inline bool is_combining(charT c) |
|
69 { |
|
70 return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c))); |
|
71 } |
|
72 template <> |
|
73 inline bool is_combining<char>(char) |
|
74 { |
|
75 return false; |
|
76 } |
|
77 template <> |
|
78 inline bool is_combining<signed char>(signed char) |
|
79 { |
|
80 return false; |
|
81 } |
|
82 template <> |
|
83 inline bool is_combining<unsigned char>(unsigned char) |
|
84 { |
|
85 return false; |
|
86 } |
|
87 #ifndef __hpux // can't use WCHAR_MAX/MIN in pp-directives |
|
88 #ifdef _MSC_VER |
|
89 template<> |
|
90 inline bool is_combining<wchar_t>(wchar_t c) |
|
91 { |
|
92 return is_combining_implementation(static_cast<unsigned short>(c)); |
|
93 } |
|
94 #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) |
|
95 #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) |
|
96 template<> |
|
97 inline bool is_combining<wchar_t>(wchar_t c) |
|
98 { |
|
99 return is_combining_implementation(static_cast<unsigned short>(c)); |
|
100 } |
|
101 #else |
|
102 template<> |
|
103 inline bool is_combining<wchar_t>(wchar_t c) |
|
104 { |
|
105 return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c)); |
|
106 } |
|
107 #endif |
|
108 #endif |
|
109 #endif |
|
110 |
|
111 // |
|
112 // is a charT c a line separator? |
|
113 // |
|
114 template <class charT> |
|
115 inline bool is_separator(charT c) |
|
116 { |
|
117 return BOOST_REGEX_MAKE_BOOL( |
|
118 (c == static_cast<charT>('\n')) |
|
119 || (c == static_cast<charT>('\r')) |
|
120 || (c == static_cast<charT>('\f')) |
|
121 || (static_cast<boost::uint16_t>(c) == 0x2028u) |
|
122 || (static_cast<boost::uint16_t>(c) == 0x2029u) |
|
123 || (static_cast<boost::uint16_t>(c) == 0x85u)); |
|
124 } |
|
125 template <> |
|
126 inline bool is_separator<char>(char c) |
|
127 { |
|
128 return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); |
|
129 } |
|
130 |
|
131 // |
|
132 // get a default collating element: |
|
133 // |
|
134 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); |
|
135 |
|
136 // |
|
137 // get the state_id of a character clasification, the individual |
|
138 // traits classes then transform that state_id into a bitmask: |
|
139 // |
|
140 template <class charT> |
|
141 struct character_pointer_range |
|
142 { |
|
143 const charT* p1; |
|
144 const charT* p2; |
|
145 |
|
146 bool operator < (const character_pointer_range& r)const |
|
147 { |
|
148 return std::lexicographical_compare(p1, p2, r.p1, r.p2); |
|
149 } |
|
150 bool operator == (const character_pointer_range& r)const |
|
151 { |
|
152 // Not only do we check that the ranges are of equal size before |
|
153 // calling std::equal, but there is no other algorithm available: |
|
154 // not even a non-standard MS one. So forward to unchecked_equal |
|
155 // in the MS case. |
|
156 return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1); |
|
157 } |
|
158 }; |
|
159 template <class charT> |
|
160 int get_default_class_id(const charT* p1, const charT* p2) |
|
161 { |
|
162 static const charT data[72] = { |
|
163 'a', 'l', 'n', 'u', 'm', |
|
164 'a', 'l', 'p', 'h', 'a', |
|
165 'b', 'l', 'a', 'n', 'k', |
|
166 'c', 'n', 't', 'r', 'l', |
|
167 'd', 'i', 'g', 'i', 't', |
|
168 'g', 'r', 'a', 'p', 'h', |
|
169 'l', 'o', 'w', 'e', 'r', |
|
170 'p', 'r', 'i', 'n', 't', |
|
171 'p', 'u', 'n', 'c', 't', |
|
172 's', 'p', 'a', 'c', 'e', |
|
173 'u', 'n', 'i', 'c', 'o', 'd', 'e', |
|
174 'u', 'p', 'p', 'e', 'r', |
|
175 'w', 'o', 'r', 'd', |
|
176 'x', 'd', 'i', 'g', 'i', 't', |
|
177 }; |
|
178 |
|
179 static const character_pointer_range<charT> ranges[19] = |
|
180 { |
|
181 {data+0, data+5,}, // alnum |
|
182 {data+5, data+10,}, // alpha |
|
183 {data+10, data+15,}, // blank |
|
184 {data+15, data+20,}, // cntrl |
|
185 {data+20, data+21,}, // d |
|
186 {data+20, data+25,}, // digit |
|
187 {data+25, data+30,}, // graph |
|
188 {data+30, data+31,}, // l |
|
189 {data+30, data+35,}, // lower |
|
190 {data+35, data+40,}, // print |
|
191 {data+40, data+45,}, // punct |
|
192 {data+45, data+46,}, // s |
|
193 {data+45, data+50,}, // space |
|
194 {data+57, data+58,}, // u |
|
195 {data+50, data+57,}, // unicode |
|
196 {data+57, data+62,}, // upper |
|
197 {data+62, data+63,}, // w |
|
198 {data+62, data+66,}, // word |
|
199 {data+66, data+72,}, // xdigit |
|
200 }; |
|
201 static const character_pointer_range<charT>* ranges_begin = ranges; |
|
202 static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); |
|
203 |
|
204 character_pointer_range<charT> t = { p1, p2, }; |
|
205 const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t); |
|
206 if((p != ranges_end) && (t == *p)) |
|
207 return static_cast<int>(p - ranges); |
|
208 return -1; |
|
209 } |
|
210 |
|
211 // |
|
212 // helper functions: |
|
213 // |
|
214 template <class charT> |
|
215 std::ptrdiff_t global_length(const charT* p) |
|
216 { |
|
217 std::ptrdiff_t n = 0; |
|
218 while(*p) |
|
219 { |
|
220 ++p; |
|
221 ++n; |
|
222 } |
|
223 return n; |
|
224 } |
|
225 template<> |
|
226 inline std::ptrdiff_t global_length<char>(const char* p) |
|
227 { |
|
228 return (std::strlen)(p); |
|
229 } |
|
230 #ifndef BOOST_NO_WREGEX |
|
231 template<> |
|
232 inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p) |
|
233 { |
|
234 return (std::wcslen)(p); |
|
235 } |
|
236 #endif |
|
237 template <class charT> |
|
238 inline charT BOOST_REGEX_CALL global_lower(charT c) |
|
239 { |
|
240 return c; |
|
241 } |
|
242 template <class charT> |
|
243 inline charT BOOST_REGEX_CALL global_upper(charT c) |
|
244 { |
|
245 return c; |
|
246 } |
|
247 |
|
248 BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); |
|
249 BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); |
|
250 #ifndef BOOST_NO_WREGEX |
|
251 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); |
|
252 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); |
|
253 #endif |
|
254 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
|
255 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); |
|
256 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); |
|
257 #endif |
|
258 // |
|
259 // This sucks: declare template specialisations of global_lower/global_upper |
|
260 // that just forward to the non-template implementation functions. We do |
|
261 // this because there is one compiler (Compaq Tru64 C++) that doesn't seem |
|
262 // to differentiate between templates and non-template overloads.... |
|
263 // what's more, the primary template, plus all overloads have to be |
|
264 // defined in the same translation unit (if one is inline they all must be) |
|
265 // otherwise the "local template instantiation" compiler option can pick |
|
266 // the wrong instantiation when linking: |
|
267 // |
|
268 template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); } |
|
269 template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); } |
|
270 #ifndef BOOST_NO_WREGEX |
|
271 template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); } |
|
272 template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); } |
|
273 #endif |
|
274 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
|
275 template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); } |
|
276 template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); } |
|
277 #endif |
|
278 |
|
279 template <class charT> |
|
280 int global_value(charT c) |
|
281 { |
|
282 static const charT zero = '0'; |
|
283 static const charT nine = '9'; |
|
284 static const charT a = 'a'; |
|
285 static const charT f = 'f'; |
|
286 static const charT A = 'A'; |
|
287 static const charT F = 'F'; |
|
288 |
|
289 if(c > f) return -1; |
|
290 if(c >= a) return 10 + (c - a); |
|
291 if(c > F) return -1; |
|
292 if(c >= A) return 10 + (c - A); |
|
293 if(c > nine) return -1; |
|
294 if(c >= zero) return c - zero; |
|
295 return -1; |
|
296 } |
|
297 template <class charT, class traits> |
|
298 int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) |
|
299 { |
|
300 (void)t; // warning suppression |
|
301 int next_value = t.value(*p1, radix); |
|
302 if((p1 == p2) || (next_value < 0) || (next_value >= radix)) |
|
303 return -1; |
|
304 int result = 0; |
|
305 while(p1 != p2) |
|
306 { |
|
307 next_value = t.value(*p1, radix); |
|
308 if((next_value < 0) || (next_value >= radix)) |
|
309 break; |
|
310 result *= radix; |
|
311 result += next_value; |
|
312 ++p1; |
|
313 } |
|
314 return result; |
|
315 } |
|
316 |
|
317 } // re_detail |
|
318 } // boost |
|
319 |
|
320 #ifdef BOOST_MSVC |
|
321 #pragma warning(push) |
|
322 #pragma warning(disable: 4103) |
|
323 #endif |
|
324 #ifdef BOOST_HAS_ABI_HEADERS |
|
325 # include BOOST_ABI_SUFFIX |
|
326 #endif |
|
327 #ifdef BOOST_MSVC |
|
328 #pragma warning(pop) |
|
329 #endif |
|
330 |
|
331 #endif |