|
1 /* |
|
2 * |
|
3 * Copyright (c) 1998-2002 |
|
4 * John Maddock |
|
5 * |
|
6 * Use, modification and distribution are subject to the |
|
7 * Boost Software License, Version 1.0. (See accompanying file |
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|
9 * |
|
10 */ |
|
11 |
|
12 /* |
|
13 * LOCATION: see http://www.boost.org for most recent version. |
|
14 * FILE: primary_transform.hpp |
|
15 * VERSION: see <boost/version.hpp> |
|
16 * DESCRIPTION: Heuristically determines the sort string format in use |
|
17 * by the current locale. |
|
18 */ |
|
19 |
|
20 #ifndef BOOST_REGEX_PRIMARY_TRANSFORM |
|
21 #define BOOST_REGEX_PRIMARY_TRANSFORM |
|
22 |
|
23 #ifdef BOOST_MSVC |
|
24 #pragma warning(push) |
|
25 #pragma warning(disable: 4103) |
|
26 #endif |
|
27 #ifdef BOOST_HAS_ABI_HEADERS |
|
28 # include BOOST_ABI_PREFIX |
|
29 #endif |
|
30 #ifdef BOOST_MSVC |
|
31 #pragma warning(pop) |
|
32 #endif |
|
33 |
|
34 namespace boost{ |
|
35 namespace re_detail{ |
|
36 |
|
37 |
|
38 enum{ |
|
39 sort_C, |
|
40 sort_fixed, |
|
41 sort_delim, |
|
42 sort_unknown |
|
43 }; |
|
44 |
|
45 template <class S, class charT> |
|
46 unsigned count_chars(const S& s, charT c) |
|
47 { |
|
48 // |
|
49 // Count how many occurances of character c occur |
|
50 // in string s: if c is a delimeter between collation |
|
51 // fields, then this should be the same value for all |
|
52 // sort keys: |
|
53 // |
|
54 unsigned int count = 0; |
|
55 for(unsigned pos = 0; pos < s.size(); ++pos) |
|
56 { |
|
57 if(s[pos] == c) ++count; |
|
58 } |
|
59 return count; |
|
60 } |
|
61 |
|
62 |
|
63 template <class traits, class charT> |
|
64 unsigned find_sort_syntax(const traits* pt, charT* delim) |
|
65 { |
|
66 // |
|
67 // compare 'a' with 'A' to see how similar they are, |
|
68 // should really use a-accute but we can't portably do that, |
|
69 // |
|
70 typedef typename traits::string_type string_type; |
|
71 typedef typename traits::char_type char_type; |
|
72 |
|
73 // Suppress incorrect warning for MSVC |
|
74 (void)pt; |
|
75 |
|
76 char_type a[2] = {'a', '\0', }; |
|
77 string_type sa(pt->transform(a, a+1)); |
|
78 if(sa == a) |
|
79 { |
|
80 *delim = 0; |
|
81 return sort_C; |
|
82 } |
|
83 char_type A[2] = { 'A', '\0', }; |
|
84 string_type sA(pt->transform(A, A+1)); |
|
85 char_type c[2] = { ';', '\0', }; |
|
86 string_type sc(pt->transform(c, c+1)); |
|
87 |
|
88 int pos = 0; |
|
89 while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos; |
|
90 --pos; |
|
91 if(pos < 0) |
|
92 { |
|
93 *delim = 0; |
|
94 return sort_unknown; |
|
95 } |
|
96 // |
|
97 // at this point sa[pos] is either the end of a fixed width field |
|
98 // or the character that acts as a delimiter: |
|
99 // |
|
100 charT maybe_delim = sa[pos]; |
|
101 if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim))) |
|
102 { |
|
103 *delim = maybe_delim; |
|
104 return sort_delim; |
|
105 } |
|
106 // |
|
107 // OK doen't look like a delimiter, try for fixed width field: |
|
108 // |
|
109 if((sa.size() == sA.size()) && (sa.size() == sc.size())) |
|
110 { |
|
111 // note assumes that the fixed width field is less than |
|
112 // (numeric_limits<charT>::max)(), should be true for all types |
|
113 // I can't imagine 127 character fields... |
|
114 *delim = static_cast<charT>(++pos); |
|
115 return sort_fixed; |
|
116 } |
|
117 // |
|
118 // don't know what it is: |
|
119 // |
|
120 *delim = 0; |
|
121 return sort_unknown; |
|
122 } |
|
123 |
|
124 |
|
125 } // namespace re_detail |
|
126 } // namespace boost |
|
127 |
|
128 #ifdef BOOST_MSVC |
|
129 #pragma warning(push) |
|
130 #pragma warning(disable: 4103) |
|
131 #endif |
|
132 #ifdef BOOST_HAS_ABI_HEADERS |
|
133 # include BOOST_ABI_SUFFIX |
|
134 #endif |
|
135 #ifdef BOOST_MSVC |
|
136 #pragma warning(pop) |
|
137 #endif |
|
138 |
|
139 #endif |
|
140 |
|
141 |
|
142 |
|
143 |
|
144 |
|
145 |
|
146 |