|
1 // (C) Copyright Jeremy Siek 2004 |
|
2 // Distributed under the Boost Software License, Version 1.0. (See |
|
3 // accompanying file LICENSE_1_0.txt or copy at |
|
4 // http://www.boost.org/LICENSE_1_0.txt) |
|
5 |
|
6 /* |
|
7 * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3. |
|
8 * |
|
9 * Template function looks like this: |
|
10 * |
|
11 * template <typename Container> |
|
12 * void stringtok (Container &l, |
|
13 * string const &s, |
|
14 * char const * const ws = " \t\n"); |
|
15 * |
|
16 * A nondestructive version of strtok() that handles its own memory and can |
|
17 * be broken up by any character(s). Does all the work at once rather than |
|
18 * in an invocation loop like strtok() requires. |
|
19 * |
|
20 * Container is any type that supports push_back(a_string), although using |
|
21 * list<string> and deque<string> are indicated due to their O(1) push_back. |
|
22 * (I prefer deque<> because op[]/at() is available as well.) The first |
|
23 * parameter references an existing Container. |
|
24 * |
|
25 * s is the string to be tokenized. From the parameter declaration, it can |
|
26 * be seen that s is not affected. Since references-to-const may refer to |
|
27 * temporaries, you could use stringtok(some_container, readline("")) when |
|
28 * using the GNU readline library. |
|
29 * |
|
30 * The final parameter is an array of characters that serve as whitespace. |
|
31 * Whitespace characters default to one or more of tab, space, and newline, |
|
32 * in any combination. |
|
33 * |
|
34 * 'l' need not be empty on entry. On return, 'l' will have the token |
|
35 * strings appended. |
|
36 * |
|
37 * |
|
38 * [Example: |
|
39 * list<string> ls; |
|
40 * stringtok (ls, " this \t is\t\n a test "); |
|
41 * for (list<string>::const_iterator i = ls.begin(); |
|
42 * i != ls.end(); ++i) |
|
43 * { |
|
44 * cerr << ':' << (*i) << ":\n"; |
|
45 * } |
|
46 * |
|
47 * would print |
|
48 * :this: |
|
49 * :is: |
|
50 * :a: |
|
51 * :test: |
|
52 * -end example] |
|
53 * |
|
54 * pedwards@jaj.com May 1999 |
|
55 */ |
|
56 |
|
57 |
|
58 #include <string> |
|
59 #include <cstring> // for strchr |
|
60 |
|
61 |
|
62 /***************************************************************** |
|
63 * This is the only part of the implementation that I don't like. |
|
64 * It can probably be improved upon by the reader... |
|
65 */ |
|
66 |
|
67 inline bool |
|
68 isws (char c, char const * const wstr) |
|
69 { |
|
70 using namespace std; |
|
71 return (strchr(wstr,c) != NULL); |
|
72 } |
|
73 |
|
74 |
|
75 namespace boost { |
|
76 |
|
77 /***************************************************************** |
|
78 * Simplistic and quite Standard, but a bit slow. This should be |
|
79 * templatized on basic_string instead, or on a more generic StringT |
|
80 * that just happens to support ::size_type, .substr(), and so on. |
|
81 * I had hoped that "whitespace" would be a trait, but it isn't, so |
|
82 * the user must supply it. Enh, this lets them break up strings on |
|
83 * different things easier than traits would anyhow. |
|
84 */ |
|
85 template <typename Container> |
|
86 void |
|
87 stringtok (Container &l, std::string const &s, char const * const ws = " \t\n") |
|
88 { |
|
89 typedef std::string::size_type size_type; |
|
90 const size_type S = s.size(); |
|
91 size_type i = 0; |
|
92 |
|
93 while (i < S) { |
|
94 // eat leading whitespace |
|
95 while ((i < S) && (isws(s[i],ws))) ++i; |
|
96 if (i == S) return; // nothing left but WS |
|
97 |
|
98 // find end of word |
|
99 size_type j = i+1; |
|
100 while ((j < S) && (!isws(s[j],ws))) ++j; |
|
101 |
|
102 // add word |
|
103 l.push_back(s.substr(i,j-i)); |
|
104 |
|
105 // set up for next loop |
|
106 i = j+1; |
|
107 } |
|
108 } |
|
109 |
|
110 |
|
111 } // namespace boost |