|
1 // Copyright (c) 2005, Google Inc. |
|
2 // All rights reserved. |
|
3 // |
|
4 // Redistribution and use in source and binary forms, with or without |
|
5 // modification, are permitted provided that the following conditions are |
|
6 // met: |
|
7 // |
|
8 // * Redistributions of source code must retain the above copyright |
|
9 // notice, this list of conditions and the following disclaimer. |
|
10 // * Redistributions in binary form must reproduce the above |
|
11 // copyright notice, this list of conditions and the following disclaimer |
|
12 // in the documentation and/or other materials provided with the |
|
13 // distribution. |
|
14 // * Neither the name of Google Inc. nor the names of its |
|
15 // contributors may be used to endorse or promote products derived from |
|
16 // this software without specific prior written permission. |
|
17 // |
|
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 // |
|
30 // Author: Sanjay Ghemawat |
|
31 |
|
32 #ifdef HAVE_CONFIG_H |
|
33 #include "config.h" |
|
34 #endif |
|
35 |
|
36 #include <vector> |
|
37 #include <assert.h> |
|
38 |
|
39 #include "pcrecpp_internal.h" |
|
40 #include "pcre_scanner.h" |
|
41 |
|
42 using std::vector; |
|
43 |
|
44 namespace pcrecpp { |
|
45 |
|
46 Scanner::Scanner() |
|
47 : data_(), |
|
48 input_(data_), |
|
49 skip_(NULL), |
|
50 should_skip_(false), |
|
51 skip_repeat_(false), |
|
52 save_comments_(false), |
|
53 comments_(NULL), |
|
54 comments_offset_(0) { |
|
55 } |
|
56 |
|
57 Scanner::Scanner(const string& in) |
|
58 : data_(in), |
|
59 input_(data_), |
|
60 skip_(NULL), |
|
61 should_skip_(false), |
|
62 skip_repeat_(false), |
|
63 save_comments_(false), |
|
64 comments_(NULL), |
|
65 comments_offset_(0) { |
|
66 } |
|
67 |
|
68 Scanner::~Scanner() { |
|
69 delete skip_; |
|
70 delete comments_; |
|
71 } |
|
72 |
|
73 void Scanner::SetSkipExpression(const char* re) { |
|
74 delete skip_; |
|
75 if (re != NULL) { |
|
76 skip_ = new RE(re); |
|
77 should_skip_ = true; |
|
78 skip_repeat_ = true; |
|
79 ConsumeSkip(); |
|
80 } else { |
|
81 skip_ = NULL; |
|
82 should_skip_ = false; |
|
83 skip_repeat_ = false; |
|
84 } |
|
85 } |
|
86 |
|
87 void Scanner::Skip(const char* re) { |
|
88 delete skip_; |
|
89 if (re != NULL) { |
|
90 skip_ = new RE(re); |
|
91 should_skip_ = true; |
|
92 skip_repeat_ = false; |
|
93 ConsumeSkip(); |
|
94 } else { |
|
95 skip_ = NULL; |
|
96 should_skip_ = false; |
|
97 skip_repeat_ = false; |
|
98 } |
|
99 } |
|
100 |
|
101 void Scanner::DisableSkip() { |
|
102 assert(skip_ != NULL); |
|
103 should_skip_ = false; |
|
104 } |
|
105 |
|
106 void Scanner::EnableSkip() { |
|
107 assert(skip_ != NULL); |
|
108 should_skip_ = true; |
|
109 ConsumeSkip(); |
|
110 } |
|
111 |
|
112 int Scanner::LineNumber() const { |
|
113 // TODO: Make it more efficient by keeping track of the last point |
|
114 // where we computed line numbers and counting newlines since then. |
|
115 // We could use std:count, but not all systems have it. :-( |
|
116 int count = 1; |
|
117 for (const char* p = data_.data(); p < input_.data(); ++p) |
|
118 if (*p == '\n') |
|
119 ++count; |
|
120 return count; |
|
121 } |
|
122 |
|
123 int Scanner::Offset() const { |
|
124 return input_.data() - data_.c_str(); |
|
125 } |
|
126 |
|
127 bool Scanner::LookingAt(const RE& re) const { |
|
128 int consumed; |
|
129 return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); |
|
130 } |
|
131 |
|
132 |
|
133 bool Scanner::Consume(const RE& re, |
|
134 const Arg& arg0, |
|
135 const Arg& arg1, |
|
136 const Arg& arg2) { |
|
137 const bool result = re.Consume(&input_, arg0, arg1, arg2); |
|
138 if (result && should_skip_) ConsumeSkip(); |
|
139 return result; |
|
140 } |
|
141 |
|
142 // helper function to consume *skip_ and honour save_comments_ |
|
143 void Scanner::ConsumeSkip() { |
|
144 const char* start_data = input_.data(); |
|
145 while (skip_->Consume(&input_)) { |
|
146 if (!skip_repeat_) { |
|
147 // Only one skip allowed. |
|
148 break; |
|
149 } |
|
150 } |
|
151 if (save_comments_) { |
|
152 if (comments_ == NULL) { |
|
153 comments_ = new vector<StringPiece>; |
|
154 } |
|
155 // already pointing one past end, so no need to +1 |
|
156 int length = input_.data() - start_data; |
|
157 if (length > 0) { |
|
158 comments_->push_back(StringPiece(start_data, length)); |
|
159 } |
|
160 } |
|
161 } |
|
162 |
|
163 |
|
164 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { |
|
165 // short circuit out if we've not yet initialized comments_ |
|
166 // (e.g., when save_comments is false) |
|
167 if (!comments_) { |
|
168 return; |
|
169 } |
|
170 // TODO: if we guarantee that comments_ will contain StringPieces |
|
171 // that are ordered by their start, then we can do a binary search |
|
172 // for the first StringPiece at or past start and then scan for the |
|
173 // ones contained in the range, quit early (use equal_range or |
|
174 // lower_bound) |
|
175 for (vector<StringPiece>::const_iterator it = comments_->begin(); |
|
176 it != comments_->end(); ++it) { |
|
177 if ((it->data() >= data_.c_str() + start && |
|
178 it->data() + it->size() <= data_.c_str() + end)) { |
|
179 ranges->push_back(*it); |
|
180 } |
|
181 } |
|
182 } |
|
183 |
|
184 |
|
185 void Scanner::GetNextComments(vector<StringPiece> *ranges) { |
|
186 // short circuit out if we've not yet initialized comments_ |
|
187 // (e.g., when save_comments is false) |
|
188 if (!comments_) { |
|
189 return; |
|
190 } |
|
191 for (vector<StringPiece>::const_iterator it = |
|
192 comments_->begin() + comments_offset_; |
|
193 it != comments_->end(); ++it) { |
|
194 ranges->push_back(*it); |
|
195 ++comments_offset_; |
|
196 } |
|
197 } |
|
198 |
|
199 } // namespace pcrecpp |