libraries/spcre/libpcre/pcre/pcre_scanner.cc
changeset 0 7f656887cf89
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libraries/spcre/libpcre/pcre/pcre_scanner.cc	Wed Jun 23 15:52:26 2010 +0100
@@ -0,0 +1,199 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <vector>
+#include <assert.h>
+
+#include "pcrecpp_internal.h"
+#include "pcre_scanner.h"
+
+using std::vector;
+
+namespace pcrecpp {
+
+Scanner::Scanner()
+  : data_(),
+    input_(data_),
+    skip_(NULL),
+    should_skip_(false),
+    skip_repeat_(false),
+    save_comments_(false),
+    comments_(NULL),
+    comments_offset_(0) {
+}
+
+Scanner::Scanner(const string& in)
+  : data_(in),
+    input_(data_),
+    skip_(NULL),
+    should_skip_(false),
+    skip_repeat_(false),
+    save_comments_(false),
+    comments_(NULL),
+    comments_offset_(0) {
+}
+
+Scanner::~Scanner() {
+  delete skip_;
+  delete comments_;
+}
+
+void Scanner::SetSkipExpression(const char* re) {
+  delete skip_;
+  if (re != NULL) {
+    skip_ = new RE(re);
+    should_skip_ = true;
+    skip_repeat_ = true;
+    ConsumeSkip();
+  } else {
+    skip_ = NULL;
+    should_skip_ = false;
+    skip_repeat_ = false;
+  }
+}
+
+void Scanner::Skip(const char* re) {
+  delete skip_;
+  if (re != NULL) {
+    skip_ = new RE(re);
+    should_skip_ = true;
+    skip_repeat_ = false;
+    ConsumeSkip();
+  } else {
+    skip_ = NULL;
+    should_skip_ = false;
+    skip_repeat_ = false;
+  }
+}
+
+void Scanner::DisableSkip() {
+  assert(skip_ != NULL);
+  should_skip_ = false;
+}
+
+void Scanner::EnableSkip() {
+  assert(skip_ != NULL);
+  should_skip_ = true;
+  ConsumeSkip();
+}
+
+int Scanner::LineNumber() const {
+  // TODO: Make it more efficient by keeping track of the last point
+  // where we computed line numbers and counting newlines since then.
+  // We could use std:count, but not all systems have it. :-(
+  int count = 1;
+  for (const char* p = data_.data(); p < input_.data(); ++p)
+    if (*p == '\n')
+      ++count;
+  return count;
+}
+
+int Scanner::Offset() const {
+  return input_.data() - data_.c_str();
+}
+
+bool Scanner::LookingAt(const RE& re) const {
+  int consumed;
+  return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
+}
+
+
+bool Scanner::Consume(const RE& re,
+                      const Arg& arg0,
+                      const Arg& arg1,
+                      const Arg& arg2) {
+  const bool result = re.Consume(&input_, arg0, arg1, arg2);
+  if (result && should_skip_) ConsumeSkip();
+  return result;
+}
+
+// helper function to consume *skip_ and honour save_comments_
+void Scanner::ConsumeSkip() {
+  const char* start_data = input_.data();
+  while (skip_->Consume(&input_)) {
+    if (!skip_repeat_) {
+      // Only one skip allowed.
+      break;
+    }
+  }
+  if (save_comments_) {
+    if (comments_ == NULL) {
+      comments_ = new vector<StringPiece>;
+    }
+    // already pointing one past end, so no need to +1
+    int length = input_.data() - start_data;
+    if (length > 0) {
+      comments_->push_back(StringPiece(start_data, length));
+    }
+  }
+}
+
+
+void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
+  // short circuit out if we've not yet initialized comments_
+  // (e.g., when save_comments is false)
+  if (!comments_) {
+    return;
+  }
+  // TODO: if we guarantee that comments_ will contain StringPieces
+  // that are ordered by their start, then we can do a binary search
+  // for the first StringPiece at or past start and then scan for the
+  // ones contained in the range, quit early (use equal_range or
+  // lower_bound)
+  for (vector<StringPiece>::const_iterator it = comments_->begin();
+       it != comments_->end(); ++it) {
+    if ((it->data() >= data_.c_str() + start &&
+         it->data() + it->size() <= data_.c_str() + end)) {
+      ranges->push_back(*it);
+    }
+  }
+}
+
+
+void Scanner::GetNextComments(vector<StringPiece> *ranges) {
+  // short circuit out if we've not yet initialized comments_
+  // (e.g., when save_comments is false)
+  if (!comments_) {
+    return;
+  }
+  for (vector<StringPiece>::const_iterator it =
+         comments_->begin() + comments_offset_;
+       it != comments_->end(); ++it) {
+    ranges->push_back(*it);
+    ++comments_offset_;
+  }
+}
+
+}   // namespace pcrecpp