diff -r d4d56f5e7c55 -r 65456528cac2 searchengine/oss/loc/analysis/inc/public/tinyiterator.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/oss/loc/analysis/inc/public/tinyiterator.h Fri Oct 15 12:09:28 2010 +0530 @@ -0,0 +1,186 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#ifndef TINYITERATOR_H_ +#define TINYITERATOR_H_ + +#include + +namespace lucene { + namespace util { + class Reader; + } +} +namespace analysis { + + /** + * This package provides basic + */ + namespace tiny { + + /* + * + * Meta code for describing the iterator concept used here. + * Used to iterate character streams. + * Follows closely the STL forward iterator. + * Note: Comparisons of form x < y may not work properly. + * Note: x - y will not provide distance in characters, + * but instead offset distance in the original text. + * + * Only operators of form ++i is provided. --i is not provided + * because complications with utf16 or unicode decomposition/ + * composition. i++ would lead to innecessary code. + * + + concept Iterator { + + // Accessor to the iterator character + wchar_t operator*(); // 16 bit unicode + OR + int operator*(); // 32 bit unicode + + // Next location + Iterator& operator++(); + + // Returns offset in the original text. Note that one character + // may be transformed into a number of characters. This means + // that comparisons of form (int)i < (int)j are unreliable and + // should not be used. Also lengths i - j are unreliable. With korean + // i - j may be produce length 2, but iterating for (;i + struct ArrayIterator { + public: + inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {} + inline ArrayIterator(T& array) : array_(&array), i_(0) {} + inline ArrayIterator() : array_(0), i_(0) {} + + inline wchar_t operator*() const { return (*array_)[i_]; }; + inline ArrayIterator& operator++() { i_++; return *this; }; + inline operator int() { return i_; } + private: + T* array_; + int i_; + + }; + + /** + * Iterates from starting position to up to length characters. + */ + template + struct RangeIterator { + public: + RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {} + inline int operator*() { return left_ ? *i_: '\0'; } + inline RangeIterator& operator++() { + if (left_ ) { + ++i_; left_--; + } + return *this; + } + inline operator int() { return i_; } + private: + Iterator i_; + int left_; + }; + + /** + * Turns iterator into a C++ stream. Allows out<<'c'<<'\0'; + * kind of syntax to be used with iterators. + */ + template + struct IteratorOutput { + public: + IteratorOutput(Iterator i) : i_(i) {} + template + inline IteratorOutput& operator<<(T t) { + *i_= t; + ++i_; + return *this; + } + private: + Iterator i_; + }; + + /** + * CLucene IO support + */ + namespace cl { + + /** + * Informs that caller has attempted to read a location + * from the reader source, that is no more stored in the + * buffer. + */ + class TooOldIndexException : public std::exception { + public: + const char* what() const; + }; + + /** + * Provides buffer & array like interface to be used with + * CLucene readers. If reader r provides access to file X, + * and we have buf(r), we can sort of 'random access' file + * X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer + * is of limited size. There is always the most recent location + * L that is read. Trying to access buf[L-SIZE-1] will raise + * exception, where SIZE is the buffer size. + */ + template + class ReaderBuffer { + public: + /** Iterator for iterating the underlying source */ + typedef ArrayIterator iterator; + /* Constructs buffer for a reader reading some source. */ + ReaderBuffer(lucene::util::Reader& reader); + /** Returns character at location i */ + wchar_t operator[](int i); + /** Returns iterator pointing to location i */ + inline iterator at(int i); + /** Returns iterator pointing to the beginning of character source */ + inline iterator begin(); + private: + /** Rotating buffer. */ + wchar_t buf_[SIZE]; + /** How many characters have been read from reader */ + int read_; + /** Points to the next character to be overwritten in buffer */ + int cut_; + /** Index of oldest character inside the original source */ + int offset_; + /** Reader reading original source */ + lucene::util::Reader& reader_; + }; + + } + + } + +} + +#endif /* TINYITERATOR_H_ */