searchengine/oss/loc/analysis/inc/public/tinyiterator.h
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #ifndef TINYITERATOR_H_
       
    19 #define TINYITERATOR_H_
       
    20 
       
    21 #include <exception>
       
    22 
       
    23 namespace lucene {
       
    24     namespace util {
       
    25         class Reader; 
       
    26     }
       
    27 }
       
    28 namespace analysis {
       
    29 
       
    30 	 /**
       
    31 	  * This package provides basic 
       
    32 	  */
       
    33      namespace tiny {
       
    34       
       
    35         /*
       
    36          *
       
    37          * Meta code for describing the iterator concept used here.
       
    38          * Used to iterate character streams. 
       
    39          * Follows closely the STL forward iterator.
       
    40          * Note: Comparisons of form x < y may not work properly.
       
    41          * Note: x - y will not provide distance in characters, 
       
    42          * but instead offset distance in the original text.  
       
    43          * 
       
    44          * Only operators of form ++i is provided. --i is not provided
       
    45          * because complications with utf16 or unicode decomposition/ 
       
    46          * composition. i++ would lead to innecessary code.
       
    47          * 
       
    48          
       
    49         concept Iterator {
       
    50            
       
    51 		   // Accessor to the iterator character
       
    52            wchar_t operator*();     // 16 bit unicode
       
    53            OR
       
    54            int operator*();         // 32 bit unicode
       
    55        
       
    56            // Next location
       
    57 		   Iterator& operator++();
       
    58 		   
       
    59 		   // Returns offset in the original text. Note that one character 
       
    60 		   // may be transformed into a number of characters. This means 
       
    61 		   // that comparisons of form (int)i < (int)j are unreliable and
       
    62 		   // should not be used. Also lengths i - j are unreliable. With korean 
       
    63 		   // i - j may be produce length 2, but iterating for (;i<j;++i); may
       
    64 		   // iterate throught e.g. 6 characters. Or, with 32 bit unicode,
       
    65 		   // length 2 may contain only one character. 
       
    66            operator int(); 
       
    67         
       
    68         };
       
    69          
       
    70          */
       
    71      
       
    72 		
       
    73         /**
       
    74          * Iterates throught some T that provides array/pointer like interface
       
    75          */
       
    76         template <typename T>
       
    77         struct ArrayIterator {
       
    78             public:
       
    79                 inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {}
       
    80                 inline ArrayIterator(T& array) : array_(&array), i_(0) {}
       
    81                 inline ArrayIterator() : array_(0), i_(0) {}
       
    82                 
       
    83                 inline wchar_t operator*() const { return (*array_)[i_]; };  
       
    84                 inline ArrayIterator<T>& operator++() { i_++; return *this; };  
       
    85                 inline operator int() { return i_; }
       
    86             private: 
       
    87                 T* array_;
       
    88                 int i_;
       
    89                 
       
    90         };
       
    91 
       
    92         /**
       
    93          * Iterates from starting position to up to length characters. 
       
    94          */
       
    95         template <typename Iterator>
       
    96         struct RangeIterator {
       
    97             public:
       
    98                 RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {}
       
    99                 inline int operator*() { return left_ ? *i_: '\0'; }
       
   100                 inline RangeIterator& operator++() { 
       
   101                     if (left_ ) { 
       
   102                         ++i_; left_--;
       
   103                     }
       
   104                     return *this;
       
   105                 }
       
   106                 inline operator int() { return i_; }
       
   107             private: 
       
   108                 Iterator i_;
       
   109                 int left_; 
       
   110         };
       
   111 
       
   112         /**
       
   113          * Turns iterator into a C++ stream. Allows out<<'c'<<'\0'; 
       
   114          * kind of syntax to be used with iterators.  
       
   115          */
       
   116         template <typename Iterator>
       
   117         struct IteratorOutput {
       
   118             public:
       
   119                 IteratorOutput(Iterator i) : i_(i) {}
       
   120                 template <typename T>
       
   121                 inline IteratorOutput& operator<<(T t) {
       
   122                     *i_= t;
       
   123                     ++i_;
       
   124                     return *this;
       
   125                 }
       
   126             private:
       
   127                 Iterator i_;
       
   128         };
       
   129         
       
   130         /**
       
   131          * CLucene IO support
       
   132          */
       
   133         namespace cl {
       
   134         
       
   135 			/**
       
   136 			 * Informs that caller has attempted to read a location
       
   137 			 * from the reader source, that is no more stored in the 
       
   138 			 * buffer.
       
   139 			 */
       
   140 			class TooOldIndexException : public std::exception {
       
   141             public: 
       
   142                 const char* what() const;
       
   143             };
       
   144             
       
   145             /**
       
   146              * Provides buffer & array like interface to be used with
       
   147              * CLucene readers. If reader r provides access to file X, 
       
   148              * and we have buf(r), we can sort of 'random access' file 
       
   149              * X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer
       
   150              * is of limited size. There is always the most recent location
       
   151              * L that is read. Trying to access buf[L-SIZE-1] will raise 
       
   152              * exception, where SIZE is the buffer size. 
       
   153              */
       
   154             template<int SIZE>
       
   155             class ReaderBuffer {
       
   156                 public:
       
   157 					/** Iterator for iterating the underlying source */
       
   158                     typedef ArrayIterator<ReaderBuffer> iterator;
       
   159                     /* Constructs buffer for a reader reading some source. */
       
   160                     ReaderBuffer(lucene::util::Reader& reader);
       
   161                     /** Returns character at location i  */
       
   162                     wchar_t operator[](int i);
       
   163                     /** Returns iterator pointing to location i */
       
   164                     inline iterator at(int i);
       
   165                     /** Returns iterator pointing to the beginning of character source */
       
   166                     inline iterator begin();
       
   167                 private:
       
   168                     /** Rotating buffer. */
       
   169                     wchar_t buf_[SIZE]; 
       
   170                     /** How many characters have been read from reader */
       
   171                     int read_; 
       
   172                     /** Points to the next character to be overwritten in buffer */
       
   173                     int cut_; 
       
   174                     /** Index of oldest character inside the original source */
       
   175                     int offset_; 
       
   176                     /** Reader reading original source */
       
   177                     lucene::util::Reader& reader_;
       
   178             };
       
   179             
       
   180         }
       
   181 
       
   182     }
       
   183 
       
   184 }
       
   185 
       
   186 #endif /* TINYITERATOR_H_ */