searchengine/oss/loc/analysis/inc/public/tinyiterator.h
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
hgs
parents:
diff changeset
    18
#ifndef TINYITERATOR_H_
hgs
parents:
diff changeset
    19
#define TINYITERATOR_H_
hgs
parents:
diff changeset
    20
hgs
parents:
diff changeset
    21
#include <exception>
hgs
parents:
diff changeset
    22
hgs
parents:
diff changeset
    23
namespace lucene {
hgs
parents:
diff changeset
    24
    namespace util {
hgs
parents:
diff changeset
    25
        class Reader; 
hgs
parents:
diff changeset
    26
    }
hgs
parents:
diff changeset
    27
}
hgs
parents:
diff changeset
    28
namespace analysis {
hgs
parents:
diff changeset
    29
hgs
parents:
diff changeset
    30
	 /**
hgs
parents:
diff changeset
    31
	  * This package provides basic 
hgs
parents:
diff changeset
    32
	  */
hgs
parents:
diff changeset
    33
     namespace tiny {
hgs
parents:
diff changeset
    34
      
hgs
parents:
diff changeset
    35
        /*
hgs
parents:
diff changeset
    36
         *
hgs
parents:
diff changeset
    37
         * Meta code for describing the iterator concept used here.
hgs
parents:
diff changeset
    38
         * Used to iterate character streams. 
hgs
parents:
diff changeset
    39
         * Follows closely the STL forward iterator.
hgs
parents:
diff changeset
    40
         * Note: Comparisons of form x < y may not work properly.
hgs
parents:
diff changeset
    41
         * Note: x - y will not provide distance in characters, 
hgs
parents:
diff changeset
    42
         * but instead offset distance in the original text.  
hgs
parents:
diff changeset
    43
         * 
hgs
parents:
diff changeset
    44
         * Only operators of form ++i is provided. --i is not provided
hgs
parents:
diff changeset
    45
         * because complications with utf16 or unicode decomposition/ 
hgs
parents:
diff changeset
    46
         * composition. i++ would lead to innecessary code.
hgs
parents:
diff changeset
    47
         * 
hgs
parents:
diff changeset
    48
         
hgs
parents:
diff changeset
    49
        concept Iterator {
hgs
parents:
diff changeset
    50
           
hgs
parents:
diff changeset
    51
		   // Accessor to the iterator character
hgs
parents:
diff changeset
    52
           wchar_t operator*();     // 16 bit unicode
hgs
parents:
diff changeset
    53
           OR
hgs
parents:
diff changeset
    54
           int operator*();         // 32 bit unicode
hgs
parents:
diff changeset
    55
       
hgs
parents:
diff changeset
    56
           // Next location
hgs
parents:
diff changeset
    57
		   Iterator& operator++();
hgs
parents:
diff changeset
    58
		   
hgs
parents:
diff changeset
    59
		   // Returns offset in the original text. Note that one character 
hgs
parents:
diff changeset
    60
		   // may be transformed into a number of characters. This means 
hgs
parents:
diff changeset
    61
		   // that comparisons of form (int)i < (int)j are unreliable and
hgs
parents:
diff changeset
    62
		   // should not be used. Also lengths i - j are unreliable. With korean 
hgs
parents:
diff changeset
    63
		   // i - j may be produce length 2, but iterating for (;i<j;++i); may
hgs
parents:
diff changeset
    64
		   // iterate throught e.g. 6 characters. Or, with 32 bit unicode,
hgs
parents:
diff changeset
    65
		   // length 2 may contain only one character. 
hgs
parents:
diff changeset
    66
           operator int(); 
hgs
parents:
diff changeset
    67
        
hgs
parents:
diff changeset
    68
        };
hgs
parents:
diff changeset
    69
         
hgs
parents:
diff changeset
    70
         */
hgs
parents:
diff changeset
    71
     
hgs
parents:
diff changeset
    72
		
hgs
parents:
diff changeset
    73
        /**
hgs
parents:
diff changeset
    74
         * Iterates throught some T that provides array/pointer like interface
hgs
parents:
diff changeset
    75
         */
hgs
parents:
diff changeset
    76
        template <typename T>
hgs
parents:
diff changeset
    77
        struct ArrayIterator {
hgs
parents:
diff changeset
    78
            public:
hgs
parents:
diff changeset
    79
                inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {}
hgs
parents:
diff changeset
    80
                inline ArrayIterator(T& array) : array_(&array), i_(0) {}
hgs
parents:
diff changeset
    81
                inline ArrayIterator() : array_(0), i_(0) {}
hgs
parents:
diff changeset
    82
                
hgs
parents:
diff changeset
    83
                inline wchar_t operator*() const { return (*array_)[i_]; };  
hgs
parents:
diff changeset
    84
                inline ArrayIterator<T>& operator++() { i_++; return *this; };  
hgs
parents:
diff changeset
    85
                inline operator int() { return i_; }
hgs
parents:
diff changeset
    86
            private: 
hgs
parents:
diff changeset
    87
                T* array_;
hgs
parents:
diff changeset
    88
                int i_;
hgs
parents:
diff changeset
    89
                
hgs
parents:
diff changeset
    90
        };
hgs
parents:
diff changeset
    91
hgs
parents:
diff changeset
    92
        /**
hgs
parents:
diff changeset
    93
         * Iterates from starting position to up to length characters. 
hgs
parents:
diff changeset
    94
         */
hgs
parents:
diff changeset
    95
        template <typename Iterator>
hgs
parents:
diff changeset
    96
        struct RangeIterator {
hgs
parents:
diff changeset
    97
            public:
hgs
parents:
diff changeset
    98
                RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {}
hgs
parents:
diff changeset
    99
                inline int operator*() { return left_ ? *i_: '\0'; }
hgs
parents:
diff changeset
   100
                inline RangeIterator& operator++() { 
hgs
parents:
diff changeset
   101
                    if (left_ ) { 
hgs
parents:
diff changeset
   102
                        ++i_; left_--;
hgs
parents:
diff changeset
   103
                    }
hgs
parents:
diff changeset
   104
                    return *this;
hgs
parents:
diff changeset
   105
                }
hgs
parents:
diff changeset
   106
                inline operator int() { return i_; }
hgs
parents:
diff changeset
   107
            private: 
hgs
parents:
diff changeset
   108
                Iterator i_;
hgs
parents:
diff changeset
   109
                int left_; 
hgs
parents:
diff changeset
   110
        };
hgs
parents:
diff changeset
   111
hgs
parents:
diff changeset
   112
        /**
hgs
parents:
diff changeset
   113
         * Turns iterator into a C++ stream. Allows out<<'c'<<'\0'; 
hgs
parents:
diff changeset
   114
         * kind of syntax to be used with iterators.  
hgs
parents:
diff changeset
   115
         */
hgs
parents:
diff changeset
   116
        template <typename Iterator>
hgs
parents:
diff changeset
   117
        struct IteratorOutput {
hgs
parents:
diff changeset
   118
            public:
hgs
parents:
diff changeset
   119
                IteratorOutput(Iterator i) : i_(i) {}
hgs
parents:
diff changeset
   120
                template <typename T>
hgs
parents:
diff changeset
   121
                inline IteratorOutput& operator<<(T t) {
hgs
parents:
diff changeset
   122
                    *i_= t;
hgs
parents:
diff changeset
   123
                    ++i_;
hgs
parents:
diff changeset
   124
                    return *this;
hgs
parents:
diff changeset
   125
                }
hgs
parents:
diff changeset
   126
            private:
hgs
parents:
diff changeset
   127
                Iterator i_;
hgs
parents:
diff changeset
   128
        };
hgs
parents:
diff changeset
   129
        
hgs
parents:
diff changeset
   130
        /**
hgs
parents:
diff changeset
   131
         * CLucene IO support
hgs
parents:
diff changeset
   132
         */
hgs
parents:
diff changeset
   133
        namespace cl {
hgs
parents:
diff changeset
   134
        
hgs
parents:
diff changeset
   135
			/**
hgs
parents:
diff changeset
   136
			 * Informs that caller has attempted to read a location
hgs
parents:
diff changeset
   137
			 * from the reader source, that is no more stored in the 
hgs
parents:
diff changeset
   138
			 * buffer.
hgs
parents:
diff changeset
   139
			 */
hgs
parents:
diff changeset
   140
			class TooOldIndexException : public std::exception {
hgs
parents:
diff changeset
   141
            public: 
hgs
parents:
diff changeset
   142
                const char* what() const;
hgs
parents:
diff changeset
   143
            };
hgs
parents:
diff changeset
   144
            
hgs
parents:
diff changeset
   145
            /**
hgs
parents:
diff changeset
   146
             * Provides buffer & array like interface to be used with
hgs
parents:
diff changeset
   147
             * CLucene readers. If reader r provides access to file X, 
hgs
parents:
diff changeset
   148
             * and we have buf(r), we can sort of 'random access' file 
hgs
parents:
diff changeset
   149
             * X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer
hgs
parents:
diff changeset
   150
             * is of limited size. There is always the most recent location
hgs
parents:
diff changeset
   151
             * L that is read. Trying to access buf[L-SIZE-1] will raise 
hgs
parents:
diff changeset
   152
             * exception, where SIZE is the buffer size. 
hgs
parents:
diff changeset
   153
             */
hgs
parents:
diff changeset
   154
            template<int SIZE>
hgs
parents:
diff changeset
   155
            class ReaderBuffer {
hgs
parents:
diff changeset
   156
                public:
hgs
parents:
diff changeset
   157
					/** Iterator for iterating the underlying source */
hgs
parents:
diff changeset
   158
                    typedef ArrayIterator<ReaderBuffer> iterator;
hgs
parents:
diff changeset
   159
                    /* Constructs buffer for a reader reading some source. */
hgs
parents:
diff changeset
   160
                    ReaderBuffer(lucene::util::Reader& reader);
hgs
parents:
diff changeset
   161
                    /** Returns character at location i  */
hgs
parents:
diff changeset
   162
                    wchar_t operator[](int i);
hgs
parents:
diff changeset
   163
                    /** Returns iterator pointing to location i */
hgs
parents:
diff changeset
   164
                    inline iterator at(int i);
hgs
parents:
diff changeset
   165
                    /** Returns iterator pointing to the beginning of character source */
hgs
parents:
diff changeset
   166
                    inline iterator begin();
hgs
parents:
diff changeset
   167
                private:
hgs
parents:
diff changeset
   168
                    /** Rotating buffer. */
hgs
parents:
diff changeset
   169
                    wchar_t buf_[SIZE]; 
hgs
parents:
diff changeset
   170
                    /** How many characters have been read from reader */
hgs
parents:
diff changeset
   171
                    int read_; 
hgs
parents:
diff changeset
   172
                    /** Points to the next character to be overwritten in buffer */
hgs
parents:
diff changeset
   173
                    int cut_; 
hgs
parents:
diff changeset
   174
                    /** Index of oldest character inside the original source */
hgs
parents:
diff changeset
   175
                    int offset_; 
hgs
parents:
diff changeset
   176
                    /** Reader reading original source */
hgs
parents:
diff changeset
   177
                    lucene::util::Reader& reader_;
hgs
parents:
diff changeset
   178
            };
hgs
parents:
diff changeset
   179
            
hgs
parents:
diff changeset
   180
        }
hgs
parents:
diff changeset
   181
hgs
parents:
diff changeset
   182
    }
hgs
parents:
diff changeset
   183
hgs
parents:
diff changeset
   184
}
hgs
parents:
diff changeset
   185
hgs
parents:
diff changeset
   186
#endif /* TINYITERATOR_H_ */