searchengine/oss/loc/analysis/inc/public/tinyiterator.h
changeset 24 65456528cac2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/loc/analysis/inc/public/tinyiterator.h	Fri Oct 15 12:09:28 2010 +0530
@@ -0,0 +1,186 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+#ifndef TINYITERATOR_H_
+#define TINYITERATOR_H_
+
+#include <exception>
+
+namespace lucene {
+    namespace util {
+        class Reader; 
+    }
+}
+namespace analysis {
+
+	 /**
+	  * This package provides basic 
+	  */
+     namespace tiny {
+      
+        /*
+         *
+         * Meta code for describing the iterator concept used here.
+         * Used to iterate character streams. 
+         * Follows closely the STL forward iterator.
+         * Note: Comparisons of form x < y may not work properly.
+         * Note: x - y will not provide distance in characters, 
+         * but instead offset distance in the original text.  
+         * 
+         * Only operators of form ++i is provided. --i is not provided
+         * because complications with utf16 or unicode decomposition/ 
+         * composition. i++ would lead to innecessary code.
+         * 
+         
+        concept Iterator {
+           
+		   // Accessor to the iterator character
+           wchar_t operator*();     // 16 bit unicode
+           OR
+           int operator*();         // 32 bit unicode
+       
+           // Next location
+		   Iterator& operator++();
+		   
+		   // Returns offset in the original text. Note that one character 
+		   // may be transformed into a number of characters. This means 
+		   // that comparisons of form (int)i < (int)j are unreliable and
+		   // should not be used. Also lengths i - j are unreliable. With korean 
+		   // i - j may be produce length 2, but iterating for (;i<j;++i); may
+		   // iterate throught e.g. 6 characters. Or, with 32 bit unicode,
+		   // length 2 may contain only one character. 
+           operator int(); 
+        
+        };
+         
+         */
+     
+		
+        /**
+         * Iterates throught some T that provides array/pointer like interface
+         */
+        template <typename T>
+        struct ArrayIterator {
+            public:
+                inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {}
+                inline ArrayIterator(T& array) : array_(&array), i_(0) {}
+                inline ArrayIterator() : array_(0), i_(0) {}
+                
+                inline wchar_t operator*() const { return (*array_)[i_]; };  
+                inline ArrayIterator<T>& operator++() { i_++; return *this; };  
+                inline operator int() { return i_; }
+            private: 
+                T* array_;
+                int i_;
+                
+        };
+
+        /**
+         * Iterates from starting position to up to length characters. 
+         */
+        template <typename Iterator>
+        struct RangeIterator {
+            public:
+                RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {}
+                inline int operator*() { return left_ ? *i_: '\0'; }
+                inline RangeIterator& operator++() { 
+                    if (left_ ) { 
+                        ++i_; left_--;
+                    }
+                    return *this;
+                }
+                inline operator int() { return i_; }
+            private: 
+                Iterator i_;
+                int left_; 
+        };
+
+        /**
+         * Turns iterator into a C++ stream. Allows out<<'c'<<'\0'; 
+         * kind of syntax to be used with iterators.  
+         */
+        template <typename Iterator>
+        struct IteratorOutput {
+            public:
+                IteratorOutput(Iterator i) : i_(i) {}
+                template <typename T>
+                inline IteratorOutput& operator<<(T t) {
+                    *i_= t;
+                    ++i_;
+                    return *this;
+                }
+            private:
+                Iterator i_;
+        };
+        
+        /**
+         * CLucene IO support
+         */
+        namespace cl {
+        
+			/**
+			 * Informs that caller has attempted to read a location
+			 * from the reader source, that is no more stored in the 
+			 * buffer.
+			 */
+			class TooOldIndexException : public std::exception {
+            public: 
+                const char* what() const;
+            };
+            
+            /**
+             * Provides buffer & array like interface to be used with
+             * CLucene readers. If reader r provides access to file X, 
+             * and we have buf(r), we can sort of 'random access' file 
+             * X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer
+             * is of limited size. There is always the most recent location
+             * L that is read. Trying to access buf[L-SIZE-1] will raise 
+             * exception, where SIZE is the buffer size. 
+             */
+            template<int SIZE>
+            class ReaderBuffer {
+                public:
+					/** Iterator for iterating the underlying source */
+                    typedef ArrayIterator<ReaderBuffer> iterator;
+                    /* Constructs buffer for a reader reading some source. */
+                    ReaderBuffer(lucene::util::Reader& reader);
+                    /** Returns character at location i  */
+                    wchar_t operator[](int i);
+                    /** Returns iterator pointing to location i */
+                    inline iterator at(int i);
+                    /** Returns iterator pointing to the beginning of character source */
+                    inline iterator begin();
+                private:
+                    /** Rotating buffer. */
+                    wchar_t buf_[SIZE]; 
+                    /** How many characters have been read from reader */
+                    int read_; 
+                    /** Points to the next character to be overwritten in buffer */
+                    int cut_; 
+                    /** Index of oldest character inside the original source */
+                    int offset_; 
+                    /** Reader reading original source */
+                    lucene::util::Reader& reader_;
+            };
+            
+        }
+
+    }
+
+}
+
+#endif /* TINYITERATOR_H_ */