|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #ifndef TINYITERATOR_H_ |
|
19 #define TINYITERATOR_H_ |
|
20 |
|
21 #include <exception> |
|
22 |
|
23 namespace lucene { |
|
24 namespace util { |
|
25 class Reader; |
|
26 } |
|
27 } |
|
28 namespace analysis { |
|
29 |
|
30 /** |
|
31 * This package provides basic |
|
32 */ |
|
33 namespace tiny { |
|
34 |
|
35 /* |
|
36 * |
|
37 * Meta code for describing the iterator concept used here. |
|
38 * Used to iterate character streams. |
|
39 * Follows closely the STL forward iterator. |
|
40 * Note: Comparisons of form x < y may not work properly. |
|
41 * Note: x - y will not provide distance in characters, |
|
42 * but instead offset distance in the original text. |
|
43 * |
|
44 * Only operators of form ++i is provided. --i is not provided |
|
45 * because complications with utf16 or unicode decomposition/ |
|
46 * composition. i++ would lead to innecessary code. |
|
47 * |
|
48 |
|
49 concept Iterator { |
|
50 |
|
51 // Accessor to the iterator character |
|
52 wchar_t operator*(); // 16 bit unicode |
|
53 OR |
|
54 int operator*(); // 32 bit unicode |
|
55 |
|
56 // Next location |
|
57 Iterator& operator++(); |
|
58 |
|
59 // Returns offset in the original text. Note that one character |
|
60 // may be transformed into a number of characters. This means |
|
61 // that comparisons of form (int)i < (int)j are unreliable and |
|
62 // should not be used. Also lengths i - j are unreliable. With korean |
|
63 // i - j may be produce length 2, but iterating for (;i<j;++i); may |
|
64 // iterate throught e.g. 6 characters. Or, with 32 bit unicode, |
|
65 // length 2 may contain only one character. |
|
66 operator int(); |
|
67 |
|
68 }; |
|
69 |
|
70 */ |
|
71 |
|
72 |
|
73 /** |
|
74 * Iterates throught some T that provides array/pointer like interface |
|
75 */ |
|
76 template <typename T> |
|
77 struct ArrayIterator { |
|
78 public: |
|
79 inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {} |
|
80 inline ArrayIterator(T& array) : array_(&array), i_(0) {} |
|
81 inline ArrayIterator() : array_(0), i_(0) {} |
|
82 |
|
83 inline wchar_t operator*() const { return (*array_)[i_]; }; |
|
84 inline ArrayIterator<T>& operator++() { i_++; return *this; }; |
|
85 inline operator int() { return i_; } |
|
86 private: |
|
87 T* array_; |
|
88 int i_; |
|
89 |
|
90 }; |
|
91 |
|
92 /** |
|
93 * Iterates from starting position to up to length characters. |
|
94 */ |
|
95 template <typename Iterator> |
|
96 struct RangeIterator { |
|
97 public: |
|
98 RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {} |
|
99 inline int operator*() { return left_ ? *i_: '\0'; } |
|
100 inline RangeIterator& operator++() { |
|
101 if (left_ ) { |
|
102 ++i_; left_--; |
|
103 } |
|
104 return *this; |
|
105 } |
|
106 inline operator int() { return i_; } |
|
107 private: |
|
108 Iterator i_; |
|
109 int left_; |
|
110 }; |
|
111 |
|
112 /** |
|
113 * Turns iterator into a C++ stream. Allows out<<'c'<<'\0'; |
|
114 * kind of syntax to be used with iterators. |
|
115 */ |
|
116 template <typename Iterator> |
|
117 struct IteratorOutput { |
|
118 public: |
|
119 IteratorOutput(Iterator i) : i_(i) {} |
|
120 template <typename T> |
|
121 inline IteratorOutput& operator<<(T t) { |
|
122 *i_= t; |
|
123 ++i_; |
|
124 return *this; |
|
125 } |
|
126 private: |
|
127 Iterator i_; |
|
128 }; |
|
129 |
|
130 /** |
|
131 * CLucene IO support |
|
132 */ |
|
133 namespace cl { |
|
134 |
|
135 /** |
|
136 * Informs that caller has attempted to read a location |
|
137 * from the reader source, that is no more stored in the |
|
138 * buffer. |
|
139 */ |
|
140 class TooOldIndexException : public std::exception { |
|
141 public: |
|
142 const char* what() const; |
|
143 }; |
|
144 |
|
145 /** |
|
146 * Provides buffer & array like interface to be used with |
|
147 * CLucene readers. If reader r provides access to file X, |
|
148 * and we have buf(r), we can sort of 'random access' file |
|
149 * X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer |
|
150 * is of limited size. There is always the most recent location |
|
151 * L that is read. Trying to access buf[L-SIZE-1] will raise |
|
152 * exception, where SIZE is the buffer size. |
|
153 */ |
|
154 template<int SIZE> |
|
155 class ReaderBuffer { |
|
156 public: |
|
157 /** Iterator for iterating the underlying source */ |
|
158 typedef ArrayIterator<ReaderBuffer> iterator; |
|
159 /* Constructs buffer for a reader reading some source. */ |
|
160 ReaderBuffer(lucene::util::Reader& reader); |
|
161 /** Returns character at location i */ |
|
162 wchar_t operator[](int i); |
|
163 /** Returns iterator pointing to location i */ |
|
164 inline iterator at(int i); |
|
165 /** Returns iterator pointing to the beginning of character source */ |
|
166 inline iterator begin(); |
|
167 private: |
|
168 /** Rotating buffer. */ |
|
169 wchar_t buf_[SIZE]; |
|
170 /** How many characters have been read from reader */ |
|
171 int read_; |
|
172 /** Points to the next character to be overwritten in buffer */ |
|
173 int cut_; |
|
174 /** Index of oldest character inside the original source */ |
|
175 int offset_; |
|
176 /** Reader reading original source */ |
|
177 lucene::util::Reader& reader_; |
|
178 }; |
|
179 |
|
180 } |
|
181 |
|
182 } |
|
183 |
|
184 } |
|
185 |
|
186 #endif /* TINYITERATOR_H_ */ |