24
|
1 |
/*
|
|
2 |
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
|
3 |
* All rights reserved.
|
|
4 |
* This component and the accompanying materials are made available
|
|
5 |
* under the terms of "Eclipse Public License v1.0"
|
|
6 |
* which accompanies this distribution, and is available
|
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
|
8 |
*
|
|
9 |
* Initial Contributors:
|
|
10 |
* Nokia Corporation - initial contribution.
|
|
11 |
*
|
|
12 |
* Contributors:
|
|
13 |
*
|
|
14 |
* Description:
|
|
15 |
*
|
|
16 |
*/
|
|
17 |
|
|
18 |
#ifndef TINYITERATOR_H_
|
|
19 |
#define TINYITERATOR_H_
|
|
20 |
|
|
21 |
#include <exception>
|
|
22 |
|
|
23 |
namespace lucene {
|
|
24 |
namespace util {
|
|
25 |
class Reader;
|
|
26 |
}
|
|
27 |
}
|
|
28 |
namespace analysis {
|
|
29 |
|
|
30 |
/**
|
|
31 |
* This package provides basic
|
|
32 |
*/
|
|
33 |
namespace tiny {
|
|
34 |
|
|
35 |
/*
|
|
36 |
*
|
|
37 |
* Meta code for describing the iterator concept used here.
|
|
38 |
* Used to iterate character streams.
|
|
39 |
* Follows closely the STL forward iterator.
|
|
40 |
* Note: Comparisons of form x < y may not work properly.
|
|
41 |
* Note: x - y will not provide distance in characters,
|
|
42 |
* but instead offset distance in the original text.
|
|
43 |
*
|
|
44 |
* Only operators of form ++i is provided. --i is not provided
|
|
45 |
* because complications with utf16 or unicode decomposition/
|
|
46 |
* composition. i++ would lead to innecessary code.
|
|
47 |
*
|
|
48 |
|
|
49 |
concept Iterator {
|
|
50 |
|
|
51 |
// Accessor to the iterator character
|
|
52 |
wchar_t operator*(); // 16 bit unicode
|
|
53 |
OR
|
|
54 |
int operator*(); // 32 bit unicode
|
|
55 |
|
|
56 |
// Next location
|
|
57 |
Iterator& operator++();
|
|
58 |
|
|
59 |
// Returns offset in the original text. Note that one character
|
|
60 |
// may be transformed into a number of characters. This means
|
|
61 |
// that comparisons of form (int)i < (int)j are unreliable and
|
|
62 |
// should not be used. Also lengths i - j are unreliable. With korean
|
|
63 |
// i - j may be produce length 2, but iterating for (;i<j;++i); may
|
|
64 |
// iterate throught e.g. 6 characters. Or, with 32 bit unicode,
|
|
65 |
// length 2 may contain only one character.
|
|
66 |
operator int();
|
|
67 |
|
|
68 |
};
|
|
69 |
|
|
70 |
*/
|
|
71 |
|
|
72 |
|
|
73 |
/**
|
|
74 |
* Iterates throught some T that provides array/pointer like interface
|
|
75 |
*/
|
|
76 |
template <typename T>
|
|
77 |
struct ArrayIterator {
|
|
78 |
public:
|
|
79 |
inline ArrayIterator(T& array, int i) : array_(&array), i_(i) {}
|
|
80 |
inline ArrayIterator(T& array) : array_(&array), i_(0) {}
|
|
81 |
inline ArrayIterator() : array_(0), i_(0) {}
|
|
82 |
|
|
83 |
inline wchar_t operator*() const { return (*array_)[i_]; };
|
|
84 |
inline ArrayIterator<T>& operator++() { i_++; return *this; };
|
|
85 |
inline operator int() { return i_; }
|
|
86 |
private:
|
|
87 |
T* array_;
|
|
88 |
int i_;
|
|
89 |
|
|
90 |
};
|
|
91 |
|
|
92 |
/**
|
|
93 |
* Iterates from starting position to up to length characters.
|
|
94 |
*/
|
|
95 |
template <typename Iterator>
|
|
96 |
struct RangeIterator {
|
|
97 |
public:
|
|
98 |
RangeIterator(Iterator& begin, int length) : i_(begin), left_(length) {}
|
|
99 |
inline int operator*() { return left_ ? *i_: '\0'; }
|
|
100 |
inline RangeIterator& operator++() {
|
|
101 |
if (left_ ) {
|
|
102 |
++i_; left_--;
|
|
103 |
}
|
|
104 |
return *this;
|
|
105 |
}
|
|
106 |
inline operator int() { return i_; }
|
|
107 |
private:
|
|
108 |
Iterator i_;
|
|
109 |
int left_;
|
|
110 |
};
|
|
111 |
|
|
112 |
/**
|
|
113 |
* Turns iterator into a C++ stream. Allows out<<'c'<<'\0';
|
|
114 |
* kind of syntax to be used with iterators.
|
|
115 |
*/
|
|
116 |
template <typename Iterator>
|
|
117 |
struct IteratorOutput {
|
|
118 |
public:
|
|
119 |
IteratorOutput(Iterator i) : i_(i) {}
|
|
120 |
template <typename T>
|
|
121 |
inline IteratorOutput& operator<<(T t) {
|
|
122 |
*i_= t;
|
|
123 |
++i_;
|
|
124 |
return *this;
|
|
125 |
}
|
|
126 |
private:
|
|
127 |
Iterator i_;
|
|
128 |
};
|
|
129 |
|
|
130 |
/**
|
|
131 |
* CLucene IO support
|
|
132 |
*/
|
|
133 |
namespace cl {
|
|
134 |
|
|
135 |
/**
|
|
136 |
* Informs that caller has attempted to read a location
|
|
137 |
* from the reader source, that is no more stored in the
|
|
138 |
* buffer.
|
|
139 |
*/
|
|
140 |
class TooOldIndexException : public std::exception {
|
|
141 |
public:
|
|
142 |
const char* what() const;
|
|
143 |
};
|
|
144 |
|
|
145 |
/**
|
|
146 |
* Provides buffer & array like interface to be used with
|
|
147 |
* CLucene readers. If reader r provides access to file X,
|
|
148 |
* and we have buf(r), we can sort of 'random access' file
|
|
149 |
* X with buf[0], buf[X], buf[Z+3] syntaxes. Still, the buffer
|
|
150 |
* is of limited size. There is always the most recent location
|
|
151 |
* L that is read. Trying to access buf[L-SIZE-1] will raise
|
|
152 |
* exception, where SIZE is the buffer size.
|
|
153 |
*/
|
|
154 |
template<int SIZE>
|
|
155 |
class ReaderBuffer {
|
|
156 |
public:
|
|
157 |
/** Iterator for iterating the underlying source */
|
|
158 |
typedef ArrayIterator<ReaderBuffer> iterator;
|
|
159 |
/* Constructs buffer for a reader reading some source. */
|
|
160 |
ReaderBuffer(lucene::util::Reader& reader);
|
|
161 |
/** Returns character at location i */
|
|
162 |
wchar_t operator[](int i);
|
|
163 |
/** Returns iterator pointing to location i */
|
|
164 |
inline iterator at(int i);
|
|
165 |
/** Returns iterator pointing to the beginning of character source */
|
|
166 |
inline iterator begin();
|
|
167 |
private:
|
|
168 |
/** Rotating buffer. */
|
|
169 |
wchar_t buf_[SIZE];
|
|
170 |
/** How many characters have been read from reader */
|
|
171 |
int read_;
|
|
172 |
/** Points to the next character to be overwritten in buffer */
|
|
173 |
int cut_;
|
|
174 |
/** Index of oldest character inside the original source */
|
|
175 |
int offset_;
|
|
176 |
/** Reader reading original source */
|
|
177 |
lucene::util::Reader& reader_;
|
|
178 |
};
|
|
179 |
|
|
180 |
}
|
|
181 |
|
|
182 |
}
|
|
183 |
|
|
184 |
}
|
|
185 |
|
|
186 |
#endif /* TINYITERATOR_H_ */
|