8
|
1 |
/*
|
|
2 |
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
|
3 |
* All rights reserved.
|
|
4 |
* This component and the accompanying materials are made available
|
|
5 |
* under the terms of "Eclipse Public License v1.0"
|
|
6 |
* which accompanies this distribution, and is available
|
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
|
8 |
*
|
|
9 |
* Initial Contributors:
|
|
10 |
* Nokia Corporation - initial contribution.
|
|
11 |
*
|
|
12 |
* Contributors:
|
|
13 |
*
|
|
14 |
* Description:
|
|
15 |
*
|
|
16 |
*/
|
|
17 |
|
|
18 |
|
|
19 |
#include "CLucene.h"
|
|
20 |
|
|
21 |
#include "cpixmaindefs.h"
|
|
22 |
|
|
23 |
// internal libs
|
|
24 |
#include "cpixparsetools.h"
|
|
25 |
|
|
26 |
// internal
|
|
27 |
#include "analyzer.h"
|
|
28 |
|
|
29 |
#include "prefixqueryparser.h"
|
|
30 |
|
|
31 |
#include "cpixanalyzer.h"
|
|
32 |
#include "cluceneext.h"
|
|
33 |
|
|
34 |
#include "tinyunicode.h"
|
|
35 |
|
|
36 |
#include "cpixexc.h"
|
|
37 |
|
|
38 |
namespace Cpix {
|
|
39 |
|
|
40 |
using namespace lucene::analysis;
|
|
41 |
using namespace lucene::search;
|
|
42 |
using namespace lucene::document;
|
|
43 |
using namespace lucene::util;
|
|
44 |
using lucene::index::Term;
|
|
45 |
using namespace std;
|
|
46 |
|
|
47 |
namespace {
|
|
48 |
|
|
49 |
/**
|
|
50 |
* Small optimization to avoid creating extra boolean queries
|
|
51 |
*/
|
|
52 |
class QueryConstructor {
|
|
53 |
|
|
54 |
public:
|
|
55 |
QueryConstructor() : q_(), bq_(0) {}
|
|
56 |
|
|
57 |
auto_ptr<Query> operator()() {
|
|
58 |
return q_;
|
|
59 |
}
|
|
60 |
void add(auto_ptr<Query> q) {
|
|
61 |
if ( q.get() ) {
|
|
62 |
if ( bq_ ) {
|
16
|
63 |
bq_->add( q.release(), true, false, false );
|
8
|
64 |
} else {
|
|
65 |
if ( q_.get() ) {
|
|
66 |
auto_ptr<BooleanQuery> bq( new BooleanQuery() );
|
|
67 |
bq_ = bq.get();
|
14
|
68 |
bq_->add( q_.release(), true, false, false );
|
|
69 |
bq_->add( q.release(), true, false, false );
|
8
|
70 |
q_.reset( bq.release() );
|
|
71 |
} else {
|
|
72 |
q_ = q;
|
|
73 |
}
|
|
74 |
}
|
|
75 |
}
|
|
76 |
}
|
|
77 |
inline void add(Query* q) {
|
|
78 |
add( auto_ptr<Query>( q ) );
|
|
79 |
}
|
|
80 |
|
|
81 |
private:
|
|
82 |
|
|
83 |
auto_ptr<Query> q_;
|
|
84 |
BooleanQuery* bq_;
|
|
85 |
|
|
86 |
};
|
|
87 |
|
|
88 |
/**
|
|
89 |
* TokenStream interface with one modification:
|
|
90 |
* * Ability to check if returned token was last one in the stream
|
|
91 |
*/
|
|
92 |
class HasNextTokenStream {
|
|
93 |
|
|
94 |
public:
|
|
95 |
|
|
96 |
HasNextTokenStream(TokenStream* tokens)
|
|
97 |
: i_(true),
|
|
98 |
next_(),
|
|
99 |
buf_(),
|
|
100 |
tokens_( tokens ){
|
|
101 |
next_ = tokens_->next(&buf_[0]);
|
|
102 |
}
|
|
103 |
|
|
104 |
inline Token& next() {
|
|
105 |
next_ = tokens_->next(&buf_[i_]);
|
|
106 |
i_ = !i_;
|
|
107 |
return buf_[i_];
|
|
108 |
}
|
|
109 |
|
|
110 |
inline bool hasNext() {
|
|
111 |
return next_;
|
|
112 |
}
|
|
113 |
|
|
114 |
private:
|
|
115 |
bool i_, next_;
|
|
116 |
Token buf_[2];
|
|
117 |
auto_ptr<TokenStream> tokens_;
|
|
118 |
};
|
|
119 |
|
|
120 |
|
|
121 |
}
|
|
122 |
|
|
123 |
PrefixQueryParser::PrefixQueryParser(const wchar_t* field)
|
|
124 |
: field_(field) {}
|
|
125 |
|
|
126 |
PrefixQueryParser::~PrefixQueryParser() {}
|
|
127 |
|
|
128 |
auto_ptr<Query> PrefixQueryParser::parse(const wchar_t* query) {
|
|
129 |
Cpt::Lex::WhitespaceSplitter split(query);
|
|
130 |
QueryConstructor ret;
|
|
131 |
while ( split ) {
|
|
132 |
ret.add( toQuery( split++ ) );
|
|
133 |
}
|
|
134 |
return ret();
|
|
135 |
}
|
|
136 |
|
|
137 |
const wchar_t* PrefixQueryParser::getField() const {
|
|
138 |
return field_.c_str();
|
|
139 |
}
|
|
140 |
|
|
141 |
void PrefixQueryParser::setDefaultOperator(cpix_QP_Operator op) {
|
|
142 |
THROW_CPIXEXC("Prefix query parser does not support setting the default operator.");
|
|
143 |
}
|
|
144 |
|
|
145 |
bool PrefixQueryParser::usePrefixFor(lucene::analysis::Token& token) {
|
|
146 |
return !analysis::unicode::IsCjk(token.termText()[0]);
|
|
147 |
}
|
|
148 |
|
|
149 |
auto_ptr<Query>
|
|
150 |
PrefixQueryParser::toQuery(Cpt::Lex::Token word) {
|
|
151 |
Analyzer& preAnalyzer( Analysis::getPrefixAnalyzer() );
|
|
152 |
StringReader reader( word.begin(), word.length() );
|
|
153 |
HasNextTokenStream tokens(
|
|
154 |
preAnalyzer.tokenStream( field_.c_str(),
|
|
155 |
&reader ) );
|
|
156 |
|
|
157 |
QueryConstructor ret;
|
|
158 |
|
|
159 |
while ( tokens.hasNext() ) {
|
|
160 |
lucene::analysis::Token& token = tokens.next();
|
|
161 |
|
|
162 |
if ( usePrefixFor(token) ) {
|
|
163 |
if (!tokens.hasNext()) {
|
|
164 |
// Turn only last token of this word into prefix query
|
|
165 |
ret.add(
|
|
166 |
_CLNEW PrefixQuery( freeref( _CLNEW Term( field_.c_str(),
|
|
167 |
token.termText() ) ) ) );
|
|
168 |
} else {
|
|
169 |
// Others tokens can be normal term queries
|
|
170 |
ret.add(
|
|
171 |
_CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(),
|
|
172 |
token.termText() ) ) ) );
|
|
173 |
}
|
|
174 |
} else {
|
|
175 |
Analyzer& termAnalyzer = Analysis::getQueryAnalyzer();
|
|
176 |
StringReader reader( token.termText(), token.termTextLength() );
|
|
177 |
HasNextTokenStream tokens(
|
|
178 |
termAnalyzer.tokenStream( field_.c_str(),
|
|
179 |
&reader ) );
|
|
180 |
|
|
181 |
Token& first = tokens.next();
|
|
182 |
if (tokens.hasNext()) { // more than one
|
|
183 |
auto_ptr<PhraseQuery> phrase( _CLNEW PhraseQuery() );
|
|
184 |
phrase->add( freeref( _CLNEW Term( field_.c_str(),
|
|
185 |
first.termText() ) ) );
|
|
186 |
while (tokens.hasNext()) {
|
|
187 |
phrase->add( freeref( _CLNEW Term( field_.c_str(),
|
|
188 |
tokens.next().termText() ) ) );
|
|
189 |
}
|
|
190 |
ret.add( std::auto_ptr<Query>( phrase.release() ) );
|
|
191 |
} else {
|
|
192 |
ret.add(
|
|
193 |
_CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(),
|
|
194 |
first.termText() ) ) ) );
|
|
195 |
}
|
|
196 |
}
|
|
197 |
}
|
|
198 |
return ret();
|
|
199 |
}
|
|
200 |
|
|
201 |
}
|