|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include "CLucene.h" |
|
20 |
|
21 #include "cpixmaindefs.h" |
|
22 |
|
23 // internal libs |
|
24 #include "cpixparsetools.h" |
|
25 |
|
26 // internal |
|
27 #include "analyzer.h" |
|
28 |
|
29 #include "prefixqueryparser.h" |
|
30 |
|
31 #include "cpixanalyzer.h" |
|
32 #include "cluceneext.h" |
|
33 |
|
34 #include "tinyunicode.h" |
|
35 |
|
36 #include "cpixexc.h" |
|
37 |
|
38 namespace Cpix { |
|
39 |
|
40 using namespace lucene::analysis; |
|
41 using namespace lucene::search; |
|
42 using namespace lucene::document; |
|
43 using namespace lucene::util; |
|
44 using lucene::index::Term; |
|
45 using namespace std; |
|
46 |
|
47 namespace { |
|
48 |
|
49 /** |
|
50 * Small optimization to avoid creating extra boolean queries |
|
51 */ |
|
52 class QueryConstructor { |
|
53 |
|
54 public: |
|
55 QueryConstructor() : q_(), bq_(0) {} |
|
56 |
|
57 auto_ptr<Query> operator()() { |
|
58 return q_; |
|
59 } |
|
60 void add(auto_ptr<Query> q) { |
|
61 if ( q.get() ) { |
|
62 if ( bq_ ) { |
|
63 bq_->add( q.release(), true, true, false ); |
|
64 } else { |
|
65 if ( q_.get() ) { |
|
66 auto_ptr<BooleanQuery> bq( new BooleanQuery() ); |
|
67 bq_ = bq.get(); |
|
68 bq_->add( q_.release(), true, true, false ); |
|
69 bq_->add( q.release(), true, true, false ); |
|
70 q_.reset( bq.release() ); |
|
71 } else { |
|
72 q_ = q; |
|
73 } |
|
74 } |
|
75 } |
|
76 } |
|
77 inline void add(Query* q) { |
|
78 add( auto_ptr<Query>( q ) ); |
|
79 } |
|
80 |
|
81 private: |
|
82 |
|
83 auto_ptr<Query> q_; |
|
84 BooleanQuery* bq_; |
|
85 |
|
86 }; |
|
87 |
|
88 /** |
|
89 * TokenStream interface with one modification: |
|
90 * * Ability to check if returned token was last one in the stream |
|
91 */ |
|
92 class HasNextTokenStream { |
|
93 |
|
94 public: |
|
95 |
|
96 HasNextTokenStream(TokenStream* tokens) |
|
97 : i_(true), |
|
98 next_(), |
|
99 buf_(), |
|
100 tokens_( tokens ){ |
|
101 next_ = tokens_->next(&buf_[0]); |
|
102 } |
|
103 |
|
104 inline Token& next() { |
|
105 next_ = tokens_->next(&buf_[i_]); |
|
106 i_ = !i_; |
|
107 return buf_[i_]; |
|
108 } |
|
109 |
|
110 inline bool hasNext() { |
|
111 return next_; |
|
112 } |
|
113 |
|
114 private: |
|
115 bool i_, next_; |
|
116 Token buf_[2]; |
|
117 auto_ptr<TokenStream> tokens_; |
|
118 }; |
|
119 |
|
120 |
|
121 } |
|
122 |
|
123 PrefixQueryParser::PrefixQueryParser(const wchar_t* field) |
|
124 : field_(field) {} |
|
125 |
|
126 PrefixQueryParser::~PrefixQueryParser() {} |
|
127 |
|
128 auto_ptr<Query> PrefixQueryParser::parse(const wchar_t* query) { |
|
129 Cpt::Lex::WhitespaceSplitter split(query); |
|
130 QueryConstructor ret; |
|
131 while ( split ) { |
|
132 ret.add( toQuery( split++ ) ); |
|
133 } |
|
134 return ret(); |
|
135 } |
|
136 |
|
137 const wchar_t* PrefixQueryParser::getField() const { |
|
138 return field_.c_str(); |
|
139 } |
|
140 |
|
141 void PrefixQueryParser::setDefaultOperator(cpix_QP_Operator op) { |
|
142 THROW_CPIXEXC("Prefix query parser does not support setting the default operator."); |
|
143 } |
|
144 |
|
145 bool PrefixQueryParser::usePrefixFor(lucene::analysis::Token& token) { |
|
146 return !analysis::unicode::IsCjk(token.termText()[0]); |
|
147 } |
|
148 |
|
149 auto_ptr<Query> |
|
150 PrefixQueryParser::toQuery(Cpt::Lex::Token word) { |
|
151 Analyzer& preAnalyzer( Analysis::getPrefixAnalyzer() ); |
|
152 StringReader reader( word.begin(), word.length() ); |
|
153 HasNextTokenStream tokens( |
|
154 preAnalyzer.tokenStream( field_.c_str(), |
|
155 &reader ) ); |
|
156 |
|
157 QueryConstructor ret; |
|
158 |
|
159 while ( tokens.hasNext() ) { |
|
160 lucene::analysis::Token& token = tokens.next(); |
|
161 |
|
162 if ( usePrefixFor(token) ) { |
|
163 if (!tokens.hasNext()) { |
|
164 // Turn only last token of this word into prefix query |
|
165 ret.add( |
|
166 _CLNEW PrefixQuery( freeref( _CLNEW Term( field_.c_str(), |
|
167 token.termText() ) ) ) ); |
|
168 } else { |
|
169 // Others tokens can be normal term queries |
|
170 ret.add( |
|
171 _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(), |
|
172 token.termText() ) ) ) ); |
|
173 } |
|
174 } else { |
|
175 Analyzer& termAnalyzer = Analysis::getQueryAnalyzer(); |
|
176 StringReader reader( token.termText(), token.termTextLength() ); |
|
177 HasNextTokenStream tokens( |
|
178 termAnalyzer.tokenStream( field_.c_str(), |
|
179 &reader ) ); |
|
180 |
|
181 Token& first = tokens.next(); |
|
182 if (tokens.hasNext()) { // more than one |
|
183 auto_ptr<PhraseQuery> phrase( _CLNEW PhraseQuery() ); |
|
184 phrase->add( freeref( _CLNEW Term( field_.c_str(), |
|
185 first.termText() ) ) ); |
|
186 while (tokens.hasNext()) { |
|
187 phrase->add( freeref( _CLNEW Term( field_.c_str(), |
|
188 tokens.next().termText() ) ) ); |
|
189 } |
|
190 ret.add( std::auto_ptr<Query>( phrase.release() ) ); |
|
191 } else { |
|
192 ret.add( |
|
193 _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(), |
|
194 first.termText() ) ) ) ); |
|
195 } |
|
196 } |
|
197 } |
|
198 return ret(); |
|
199 } |
|
200 |
|
201 } |