10 #include "clucene/document/document.h" |
10 #include "clucene/document/document.h" |
11 #include "clucene/index/indexreader.h" |
11 #include "clucene/index/indexreader.h" |
12 #include "filter.h" |
12 #include "filter.h" |
13 #include "clucene/search/searchheader.h" |
13 #include "clucene/search/searchheader.h" |
14 //#ifdef USE_HIGHLIGHTER |
14 //#ifdef USE_HIGHLIGHTER |
15 |
|
16 #include "CLucene/highlighter/QueryTermExtractor.h" |
15 #include "CLucene/highlighter/QueryTermExtractor.h" |
17 #include "CLucene/highlighter/QueryScorer.h" |
16 #include "CLucene/highlighter/QueryScorer.h" |
18 #include "CLucene/highlighter/Highlighter.h" |
17 #include "CLucene/highlighter/Highlighter.h" |
19 #include "CLucene/highlighter/SimpleHTMLFormatter.h" |
18 #include "CLucene/highlighter/SimpleHTMLFormatter.h" |
20 #include "CLucene/analysis/standard/StandardAnalyzer.h" |
19 #include "CLucene/analysis/standard/StandardAnalyzer.h" |
21 #include "clucene/search/prefixquery.h" |
20 #include "clucene/search/prefixquery.h" |
22 |
21 |
23 // internal libs |
22 #include "prefixfilter.h" |
24 #include "cpixparsetools.h" |
23 #include "koreananalyzer.h" |
25 |
24 |
26 //#endif |
25 //#endif |
27 |
26 |
28 CL_NS_USE(document) |
27 CL_NS_USE(document) |
29 CL_NS_USE(util) |
28 CL_NS_USE(util) |
79 |
81 |
80 } |
82 } |
81 int32_t Hits::length() const { |
83 int32_t Hits::length() const { |
82 return _length; |
84 return _length; |
83 } |
85 } |
84 |
86 |
|
87 void Hits::getHighlightedText(CL_NS(document)::Document* document) |
|
88 { |
|
89 /* TODO :: Important consideration for getting locale |
|
90 * Highlighting is based on the locale, the current implementation is |
|
91 * only for symbian devices, this dependency should be complete before |
|
92 * porting to any other OS. so all code is under symbian macro. |
|
93 * |
|
94 */ |
|
95 #if defined (__SYMBIAN32__) |
|
96 TCHAR* result = NULL; |
|
97 CL_NS2(search,highlight)::QueryScorer hl_scorer(query); |
|
98 CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); |
|
99 highlighter.setTextFragmenter(&hl_frag); |
|
100 |
|
101 const TCHAR* fieldtxt = document->get(LCPIX_HL_EXCERPT_FIELD); |
|
102 |
|
103 if(fieldtxt) |
|
104 { |
|
105 StringReader strreader(fieldtxt); |
|
106 |
|
107 switch(lang) |
|
108 { |
|
109 case ELangEnglish: |
|
110 case ELangCanadianEnglish: |
|
111 case ELangInternationalEnglish: |
|
112 case ELangSouthAfricanEnglish: |
|
113 { |
|
114 CL_NS(analysis)::TokenStream* tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardTokenizer(&strreader); |
|
115 tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardFilter(tokenstream,true); |
|
116 tokenstream = _CLNEW CL_NS(analysis)::LowerCaseFilter(tokenstream,true); |
|
117 result = highlighter.getBestFragments(tokenstream, fieldtxt, 2, L"..."); |
|
118 break; |
|
119 } |
|
120 case ELangFrench: |
|
121 case ELangSwissFrench: |
|
122 case ELangBelgianFrench: |
|
123 case ELangInternationalFrench: |
|
124 case ELangCanadianFrench: |
|
125 { |
|
126 ::analysis::FrenchAnalyzer hl_analyzer; |
|
127 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); |
|
128 result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); |
|
129 break; |
|
130 } |
|
131 case ELangHebrew: |
|
132 { |
|
133 ::analysis::HebrewAnalyzer hl_analyzer; |
|
134 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); |
|
135 result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); |
|
136 break; |
|
137 } |
|
138 case ELangTaiwanChinese: |
|
139 case ELangHongKongChinese: |
|
140 case ELangPrcChinese: |
|
141 case ELangJapanese: |
|
142 case ELangKorean: |
|
143 { |
|
144 ::analysis::CjkNGramTokenizer hl_analyzer(&strreader,1); |
|
145 lucene::analysis::TokenStream * ts1 = &hl_analyzer; |
|
146 result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); |
|
147 break; |
|
148 } |
|
149 case ELangNone: |
|
150 default: |
|
151 { |
|
152 CL_NS(analysis)::TokenStream* tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardTokenizer(&strreader); |
|
153 tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardFilter(tokenstream,true); |
|
154 tokenstream = _CLNEW CL_NS(analysis)::LowerCaseFilter(tokenstream,true); |
|
155 result = highlighter.getBestFragments(tokenstream, fieldtxt, 2, L"..."); |
|
156 } |
|
157 } |
|
158 |
|
159 if (result != NULL && *((int*)result) != 0x00) |
|
160 { |
|
161 document->removeField( LCPIX_HL_EXCERPT_FIELD ); |
|
162 document->add(*_CLNEW Field(LCPIX_HL_EXCERPT_FIELD, |
|
163 result, lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); |
|
164 result = NULL; |
|
165 } |
|
166 } |
|
167 |
|
168 const TCHAR* fieldtxt2 = document->get(LCPIX_EXCERPT_FIELD); |
|
169 |
|
170 if(fieldtxt2 ) |
|
171 { |
|
172 StringReader strreader2(fieldtxt2); |
|
173 switch(lang) |
|
174 { |
|
175 case ELangEnglish: |
|
176 case ELangCanadianEnglish: |
|
177 case ELangInternationalEnglish: |
|
178 case ELangSouthAfricanEnglish: |
|
179 { |
|
180 CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; |
|
181 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); |
|
182 result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); |
|
183 break; |
|
184 } |
|
185 case ELangFrench: |
|
186 case ELangSwissFrench: |
|
187 case ELangBelgianFrench: |
|
188 case ELangInternationalFrench: |
|
189 case ELangCanadianFrench: |
|
190 { |
|
191 ::analysis::FrenchAnalyzer hl_analyzer; |
|
192 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); |
|
193 result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); |
|
194 break; |
|
195 } |
|
196 case ELangHebrew: |
|
197 { |
|
198 ::analysis::HebrewAnalyzer hl_analyzer; |
|
199 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); |
|
200 result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); |
|
201 break; |
|
202 } |
|
203 case ELangTaiwanChinese: |
|
204 case ELangHongKongChinese: |
|
205 case ELangPrcChinese: |
|
206 case ELangJapanese: |
|
207 case ELangKorean: |
|
208 { |
|
209 ::analysis::CjkNGramTokenizer hl_analyzer(&strreader2,1); |
|
210 lucene::analysis::TokenStream * ts1 = &hl_analyzer; |
|
211 result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); |
|
212 break; |
|
213 } |
|
214 case ELangNone: |
|
215 default: |
|
216 { |
|
217 CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; |
|
218 lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); |
|
219 result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); |
|
220 } |
|
221 } |
|
222 if (result != NULL && *((int*)result) != 0x00) |
|
223 { |
|
224 document->removeField( LCPIX_EXCERPT_FIELD ); |
|
225 document->add(*_CLNEW Field(LCPIX_EXCERPT_FIELD, |
|
226 result, lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); |
|
227 } |
|
228 } |
|
229 #endif |
|
230 |
|
231 } |
|
232 |
85 Document& Hits::doc(const int32_t n){ |
233 Document& Hits::doc(const int32_t n){ |
86 HitDoc* hitDoc = getHitDoc(n); |
234 HitDoc* hitDoc = getHitDoc(n); |
87 |
235 |
88 // Update LRU cache of documents |
236 // Update LRU cache of documents |
89 remove(hitDoc); // remove from list, if there |
237 remove(hitDoc); // remove from list, if there |
98 |
246 |
99 if (hitDoc->doc == NULL){ |
247 if (hitDoc->doc == NULL){ |
100 hitDoc->doc = _CLNEW Document; |
248 hitDoc->doc = _CLNEW Document; |
101 searcher->doc(hitDoc->id, hitDoc->doc); // cache miss: read document |
249 searcher->doc(hitDoc->id, hitDoc->doc); // cache miss: read document |
102 //#ifdef USE_HIGHLIGHTER |
250 //#ifdef USE_HIGHLIGHTER |
103 |
|
104 CL_NS(document)::Document* document = hitDoc->doc; |
251 CL_NS(document)::Document* document = hitDoc->doc; |
105 |
252 getHighlightedText(document); |
106 TCHAR* result = NULL; |
253 //#endif |
107 Query* rwquery[2]; |
254 |
108 searcher->getrewritten(hitDoc->id, query, rwquery); |
|
109 |
|
110 const TCHAR* firstlnHLtxt = document->get(LCPIX_HL_EXCERPT_FIELD); |
|
111 |
|
112 if(firstlnHLtxt && rwquery[1]) |
|
113 { |
|
114 CL_NS2(search,highlight)::QueryScorer hl_scorer(rwquery[1]); |
|
115 |
|
116 CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); |
|
117 |
|
118 highlighter.setTextFragmenter(&hl_frag); |
|
119 |
|
120 wstring hlText; |
|
121 |
|
122 StringReader strreader(firstlnHLtxt); |
|
123 |
|
124 lucene::analysis::TokenStream * tokenStream = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); |
|
125 |
|
126 result = highlighter.getBestFragments(tokenStream, firstlnHLtxt, 2,L"..."); |
|
127 |
|
128 if (result != NULL && *((int*)result) != 0x00) |
|
129 { |
|
130 hlText.append(result); |
|
131 |
|
132 document->removeField( LCPIX_HL_EXCERPT_FIELD ); |
|
133 |
|
134 document->add(*_CLNEW Field(LCPIX_HL_EXCERPT_FIELD, |
|
135 hlText.c_str(), lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); |
|
136 } |
|
137 |
|
138 } |
|
139 |
|
140 const TCHAR* text = document->get(LCPIX_EXCERPT_FIELD); |
|
141 |
|
142 if(text && rwquery[1]) |
|
143 { |
|
144 CL_NS2(search,highlight)::QueryScorer hl_scorer(rwquery[1]); |
|
145 |
|
146 CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); |
|
147 |
|
148 highlighter.setTextFragmenter(&hl_frag); |
|
149 |
|
150 wstring hlText; |
|
151 |
|
152 StringReader strreader(text); |
|
153 |
|
154 lucene::analysis::TokenStream * tokenStream = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader); |
|
155 |
|
156 result = highlighter.getBestFragments(tokenStream, text, 2,L"..."); |
|
157 |
|
158 if (result != NULL && *((int*)result) != 0x00) |
|
159 { |
|
160 hlText.append(result); |
|
161 |
|
162 document->removeField( LCPIX_EXCERPT_FIELD ); |
|
163 |
|
164 document->add(*_CLNEW Field(LCPIX_EXCERPT_FIELD, |
|
165 hlText.c_str(), lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); |
|
166 } |
|
167 } |
|
168 //#endif |
|
169 } |
255 } |
170 |
256 |
171 return *hitDoc->doc; |
257 return *hitDoc->doc; |
172 } |
258 } |
173 |
259 |