|
1 /** |
|
2 * Copyright 2002-2004 The Apache Software Foundation |
|
3 * |
|
4 * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 * you may not use this file except in compliance with the License. |
|
6 * You may obtain a copy of the License at |
|
7 * |
|
8 * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 * |
|
10 * Unless required by applicable law or agreed to in writing, software |
|
11 * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 * See the License for the specific language governing permissions and |
|
14 * limitations under the License. |
|
15 */ |
|
16 |
|
17 #ifndef _lucene_search_highlighter_queryscorer_ |
|
18 #define _lucene_search_highlighter_queryscorer_ |
|
19 |
|
20 #if defined(_LUCENE_PRAGMA_ONCE) |
|
21 # pragma once |
|
22 #endif |
|
23 |
|
24 #include <math.h> // float_t |
|
25 #include "CLucene/analysis/AnalysisHeader.h" |
|
26 #include "CLucene/search/SearchHeader.h" |
|
27 #include "CLucene/index/IndexReader.h" |
|
28 #include "CLucene/highlighter/WeightedTerm.h" |
|
29 #include "CLucene/highlighter/HighlightScorer.h" |
|
30 #include "CLucene/highlighter/QueryTermExtractor.h" |
|
31 #include "CLucene/highlighter/TextFragment.h" |
|
32 |
|
33 CL_NS_DEF2(search,highlight) |
|
34 |
|
35 /** |
|
36 * {@link Scorer} implementation which scores text fragments by the number of unique query terms found. |
|
37 * This class uses the {@link QueryTermExtractor} class to process determine the query terms and |
|
38 * their boosts to be used. |
|
39 */ |
|
40 //TODO: provide option to boost score of fragments near beginning of document |
|
41 // based on fragment.getFragNum() |
|
42 class QueryScorer : public HighlightScorer |
|
43 { |
|
44 private: |
|
45 TextFragment * _currentTextFragment; |
|
46 CL_NS(util)::CLHashSet<const TCHAR*, |
|
47 CL_NS(util)::Compare::TChar, |
|
48 CL_NS(util)::Deletor::tcArray> _uniqueTermsInFragment; |
|
49 double _totalScore; |
|
50 double _maxTermWeight; |
|
51 CL_NS(util)::LHashMap<const TCHAR*, const WeightedTerm *, |
|
52 CL_NS(util)::Compare::TChar, |
|
53 CL_NS(util)::Equals::TChar, |
|
54 CL_NS(util)::Deletor::tcArray, |
|
55 CL_NS(util)::Deletor::Object<const WeightedTerm> > _termsToFind; |
|
56 |
|
57 public: |
|
58 /** |
|
59 * |
|
60 * @param query a Lucene query (ideally rewritten using query.rewrite |
|
61 * before being passed to this class and the searcher) |
|
62 */ |
|
63 QueryScorer(const Query * query); |
|
64 |
|
65 /** |
|
66 * |
|
67 * @param query a Lucene query (ideally rewritten using query.rewrite |
|
68 * before being passed to this class and the searcher) |
|
69 * @param reader used to compute IDF which can be used to a) score selected fragments better |
|
70 * b) use graded highlights eg set font color intensity |
|
71 * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based |
|
72 */ |
|
73 QueryScorer(const Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName); |
|
74 |
|
75 QueryScorer(WeightedTerm** weightedTerms); |
|
76 |
|
77 ~QueryScorer(); |
|
78 |
|
79 /* (non-Javadoc) |
|
80 * @see org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache.lucene.search.highlight.TextFragment) |
|
81 */ |
|
82 void startFragment(TextFragment* newFragment); |
|
83 |
|
84 /* (non-Javadoc) |
|
85 * @see org.apache.lucene.search.highlight.FragmentScorer#scoreToken(org.apache.lucene.analysis.Token) |
|
86 */ |
|
87 float_t getTokenScore(CL_NS(analysis)::Token * token); |
|
88 |
|
89 /* (non-Javadoc) |
|
90 * @see org.apache.lucene.search.highlight.FragmentScorer#endFragment(org.apache.lucene.search.highlight.TextFragment) |
|
91 */ |
|
92 float_t getFragmentScore(); |
|
93 |
|
94 /* (non-Javadoc) |
|
95 * @see org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed() |
|
96 */ |
|
97 void allFragmentsProcessed(); |
|
98 |
|
99 /** |
|
100 * |
|
101 * @return The highest weighted term (useful for passing to GradientFormatter to set |
|
102 * top end of coloring scale. |
|
103 */ |
|
104 float_t getMaxTermWeight(); |
|
105 |
|
106 private: |
|
107 void initialize(WeightedTerm** weightedTerms); |
|
108 |
|
109 }; |
|
110 |
|
111 CL_NS_END2 |
|
112 |
|
113 #endif |
|
114 |