|
1 #include "clucene/stdheader.h" |
|
2 #include "queryscorer.h" |
|
3 |
|
4 CL_NS_DEF2(search,highlight) |
|
5 CL_NS_USE(index) |
|
6 CL_NS_USE(analysis) |
|
7 |
|
8 QueryScorer::QueryScorer(const Query * query): |
|
9 _uniqueTermsInFragment(true), |
|
10 _termsToFind(false,true) |
|
11 |
|
12 { |
|
13 WeightedTerm** _weighted_terms = QueryTermExtractor::getTerms(query); |
|
14 initialize(_weighted_terms); |
|
15 _CLDELETE_ARRAY(_weighted_terms); |
|
16 } |
|
17 QueryScorer::~QueryScorer() |
|
18 { |
|
19 } |
|
20 |
|
21 /* QueryScorer(Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName) |
|
22 { |
|
23 WeightedTerm** _weighted_terms = QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName); |
|
24 initialize(_weighted_terms); |
|
25 }*/ |
|
26 |
|
27 |
|
28 QueryScorer::QueryScorer(WeightedTerm** weightedTerms) |
|
29 { |
|
30 initialize(weightedTerms); |
|
31 } |
|
32 |
|
33 void QueryScorer::initialize(WeightedTerm** weightedTerms) |
|
34 { |
|
35 _currentTextFragment = NULL; |
|
36 _totalScore = 0; |
|
37 _maxTermWeight = 0; |
|
38 |
|
39 // Copy external weighted terms |
|
40 int i=0; |
|
41 while ( weightedTerms[i] != NULL ){ |
|
42 const WeightedTerm* existingTerm=_termsToFind.get(weightedTerms[i]->getTerm()); |
|
43 if( (existingTerm==NULL) ||(existingTerm->getWeight()<weightedTerms[i]->getWeight()) ) |
|
44 { |
|
45 //if a term is defined more than once, always use the highest scoring weight |
|
46 WeightedTerm* term = weightedTerms[i]; |
|
47 _termsToFind.put(term->getTerm(), term); |
|
48 |
|
49 _maxTermWeight=max(_maxTermWeight,weightedTerms[i]->getWeight()); |
|
50 }else |
|
51 _CLDELETE(weightedTerms[i]); |
|
52 |
|
53 i++; |
|
54 } |
|
55 } |
|
56 |
|
57 void QueryScorer::startFragment(TextFragment * newFragment) |
|
58 { |
|
59 _uniqueTermsInFragment.clear(); |
|
60 _currentTextFragment=newFragment; |
|
61 _totalScore=0; |
|
62 |
|
63 } |
|
64 |
|
65 float_t QueryScorer::getTokenScore(Token * token) |
|
66 { |
|
67 const TCHAR* termText=token->termText(); |
|
68 |
|
69 const WeightedTerm* queryTerm = _termsToFind.get(termText); |
|
70 if(queryTerm==NULL) |
|
71 { |
|
72 //not a query term - return |
|
73 return 0; |
|
74 } |
|
75 //found a query term - is it unique in this doc? |
|
76 if(_uniqueTermsInFragment.find(termText)==_uniqueTermsInFragment.end()) |
|
77 { |
|
78 _totalScore+=queryTerm->getWeight(); |
|
79 TCHAR* owned_term = stringDuplicate(termText); |
|
80 _uniqueTermsInFragment.insert(owned_term); |
|
81 } |
|
82 return queryTerm->getWeight(); |
|
83 } |
|
84 |
|
85 /** |
|
86 * |
|
87 * @return The highest weighted term (useful for passing to GradientFormatter to set |
|
88 * top end of coloring scale. |
|
89 */ |
|
90 float_t QueryScorer::getMaxTermWeight() |
|
91 { |
|
92 return _maxTermWeight; |
|
93 } |
|
94 |
|
95 |
|
96 float_t QueryScorer::getFragmentScore(){ |
|
97 return _totalScore; |
|
98 } |
|
99 |
|
100 CL_NS_END2 |