searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.h
changeset 7 a5fbfefd615f
child 21 2c484ac32ef0
equal deleted inserted replaced
3:ae3f1779f6da 7:a5fbfefd615f
       
     1 /**
       
     2  * Copyright 2002-2004 The Apache Software Foundation
       
     3  *
       
     4  * Licensed under the Apache License, Version 2.0 (the "License");
       
     5  * you may not use this file except in compliance with the License.
       
     6  * You may obtain a copy of the License at
       
     7  *
       
     8  *     http://www.apache.org/licenses/LICENSE-2.0
       
     9  *
       
    10  * Unless required by applicable law or agreed to in writing, software
       
    11  * distributed under the License is distributed on an "AS IS" BASIS,
       
    12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    13  * See the License for the specific language governing permissions and
       
    14  * limitations under the License.
       
    15  */
       
    16 
       
    17 #ifndef _lucene_search_highlighter_queryscorer_
       
    18 #define _lucene_search_highlighter_queryscorer_
       
    19 
       
    20 #if defined(_LUCENE_PRAGMA_ONCE)
       
    21 # pragma once
       
    22 #endif
       
    23 
       
    24 #include <math.h> // float_t
       
    25 #include "CLucene/analysis/AnalysisHeader.h"
       
    26 #include "CLucene/search/SearchHeader.h"
       
    27 #include "CLucene/index/IndexReader.h"
       
    28 #include "CLucene/highlighter/WeightedTerm.h"
       
    29 #include "CLucene/highlighter/HighlightScorer.h"
       
    30 #include "CLucene/highlighter/QueryTermExtractor.h"
       
    31 #include "CLucene/highlighter/TextFragment.h"
       
    32 
       
    33 CL_NS_DEF2(search,highlight)
       
    34 
       
    35 /**
       
    36  * {@link Scorer} implementation which scores text fragments by the number of unique query terms found.
       
    37  * This class uses the {@link QueryTermExtractor} class to process determine the query terms and 
       
    38  * their boosts to be used. 
       
    39  */
       
    40 //TODO: provide option to boost score of fragments near beginning of document 
       
    41 // based on fragment.getFragNum()
       
    42 class QueryScorer : public HighlightScorer
       
    43 {
       
    44 private:
       
    45 	TextFragment * _currentTextFragment;
       
    46 	CL_NS(util)::CLHashSet<const TCHAR*,
       
    47 		CL_NS(util)::Compare::TChar,
       
    48 		CL_NS(util)::Deletor::tcArray> _uniqueTermsInFragment;
       
    49 	double _totalScore;
       
    50 	double _maxTermWeight;
       
    51 	CL_NS(util)::LHashMap<const TCHAR*, const WeightedTerm *,
       
    52 		CL_NS(util)::Compare::TChar,
       
    53 		CL_NS(util)::Equals::TChar,
       
    54 		CL_NS(util)::Deletor::tcArray,
       
    55 		CL_NS(util)::Deletor::Object<const WeightedTerm> > _termsToFind;
       
    56 
       
    57 public:
       
    58 	/**
       
    59 	* 
       
    60 	* @param query a Lucene query (ideally rewritten using query.rewrite 
       
    61 	* before being passed to this class and the searcher)
       
    62 	*/
       
    63 	QueryScorer(const Query * query);
       
    64 
       
    65 	/**
       
    66 	* 
       
    67 	* @param query a Lucene query (ideally rewritten using query.rewrite 
       
    68 	* before being passed to this class and the searcher)
       
    69 	* @param reader used to compute IDF which can be used to a) score selected fragments better 
       
    70 	* b) use graded highlights eg set font color intensity
       
    71 	* @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
       
    72 	*/
       
    73 	QueryScorer(const Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName);
       
    74 
       
    75 	QueryScorer(WeightedTerm** weightedTerms);
       
    76 
       
    77 	~QueryScorer();
       
    78 
       
    79 	/* (non-Javadoc)
       
    80 	 * @see org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
       
    81 	 */
       
    82 	void startFragment(TextFragment* newFragment);
       
    83 	
       
    84 	/* (non-Javadoc)
       
    85 	 * @see org.apache.lucene.search.highlight.FragmentScorer#scoreToken(org.apache.lucene.analysis.Token)
       
    86 	 */
       
    87 	float_t getTokenScore(CL_NS(analysis)::Token * token);
       
    88 	
       
    89 	/* (non-Javadoc)
       
    90 	 * @see org.apache.lucene.search.highlight.FragmentScorer#endFragment(org.apache.lucene.search.highlight.TextFragment)
       
    91 	 */
       
    92 	float_t getFragmentScore();
       
    93 
       
    94 	/* (non-Javadoc)
       
    95 	 * @see org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
       
    96 	 */
       
    97 	void allFragmentsProcessed();
       
    98 
       
    99 	/**
       
   100 	 * 
       
   101 	 * @return The highest weighted term (useful for passing to GradientFormatter to set
       
   102 	 * top end of coloring scale.  
       
   103 		*/
       
   104 	float_t getMaxTermWeight();
       
   105 
       
   106 private:
       
   107 	void initialize(WeightedTerm** weightedTerms);
       
   108 
       
   109 };
       
   110 
       
   111 CL_NS_END2
       
   112 
       
   113 #endif
       
   114