searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.cpp
changeset 7 a5fbfefd615f
child 21 2c484ac32ef0
equal deleted inserted replaced
3:ae3f1779f6da 7:a5fbfefd615f
       
     1 #include "clucene/stdheader.h"
       
     2 #include "queryscorer.h"
       
     3 
       
     4 CL_NS_DEF2(search,highlight)
       
     5 CL_NS_USE(index)
       
     6 CL_NS_USE(analysis)
       
     7 
       
     8 	QueryScorer::QueryScorer(const Query * query):
       
     9         _uniqueTermsInFragment(true),
       
    10 		_termsToFind(false,true)
       
    11 		
       
    12 	 {
       
    13 		 WeightedTerm** _weighted_terms = QueryTermExtractor::getTerms(query);
       
    14 		 initialize(_weighted_terms);
       
    15 		 _CLDELETE_ARRAY(_weighted_terms);
       
    16 	 }
       
    17 	 QueryScorer::~QueryScorer()
       
    18 	 {
       
    19 	 }
       
    20 
       
    21 /*	 QueryScorer(Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName)
       
    22 	 {
       
    23 		 WeightedTerm** _weighted_terms = QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName);
       
    24 		 initialize(_weighted_terms);
       
    25 	 }*/
       
    26 
       
    27 
       
    28 	QueryScorer::QueryScorer(WeightedTerm** weightedTerms)
       
    29 	{
       
    30 		 initialize(weightedTerms);
       
    31 	}
       
    32 	
       
    33 	void QueryScorer::initialize(WeightedTerm** weightedTerms)
       
    34 	{
       
    35 		_currentTextFragment = NULL;
       
    36 		_totalScore = 0;
       
    37 		_maxTermWeight = 0;
       
    38 
       
    39 		// Copy external weighted terms
       
    40 		 int i=0;
       
    41 		 while ( weightedTerms[i] != NULL ){
       
    42 			const WeightedTerm* existingTerm=_termsToFind.get(weightedTerms[i]->getTerm());
       
    43 			if( (existingTerm==NULL) ||(existingTerm->getWeight()<weightedTerms[i]->getWeight()) )
       
    44   	        {
       
    45   				//if a term is defined more than once, always use the highest scoring weight
       
    46 				WeightedTerm* term = weightedTerms[i];
       
    47 				_termsToFind.put(term->getTerm(), term);
       
    48 
       
    49 				_maxTermWeight=max(_maxTermWeight,weightedTerms[i]->getWeight());
       
    50   	        }else
       
    51 				_CLDELETE(weightedTerms[i]);
       
    52 
       
    53 			i++;
       
    54 		 }
       
    55 	}
       
    56 
       
    57 	void QueryScorer::startFragment(TextFragment * newFragment)
       
    58 	{
       
    59 		_uniqueTermsInFragment.clear();
       
    60 		_currentTextFragment=newFragment;
       
    61 		_totalScore=0;
       
    62 		
       
    63 	}
       
    64 	
       
    65 	float_t QueryScorer::getTokenScore(Token * token)
       
    66 	{
       
    67 		const TCHAR* termText=token->termText();
       
    68 		
       
    69 		const WeightedTerm* queryTerm = _termsToFind.get(termText);
       
    70 		if(queryTerm==NULL)
       
    71 		{
       
    72 			//not a query term - return
       
    73 			return 0;
       
    74 		}
       
    75 		//found a query term - is it unique in this doc?
       
    76 		if(_uniqueTermsInFragment.find(termText)==_uniqueTermsInFragment.end())
       
    77 		{
       
    78 			_totalScore+=queryTerm->getWeight();
       
    79 			TCHAR* owned_term = stringDuplicate(termText);
       
    80 			_uniqueTermsInFragment.insert(owned_term);
       
    81 		}
       
    82 		return queryTerm->getWeight();
       
    83 	}
       
    84 	
       
    85 	/**
       
    86   	*
       
    87   	* @return The highest weighted term (useful for passing to GradientFormatter to set
       
    88   	* top end of coloring scale.
       
    89   	*/
       
    90 	float_t QueryScorer::getMaxTermWeight()
       
    91 	{
       
    92   		return _maxTermWeight;
       
    93 	}
       
    94 
       
    95 
       
    96 	float_t QueryScorer::getFragmentScore(){
       
    97 		return _totalScore;
       
    98 	}
       
    99 
       
   100 CL_NS_END2