searchengine/oss/cl/clucene/src/clucene/search/termscorer.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 03 May 2010 13:33:22 +0300
changeset 1 6f2c1c46032b
parent 0 671dee74050a
permissions -rw-r--r--
Revision: 201015 Kit: 201018

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "clucene/stdheader.h"
#include "termscorer.h"

#include "clucene/index/terms.h"
#include "termquery.h"

CL_NS_USE(index)
CL_NS_DEF(search)

	//TermScorer takes TermDocs and delets it when TermScorer is cleaned up
	TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td, 
			Similarity* similarity,uint8_t* _norms):
	    Scorer(similarity),
	    termDocs(td),
	    norms(_norms),
	    weight(w),
	    weightValue(w->getValue()),
	    _doc(0),
	    pointer(0),
	    pointerMax(0)
	{
		memset(docs,0,32*sizeof(int32_t));
		memset(freqs,0,32*sizeof(int32_t));

		for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++)
			scoreCache[i] = getSimilarity()->tf(i) * weightValue;
	}

	TermScorer::~TermScorer(){
		_CLDELETE(termDocs);
	}
  bool TermScorer::next(){
    pointer++;
    if (pointer >= pointerMax) {
      pointerMax = termDocs->read(docs, freqs, 32);    // refill buffer
      if (pointerMax != 0) {
        pointer = 0;
      } else {
        termDocs->close();			  // close stream
        _doc = LUCENE_INT32_MAX_SHOULDBE;		  // set to sentinel value
        return false;
      }
    } 
    _doc = docs[pointer];
    return true;
  }

  bool TermScorer::skipTo(int32_t target) {
    // first scan in cache
    for (pointer++; pointer < pointerMax; pointer++) {
      if (docs[pointer] >= target) {
        _doc = docs[pointer];
        return true;
      }
    }

    // not found in cache, seek underlying stream
    bool result = termDocs->skipTo(target);
      if (result) {
         pointerMax = 1;
         pointer = 0;
         docs[pointer] = _doc = termDocs->doc();
         freqs[pointer] = termDocs->freq();
      } else {
         _doc = LUCENE_INT32_MAX_SHOULDBE;
      }
      return result;
  }

  void TermScorer::explain(int32_t doc, Explanation* tfExplanation) {
    TermQuery* query = (TermQuery*)weight->getQuery();
    int32_t tf = 0;
    while (pointer < pointerMax) {
      if (docs[pointer] == doc)
        tf = freqs[pointer];
      pointer++;
    }
    if (tf == 0) {
      while (termDocs->next()) {
        if (termDocs->doc() == doc) {
          tf = termDocs->freq();
        }
      }
    }
    termDocs->close();
    tfExplanation->setValue(getSimilarity()->tf(tf));

    TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1];
	TCHAR* termToString = query->getTerm(false)->toString();
	_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf);
    _CLDELETE_CARRAY(termToString);
    tfExplanation->setDescription(buf);
  }

  TCHAR* TermScorer::toString() { 
     TCHAR* wb = weight->toString();
     int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer("  ")") + 1
     TCHAR* ret = _CL_NEWARRAY(TCHAR,rl);
	 _sntprintf(ret,rl,_T("scorer(%s)"), wb);
     _CLDELETE_ARRAY(wb);
     return ret;
  }

  float_t TermScorer::score(){
	 int32_t f = freqs[pointer];
     float_t raw =                                   // compute tf(f)*weight
      f < LUCENE_SCORE_CACHE_SIZE			  // check cache
      ? scoreCache[f]                             // cache hit
      : getSimilarity()->tf(f) * weightValue;        // cache miss

     return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field
  }
	
CL_NS_END