searchengine/oss/cl/clucene/src/clucene/search/searchheader.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 17 Sep 2010 08:35:54 +0300
changeset 21 2c484ac32ef0
parent 18 3e1f76dd2722
permissions -rw-r--r--
Revision: 201035 Kit: 201037

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_search_SearchHeader_
#define _lucene_search_SearchHeader_

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "clucene/index/indexreader.h"
#include "clucene/index/term.h"
#include "clucene/search/filter.h"
#include "clucene/document/document.h"
#include "clucene/search/sort.h"
#include "clucene/util/voidlist.h"
#include "clucene/search/explanation.h"
#include "clucene/search/similarity.h"

//#ifdef USE_HIGHLIGHTER
#include "CLucene/highlighter/SimpleFragmenter.h"
#include "CLucene/highlighter/SimpleHTMLFormatter.h"
#include "CLucene/analysis/standard/StandardAnalyzer.h"
#define LCPIX_HL_EXCERPT_FIELD  L"_hlexcerpt"
#define LCPIX_EXCERPT_FIELD   L"_excerpt"
#if defined (__SYMBIAN32__)
#include <e32std.h>
#endif
//#endif

CL_NS_DEF(search)

	//predefine classes
	class Scorer;
	class Query;
	class Hits;
	class Sort;
	class FieldDoc;
	class TopFieldDocs;
   
   /** Expert: Returned by low-level search implementations.
	* @see TopDocs */
	struct ScoreDoc {
		/** Expert: A hit document's number.
		* @see Searcher#doc(int32_t)
		*/
		int32_t doc;

		/** Expert: The score of this document for the query. */
		float_t score;
	};

	/** Expert: Returned by low-level search implementations.
	* @see Searcher#search(Query,Filter,int32_t) */
	class TopDocs:LUCENE_BASE {
	public:
		/** Expert: The total number of hits for the query.
		 * @see Hits#length()
		*/
		int32_t totalHits;

		/** Expert: The top hits for the query. */
		ScoreDoc* scoreDocs;
		int32_t scoreDocsLength;

		/** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/
		TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength);
		~TopDocs();
	};

    // Lower-level search API.
    // @see Searcher#search(Query,HitCollector)
	class HitCollector: LUCENE_BASE {
    public:
      /** Called once for every non-zero scoring document, with the document number
      * and its score.
      *
      * <P>If, for example, an application wished to collect all of the hits for a
      * query in a BitSet, then it might:<pre>
      *   Searcher searcher = new IndexSearcher(indexReader);
      *   final BitSet bits = new BitSet(indexReader.maxDoc());
      *   searcher.search(query, new HitCollector() {
      *       public void collect(int32_t doc, float score) {
      *         bits.set(doc);
      *       }
      *     });
      * </pre>
      *
      * <p>Note: This is called in an inner search loop.  For good search
      * performance, implementations of this method should not call
      * {@link Searcher#doc(int32_t)} or
      * {@link IndexReader#document(int32_t)} on every
      * document number encountered.  Doing so can slow searches by an order
      * of magnitude or more.
      * <p>Note: The <code>score</code> passed to this method is a raw score.
      * In other words, the score will not necessarily be a float whose value is
      * between 0 and 1.
      */
      virtual void collect(const int32_t doc, const float_t score) = 0;
		virtual ~HitCollector(){}
    };

   /** Expert: Calculate query weights and build query scorers.
   *
   * <p>A Weight is constructed by a query, given a Searcher ({@link
   * Query#_createWeight(Searcher)}).  The {@link #sumOfSquaredWeights()} method
   * is then called on the top-level query to compute the query normalization
   * factor (@link Similarity#queryNorm(float_t)}).  This factor is then passed to
   * {@link #normalize(float_t)}.  At this point the weighting is complete and a
   * scorer may be constructed by calling {@link #scorer(IndexReader)}.
   */
	class Weight: LUCENE_BASE {
    public:
		virtual ~Weight(){
		};

      /** The query that this concerns. */
      virtual Query* getQuery() = 0;

      /** The weight for this query. */
      virtual float_t getValue() = 0;

      /** The sum of squared weights of contained query clauses. */
      virtual float_t sumOfSquaredWeights() = 0;

      /** Assigns the query normalization factor to this. */
      virtual void normalize(float_t norm) = 0;

      /** Constructs a scorer for this. */
      virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0;

      /** An explanation of the score computation for the named document. */
      virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0;

      virtual TCHAR* toString(){
         return STRDUP_TtoT(_T("Weight"));
      }
   };

   class HitDoc:LUCENE_BASE {
    public:
		float_t score;
		int32_t id;
		CL_NS(document)::Document* doc;
		
		HitDoc* next;					  // in doubly-linked cache
		HitDoc* prev;					  // in doubly-linked cache
		
		HitDoc(const float_t s, const int32_t i);
		~HitDoc();
    };



    // A ranked list of documents, used to hold search results. 
   class Hits:LUCENE_BASE {
    private:
		Query* query;
		Searcher* searcher;
		Filter* filter;
		const Sort* sort;

		size_t _length;				  // the total number of hits
		CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> > hitDocs;	  // cache of hits retrieved

		HitDoc* first;				  // head of LRU cache
		HitDoc* last;				  // tail of LRU cache
		int32_t numDocs;			  // number cached
		int32_t maxDocs;			  // max to cache
//#ifdef USE_HIGHLIGHTER		
		CL_NS2(search,highlight)::SimpleHTMLFormatter hl_formatter;
		
		CL_NS2(search,highlight)::SimpleFragmenter hl_frag;
#if defined (__SYMBIAN32__)        
		TLanguage lang;
#endif		
//#endif		
    public:
		Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL);
		~Hits();

		/** Returns the total number of hits available in this set. */
		int32_t length() const;
	    
		/** Returns the stored fields of the n<sup>th</sup> document in this set.
		<p>Documents are cached, so that repeated requests for the same element may
		return the same Document object. 
		*
		* @memory Memory belongs to the hits object. Don't delete the return value.
		*/
		CL_NS(document)::Document& doc(const int32_t n);
	      
		/** Returns the id for the nth document in this set. */
		int32_t id (const int32_t n);
	    
		/** Returns the score for the nth document in this set. */
		float_t score(const int32_t n);
	      
	private:
		// Tries to add new documents to hitDocs.
		// Ensures that the hit numbered <code>_min</code> has been retrieved.
		void getMoreDocs(const size_t _min);
	    
		HitDoc* getHitDoc(const size_t n);
	    
		void addToFront(HitDoc* hitDoc);
	    
		void remove(const HitDoc* hitDoc);
		
		/* Get the tokenstream for Highlighting.
		 * @ text Text to be analyzed
		 * @ result wchar double pointer to return highlighted text
		 * @ firstline Flag for checking first line or Excerpt field.
		 */			
		void getHighlightedText(CL_NS(document)::Document* document);

  };

   /** The interface for search implementations.
   *
   * <p>Implementations provide search over a single index, over multiple
   * indices, and over indices on remote servers.
   */
   class Searchable: LUCENE_BASE {
   public:
   	virtual ~Searchable(){
	}

      /** Lower-level search API.
      *
      * <p>{@link HitCollector#collect(int32_t,float_t)} is called for every non-zero
      * scoring document.
      *
      * <p>Applications should only use this if they need <i>all</i> of the
      * matching documents.  The high-level search API ({@link
      * Searcher#search(Query*)}) is usually more efficient, as it skips
      * non-high-scoring hits.
      *
      * @param query to match documents
      * @param filter if non-null, a bitset used to eliminate some documents
      * @param results to receive hits
      */
      virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0;

      /** Frees resources associated with this Searcher.
      * Be careful not to call this method while you are still using objects
      * like {@link Hits}.
      */
      virtual void close() = 0;

      /** Expert: Returns the number of documents containing <code>term</code>.
      * Called by search code to compute term weights.
      * @see IndexReader#docFreq(Term).
      */
      virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0;

      /** Expert: Returns one greater than the largest possible document number.
      * Called by search code to compute term weights.
      * @see IndexReader#maxDoc().
      */
      virtual int32_t maxDoc() const = 0;

      /** Expert: Low-level search implementation.  Finds the top <code>n</code>
      * hits for <code>query</code>, applying <code>filter</code> if non-null.
      *
      * <p>Called by {@link Hits}.
      *
      * <p>Applications should usually call {@link Searcher#search(Query*)} or
      * {@link Searcher#search(Query*,Filter*)} instead.
      */
      virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0;

      /** Expert: Returns the stored fields of document <code>i</code>.
      * Called by {@link HitCollector} implementations.
      * @see IndexReader#document(int32_t).
      */
      virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0;
      _CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i);

      /** Expert: called to re-write queries into primitive queries. */
      virtual Query* rewrite(Query* query) = 0;

      /** Returns an Explanation that describes how <code>doc</code> scored against
      * <code>query</code>.
      *
      * <p>This is intended to be used in developing Similarity implementations,
      * and, for good performance, should not be displayed with every hit.
      * Computing an explanation is as expensive as executing the query over the
      * entire index.
      */
      virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0;

      /** Expert: Low-level search implementation with arbitrary sorting.  Finds
      * the top <code>n</code> hits for <code>query</code>, applying
      * <code>filter</code> if non-null, and sorting the hits by the criteria in
      * <code>sort</code>.
      *
      * <p>Applications should usually call {@link
      * Searcher#search(Query,Filter,Sort)} instead.
      */
	  	virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0;
	  	
   };



	/** An abstract base class for search implementations.
	* Implements some common utility methods.
	*/
	class Searcher:public Searchable {
	private:
		/** The Similarity implementation used by this searcher. */
		Similarity* similarity;

		public:
		Searcher(){
			similarity = Similarity::getDefault();
		}
		virtual ~Searcher(){
		}

		// Returns the documents matching <code>query</code>.
		Hits* search(Query* query) {
			return search(query, (Filter*)NULL );
		}

		// Returns the documents matching <code>query</code> and
		//	<code>filter</code>. 
		Hits* search(Query* query, Filter* filter) {
			return _CLNEW Hits(this, query, filter);
		}

		/** Returns documents matching <code>query</code> sorted by
		* <code>sort</code>.
		*/
		Hits* search(Query* query, const Sort* sort){
			return _CLNEW Hits(this, query, NULL, sort);
		}

		/** Returns documents matching <code>query</code> and <code>filter</code>,
			* sorted by <code>sort</code>.
			*/
		Hits* search(Query* query, Filter* filter, const Sort* sort){
			return _CLNEW Hits(this, query, filter, sort);
		}

		/** Lower-level search API.
		*
		* <p>{@link HitCollector#collect(int32_t	,float_t)} is called for every non-zero
		* scoring document.
		*
		* <p>Applications should only use this if they need <i>all</i> of the
		* matching documents.  The high-level search API ({@link
		* Searcher#search(Query*)}) is usually more efficient, as it skips
		* non-high-scoring hits.
		* <p>Note: The <code>score</code> passed to this method is a raw score.
		* In other words, the score will not necessarily be a float whose value is
		* between 0 and 1.
		*/
		void _search(Query* query, HitCollector* results) {
			Searchable::_search(query, NULL, results);
		}

		/** Expert: Set the Similarity implementation used by this Searcher.
		*
		* @see Similarity#setDefault(Similarity)
		*/
		void setSimilarity(Similarity* similarity) {
			this->similarity = similarity;
		}

		/** Expert: Return the Similarity implementation used by this Searcher.
		*
		* <p>This defaults to the current value of {@link Similarity#getDefault()}.
		*/
		Similarity* getSimilarity(){
			return this->similarity;
		}
	};

	/** The abstract base class for queries.
    <p>Instantiable subclasses are:
    <ul>
    <li> {@link TermQuery}
    <li> {@link MultiTermQuery}
    <li> {@link BooleanQuery}
    <li> {@link WildcardQuery}
    <li> {@link PhraseQuery}
    <li> {@link PrefixQuery}
    <li> {@link PhrasePrefixQuery}
    <li> {@link FuzzyQuery}
    <li> {@link RangeQuery}
    <li> {@link spans.SpanQuery}
    </ul>
    <p>A parser for queries is contained in:
    <ul>
    <li>{@link queryParser.QueryParser QueryParser}
    </ul>
	*/
	class Query :LUCENE_BASE {
	private:
		// query boost factor
		float_t boost;
	protected:
		Query(const Query& clone);
	public:
		Query();
		virtual ~Query();

		/** Sets the boost for this query clause to <code>b</code>.  Documents
		* matching this clause will (in addition to the normal weightings) have
		* their score multiplied by <code>b</code>.
		*/
		void setBoost(float_t b);

		/** Gets the boost for this clause.  Documents matching
		* this clause will (in addition to the normal weightings) have their score
		* multiplied by <code>b</code>.   The boost is 1.0 by default.
		*/
		float_t getBoost() const;

        /** Expert: Constructs an initializes a Weight for a top-level query. */
        Weight* weight(Searcher* searcher);

        /** Expert: called to re-write queries into primitive queries. */
        virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
        
        /** Expert: called when re-writing queries under MultiSearcher.
         *
         * <p>Only implemented by derived queries, with no
         * {@link #_createWeight(Searcher)} implementatation.
         */
         virtual Query* combine(Query** queries);

        /** Expert: merges the clauses of a set of BooleanQuery's into a single
         * BooleanQuery.
         *
         *<p>A utility for use by {@link #combine(Query[])} implementations.
	     */
        static Query* mergeBooleanQueries(Query** queries);

        /** Expert: Returns the Similarity implementation to be used for this query.
        * Subclasses may override this method to specify their own Similarity
        * implementation, perhaps one that delegates through that of the Searcher.
        * By default the Searcher's Similarity implementation is returned.*/
        Similarity* getSimilarity(Searcher* searcher);
        
        /** Returns a clone of this query. */
        virtual Query* clone() const = 0;
        virtual const TCHAR* getQueryName() const = 0;
        bool instanceOf(const TCHAR* other) const;
        
        /** Prints a query to a string, with <code>field</code> as the default field
        * for terms.  <p>The representation used is one that is readable by
        * {@link queryParser.QueryParser QueryParser}
        * (although, if the query was created by the parser, the printed
        * representation may not be exactly what was parsed).
        */
        virtual TCHAR* toString(const TCHAR* field) const = 0;
        
        virtual bool equals(Query* other) const = 0;
        virtual size_t hashCode() const = 0;
        
        /** Prints a query to a string. */
        TCHAR* toString() const;

		
		/** Expert: Constructs an appropriate Weight implementation for this query.
		*
		* <p>Only implemented by primitive queries, which re-write to themselves.
		* <i>This is an Internal function</i>
		*/
		virtual Weight* _createWeight(Searcher* searcher);
		
	};


CL_NS_END
#endif