searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 17 Sep 2010 08:35:54 +0300
changeset 21 2c484ac32ef0
parent 7 a5fbfefd615f
permissions -rw-r--r--
Revision: 201035 Kit: 201037

/**
 * Copyright 2002-2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef _lucene_search_highlight_querytermextractor_
#define _lucene_search_highlight_querytermextractor_

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "CLucene/util/VoidList.h"
#include "CLucene/search/SearchHeader.h"
#include "CLucene/index/IndexReader.h"
#include "CLucene/search/BooleanQuery.h"
#include "CLucene/search/PhraseQuery.h"
#include "CLucene/search/TermQuery.h"
#include "CLucene/search/PrefixQuery.h"
#include "CLucene/highlighter/WeightedTerm.h"

CL_NS_DEF2(search,highlight)

/**
 * Utility class used to extract the terms used in a query, plus any weights.
 * This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes
 * so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of 
 * expanded terms. 
 * 
 */
class QueryTermExtractor
{
	QueryTermExtractor(){
	}
public:

	/**
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
	 *
	 * @param query      Query to extract term texts from
	 * @return an array of the terms used in a query, plus their weights.
	 */
	static WeightedTerm** getTerms(const Query *query);

	/**
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
	 *
	 * @param query      Query to extract term texts from
	 * @param reader used to compute IDF which can be used to a) score selected fragments better 
	 * b) use graded highlights eg chaning intensity of font color
	 * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
	 * @return an array of the terms used in a query, plus their weights.
	 */
	 static WeightedTerm** getIdfWeightedTerms(const Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName);

	/**
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
	 *
	 * @param query      Query to extract term texts from
	 * @param prohibited <code>true</code> to extract "prohibited" terms, too
     * @return an array of the terms used in a query, plus their weights.Memory owned by the caller
     */
	static WeightedTerm** getTerms(const Query * query, bool prohibited);


	static void getTerms(const Query * query, WeightedTermList* terms,bool prohibited);
	static void getTermsFromBooleanQuery(const BooleanQuery * query, WeightedTermList* terms, bool prohibited);
	static void getTermsFromPhraseQuery(const PhraseQuery * query, WeightedTermList* terms);
	static void getTermsFromTermQuery(const TermQuery * query, WeightedTermList* terms);
	static void getTermsFromPrefixQuery( PrefixQuery * query, WeightedTermList* terms);
//	static void getTermsFromSpanNearQuery(SpanNearQuery* query, WeightedTermList* terms);
};

CL_NS_END2



#endif