searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.h
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
parent 7 a5fbfefd615f
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/**
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
 * Copyright 2002-2004 The Apache Software Foundation
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
 * Licensed under the Apache License, Version 2.0 (the "License");
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
 * you may not use this file except in compliance with the License.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
 * You may obtain a copy of the License at
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
 *     http://www.apache.org/licenses/LICENSE-2.0
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
 * Unless required by applicable law or agreed to in writing, software
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
 * distributed under the License is distributed on an "AS IS" BASIS,
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
 * See the License for the specific language governing permissions and
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
 * limitations under the License.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
 */
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
#ifndef _lucene_search_highlight_querytermextractor_
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
#define _lucene_search_highlight_querytermextractor_
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
#if defined(_LUCENE_PRAGMA_ONCE)
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
# pragma once
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
#endif
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
#include "CLucene/util/VoidList.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
#include "CLucene/search/SearchHeader.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
#include "CLucene/index/IndexReader.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
#include "CLucene/search/BooleanQuery.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
#include "CLucene/search/PhraseQuery.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
#include "CLucene/search/TermQuery.h"
24
hgs
parents: 7
diff changeset
    29
#include "CLucene/search/PrefixQuery.h"
7
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
#include "CLucene/highlighter/WeightedTerm.h"
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
CL_NS_DEF2(search,highlight)
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
/**
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
 * Utility class used to extract the terms used in a query, plus any weights.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
 * This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
 * so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of 
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
 * expanded terms. 
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
 * 
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
 */
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
class QueryTermExtractor
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
{
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
	QueryTermExtractor(){
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
	}
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
public:
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
	/**
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
	 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
	 * @param query      Query to extract term texts from
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
	 * @return an array of the terms used in a query, plus their weights.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
	 */
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
	static WeightedTerm** getTerms(const Query *query);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
	/**
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
	 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
	 * @param query      Query to extract term texts from
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
	 * @param reader used to compute IDF which can be used to a) score selected fragments better 
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
	 * b) use graded highlights eg chaning intensity of font color
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
	 * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
	 * @return an array of the terms used in a query, plus their weights.
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
	 */
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
	 static WeightedTerm** getIdfWeightedTerms(const Query* query, CL_NS(index)::IndexReader* reader, const TCHAR* fieldName);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
	/**
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
	 * Extracts all terms texts of a given Query into an array of WeightedTerms
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
	 *
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
	 * @param query      Query to extract term texts from
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
	 * @param prohibited <code>true</code> to extract "prohibited" terms, too
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
     * @return an array of the terms used in a query, plus their weights.Memory owned by the caller
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
     */
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
	static WeightedTerm** getTerms(const Query * query, bool prohibited);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
	static void getTerms(const Query * query, WeightedTermList* terms,bool prohibited);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
	static void getTermsFromBooleanQuery(const BooleanQuery * query, WeightedTermList* terms, bool prohibited);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
	static void getTermsFromPhraseQuery(const PhraseQuery * query, WeightedTermList* terms);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
	static void getTermsFromTermQuery(const TermQuery * query, WeightedTermList* terms);
24
hgs
parents: 7
diff changeset
    80
	static void getTermsFromPrefixQuery( PrefixQuery * query, WeightedTermList* terms);
7
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
//	static void getTermsFromSpanNearQuery(SpanNearQuery* query, WeightedTermList* terms);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
};
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
CL_NS_END2
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
#endif