# HG changeset patch # User Dremov Kirill (Nokia-D-MSW/Tampere) # Date 1284701754 -10800 # Node ID 2c484ac32ef041093de966f7b420268e679ebc23 # Parent 3e1f76dd2722094006fb68c291d4b93e29f23039 Revision: 201035 Kit: 201037 diff -r 3e1f76dd2722 -r 2c484ac32ef0 harvester/harvesterserver/src/cindexingmanager.cpp --- a/harvester/harvesterserver/src/cindexingmanager.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/harvester/harvesterserver/src/cindexingmanager.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -297,7 +297,9 @@ } // Always issue new wait - iTimer.After(iStatus, KDefaultWaitTime); + timenow += TTimeIntervalDays(1); + //iTimer.After(iStatus, KDefaultWaitTime); + iTimer.At(iStatus, timenow); SetActive(); } diff -r 3e1f76dd2722 -r 2c484ac32ef0 package_definition.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/package_definition.xml Fri Sep 17 08:35:54 2010 +0300 @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 3e1f76dd2722 -r 2c484ac32ef0 package_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/package_map.xml Fri Sep 17 08:35:54 2010 +0300 @@ -0,0 +1,1 @@ + diff -r 3e1f76dd2722 -r 2c484ac32ef0 qcpix/cpixsearch.pro --- a/qcpix/cpixsearch.pro Thu Sep 02 21:37:32 2010 +0300 +++ b/qcpix/cpixsearch.pro Fri Sep 17 08:35:54 2010 +0300 @@ -34,7 +34,7 @@ DEFINES += BUILD_DLL symbian{ - TARGET.CAPABILITY = CAP_GENERAL_DLL -DRM + TARGET.CAPABILITY = CAP_GENERAL_DLL TARGET.EPOCALLOWDLLDATA = 1 TARGET.UID3 = 0xE3B89364 TARGET.VID = VID_DEFAULT diff -r 3e1f76dd2722 -r 2c484ac32ef0 rom/bld.inf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rom/bld.inf Fri Sep 17 08:35:54 2010 +0300 @@ -0,0 +1,35 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: Build information file +* +*/ + + +#include + + +PRJ_PLATFORMS + DEFAULT + +PRJ_EXPORTS +CPix_mw.iby CORE_MW_LAYER_IBY_EXPORT_PATH(CPix_mw.iby) +../sis/cpixsearch_stub.sis /epoc32/data/z/system/install/cpixsearch_stub.sis +../cenrep/2001f6fb.cre /epoc32/winscw/c/private/10202be9/2001f6fb.cre +../cenrep/2001f6fb.cre /epoc32/data/z/private/10202be9/2001f6fb.cre + +PRJ_TESTMMPFILES + +PRJ_TESTEXPORTS +//To DO +// ids_testers.iby CORE_MW_LAYER_IBY_EXPORT_PATH(ids_testers.iby) \ No newline at end of file diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/cpix/cpix/inc/private/document.h --- a/searchengine/cpix/cpix/inc/private/document.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/document.h Fri Sep 17 08:35:54 2010 +0300 @@ -76,7 +76,9 @@ bool isAggregated() const; - bool isFreeText() const; + bool isFreeText() const; + + bool isPhoneNumber() const; float_t boost() const; @@ -97,6 +99,8 @@ bool aggregate_; bool freeText_; + + bool phoneNumber_; }; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/cpix/cpix/inc/public/cpixdoc.h --- a/searchengine/cpix/cpix/inc/public/cpixdoc.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/cpix/cpix/inc/public/cpixdoc.h Fri Sep 17 08:35:54 2010 +0300 @@ -76,7 +76,11 @@ // index the value of the field without the stop word analyzer and // store it in _aggregate - cpix_FREE_TEXT = 128 + cpix_FREE_TEXT= 128, + + // index the value of the field using phonenumber analyser to split numbers + // and store it in _aggregate + cpix_PHONE_NUMBER = 256 }; typedef enum cpix_Index_ cpix_Index; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/cpix/cpix/src/analyzer.cpp --- a/searchengine/cpix/cpix/src/analyzer.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/cpix/cpix/src/analyzer.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -273,6 +273,15 @@ stream_ = _CLNEW standard::StandardFilter(stream_,true); stream_ = _CLNEW LowerCaseFilter(stream_,true); } + // if it is phonenumber, use phone number analyser + else if(field->isPhoneNumber()) + { + lucene::analysis::Analyzer *PhoneNumerAnalyzer_; + + PhoneNumerAnalyzer_ = _CLNEW lucene::analysis::PhoneNumberAnalyzer(); + stream_ = PhoneNumerAnalyzer_->tokenStream( field->name(), reader_ ); + _CLDELETE(PhoneNumerAnalyzer_); + } else stream_ = analyzer_.tokenStream( field->name(), reader_ ); } diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/cpix/cpix/src/customanalyzer.cpp --- a/searchengine/cpix/cpix/src/customanalyzer.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/cpix/cpix/src/customanalyzer.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -652,6 +652,7 @@ {CPIX_TOKENIZER_KOREAN_QUERY,TokenizerFactoryCtor::create}, {CPIX_ANALYZER_STANDARD, AnalyzerWrapCtor::create}, + {CPIX_ANALYZER_PHONENUMBER, AnalyzerWrapCtor::create}, {CPIX_ANALYZER_DEFAULT, TokenStreamFactoryCtor::create}, // TODO: Add more Tokenizers/Analyzers diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/cpix/cpix/src/document.cpp --- a/searchengine/cpix/cpix/src/document.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/cpix/cpix/src/document.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -36,6 +36,7 @@ int configs) : own_(true), field_(0) { freeText_ = false; + phoneNumber_ = false; resolveConfig(configs); field_ = _CLNEW lucene::document::Field(name, value, configs); } @@ -46,6 +47,7 @@ int configs) : own_(true), field_(0) { freeText_ = false; + phoneNumber_ = false; resolveConfig(configs); field_ = _CLNEW lucene::document::Field(name, stream, configs); } @@ -70,6 +72,11 @@ freeText_ = true; configs &= (~cpix_FREE_TEXT); } + + if(configs & cpix_PHONE_NUMBER){ + phoneNumber_ = true; + configs &= (~cpix_PHONE_NUMBER); + } } @@ -81,6 +88,7 @@ aggregate_( aggregate ) { freeText_ = false; + phoneNumber_ = false; } Field::~Field() { @@ -118,6 +126,10 @@ return freeText_; } + bool Field::isPhoneNumber() const { + return phoneNumber_; + } + bool Field::isAggregated() const { return aggregate_; } diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/group/clucene.mmp --- a/searchengine/oss/cl/clucene/group/clucene.mmp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/group/clucene.mmp Fri Sep 17 08:35:54 2010 +0300 @@ -20,6 +20,8 @@ USERINCLUDE ../../../../../searchsrv_plat/cpix_utility_api/inc USERINCLUDE ../src USERINCLUDE ../../../../util/cpixtools/inc/public +// Included path for NON English langauage for HIGHLIGHTER +USERINCLUDE ../../../loc/analysis/inc/public SYSTEMINCLUDE /epoc32/include OS_LAYER_LIBC_SYSTEMINCLUDE diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.cpp --- a/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -371,6 +371,32 @@ return false; } +TokenStream* PhoneNumberAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader){ + TokenStream* ret = _CLNEW PhoneNumberTokenizer(reader); + return ret; + +} + +PhoneNumberTokenizer::PhoneNumberTokenizer(CL_NS(util)::Reader* input): + Tokenizer(input) +{ + termLen = input->read(termText); + + tokenLen = 0; +} + bool PhoneNumberTokenizer::next(Token* token) { + + if(tokenLen < termLen) + { + token->set(termText+tokenLen,0,termLen-tokenLen); + tokenLen ++; + return true; + } + + return false; + + } + LengthFilter::LengthFilter(TokenStream* in, int _min, int _max): TokenFilter(in) diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h --- a/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h Fri Sep 17 08:35:54 2010 +0300 @@ -100,6 +100,20 @@ bool isTokenChar(const TCHAR c) const; }; +/** A PhoneNumberTokenizer is a tokenizer that strips a number to its subset. + * ex: A number 567 is tokenized as 567, 67 and 7. This is introduced to make + * number/word searchable from middle*/ +class PhoneNumberTokenizer: public Tokenizer { +private: + const wchar_t *termText; + int32_t tokenLen; + int32_t termLen; +public: + /** Construct a new PhoneNumberTokenizer. */ + PhoneNumberTokenizer(CL_NS(util)::Reader* input); + ~PhoneNumberTokenizer(){} + bool next(Token* token); +}; /** An Analyzer that uses WhitespaceTokenizer. */ class WhitespaceAnalyzer: public Analyzer { @@ -285,6 +299,13 @@ virtual ~KeywordAnalyzer(){} }; + +/** An Analyzer that uses PhoneNumberTokenizer. */ +class PhoneNumberAnalyzer: public Analyzer { +public: + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + virtual ~PhoneNumberAnalyzer(){} +}; /** * Removes words that are too long and too short from the stream. diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.cpp --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -181,6 +181,7 @@ int32_t startOffset; int32_t endOffset; int32_t lastEndOffset = 0; + int32_t highlightedfrags = 0; _textFragmenter->start(text); TCHAR substringBuffer[LUCENE_MAX_WORD_LEN]; @@ -223,7 +224,9 @@ //check if current token marks the start of a new fragment if (_textFragmenter->isNewFragment(&token)) { - currentFrag->setScore(_fragmentScorer->getFragmentScore()); + float_t score = _fragmentScorer->getFragmentScore(); + if(score > 0) highlightedfrags++; + currentFrag->setScore(score); //record stats for a new fragment currentFrag->setTextEndPos( writeTo->length() ); currentFrag =_CLNEW TextFragment(writeTo->length(), docFrags.size()); @@ -241,7 +244,7 @@ tokenGroup->addToken(&token,_fragmentScorer->getTokenScore(&token)); - if(lastEndOffset>maxDocBytesToAnalyze) + if(lastEndOffset>maxDocBytesToAnalyze || highlightedfrags>MAX_FRAGMENTS_TO_HIGHLIGHT) { break; } diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.h --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/Highlighter.h Fri Sep 17 08:35:54 2010 +0300 @@ -67,6 +67,8 @@ public: LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024); + + LUCENE_STATIC_CONSTANT(int32_t, MAX_FRAGMENTS_TO_HIGHLIGHT=2); /** * Constructs a Highlighter object with the provided scorer. The HighlightScorer object is owned diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.cpp --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -1,5 +1,6 @@ #include "clucene/stdheader.h" #include "queryscorer.h" +#include CL_NS_DEF2(search,highlight) CL_NS_USE(index) @@ -61,13 +62,54 @@ _totalScore=0; } + /* + * Compares the Query term to Field text token if match, returns 1 + * + */ + + int QueryScorer::matchQuryText(const TCHAR* termText) + { + CL_NS(util)::LHashMap >::iterator i = _termsToFind.begin(); + for(; i != _termsToFind.end() ; i ++) + { + const TCHAR * temp = i->first; + int tempLen = wcslen(temp); + int matchFlag = 1; + if(tempLen <= wcslen(termText)) + { + for(int j = tempLen - 1 ; j >=0 ; j--) + { + if(temp[j] != termText[j]) + { + matchFlag = 0; + break; + } + } + } + else + { + matchFlag = 0; + } + if(matchFlag) + return 1; + + } + return 0; + } + float_t QueryScorer::getTokenScore(Token * token) { const TCHAR* termText=token->termText(); - const WeightedTerm* queryTerm = _termsToFind.get(termText); - if(queryTerm==NULL) + // const WeightedTerm* queryTerm = _termsToFind.get(termText); + // Instead of checking for weighted terms directly match qurey text to field text. + int isQueryTerm = matchQuryText(termText); + if(isQueryTerm==0) { //not a query term - return return 0; @@ -75,11 +117,17 @@ //found a query term - is it unique in this doc? if(_uniqueTermsInFragment.find(termText)==_uniqueTermsInFragment.end()) { - _totalScore+=queryTerm->getWeight(); + //_totalScore+=queryTerm->getWeight(); + /* + * Keeping the Score value to 1 + * + */ + _totalScore+=1; TCHAR* owned_term = stringDuplicate(termText); _uniqueTermsInFragment.insert(owned_term); } - return queryTerm->getWeight(); + //return queryTerm->getWeight(); + return 1; } /** diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.h --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryScorer.h Fri Sep 17 08:35:54 2010 +0300 @@ -102,6 +102,11 @@ * top end of coloring scale. */ float_t getMaxTermWeight(); + /** + * Comapres @parameter text to Query text + * + */ + int matchQuryText(const TCHAR* ); private: void initialize(WeightedTerm** weightedTerms); diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.cpp --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -30,6 +30,9 @@ getTermsFromPhraseQuery((PhraseQuery *) query, terms); else if (query->instanceOf( TermQuery::getClassName() )) getTermsFromTermQuery((TermQuery *) query, terms); + // Adding support for prefix Query to have direct comparision of Query text + else if (query->instanceOf( PrefixQuery::getClassName() )) + getTermsFromPrefixQuery((PrefixQuery *) query, terms); //else if(query->instanceOf(_T("SpanNearQuery")) // getTermsFromSpanNearQuery((SpanNearQuery*) query, terms); } @@ -121,6 +124,18 @@ _CLDELETE(pWT); } + + void QueryTermExtractor::getTermsFromPrefixQuery( PrefixQuery * query, WeightedTermList * terms) + { + Term *term = query->getPrefix(); + WeightedTerm * pWT = _CLNEW WeightedTerm(query->getBoost(),term->text()); + _CLDECDELETE(term); + if (terms->find(pWT)==terms->end()) // possible memory leak if key already present + terms->insert(pWT); + else + _CLDELETE(pWT); + } + //todo: implement this when span queries are implemented /*void getTermsFromSpanNearQuery(SpanNearQuery* query, WeightedTermList* terms){ Collection queryTerms = query.getTerms(); diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.h --- a/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/highlighter/QueryTermExtractor.h Fri Sep 17 08:35:54 2010 +0300 @@ -26,6 +26,7 @@ #include "CLucene/search/BooleanQuery.h" #include "CLucene/search/PhraseQuery.h" #include "CLucene/search/TermQuery.h" +#include "CLucene/search/PrefixQuery.h" #include "CLucene/highlighter/WeightedTerm.h" CL_NS_DEF2(search,highlight) @@ -76,6 +77,7 @@ static void getTermsFromBooleanQuery(const BooleanQuery * query, WeightedTermList* terms, bool prohibited); static void getTermsFromPhraseQuery(const PhraseQuery * query, WeightedTermList* terms); static void getTermsFromTermQuery(const TermQuery * query, WeightedTermList* terms); + static void getTermsFromPrefixQuery( PrefixQuery * query, WeightedTermList* terms); // static void getTermsFromSpanNearQuery(SpanNearQuery* query, WeightedTermList* terms); }; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/hits.cpp --- a/searchengine/oss/cl/clucene/src/clucene/search/hits.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/hits.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -12,7 +12,6 @@ #include "filter.h" #include "clucene/search/searchheader.h" //#ifdef USE_HIGHLIGHTER - #include "CLucene/highlighter/QueryTermExtractor.h" #include "CLucene/highlighter/QueryScorer.h" #include "CLucene/highlighter/Highlighter.h" @@ -20,8 +19,8 @@ #include "CLucene/analysis/standard/StandardAnalyzer.h" #include "clucene/search/prefixquery.h" -// internal libs -#include "cpixparsetools.h" +#include "prefixfilter.h" +#include "koreananalyzer.h" //#endif @@ -56,7 +55,10 @@ Hits::Hits(Searcher* s, Query* q, Filter* f, const Sort* _sort): query(q), searcher(s), filter(f), sort(_sort) //#ifdef USE_HIGHLIGHTER - , hl_frag(20) + , hl_frag(15) +#if defined (__SYMBIAN32__) + ,lang(User::Language()) +#endif //#endif { //Func - Constructor @@ -81,7 +83,153 @@ int32_t Hits::length() const { return _length; } + + void Hits::getHighlightedText(CL_NS(document)::Document* document) + { +/* TODO :: Important consideration for getting locale + * Highlighting is based on the locale, the current implementation is + * only for symbian devices, this dependency should be complete before + * porting to any other OS. so all code is under symbian macro. + * + */ +#if defined (__SYMBIAN32__) + TCHAR* result = NULL; + CL_NS2(search,highlight)::QueryScorer hl_scorer(query); + CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); + highlighter.setTextFragmenter(&hl_frag); + const TCHAR* fieldtxt = document->get(LCPIX_HL_EXCERPT_FIELD); + + if(fieldtxt) + { + StringReader strreader(fieldtxt); + + switch(lang) + { + case ELangEnglish: + case ELangCanadianEnglish: + case ELangInternationalEnglish: + case ELangSouthAfricanEnglish: + { + CL_NS(analysis)::TokenStream* tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardTokenizer(&strreader); + tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardFilter(tokenstream,true); + tokenstream = _CLNEW CL_NS(analysis)::LowerCaseFilter(tokenstream,true); + result = highlighter.getBestFragments(tokenstream, fieldtxt, 2, L"..."); + break; + } + case ELangFrench: + case ELangSwissFrench: + case ELangBelgianFrench: + case ELangInternationalFrench: + case ELangCanadianFrench: + { + ::analysis::FrenchAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); + result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); + break; + } + case ELangHebrew: + { + ::analysis::HebrewAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); + result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); + break; + } + case ELangTaiwanChinese: + case ELangHongKongChinese: + case ELangPrcChinese: + case ELangJapanese: + case ELangKorean: + { + ::analysis::CjkNGramTokenizer hl_analyzer(&strreader,1); + lucene::analysis::TokenStream * ts1 = &hl_analyzer; + result = highlighter.getBestFragments(ts1, fieldtxt, 2, L"..."); + break; + } + case ELangNone: + default: + { + CL_NS(analysis)::TokenStream* tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardTokenizer(&strreader); + tokenstream = _CLNEW CL_NS2(analysis,standard)::StandardFilter(tokenstream,true); + tokenstream = _CLNEW CL_NS(analysis)::LowerCaseFilter(tokenstream,true); + result = highlighter.getBestFragments(tokenstream, fieldtxt, 2, L"..."); + } + } + + if (result != NULL && *((int*)result) != 0x00) + { + document->removeField( LCPIX_HL_EXCERPT_FIELD ); + document->add(*_CLNEW Field(LCPIX_HL_EXCERPT_FIELD, + result, lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); + result = NULL; + } + } + + const TCHAR* fieldtxt2 = document->get(LCPIX_EXCERPT_FIELD); + + if(fieldtxt2 ) + { + StringReader strreader2(fieldtxt2); + switch(lang) + { + case ELangEnglish: + case ELangCanadianEnglish: + case ELangInternationalEnglish: + case ELangSouthAfricanEnglish: + { + CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); + result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); + break; + } + case ELangFrench: + case ELangSwissFrench: + case ELangBelgianFrench: + case ELangInternationalFrench: + case ELangCanadianFrench: + { + ::analysis::FrenchAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); + result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); + break; + } + case ELangHebrew: + { + ::analysis::HebrewAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); + result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); + break; + } + case ELangTaiwanChinese: + case ELangHongKongChinese: + case ELangPrcChinese: + case ELangJapanese: + case ELangKorean: + { + ::analysis::CjkNGramTokenizer hl_analyzer(&strreader2,1); + lucene::analysis::TokenStream * ts1 = &hl_analyzer; + result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); + break; + } + case ELangNone: + default: + { + CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; + lucene::analysis::TokenStream * ts1 = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader2); + result = highlighter.getBestFragments(ts1, fieldtxt2, 2, L"..."); + } + } + if (result != NULL && *((int*)result) != 0x00) + { + document->removeField( LCPIX_EXCERPT_FIELD ); + document->add(*_CLNEW Field(LCPIX_EXCERPT_FIELD, + result, lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); + } + } +#endif + + } + Document& Hits::doc(const int32_t n){ HitDoc* hitDoc = getHitDoc(n); @@ -100,72 +248,10 @@ hitDoc->doc = _CLNEW Document; searcher->doc(hitDoc->id, hitDoc->doc); // cache miss: read document //#ifdef USE_HIGHLIGHTER - CL_NS(document)::Document* document = hitDoc->doc; - - TCHAR* result = NULL; - Query* rwquery[2]; - searcher->getrewritten(hitDoc->id, query, rwquery); - - const TCHAR* firstlnHLtxt = document->get(LCPIX_HL_EXCERPT_FIELD); - - if(firstlnHLtxt && rwquery[1]) - { - CL_NS2(search,highlight)::QueryScorer hl_scorer(rwquery[1]); - - CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); - - highlighter.setTextFragmenter(&hl_frag); - - wstring hlText; - - StringReader strreader(firstlnHLtxt); - - lucene::analysis::TokenStream * tokenStream = hl_analyzer.tokenStream(LCPIX_HL_EXCERPT_FIELD, &strreader); - - result = highlighter.getBestFragments(tokenStream, firstlnHLtxt, 2,L"..."); - - if (result != NULL && *((int*)result) != 0x00) - { - hlText.append(result); - - document->removeField( LCPIX_HL_EXCERPT_FIELD ); - - document->add(*_CLNEW Field(LCPIX_HL_EXCERPT_FIELD, - hlText.c_str(), lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); - } - - } - - const TCHAR* text = document->get(LCPIX_EXCERPT_FIELD); - - if(text && rwquery[1]) - { - CL_NS2(search,highlight)::QueryScorer hl_scorer(rwquery[1]); - - CL_NS2(search,highlight)::Highlighter highlighter(&hl_formatter, &hl_scorer); - - highlighter.setTextFragmenter(&hl_frag); - - wstring hlText; - - StringReader strreader(text); - - lucene::analysis::TokenStream * tokenStream = hl_analyzer.tokenStream(LCPIX_EXCERPT_FIELD, &strreader); - - result = highlighter.getBestFragments(tokenStream, text, 2,L"..."); - - if (result != NULL && *((int*)result) != 0x00) - { - hlText.append(result); - - document->removeField( LCPIX_EXCERPT_FIELD ); - - document->add(*_CLNEW Field(LCPIX_EXCERPT_FIELD, - hlText.c_str(), lucene::document::Field::STORE_YES | lucene::document::Field::INDEX_NO)); - } - } -//#endif + getHighlightedText(document); +//#endif + } return *hitDoc->doc; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.cpp --- a/searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -16,14 +16,6 @@ #include "clucene/index/term.h" #include "clucene/util/bitset.h" #include "fieldsortedhitqueue.h" -//#ifdef USE_HIGHLIGHTER -#include "CLucene/highlighter/QueryTermExtractor.h" -#include "CLucene/highlighter/QueryScorer.h" -#include "CLucene/highlighter/Highlighter.h" -#include "CLucene/highlighter/SimpleHTMLFormatter.h" -#include "CLucene/analysis/standard/StandardAnalyzer.h" -#include "CLucene/queryParser/QueryParser.h" -//#endif CL_NS_USE(index) CL_NS_USE(util) CL_NS_USE(document) @@ -119,10 +111,7 @@ reader = IndexReader::open(path); readerOwner = true; -//#ifdef USE_HIGHLIGHTER - fistlnHLQuery = NULL; - excerptrwQuery = NULL; -//#endif + } @@ -136,10 +125,7 @@ reader = IndexReader::open(directory); readerOwner = true; -//#ifdef USE_HIGHLIGHTER - fistlnHLQuery = NULL; - excerptrwQuery = NULL; -//#endif + } @@ -151,10 +137,7 @@ reader = r; readerOwner = false; -//#ifdef USE_HIGHLIGHTER - fistlnHLQuery = NULL; - excerptrwQuery = NULL; -//#endif + } IndexSearcher::~IndexSearcher(){ @@ -223,12 +206,8 @@ //Post - CND_PRECONDITION(reader != NULL, "reader is NULL"); CND_PRECONDITION(query != NULL, "query is NULL"); -//#ifdef USE_HIGHLIGHTER - if(!excerptrwQuery || !fistlnHLQuery) - { - excerptrwQuery = query->rewrite(reader); - } -//#endif + + Weight* weight = query->weight(this); Scorer* scorer = weight->scorer(reader); @@ -277,13 +256,8 @@ CND_PRECONDITION(reader != NULL, "reader is NULL"); CND_PRECONDITION(query != NULL, "query is NULL"); + -//#ifdef USE_HIGHLIGHTER - if(!excerptrwQuery || !fistlnHLQuery) - { - excerptrwQuery = query->rewrite(reader); - } -//#endif Weight* weight = query->weight(this); Scorer* scorer = weight->scorer(reader); @@ -333,12 +307,7 @@ CND_PRECONDITION(reader != NULL, "reader is NULL"); CND_PRECONDITION(query != NULL, "query is NULL"); -//#ifdef USE_HIGHLIGHTER - if(!excerptrwQuery || !fistlnHLQuery) - { - excerptrwQuery = query->rewrite(reader); - } -//#endif + BitSet* bits = NULL; @@ -380,13 +349,7 @@ } return query; } -//#ifdef USE_HIGHLIGHTER -void IndexSearcher::getrewritten(int32_t n, Query* original, Query* rwQuery[]) - { - rwQuery[0] = fistlnHLQuery; - rwQuery[1] = excerptrwQuery; - } -//#endif + void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret){ Weight* weight = query->weight(this); weight->explain(reader, doc, ret); diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.h --- a/searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/indexsearcher.h Fri Sep 17 08:35:54 2010 +0300 @@ -28,12 +28,7 @@ */ class IndexSearcher:public Searcher{ CL_NS(index)::IndexReader* reader; - bool readerOwner; -//#ifdef USE_HIGHLIGHTER - Query *fistlnHLQuery; - Query *excerptrwQuery; - CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; -//#endif + bool readerOwner; public: /// Creates a searcher searching the index in the named directory. IndexSearcher(const char* path); @@ -67,9 +62,6 @@ Query* rewrite(Query* original); -//#ifdef USE_HIGHLIGHTER - void getrewritten(int32_t n, Query* original, Query* rwQuery[]); -//#endif void explain(Query* query, int32_t doc, Explanation* ret); }; CL_NS_END diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/multisearcher.cpp --- a/searchengine/oss/cl/clucene/src/clucene/search/multisearcher.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/multisearcher.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -62,14 +62,7 @@ int32_t i = subSearcher(n); // find searcher index return searchables[i]->doc(n - starts[i], d); // dispatch to searcher } - -//#ifdef USE_HIGHLIGHTER - void MultiSearcher::getrewritten(int32_t n, Query* original, Query* rwQuery[]) { - int32_t i = subSearcher(n); // find searcher index - // changed to get already rewritten query - searchables[i]->getrewritten(n, original, rwQuery); - } -//#endif + int32_t MultiSearcher::searcherIndex(int32_t n) const{ return subSearcher(n); diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/multisearcher.h --- a/searchengine/oss/cl/clucene/src/clucene/search/multisearcher.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/multisearcher.h Fri Sep 17 08:35:54 2010 +0300 @@ -88,9 +88,7 @@ void _search(Query* query, Filter* filter, HitCollector* results); Query* rewrite(Query* original); -//#ifdef USE_HIGHLIGHTER - void getrewritten(int32_t n, Query* original, Query* rwQuery[]); -//#endif + void explain(Query* query, int32_t doc, Explanation* ret); }; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/search/searchheader.h --- a/searchengine/oss/cl/clucene/src/clucene/search/searchheader.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/search/searchheader.h Fri Sep 17 08:35:54 2010 +0300 @@ -24,9 +24,11 @@ #include "CLucene/highlighter/SimpleFragmenter.h" #include "CLucene/highlighter/SimpleHTMLFormatter.h" #include "CLucene/analysis/standard/StandardAnalyzer.h" -#define LCPIX_DEFAULT_FIELD L"_aggregate" #define LCPIX_HL_EXCERPT_FIELD L"_hlexcerpt" #define LCPIX_EXCERPT_FIELD L"_excerpt" +#if defined (__SYMBIAN32__) +#include +#endif //#endif CL_NS_DEF(search) @@ -172,8 +174,9 @@ CL_NS2(search,highlight)::SimpleHTMLFormatter hl_formatter; CL_NS2(search,highlight)::SimpleFragmenter hl_frag; - - CL_NS2(analysis,standard)::StandardAnalyzer hl_analyzer; +#if defined (__SYMBIAN32__) + TLanguage lang; +#endif //#endif public: Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL); @@ -206,6 +209,13 @@ void addToFront(HitDoc* hitDoc); void remove(const HitDoc* hitDoc); + + /* Get the tokenstream for Highlighting. + * @ text Text to be analyzed + * @ result wchar double pointer to return highlighted text + * @ firstline Flag for checking first line or Excerpt field. + */ + void getHighlightedText(CL_NS(document)::Document* document); }; @@ -292,9 +302,7 @@ * Searcher#search(Query,Filter,Sort)} instead. */ virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0; -//#ifdef USE_HIGHLIGHTER - virtual void getrewritten(int32_t n, Query* original, Query* rwQuery[])= 0; -//#endif + }; diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/sortproto/group/sortproto.mmp --- a/searchengine/oss/cl/sortproto/group/sortproto.mmp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/sortproto/group/sortproto.mmp Fri Sep 17 08:35:54 2010 +0300 @@ -51,6 +51,7 @@ STATICLIBRARY libcrt0.lib STATICLIBRARY libclucene.lib STATICLIBRARY libcpixtools.lib +STATICLIBRARY libanalysis.lib // libc and euser are always needed when using main() entry point LIBRARY libc.lib diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/tsrc/clucene_extraunittest/group/clucene_extraunittest.mmp --- a/searchengine/oss/cl/tsrc/clucene_extraunittest/group/clucene_extraunittest.mmp Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/tsrc/clucene_extraunittest/group/clucene_extraunittest.mmp Fri Sep 17 08:35:54 2010 +0300 @@ -51,6 +51,7 @@ STATICLIBRARY libitk.lib STATICLIBRARY libcpixtools.lib STATICLIBRARY libclucene.lib +STATICLIBRARY libanalysis.lib // libc and euser are always needed when using main() entry point LIBRARY libc.lib diff -r 3e1f76dd2722 -r 2c484ac32ef0 searcher/searchclient/src/csearchdocument.cpp --- a/searcher/searchclient/src/csearchdocument.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searcher/searchclient/src/csearchdocument.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -246,10 +246,12 @@ return iBoost; } -#ifdef USE_HIGHLIGHTER + EXPORT_C void CSearchDocument::AddHLDisplayFieldL(const TDesC& aField) { +#ifdef USE_HIGHLIGHTER // Needs to be tokenised to rewrite the query, but should not be searchable so EAggregateNo. AddFieldL( _L( CPIX_HL_EXCERPT_FIELD ), aField, CDocumentField::EStoreYes | CDocumentField::EIndexTokenized | CDocumentField::EAggregateNo ); +#endif } -#endif + diff -r 3e1f76dd2722 -r 2c484ac32ef0 searcher/searchserver/inc/csearchserversession.h --- a/searcher/searchserver/inc/csearchserversession.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searcher/searchserver/inc/csearchserversession.h Fri Sep 17 08:35:54 2010 +0300 @@ -108,6 +108,11 @@ void ContinueHouseKeeping(const RMessage2& aMessage); /** + * Continue housekeeping + */ + void ContinueHouseKeeping(); + + /** * Force housekeeping */ void ForceHouseKeeping(const RMessage2& aMessage); diff -r 3e1f76dd2722 -r 2c484ac32ef0 searcher/searchserver/src/cheartbeattimer.cpp --- a/searcher/searchserver/src/cheartbeattimer.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searcher/searchserver/src/cheartbeattimer.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -61,5 +61,5 @@ { if (iObserver) iObserver->HandleHeartBeatL(); - After(HEARTBEAT_PERIOD_USEC); + //After(HEARTBEAT_PERIOD_USEC); } diff -r 3e1f76dd2722 -r 2c484ac32ef0 searcher/searchserver/src/csearchserversession.cpp --- a/searcher/searchserver/src/csearchserversession.cpp Thu Sep 02 21:37:32 2010 +0300 +++ b/searcher/searchserver/src/csearchserversession.cpp Fri Sep 17 08:35:54 2010 +0300 @@ -252,30 +252,35 @@ case ESearchServerAdd: RECORDED_EXECUTION_BEGIN subsession->AddL(aMessage); + ContinueHouseKeeping(); RECORDED_EXECUTION_END("add") break; case ESearchServerUpdate: RECORDED_EXECUTION_BEGIN subsession->UpdateL(aMessage); + ContinueHouseKeeping(); RECORDED_EXECUTION_END("update") break; case ESearchServerDelete: RECORDED_EXECUTION_BEGIN subsession->DeleteL(aMessage); + ContinueHouseKeeping(); RECORDED_EXECUTION_END("delete") break; case ESearchServerReset: RECORDED_EXECUTION_BEGIN subsession->ResetL(aMessage); + ContinueHouseKeeping(); RECORDED_EXECUTION_END("reset") break; case ESearchServerFlush: RECORDED_EXECUTION_BEGIN subsession->FlushL(aMessage); + ContinueHouseKeeping(); RECORDED_EXECUTION_END("flush") break; @@ -458,6 +463,17 @@ } // ----------------------------------------------------------------------------- +// CSearchServerSession::ContinueHouseKeeping() +// Continue housekeeping +// ----------------------------------------------------------------------------- +// +void CSearchServerSession::ContinueHouseKeeping() + { + CServer2* server = const_cast( Server() ); + static_cast( server )->ContinueHouseKeeping(); + } + +// ----------------------------------------------------------------------------- // CSearchServerSession::ForceHouseKeeping() // Force housekeeping // ----------------------------------------------------------------------------- diff -r 3e1f76dd2722 -r 2c484ac32ef0 searcher/tsrc/group/bld.inf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searcher/tsrc/group/bld.inf Fri Sep 17 08:35:54 2010 +0300 @@ -0,0 +1,21 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "../RobustnessTest/group/bld.inf" +#include "../LogPlayer/group/bld.inf" +#include "../cpixsearchertest/group/bld.inf" \ No newline at end of file diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchsrv_info/nokia_searchsrv_metadata/package_definition.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchsrv_info/nokia_searchsrv_metadata/package_definition.xml Fri Sep 17 08:35:54 2010 +0300 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchsrv_plat/cpix_framework_api/inc/cdocumentfield.h --- a/searchsrv_plat/cpix_framework_api/inc/cdocumentfield.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchsrv_plat/cpix_framework_api/inc/cdocumentfield.h Fri Sep 17 08:35:54 2010 +0300 @@ -61,7 +61,8 @@ EIndexNo = 16, EIndexTokenized = 32, EIndexUnTokenized = 64, - EIndexFreeText = 128 + EIndexFreeText = 128, + EIndexPhoneNumber = 256 }; /** diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchsrv_plat/cpix_framework_api/inc/csearchdocument.h --- a/searchsrv_plat/cpix_framework_api/inc/csearchdocument.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchsrv_plat/cpix_framework_api/inc/csearchdocument.h Fri Sep 17 08:35:54 2010 +0300 @@ -182,7 +182,7 @@ */ IMPORT_C TReal32 Boost() const; -#ifdef USE_HIGHLIGHTER + /** * Adds the contents to field which will be * shown in the first line of searchUI. @@ -191,7 +191,7 @@ * @param aExcerpt Excerpt text to add to the document. */ IMPORT_C void AddHLDisplayFieldL(const TDesC& aField); -#endif + private: // Constructors diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h --- a/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Fri Sep 17 08:35:54 2010 +0300 @@ -254,7 +254,8 @@ // Default prefix analyzer #define CPIX_ANALYZER_DEFAULT_PREFIX L"natural(prefix)" -#define CPIX_ANALYZER_STANDARD L"standard" +#define CPIX_ANALYZER_STANDARD L"standard" +#define CPIX_ANALYZER_PHONENUMBER L"PhoneNumberAnalyzer" //#define CPIX_ANALYZER_DEFAULT L"standard" #define CPIX_TOKENIZER_STANDARD L"stdtokens" diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchsrv_plat/cpix_utility_api/inc/cpixwatchdogcommon.h --- a/searchsrv_plat/cpix_utility_api/inc/cpixwatchdogcommon.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchsrv_plat/cpix_utility_api/inc/cpixwatchdogcommon.h Fri Sep 17 08:35:54 2010 +0300 @@ -19,7 +19,7 @@ #define CPIXWATCHDOGCOMMON_H_ //Length of uid string in cenrep -const TInt KCenrepUidLength = 20; +const TInt KCenrepUidLength = 256; //Uid of watchdog cetral repository database const TUid KWDrepoUidMenu = {0x20029ab8}; const TUint32 KHarvesterServerKey = 0x1;