diff -r 000000000000 -r 671dee74050a searchengine/cpix/tsrc/cpixunittest/src/analysis.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/src/analysis.cpp Mon Apr 19 14:40:16 2010 +0300 @@ -0,0 +1,235 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#include +#include + +#include + +#include "cpixidxdb.h" + +#include "itk.h" + +#include "config.h" +#include "testutils.h" +#include "cpixanalyzer.h" + +#include "cpixdoc.h" + +const char * AnalysisTestDocsToIndex[5] = { + FILE_TEST_CORPUS_PATH "\\en\\1.txt", + FILE_TEST_CORPUS_PATH "\\en\\2.txt", + FILE_TEST_CORPUS_PATH "\\en\\3.txt", + FILE_TEST_CORPUS_PATH "\\en\\4.txt", + NULL +}; + +const wchar_t * AnalyzerTestTermsToSearch[5] = { + L"happy", + L"happiness", + L"happening", + NULL +}; + + +void TestAnalyzerParsing(Itk::TestMgr * , const wchar_t* definition) +{ + cpix_Result result; + + printf("Creating analyzer %S\n", definition); + + cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition); + + if ( cpix_Failed( &result) ) { + printf("Analyzer creation failed with %S\n", result.err_->msg_); + return; + } + cpix_Analyzer_destroy( analyzer ); +} + +void TestAnalyzersParsing(Itk::TestMgr * testMgr) +{ + TestAnalyzerParsing(testMgr, L"stdtokens>lowercase"); + TestAnalyzerParsing(testMgr, L"whitespace>lowercase"); + TestAnalyzerParsing(testMgr, L"letter>lowercase"); + // special syntax + TestAnalyzerParsing(testMgr, L"stdtokens()>lowercase"); + TestAnalyzerParsing(testMgr, L"stdtokens>lowercase()"); + // parameteres + TestAnalyzerParsing(testMgr, L"stdtokens>lowercase>stem(en)"); + TestAnalyzerParsing(testMgr, L"letter>lowercase>stop(en)"); + TestAnalyzerParsing(testMgr, L"letter>lowercase>stop('a', 'an', 'the')"); + + // bad syntaxes + TestAnalyzerParsing(testMgr, L"letter>>lowercase" ); + TestAnalyzerParsing(testMgr, L">letter>>lowercase lowercase" ); + TestAnalyzerParsing(testMgr, L"letter lowercase" ); +} + +void TestSwitchParsing(Itk::TestMgr * testMgr) +{ + // Per field query syntax + TestAnalyzerParsing(testMgr, L"switch {" + L"case '_docuid': keyword; " + L"case '_appclass': whitespace>lowercase;" + L"case 'title', 'message': standard>lowercase>stem(en)>stop(en);" + L"default: standard;" + L"}"); + TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': whitespace; default: standard; }>lowercase"); + TestAnalyzerParsing(testMgr, L"switch{ default: standard; }"); + TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': switch{ case '_docuid': keyword; default: whitespace; }; default: standard; }"); +} + +void TestAnalyzerUsage(Itk::TestMgr * testMgr, const wchar_t* definition) +{ + printf("Indexing and searching with %S\n", definition); + + cpix_Result + result; + + cpix_IdxDb_dbgScrapAll(&result); + + std::auto_ptr util( new FileIdxUtil ); + + util->init(); + + cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition ); + + if ( cpix_Failed( &result) ) { + printf("Analyzer creation failed with %S\n", result.err_->msg_); + return; + } + + // + // Add first few simple documents from english stem corpus + // English test corpus is used, because part of the analyzers contain + // english specific functinality, like stop words and stemming. + + for (int i = 0; AnalysisTestDocsToIndex[i]; i++) + { + util->indexFile( AnalysisTestDocsToIndex[i], analyzer, testMgr ); + } + + // + // Then continue by adding an empty document. It is inserted as + // a special case. + + cpix_Document* doc = cpix_Document_create( &result, L"empty", "root file text", L"", LTEXTFILE_MIMETYPE ); + + cpix_Field field; + cpix_Field_initialize( &field, CONTENTS_FIELD, L"", cpix_STORE_YES | cpix_INDEX_TOKENIZED ); + + cpix_Document_add( doc, &field ); + + cpix_IdxDb_add( util->idxDb(), doc, analyzer ); + + cpix_Document_destroy( doc ); + + printf("\nIndexed empty item.\n"); + + util->flush(); + + // + // Commit searches and print the results + + cpix_QueryParser + * queryParser = cpix_QueryParser_create(&result, + CONTENTS_FIELD, + analyzer ); + if (queryParser == NULL) + { + cpix_Analyzer_destroy( analyzer ); + ITK_PANIC("Could not create query parser"); + } + + for (int i = 0; AnalyzerTestTermsToSearch[i]; i++) + { + cpix_Query* query = cpix_QueryParser_parse(queryParser, + AnalyzerTestTermsToSearch[i]); + if (cpix_Failed(queryParser) + || query == NULL) + { + cpix_Analyzer_destroy(analyzer); + cpix_ClearError(queryParser); + cpix_QueryParser_destroy(queryParser); + ITK_PANIC("Could not parse query string"); + } + cpix_Hits + * hits = cpix_IdxDb_search(util->idxDb(), + query ); + + cpix_Query_destroy( query ); + + if (cpix_Failed(util->idxDb())) + { + cpix_Analyzer_destroy(analyzer); + cpix_ClearError(queryParser); + cpix_QueryParser_destroy(queryParser); + ITK_PANIC("Searching index database failed."); + } + else + { + util->printHits( hits, testMgr ); + cpix_Hits_destroy( hits ); + } + } + cpix_QueryParser_destroy(queryParser); + cpix_Analyzer_destroy( analyzer ); +} + +void TestAnalyzersUsage(Itk::TestMgr * testMgr) + { + TestAnalyzerUsage(testMgr, L"whitespace" ); + TestAnalyzerUsage(testMgr, L"letter>lowercase" ); + TestAnalyzerUsage(testMgr, L"stdtokens>lowercase>stem(en)"); + TestAnalyzerUsage(testMgr, L"letter>lowercase>stop(en)"); + TestAnalyzerUsage(testMgr, L"letter>lowercase>stop('a', 'an', 'the')"); + } + + +Itk::TesterBase * CreateAnalysisWhiteBoxTests(); + + +Itk::TesterBase * CreateAnalysisTests() +{ + using namespace Itk; + + SuiteTester + * analysis = new SuiteTester("analysis"); + + + analysis->add(CreateAnalysisWhiteBoxTests()); + + analysis->add("parsing", + &TestAnalyzersParsing, + "parsing"); + + analysis->add("switchParsing", + &TestSwitchParsing, + "switchParsing"); + + analysis->add("usage", + &TestAnalyzersUsage, + "usage"); + + // TODO add more + + return analysis; +}