--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/src/analysis.cpp Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,235 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#include <wchar.h>
+#include <stddef.h>
+
+#include <iostream>
+
+#include "cpixidxdb.h"
+
+#include "itk.h"
+
+#include "config.h"
+#include "testutils.h"
+#include "cpixanalyzer.h"
+
+#include "cpixdoc.h"
+
+const char * AnalysisTestDocsToIndex[5] = {
+ FILE_TEST_CORPUS_PATH "\\en\\1.txt",
+ FILE_TEST_CORPUS_PATH "\\en\\2.txt",
+ FILE_TEST_CORPUS_PATH "\\en\\3.txt",
+ FILE_TEST_CORPUS_PATH "\\en\\4.txt",
+ NULL
+};
+
+const wchar_t * AnalyzerTestTermsToSearch[5] = {
+ L"happy",
+ L"happiness",
+ L"happening",
+ NULL
+};
+
+
+void TestAnalyzerParsing(Itk::TestMgr * , const wchar_t* definition)
+{
+ cpix_Result result;
+
+ printf("Creating analyzer %S\n", definition);
+
+ cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition);
+
+ if ( cpix_Failed( &result) ) {
+ printf("Analyzer creation failed with %S\n", result.err_->msg_);
+ return;
+ }
+ cpix_Analyzer_destroy( analyzer );
+}
+
+void TestAnalyzersParsing(Itk::TestMgr * testMgr)
+{
+ TestAnalyzerParsing(testMgr, L"stdtokens>lowercase");
+ TestAnalyzerParsing(testMgr, L"whitespace>lowercase");
+ TestAnalyzerParsing(testMgr, L"letter>lowercase");
+ // special syntax
+ TestAnalyzerParsing(testMgr, L"stdtokens()>lowercase");
+ TestAnalyzerParsing(testMgr, L"stdtokens>lowercase()");
+ // parameteres
+ TestAnalyzerParsing(testMgr, L"stdtokens>lowercase>stem(en)");
+ TestAnalyzerParsing(testMgr, L"letter>lowercase>stop(en)");
+ TestAnalyzerParsing(testMgr, L"letter>lowercase>stop('a', 'an', 'the')");
+
+ // bad syntaxes
+ TestAnalyzerParsing(testMgr, L"letter><lowercase" );
+ TestAnalyzerParsing(testMgr, L"38j_d fad23 4?q ca'wRA" );
+ // parsing failures
+ TestAnalyzerParsing(testMgr, L"letter>>lowercase" );
+ TestAnalyzerParsing(testMgr, L">letter>>lowercase lowercase" );
+ TestAnalyzerParsing(testMgr, L"letter lowercase" );
+}
+
+void TestSwitchParsing(Itk::TestMgr * testMgr)
+{
+ // Per field query syntax
+ TestAnalyzerParsing(testMgr, L"switch {"
+ L"case '_docuid': keyword; "
+ L"case '_appclass': whitespace>lowercase;"
+ L"case 'title', 'message': standard>lowercase>stem(en)>stop(en);"
+ L"default: standard;"
+ L"}");
+ TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': whitespace; default: standard; }>lowercase");
+ TestAnalyzerParsing(testMgr, L"switch{ default: standard; }");
+ TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': switch{ case '_docuid': keyword; default: whitespace; }; default: standard; }");
+}
+
+void TestAnalyzerUsage(Itk::TestMgr * testMgr, const wchar_t* definition)
+{
+ printf("Indexing and searching with %S\n", definition);
+
+ cpix_Result
+ result;
+
+ cpix_IdxDb_dbgScrapAll(&result);
+
+ std::auto_ptr<FileIdxUtil> util( new FileIdxUtil );
+
+ util->init();
+
+ cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition );
+
+ if ( cpix_Failed( &result) ) {
+ printf("Analyzer creation failed with %S\n", result.err_->msg_);
+ return;
+ }
+
+ //
+ // Add first few simple documents from english stem corpus
+ // English test corpus is used, because part of the analyzers contain
+ // english specific functinality, like stop words and stemming.
+
+ for (int i = 0; AnalysisTestDocsToIndex[i]; i++)
+ {
+ util->indexFile( AnalysisTestDocsToIndex[i], analyzer, testMgr );
+ }
+
+ //
+ // Then continue by adding an empty document. It is inserted as
+ // a special case.
+
+ cpix_Document* doc = cpix_Document_create( &result, L"empty", "root file text", L"", LTEXTFILE_MIMETYPE );
+
+ cpix_Field field;
+ cpix_Field_initialize( &field, CONTENTS_FIELD, L"", cpix_STORE_YES | cpix_INDEX_TOKENIZED );
+
+ cpix_Document_add( doc, &field );
+
+ cpix_IdxDb_add( util->idxDb(), doc, analyzer );
+
+ cpix_Document_destroy( doc );
+
+ printf("\nIndexed empty item.\n");
+
+ util->flush();
+
+ //
+ // Commit searches and print the results
+
+ cpix_QueryParser
+ * queryParser = cpix_QueryParser_create(&result,
+ CONTENTS_FIELD,
+ analyzer );
+ if (queryParser == NULL)
+ {
+ cpix_Analyzer_destroy( analyzer );
+ ITK_PANIC("Could not create query parser");
+ }
+
+ for (int i = 0; AnalyzerTestTermsToSearch[i]; i++)
+ {
+ cpix_Query* query = cpix_QueryParser_parse(queryParser,
+ AnalyzerTestTermsToSearch[i]);
+ if (cpix_Failed(queryParser)
+ || query == NULL)
+ {
+ cpix_Analyzer_destroy(analyzer);
+ cpix_ClearError(queryParser);
+ cpix_QueryParser_destroy(queryParser);
+ ITK_PANIC("Could not parse query string");
+ }
+ cpix_Hits
+ * hits = cpix_IdxDb_search(util->idxDb(),
+ query );
+
+ cpix_Query_destroy( query );
+
+ if (cpix_Failed(util->idxDb()))
+ {
+ cpix_Analyzer_destroy(analyzer);
+ cpix_ClearError(queryParser);
+ cpix_QueryParser_destroy(queryParser);
+ ITK_PANIC("Searching index database failed.");
+ }
+ else
+ {
+ util->printHits( hits, testMgr );
+ cpix_Hits_destroy( hits );
+ }
+ }
+ cpix_QueryParser_destroy(queryParser);
+ cpix_Analyzer_destroy( analyzer );
+}
+
+void TestAnalyzersUsage(Itk::TestMgr * testMgr)
+ {
+ TestAnalyzerUsage(testMgr, L"whitespace" );
+ TestAnalyzerUsage(testMgr, L"letter>lowercase" );
+ TestAnalyzerUsage(testMgr, L"stdtokens>lowercase>stem(en)");
+ TestAnalyzerUsage(testMgr, L"letter>lowercase>stop(en)");
+ TestAnalyzerUsage(testMgr, L"letter>lowercase>stop('a', 'an', 'the')");
+ }
+
+
+Itk::TesterBase * CreateAnalysisWhiteBoxTests();
+
+
+Itk::TesterBase * CreateAnalysisTests()
+{
+ using namespace Itk;
+
+ SuiteTester
+ * analysis = new SuiteTester("analysis");
+
+
+ analysis->add(CreateAnalysisWhiteBoxTests());
+
+ analysis->add("parsing",
+ &TestAnalyzersParsing,
+ "parsing");
+
+ analysis->add("switchParsing",
+ &TestSwitchParsing,
+ "switchParsing");
+
+ analysis->add("usage",
+ &TestAnalyzersUsage,
+ "usage");
+
+ // TODO add more
+
+ return analysis;
+}