searchengine/cpix/tsrc/cpixunittest/src/analysis.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 11 Jun 2010 14:43:47 +0300
changeset 7 a5fbfefd615f
parent 3 ae3f1779f6da
child 24 65456528cac2
permissions -rw-r--r--
Revision: 201021 Kit: 2010123

/*
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*
*/

#include <wchar.h>
#include <stddef.h>

#include <iostream>

#include "cpixidxdb.h"

#include "itk.h"

#include "config.h"
#include "testutils.h"
#include "cpixanalyzer.h"

#include "cpixdoc.h"

#include "std_log_result.h"

const char * AnalysisTestDocsToIndex[5] = {
    FILE_TEST_CORPUS_PATH "\\en\\1.txt",
    FILE_TEST_CORPUS_PATH "\\en\\2.txt",
    FILE_TEST_CORPUS_PATH "\\en\\3.txt",
    FILE_TEST_CORPUS_PATH "\\en\\4.txt",
    NULL
};

const wchar_t * AnalyzerTestTermsToSearch[5] = {
	L"happy",
	L"happiness",
	L"happening",
    NULL
};


void TestAnalyzerParsing(Itk::TestMgr * , const wchar_t* definition, int expected = 1) 
{
	cpix_Result result; 
	
	printf("Creating analyzer %S\n", definition);
	
	cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition);

	if ( cpix_Failed( &result) ) {
        if(expected)
            assert_failed = 1;
        else
            assert_failed = 0;            
		printf("Analyzer creation failed with %S\n", result.err_->msg_);
		return; 
	}
	cpix_Analyzer_destroy( analyzer ); 
}

void TestAnalyzersParsing(Itk::TestMgr * testMgr) 
{
    char *xml_file = (char*)__FUNCTION__;
    assert_failed = 0;
	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase"); 
	TestAnalyzerParsing(testMgr, L"whitespace>lowercase"); 
	TestAnalyzerParsing(testMgr, L"letter>lowercase"); 
	// special syntax
	TestAnalyzerParsing(testMgr, L"stdtokens()>lowercase"); 
	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase()"); 
	// parameteres
	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase>stem(en)"); 
	TestAnalyzerParsing(testMgr, L"letter>lowercase>stop(en)"); 
	TestAnalyzerParsing(testMgr, L"letter>lowercase>stop('a', 'an', 'the')");

	// bad syntaxes
	TestAnalyzerParsing(testMgr, L"letter><lowercase" ,0); 
	TestAnalyzerParsing(testMgr, L"38j_d fad23 4?q ca'wRA", 0 );
	TestAnalyzerParsing(testMgr, L"38.45_d fd23<ca'wRA", 0 ); 
	// parsing failures
	TestAnalyzerParsing(testMgr, L"letter>>lowercase", 0 ); 
	TestAnalyzerParsing(testMgr, L">letter>>lowercase lowercase", 0 ); 
	TestAnalyzerParsing(testMgr, L"letter lowercase", 0 );
	testResultXml(xml_file);
}
void TestSwitchParsing(Itk::TestMgr * testMgr) 
{
    char *xml_file = (char*)__FUNCTION__;
        assert_failed = 0;
	// Per field query syntax
	TestAnalyzerParsing(testMgr, L"switch {"
									 L"case '_docuid':          keyword; "
									 L"case '_appclass':        whitespace>lowercase;"
									 L"case 'title', 'message': standard>lowercase>stem(en)>stop(en);"
									 L"default:                 standard;"
								 L"}");
	TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': whitespace; default: standard; }>lowercase");
	TestAnalyzerParsing(testMgr, L"switch{ default: 	standard; }");
	TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': switch{ case '_docuid': keyword; default: whitespace; }; default: standard; }");
	TestAnalyzerParsing(testMgr, L"switch{ case '_mimetype': standard; default: whitespace; }; default: standard; }");
	testResultXml(xml_file);
}

void TestAnalyzerUsage(Itk::TestMgr * testMgr, const wchar_t* definition) 
{
	printf("Indexing and searching with %S\n", definition); 
	cpix_Result
        result;

    cpix_IdxDb_dbgScrapAll(&result);

	std::auto_ptr<FileIdxUtil> util( new FileIdxUtil ); 
	
	util->init(); 
	
	cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition );
			
	if ( cpix_Failed( &result) ) {
		printf("Analyzer creation failed with %S\n", result.err_->msg_); 
		return; 
	}
	
	//
	// Add first few simple documents from english stem corpus
	// English test corpus is used, because part of the analyzers contain
	// english specific functinality, like stop words and stemming. 
	
	for (int i = 0; AnalysisTestDocsToIndex[i]; i++) 
	{
		util->indexFile( AnalysisTestDocsToIndex[i], analyzer, testMgr ); 
	}
	
	//
	// Then continue by adding an empty document. It is inserted as 
	// a special case.
	
	cpix_Document* doc = cpix_Document_create( &result, L"empty", "root file text", L"", LTEXTFILE_MIMETYPE );
	
	cpix_Field field;
	cpix_Field_initialize( &field, CONTENTS_FIELD, L"", cpix_STORE_YES | cpix_INDEX_TOKENIZED );
	
	cpix_Document_add( doc, &field ); 
	
	cpix_IdxDb_add( util->idxDb(), doc, analyzer );
	
	cpix_Document_destroy( doc );
	
	printf("\nIndexed empty item.\n"); 
	
        util->flush();

	// 
	// Commit searches and print the results
	
	cpix_QueryParser
		* queryParser = cpix_QueryParser_create(&result,
												CONTENTS_FIELD,
												analyzer );
	if (queryParser == NULL)
		{
			cpix_Analyzer_destroy( analyzer );
			ITK_PANIC("Could not create query parser");
		}
	
	for (int i = 0; AnalyzerTestTermsToSearch[i]; i++) 
	{
		cpix_Query* query = cpix_QueryParser_parse(queryParser,
												    AnalyzerTestTermsToSearch[i]);
		if (cpix_Failed(queryParser)
			|| query == NULL)
			{
				cpix_Analyzer_destroy(analyzer);
				cpix_ClearError(queryParser);
				cpix_QueryParser_destroy(queryParser);
				ITK_PANIC("Could not parse query string");
			}
		cpix_Hits
			* hits = cpix_IdxDb_search(util->idxDb(),
									   query );

		cpix_Query_destroy( query ); 

		if (cpix_Failed(util->idxDb())) 
			{
            assert_failed = 1;
			cpix_Analyzer_destroy(analyzer);
			cpix_ClearError(queryParser);
			cpix_QueryParser_destroy(queryParser);
			ITK_PANIC("Searching index database failed.");
			}
		else 
			{
			util->printHits( hits, testMgr ); 
			cpix_Hits_destroy( hits ); 
			}
	}
	cpix_QueryParser_destroy(queryParser);
	cpix_Analyzer_destroy( analyzer ); 
	
}

void TestAnalyzersUsage(Itk::TestMgr * testMgr) 
	{
    char *xml_file = (char*)__FUNCTION__;
    assert_failed = 0;
	TestAnalyzerUsage(testMgr, L"whitespace" ); 
	TestAnalyzerUsage(testMgr, L"letter>lowercase" ); 
	TestAnalyzerUsage(testMgr, L"stdtokens>lowercase>stem(en)"); 
	TestAnalyzerUsage(testMgr, L"letter>lowercase>stop(en)"); 
	TestAnalyzerUsage(testMgr, L"letter>lowercase>stop('a', 'an', 'the')");
	testResultXml(xml_file);
	}


Itk::TesterBase * CreateAnalysisWhiteBoxTests();


Itk::TesterBase * CreateAnalysisTests()
{
    using namespace Itk;

    SuiteTester
        * analysis = new SuiteTester("analysis");


    analysis->add(CreateAnalysisWhiteBoxTests());

    analysis->add("parsing",
                  &TestAnalyzersParsing,
                  "parsing");

    analysis->add("switchParsing",
                  &TestSwitchParsing,
                  "switchParsing");

    analysis->add("usage",
                  &TestAnalyzersUsage,
                  "usage");

    // TODO add more

    return analysis;
}