diff -r 000000000000 -r 671dee74050a searchengine/cpix/tsrc/cpixunittest/src/randomtest.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/src/randomtest.cpp Mon Apr 19 14:40:16 2010 +0300 @@ -0,0 +1,769 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ +#include "randomtest.h" +#include "CPixdoc.h" + +#include +#include + +using namespace std; +using namespace Itk; + +SuiteTester* CreateSuiteTesterRandom() + { + // "all/random" + RandomTest * idxDbTest = new RandomTest; + ContextTester * random = new ContextTester("random", idxDbTest); + random->add( "AddingDeleting", idxDbTest, + &RandomTest::testIndexAndDelete ); + + random->add( "AddingDeletingAll", idxDbTest, + &RandomTest::testDeleteAll ); + + random->add( "AssertContent", idxDbTest, &RandomTest::testContent ); + + random->add( "Persistence", idxDbTest, + &RandomTest::testPersistence ); + + random->add("Searches", idxDbTest, &RandomTest::testSearches ); + + random->add("CreatedIndex", idxDbTest, &RandomTest::testCreatedIndex ); + + return random; + } + + +RandomTest::RandomTest() +: // core + randomSeed_(0), + idxUtil_(NULL), + testCorpus_(DEFAULT_TEST_CORPUS_PATH), + // text processing + analyzer_(NULL), + uidQueryParser_(NULL), + contentQueryParser_(NULL), + // tracing + indexed_(), + tracedTerms_(), + tracedTermsFreq_() + { + } + +RandomTest::~RandomTest() + { + } + +void RandomTest::setup() throw (Itk::PanicExc) +{ + cpix_Result + result; + + cpix_IdxDb_dbgScrapAll(&result); + if (cpix_Failed(&result)) + { + ITK_PANIC("Could not dbg scrapp all indexes"); + } + + randomSeed_ = (unsigned int)time(0); + srandom(randomSeed_); + + idxUtil_ = new SmsIdxUtil; + idxUtil_->init( true ); + + analyzer_ = cpix_CreateSimpleAnalyzer(&result); + if ( !analyzer_ ) + { + ITK_PANIC("Analyzer could not be created"); + } + + uidQueryParser_ = cpix_QueryParser_create( &result, + LCPIX_DOCUID_FIELD, + analyzer_ ); + if ( !uidQueryParser_) + { + ITK_PANIC("Query parser could not be created"); + } + + contentQueryParser_ = cpix_QueryParser_create( &result, + LBODY_FIELD, + analyzer_ ); + if ( !contentQueryParser_ ) + { + ITK_PANIC("Query parser could not be created"); + } +} + +void RandomTest::tearDown() throw () + { + delete idxUtil_; + cpix_Analyzer_destroy( analyzer_ ); + cpix_QueryParser_destroy( uidQueryParser_ ); + cpix_QueryParser_destroy( contentQueryParser_ ); + } + +void * RandomTest::subject() + { + return NULL; + } + + +int RandomTest::getRandomItem(bool isIndexed) + { + if (isIndexed) + { // pick indexed one from list + int i = rand() % indexed_.size(); + for (set::iterator itr = indexed_.begin(); itr != indexed_.end(); itr++) + { + if (i-- == 0) + { + return *itr; + } + } + // TODO: Panic + return -1; + } + else + { + while (true) + { + int i = rand() % testCorpus_.size(); + + if (indexed_.count(i) == 0) + { + return i; + } + } + } + } + + +// +// TRACING -> +// + +void RandomTest::markIndexed(int index, bool indexed) + { + if (indexed_.count(index) == 1 && !indexed) + { + indexed_.erase(index); + } + else if (indexed_.count(index) == 0 && indexed) + { + indexed_.insert(index); + } + } + +bool RandomTest::indexed(int item) + { + return indexed_.count(item)==1; + } + +void RandomTest::traceTerm(Itk::TestMgr* , const wchar_t* term) + { + tracedTerms_.push_back( wstring( term ) ); + tracedTermsFreq_.push_back( 0 ); + } + +void RandomTest::untraceTerm(Itk::TestMgr* , const wchar_t* term) + { + int i = 0; + for (; i < tracedTerms_.size(); i++) + { + if ( term == tracedTerms_[i] ) + { + tracedTerms_.erase( tracedTerms_.begin() + i ); + tracedTermsFreq_.erase( tracedTermsFreq_.begin() + i ); + return; + } + } + ITK_PANIC( "Untracing non-traced term" ); + } + +void RandomTest::untraceTerms(Itk::TestMgr* ) + { + tracedTerms_.empty(); + tracedTermsFreq_.empty(); + } + +// Would be useful to get access to the tokenizers +bool RandomTest::isTokenChar(wchar_t character) const + { + // only for ASCII + if ( character >= 'a' && character <= 'z') + { + return true; + } + if ( character >= 'A' && character <= 'Z') + { + return true; + } + return false; + } + +bool RandomTest::containsTerm(const std::wstring& content, const std::wstring& term) const +{ + wostringstream tolowcase; + for (int i = 0; i < content.size(); i++) { + wchar_t c = content[i]; + if (c >= 'A' && c <= 'Z') { + c = (c - 'A' + 'a'); + } + tolowcase<= lowcase.size() || !isTokenChar(lowcase[i+term.length()]))) + { + return true; + } + i++; + } +} + + + +void RandomTest::incTermFreqs(Itk::TestMgr* /*testMgr*/, const std::wstring& content) + { + for (int i = 0; i < tracedTerms_.size(); i++) + { + if ( containsTerm( content, tracedTerms_[i] ) ) + { + tracedTermsFreq_[i]++; + } + } + } + +void RandomTest::decTermFreqs(Itk::TestMgr* , const std::wstring& content) + { + for (int i = 0; i < tracedTerms_.size(); i++) + { + if ( containsTerm( content, tracedTerms_[i] ) ) + { + tracedTermsFreq_[i]--; + } + } + } + +// +// Indexing & deleting +// + + +bool RandomTest::tryIndexItem(Itk::TestMgr* testMgr, size_t item) + { + wstring id = GetItemId( item ); + wstring content = testCorpus_.item( item ); + + const wchar_t + * fields[4]; + fields[0] = L"+3585553412"; // to + fields[1] = L"+3585559078"; // from + fields[2] = L"inbox"; // folder + fields[3] = content.c_str();// body + + cpix_IdxDb_add2( idxUtil_->idxDb(), + idxUtil_->schemaId(), + id.c_str(), + SMSAPPCLASS, + content.c_str(), + NULL, + fields, + analyzer_ ); + + if ( cpix_Succeeded( idxUtil_->idxDb() ) ) + { + markIndexed(item, true); + incTermFreqs(testMgr, content); + return true; + } + else + { + return false; + } + } + +void RandomTest::indexItem(Itk::TestMgr* testMgr, size_t item) +{ + ITK_ASSERT( testMgr, tryIndexItem(testMgr, item), "Inserting %d failed", item ); +} + +bool RandomTest::tryDeleteItem(Itk::TestMgr* testMgr, size_t item) +{ + wstring id = GetItemId(item ); + + cpix_IdxDb_deleteDocuments(idxUtil_->idxDb(), id.c_str()); + + if ( cpix_Succeeded( idxUtil_->idxDb() ) ) + { + markIndexed( item, false ); + wstring content = testCorpus_.item( item ); + decTermFreqs( testMgr, content ); + return true; + } + return false; +} +void RandomTest::deleteItem(Itk::TestMgr* testMgr, size_t item) +{ + ITK_ASSERT( testMgr, tryDeleteItem(testMgr, item), "Deleting %d failed", item); +} + +size_t RandomTest::indexRandom(Itk::TestMgr* testMgr) + { + int ret = getRandomItem( false ); + indexItem( testMgr, ret ); + return ret; + } + +void RandomTest::indexRandoms(Itk::TestMgr* testMgr, size_t n) + { + for (int i = 0; i < n; i++) + { + indexRandom(testMgr); + } + idxUtil_->flush(); + } + +size_t RandomTest::deleteRandom(Itk::TestMgr* testMgr) + { + int ret = getRandomItem( true ); + deleteItem( testMgr, ret ); + return ret; + } + +void RandomTest::deleteRandoms(Itk::TestMgr* testMgr, size_t n) + { + for (int i = 0; i < n; i++) + { + deleteRandom(testMgr); + } + idxUtil_->flush(); + } + +bool RandomTest::validState(Itk::TestMgr* testMgr, size_t item) + { + bool ret = false; + wstring id = GetItemId( item ); + + cpix_Query* query = + cpix_QueryParser_parse( uidQueryParser_, id.c_str() ); + + if ( query ) + { + cpix_Hits *hits = + cpix_IdxDb_search(idxUtil_->idxDb(), + query); + + if ( hits ) + { + if ( indexed( item ) ) + { + ret = ( cpix_Hits_length( hits ) == 1 ); + } + else + { + ret = ( cpix_Hits_length( hits ) == 0 ); + } + cpix_Hits_destroy( hits ); + } + cpix_Query_destroy( query ); + } + else + { + ITK_MSG( testMgr, "Creating query failed" ); + } + return ret; + } + +void RandomTest::assertState(Itk::TestMgr* testMgr, size_t item) + { + bool valid = validState( testMgr, item ); + + if ( indexed( item ) ) + { + ITK_ASSERT( testMgr, + valid, + "Indexed item not found" ); + } + else + { + ITK_ASSERT( testMgr, + valid, + "Removed item found" ); + } + } + +void RandomTest::assertContent(Itk::TestMgr* testMgr, size_t item) +{ + wstring content = testCorpus_.item(item); + + wostringstream queryText; + queryText<<"\""; + queryText<idxDb(), + query); + + if ( hits ) + { + if ( indexed( item ) ) + { + bool found = false; + for ( int i = 0; i < cpix_Hits_length( hits ); i++ ) + { + cpix_Document doc; + cpix_Hits_doc( hits, i, &doc ); + + const wchar_t* id = cpix_Document_getFieldValue( &doc, LCPIX_DOCUID_FIELD ); + + if ( id ) + { + wstring str( id ); + if ( GetItemIndex( str.c_str() ) == item ) + { + found = true; + break; + } + } + } + if ( !found ) + { + ITK_MSG( testMgr, "Failed content %d", item ); + } + + // TODO should not assert here, because then + // cpix_Hits instance leaks here. Use + // ITK_EXPECT - that does not throw + ITK_ASSERT( testMgr, + found, + "Content check failed for %d" ); + + } + else + { + ITK_MSG( testMgr, "Content asserting is unreliable for item not in index" ); + } + cpix_Hits_destroy( hits ); + } + cpix_Query_destroy( query ); + } + else + { + ITK_MSG( testMgr, "Creating query failed for %S", queryText.str().c_str() ); + } +} + +void RandomTest::assertAllItemsState(Itk::TestMgr* testMgr) + { + for ( int i = 0; i < testCorpus_.size(); i++ ) + { + assertState(testMgr, i); + } + } + +void RandomTest::assertItemsState(Itk::TestMgr* testMgr, vector& vector) + { + for (int i = 0; i < vector.size(); i++) + { + assertState(testMgr, vector[i]); + } + } + +size_t RandomTest::termMatchCount(Itk::TestMgr* testMgr, wstring& term) + { + wostringstream queryText; + queryText<<"\""; + queryText<idxDb(), + query); + + if ( hits ) + { + for ( int i = 0; i < cpix_Hits_length( hits ); i++ ) + { + cpix_Document doc; + cpix_Hits_doc( hits, i, &doc ); + + const wchar_t* id = cpix_Document_getFieldValue( &doc, LCPIX_DOCUID_FIELD ); + + if ( id ) + { + wstring str( id ); + size_t index = GetItemIndex( str.c_str() ); + wstring content( testCorpus_.item( index ) ); + + // TODO expect instead of + // assert otherwise cpix_Hits + // instance leaks + ITK_ASSERT( testMgr, + containsTerm( content, term ), + "False positive? Term %S not found in %S", term.c_str(), content.c_str() ); + matches++; + } + } + cpix_Hits_destroy( hits ); + } + cpix_Query_destroy( query ); + } + else + { + ITK_MSG( testMgr, "Creating query failed" ); + } + return matches; + } + +void RandomTest::assertTracedTerms(Itk::TestMgr* testMgr) + { + for (int i = 0; i < tracedTerms_.size(); i++) + { + int matches = termMatchCount( testMgr, tracedTerms_[i] ); + ITK_ASSERT(testMgr, + matches == tracedTermsFreq_[i], + "Term %S frequence is %d. Expected %d", + tracedTerms_[i].c_str(), + matches, + tracedTermsFreq_[i] ); + } + } + +void RandomTest::assertIndexedState(Itk::TestMgr* testMgr) + { + bool valid = true; + for (set::iterator itr = indexed_.begin(); itr != indexed_.end(); itr++) + { + if ( !( valid &= validState( testMgr, *itr ) ) ) + { + ITK_MSG( testMgr, "Indexed item %d not in index", *itr ); + } + } + ITK_ASSERT( testMgr, valid, "Operations were not succesful" ); + } + + +// +// Test cases +// + +void RandomTest::testIndexAndDelete(Itk::TestMgr* testMgr) + { + ITK_REPORT(testMgr, + "RandomSeed", + "%d", + randomSeed_); + + indexRandoms(testMgr, 50); + assertIndexedState(testMgr); + + bool valid = true; + for (int i = 0; i < 5; i++) + { + for (int i = 0; i < 10; i++) + { + // OBS int item = -1; + int + item = indexRandom(testMgr); + idxUtil_->flush(); + valid = valid && validState(testMgr, + item); + // OBS if ( !( valid &= validState( testMgr, item = indexRandom( testMgr ) ) ) ) + if (!valid) + { + ITK_MSG( testMgr, "Indexing item %d had failed", item ); + goto mainloop_exit; // double break + } + } + for (int i = 0; i < 10; i++) + { + // OBS int item = -1; + int + item = deleteRandom(testMgr); + idxUtil_->flush(); + valid = valid && validState(testMgr, + item); + // OBS if ( !( valid &= validState( testMgr, item = deleteRandom( testMgr ) ) ) ) + if (!valid) + { + ITK_MSG( testMgr, "Deleting item %d had failed", item ); + goto mainloop_exit; // double break + } + } + } + deleteRandoms(testMgr, 50); + + mainloop_exit: + ITK_ASSERT( testMgr, valid, "Operations were not succesful" ); + } + +void RandomTest::testDeleteAll(Itk::TestMgr* testMgr) + { + indexRandoms(testMgr, 50); + assertIndexedState(testMgr); + vector deleted; + for (int i = 0; i < 50; i++) + { + deleted.push_back( deleteRandom(testMgr) ); + } + idxUtil_->flush(); + bool valid = true; + for (int i = 0; i < deleted.size(); i++) + { + // OBS if ( !( valid &= validState( testMgr, deleted[i] ) ) ) + valid = valid && validState( testMgr, deleted[i] ); + if ( !( valid ) ) + { + ITK_MSG( testMgr, "Deleting item %d had failed", deleted[i] ); + break; + } + } + ITK_ASSERT( testMgr, valid, "Deletes were not succesful" ); + } + +void RandomTest::testContent(Itk::TestMgr* testMgr) + { + for (int i = 0; i < 10; i++) + { + int item = indexRandom(testMgr); + idxUtil_->flush(); + assertContent(testMgr, item); + } + + // clean up + deleteRandoms( testMgr, 10 ); + idxUtil_->flush(); + } + +void RandomTest::testPersistence(Itk::TestMgr* testMgr) + { + indexRandoms(testMgr, 20); + assertIndexedState(testMgr); + + idxUtil_->reload(); + + assertIndexedState(testMgr); + vector deleted; + for (int i = 0; i < 20; i++) + { + deleted.push_back( deleteRandom(testMgr) ); + } + + idxUtil_->reload(); + + bool valid = true; + for (int i = 0; i < deleted.size(); i++) + { + // OBS if ( !( valid &= validState( testMgr, deleted[i] ) ) ) + valid = valid && validState( testMgr, deleted[i] ); + if ( !( valid ) ) + { + ITK_MSG( testMgr, "Deleting item %d had failed", deleted[i] ); + break; + } + } + ITK_ASSERT( testMgr, valid, "Deletes were not succesful" ); + } + +void RandomTest::testSearches(Itk::TestMgr* testMgr) +{ + traceTerm(testMgr, L"happy"); + traceTerm(testMgr, L"people"); + traceTerm(testMgr, L"come"); + traceTerm(testMgr, L"guy"); + traceTerm(testMgr, L"done"); + traceTerm(testMgr, L"folder"); + traceTerm(testMgr, L"hello"); + traceTerm(testMgr, L"mister"); + traceTerm(testMgr, L"would"); + traceTerm(testMgr, L"could"); + traceTerm(testMgr, L"tomorrow"); + traceTerm(testMgr, L"sorry" ); + traceTerm(testMgr, L"about"); + traceTerm(testMgr, L"what"); + + indexRandom( testMgr ); deleteRandom( testMgr ); + idxUtil_->flush(); + + assertTracedTerms( testMgr ); + indexRandoms( testMgr, 20 ); + assertTracedTerms( testMgr ); + deleteRandoms( testMgr, 20 ); + assertTracedTerms( testMgr ); + + untraceTerms(testMgr); +} + +void RandomTest::testCreatedIndex(Itk::TestMgr* testMgr) { + + int item = indexRandom( testMgr ); + idxUtil_->flush(); + assertState( testMgr, item ); + + assertContent( testMgr, item ); + + // recreate + idxUtil_->recreate(); + markIndexed(item, + false); + + // test search + assertState( testMgr, item ); + + // test delete + idxUtil_->recreate(); + + wstring id = GetItemId(item ); + + int32_t + deleted = cpix_IdxDb_deleteDocuments(idxUtil_->idxDb(), + id.c_str()); + + ITK_ASSERT(testMgr, + cpix_Succeeded(idxUtil_->idxDb()), + "Trying to delete failed"); + ITK_EXPECT(testMgr, + deleted == 0, + "There should not have been an item to delete"); +} + +