# HG changeset patch # User hgs # Date 1277701493 -19800 # Node ID 6547bf8ca13a30cbcdce6a57bd93e67cdde9fce8 # Parent a5fbfefd615f449c2297c3399562df9cdd0cd2af 201025 diff -r a5fbfefd615f -r 6547bf8ca13a cenrep/readme --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cenrep/readme Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,13 @@ + +2001f6fb.cre file can be updated with the list of plugins to be avoided for Indexing +only UID of the plugin has to be added + +Steps to update the 2001f6fb.cre +===================================== +1.Update the list of UID's in the 2001f6fb.txt file. +2.copy the file to \epoc32\winscw\c folder +3.open command prompt to \epoc32\release\winscw\udeb folder +4.execute "centrepconv.exe 2001f6fb.txt" command. This command will create 2001f6fb.cre file in \epoc32\winscw\c folder + Before executing this command make sure the emulator is not running.This command will start the emulator and do the conversion and end the emulator. +5.Copy the generated 2001f6fb.cre file to \searchsrv\cenrep folder and build the searchsrv component +6.Create the ROM image \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a harvester/group/bld.inf --- a/harvester/group/bld.inf Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/group/bld.inf Mon Jun 28 10:34:53 2010 +0530 @@ -15,7 +15,6 @@ * */ #include "../pluginfw/group/bld.inf" -#include "../HarvesterServer/group/bld.inf" PRJ_TESTMMPFILES // Make ARMV5 UREL/UDEB SISX package //gnumakefile ../sis/makeSis.mk diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/group/bld.inf --- a/harvester/harvesterserver/group/bld.inf Fri Jun 11 14:43:47 2010 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -/* -* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). -* All rights reserved. -* This component and the accompanying materials are made available -* under the terms of "Eclipse Public License v1.0" -* which accompanies this distribution, and is available -* at the URL "http://www.eclipse.org/legal/epl-v10.html". -* -* Initial Contributors: -* Nokia Corporation - initial contribution. -* -* Contributors: -* -* Description: -* -*/ - - -PRJ_MMPFILES - -HarvesterServer.mmp - diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/group/harvesterserver.mmp --- a/harvester/harvesterserver/group/harvesterserver.mmp Fri Jun 11 14:43:47 2010 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -/* -* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). -* All rights reserved. -* This component and the accompanying materials are made available -* under the terms of "Eclipse Public License v1.0" -* which accompanies this distribution, and is available -* at the URL "http://www.eclipse.org/legal/epl-v10.html". -* -* Initial Contributors: -* Nokia Corporation - initial contribution. -* -* Contributors: -* -* Description: -* -*/ - - -TARGET CPixHarvesterServer.exe -TARGETTYPE exe -UID 0x0 0x2001F6FB -EPOCSTACKSIZE 0x5000 -EPOCHEAPSIZE 0x50000 0x300000 // From about 65KB to about 3 MB - -SOURCEPATH ../data -START RESOURCE 2001F6FB.rss -TARGETPATH /private/101f875a/import -END - -USERINCLUDE ../inc -USERINCLUDE ../../../searchengine/cpix/cpix/inc/public -USERINCLUDE ../traces - -MW_LAYER_SYSTEMINCLUDE - -SOURCEPATH ../src -SOURCE CHarvesterServer.cpp -SOURCE CHarvesterServerSession.cpp -SOURCE CIndexingManager.cpp -SOURCE CBlacklistMgr.cpp -SOURCE CBlacklistDb.cpp -SOURCE ccontentinfodb.cpp -SOURCE contentinfomgr.cpp -SOURCE ccontentinfo.cpp - -LIBRARY euser.lib -LIBRARY estor.lib -LIBRARY efsrv.lib -LIBRARY CPixSearchClient.lib -LIBRARY CPixHarvesterPluginInterface.lib -LIBRARY edbms.lib -LIBRARY sqldb.lib -LIBRARY centralrepository.lib - -// Logging -LIBRARY flogger.lib -VENDORID 0 - -CAPABILITY ALL -TCB -DRM -AllFiles - -// End of File diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/harvester.pro --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/harvester/harvesterserver/harvester.pro Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,75 @@ +# +# Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +# All rights reserved. +# This component and the accompanying materials are made available +# under the terms of "Eclipse Public License v1.0" +# which accompanies this distribution, and is available +# at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Nokia Corporation - initial contribution. +# +# Contributors: +# +# Description: +# + +TEMPLATE = app +TARGET = cpixharvesterserver + +QT += core +QT -= gui + +symbian:TARGET.UID3 = 0x2001f6fb +symbian:TARGET.CAPABILITY = ALL -TCB -DRM -AllFiles + +symbian{ + INCLUDEPATH += $$MW_LAYER_SYSTEMINCLUDE + + INCLUDEPATH += ../../searchengine/cpix/cpix/inc/public + + HEADERS += inc/CHarvesterServer.h + HEADERS += inc/CHarvesterServerSession.h + HEADERS += inc/CIndexingManager.h + HEADERS += inc/CBlacklistMgr.h + HEADERS += inc/CBlacklistDb.h + HEADERS += inc/ccotentinfodb.h + HEADERS += inc/contentinfomgr.h + HEADERS += inc/ccontentinfo.h + HEADERS += inc/harvesterserver.pan + HEADERS += inc/harvesterservercommons.h + HEADERS += inc/qtmythread.h + HEADERS += traces/CBlacklistDbTraces.h + HEADERS += traces/CBlacklistMgrTraces.h + HEADERS += traces/ccontentinfodbTraces.h + HEADERS += traces/ccontentinfoTraces.h + HEADERS += traces/CIndexingManagerTraces.h + HEADERS += traces/contentinfomgrTraces.h + HEADERS += traces/OstTraceDefinitions.h + HEADERS += traces/fixed_id.definitions + + SOURCES += src/main.cpp + SOURCES += src/CHarvesterServer.cpp + SOURCES += src/CHarvesterServerSession.cpp + SOURCES += src/CIndexingManager.cpp + SOURCES += src/CBlacklistMgr.cpp + SOURCES += src/CBlacklistDb.cpp + SOURCES += src/ccontentinfodb.cpp + SOURCES += src/contentinfomgr.cpp + SOURCES += src/ccontentinfo.cpp + + resourceTargetBlock = \ + "SOURCEPATH data" \ + "START RESOURCE 2001F6FB.rss" \ + "TARGETPATH /private/101f875a/import"\ + "END" \ + + MMP_RULES += resourceTargetBlock + + MMP_RULES += "EPOCSTACKSIZE 0x5000" + MMP_RULES += "EPOCHEAPSIZE 0x50000 0x300000" #From about 65KB to about 3 MB + + LIBS += -leuser -lestor -lCPixSearchClient -lCPixHarvesterPluginInterface -ledbms -lsqldb -lcentralrepository -lflogger + LIBS += -lefsrv + +} diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/inc/cblacklistdb.h --- a/harvester/harvesterserver/inc/cblacklistdb.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/inc/cblacklistdb.h Mon Jun 28 10:34:53 2010 +0530 @@ -98,27 +98,27 @@ TBool FindL(TInt32 aPluginUid); /* - * @description Adds the given uid of a plugin to the unloadlist table. + * @description Adds the given uid of a plugin to the dontloadlist table. * @param aPluginUid Uid of the plugin - * @return sysmbian error code + * @return symbian error code * Leaves in case of errors. */ - TInt AddtoUnloadListL( TInt32 aPluginUid ); + TInt AddtoDontloadListL( TInt32 aPluginUid ); /* - * @description remove the given uid of a plugin to the unloadlist table. + * @description remove the given uid of a plugin to the dontloadlist table. * @param aPluginUid Uid of the plugin * Leaves in case of errors. */ - void RemoveFromUnloadListL( TInt32 aPluginUid ); + void RemoveFromDontloadListL( TInt32 aPluginUid ); /* - * @description Find the given uid of a plugin to the unloadlist table. + * @description Find the given uid of a plugin to the dontloadlist table. * @param aPluginUid Uid of the plugin * @return ETrue if exists else returns EFalse * Leaves in case of errors. */ - TBool FindFromUnloadListL( TInt32 aPluginUid ); + TBool FindInDontloadListL( TInt32 aPluginUid ); private : /* @@ -132,14 +132,14 @@ * @return CDbColSet database column set * Leaves in case of errors. */ - CDbColSet* CreateColumnSetLC(); + CDbColSet* CreateBlacklistColumnSetLC(); /* - * @description Creates Column set for unload table. + * @description Creates Column set for dontload table. * @return CDbColSet database column set * Leaves in case of errors. */ - CDbColSet* CreateUnloadColumnSetLC(); + CDbColSet* CreateDontloadColumnSetLC(); private: /* * A handle to a file server session.Owned diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/inc/cblacklistmgr.h --- a/harvester/harvesterserver/inc/cblacklistmgr.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/inc/cblacklistmgr.h Mon Jun 28 10:34:53 2010 +0530 @@ -88,27 +88,27 @@ TBool FindL(TUid aPluginUid , TInt aVersion); /* - * @description Adds the given uid of a plugin to the unload list of Blacklist DB. + * @description Adds the given uid of a plugin to the dontload list of Blacklist DB. * @param aPluginUid Uid of the plugin * @return sysmbian error code * Leaves in case of errors. */ - TInt AddtoUnloadListL( TUid aPluginUid ); + TInt AddtoDontloadListL( TUid aPluginUid ); /* - * @description removes the given uid of a plugin from the unload list of Blacklist DB. + * @description removes the given uid of a plugin from the dontload list of Blacklist DB. * @param aPluginUid Uid of the plugin * Leaves in case of errors. */ - void RemoveFromUnloadListL( TUid aPluginUid ); + void RemoveFromDontloadListL( TUid aPluginUid ); /* - * @description Checks wether the plugin uid is exists in the database unload list or not. + * @description Checks wether the plugin uid is exists in the database dontload list or not. * @param aPluginUid Uid of the plugin * @return ETrue if uid exists else returns EFalse * Leaves in case of errors. */ - TBool FindfromUnloadListL(TUid aPluginUid ); + TBool FindInDontloadListL(TUid aPluginUid ); private: /* diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/inc/cindexingmanager.h --- a/harvester/harvesterserver/inc/cindexingmanager.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/inc/cindexingmanager.h Mon Jun 28 10:34:53 2010 +0530 @@ -26,6 +26,7 @@ //Forward Declaration class CBlacklistMgr; class CContentInfoMgr; +class CContentInfo; class CIndexingManager : public CActive, public MIndexingService { @@ -101,13 +102,28 @@ */ void SaveL(); /** - * Update content info Db with the plugin details + * Add an entry to the content info Db with the plugin details.If an entry with the given + * plugin name is already available in contentinfo db then the blacklist status of the plugin + * is updated with KEnable. */ - void UpdateContentInfoDbL( const TDesC& aPluginName); + void UpdateContentInfoDbL( const TDesC& aPluginName, CContentInfo* aContentinfo); /** - * Update the unload list in a separate table in blacklist database + * Update the dontload list in a separate table in blacklist database. + * If any error occurs in reading Uid values from centrep, then the dontload list + * is ignored. */ - void UpdateUnloadListL(); + void UpdateDontloadListL(); + /** + * Returns the load status of the plugin. This method will check both tables in + * Blacklist database and return the status. + * returns ETrue if uid is found in any table of blacklist database else returns EFalse. + */ + TBool GetPluginLoadStatusL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName); + + /** + * Loads the Harvesterplugin with given plugin uid + */ + void LoadHarvesterpluginL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName); private: CIndexingManager(); diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/inc/qtmythread.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/harvester/harvesterserver/inc/qtmythread.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,31 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: Utility class for fetching email. +* +*/ + +#ifndef QTMYTHREAD_H_ +#define QTMYTHREAD_H_ + +#include + +class HarvesterThread : public QThread + { + Q_OBJECT + +protected: + void run(); + }; + +#endif /* QTMYTHREAD_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/src/cblacklistdb.cpp --- a/harvester/harvesterserver/src/cblacklistdb.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/src/cblacklistdb.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -41,14 +41,14 @@ //SQL query to fetch all the records in database _LIT(KBlistSqlFormatAll , "SELECT * FROM table"); _LIT(KDriveC, "c:"); -//Unload plugins Table name in blacklist database -_LIT( KBLUnloadTableName , "unloadtable" ); -//SQL query to fetch all the records in unload table -_LIT(KUnloadlistSqlFormatAll , "SELECT * FROM unloadtable"); -//SQL query to delete the records with given uid in unload table -_LIT(KunloadlistSqlDelete, "DELETE FROM unloadtable WHERE uid=%d"); -//SQL query to fetch the records with given uid from unload table -_LIT(KUnloadlistSqlFormatSeek , "SELECT * FROM unloadtable WHERE uid=%d"); +//dontload plugins Table name in blacklist database +_LIT( KBLdontloadTableName , "dontloadtable" ); +//SQL query to fetch all the records in dontload table +_LIT(KdontloadlistSqlFormatAll , "SELECT * FROM dontloadtable"); +//SQL query to delete the records with given uid in dontload table +_LIT(KdontloadlistSqlDelete, "DELETE FROM dontloadtable WHERE uid=%d"); +//SQL query to fetch the records with given uid from dontload table +_LIT(KdontloadlistSqlFormatSeek , "SELECT * FROM dontloadtable WHERE uid=%d"); // ----------------------------------------------------------------------------- // CBlacklistDb::NewL() // ----------------------------------------------------------------------------- @@ -378,11 +378,11 @@ //create the database User::LeaveIfError( iDatabase.Create( iFs , datafile ) ); - CDbColSet* columns = CreateColumnSetLC();//creates the columns and push to cleanupstack + CDbColSet* columns = CreateBlacklistColumnSetLC();//creates the columns and push to cleanupstack User::LeaveIfError( iDatabase.CreateTable( KBlacklistTableName , *columns ) ); - //Add table to store the unload plugins - CDbColSet* unloadcolumns = CreateUnloadColumnSetLC(); //creates the columns and push to cleanupstack - User::LeaveIfError( iDatabase.CreateTable( KBLUnloadTableName , *unloadcolumns ) ); + //Add table to store the dontload plugins + CDbColSet* dontloadcolumns = CreateDontloadColumnSetLC(); //creates the columns and push to cleanupstack + User::LeaveIfError( iDatabase.CreateTable( KBLdontloadTableName , *dontloadcolumns ) ); //clean up of variables (columns and dataFile) CleanupStack::PopAndDestroy( 2 ); @@ -391,12 +391,12 @@ } // ----------------------------------------------------------------------------- -// CBlacklistDb::CreateColumnSetLC +// CBlacklistDb::CreateBlacklistColumnSetLC // ----------------------------------------------------------------------------- // -CDbColSet* CBlacklistDb::CreateColumnSetLC() +CDbColSet* CBlacklistDb::CreateBlacklistColumnSetLC() { - OstTraceFunctionEntry0( CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY ); + OstTraceFunctionEntry0( CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY ); CPIXLOGSTRING("CBlacklistDb::CreateColumnSetLC(): Enter"); CDbColSet* columns = CDbColSet::NewLC(); @@ -414,17 +414,17 @@ CPIXLOGSTRING("CBlacklistDb::CreateColumnSetLC(): Exit"); - OstTraceFunctionExit0( CBLACKLISTDB_CREATECOLUMNSETLC_EXIT ); + OstTraceFunctionExit0( CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT ); return columns; // columns stays on CleanupStack } // ----------------------------------------------------------------------------- -// CBlacklistDb::CreateUnloadColumnSetLC +// CBlacklistDb::CreateDontloadColumnSetLC // ----------------------------------------------------------------------------- // -CDbColSet* CBlacklistDb::CreateUnloadColumnSetLC() +CDbColSet* CBlacklistDb::CreateDontloadColumnSetLC() { - OstTraceFunctionEntry0( CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY ); + OstTraceFunctionEntry0( CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY ); CDbColSet* columns = CDbColSet::NewLC(); //Add uid column @@ -432,36 +432,39 @@ col.iAttributes = TDbCol::ENotNull ; columns->AddL( col ); - OstTraceFunctionExit0( CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT ); + OstTraceFunctionExit0( CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT ); return columns; // columns stays on CleanupStack } // ----------------------------------------------------------------------------- -// CBlacklistDb::AddtoUnloadListL +// CBlacklistDb::AddtoDontloadListL // ----------------------------------------------------------------------------- // -TInt CBlacklistDb::AddtoUnloadListL( TInt32 aPluginUid ) +TInt CBlacklistDb::AddtoDontloadListL( TInt32 aPluginUid ) { - OstTraceFunctionEntry0( CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY ); + OstTraceFunctionEntry0( CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY ); if ( !iOpened ) return KErrNotReady; TInt err; - //Prepare the view + //Prepare the view with all the rows in the donload table RDbView dbView; CleanupClosePushL( dbView ); - err = dbView.Prepare( iDatabase , TDbQuery( KUnloadlistSqlFormatAll ) ) ; + err = dbView.Prepare( iDatabase , TDbQuery( KdontloadlistSqlFormatAll ) ) ; if ( err == KErrNone ) { TRAP( err , dbView.InsertL() ); - CDbColSet* colSet = dbView.ColSetL(); - TDbColNo uidcolno = colSet->ColNo( Kuid ); - dbView.SetColL( uidcolno , aPluginUid ); - dbView.PutL(); + if ( err == KErrNone ) + { + CDbColSet* colSet = dbView.ColSetL(); + TDbColNo uidcolno = colSet->ColNo( Kuid ); + dbView.SetColL( uidcolno , aPluginUid ); + dbView.PutL(); + } //If addition failed, rollback - if(err != KErrNone) + else { iDatabase.Rollback(); } @@ -469,50 +472,42 @@ CleanupStack::PopAndDestroy( &dbView ); // dbView/ User::LeaveIfError( iDatabase.Compact() ); - OstTraceFunctionExit0( CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT ); + OstTraceFunctionExit0( CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT ); return err; } // ----------------------------------------------------------------------------- -// CBlacklistDb::RemoveFromUnloadListL +// CBlacklistDb::RemoveFromDontloadListL // ----------------------------------------------------------------------------- // -void CBlacklistDb::RemoveFromUnloadListL( TInt32 aPluginUid ) +void CBlacklistDb::RemoveFromDontloadListL( TInt32 aPluginUid ) { - OstTraceFunctionEntry0( CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY ); + OstTraceFunctionEntry0( CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY ); if ( !iOpened ) return ; //Remove the item record to database // Create the sql statement. KBlistSqlDelete TBuf sql; - sql.Format( KunloadlistSqlDelete , aPluginUid ); + sql.Format( KdontloadlistSqlDelete , aPluginUid ); //delete the row. TInt rowCount( iDatabase.Execute(sql) ); - if(rowCount > 0) - { - OstTrace0( TRACE_NORMAL, CBLACKLISTDB_REMOVEFROMUNLOADLISTL, "CBlacklistDb::RemoveFromUnloadListL :: removed UID succesfully" ); - CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): Removed UID succesfully"); - } - else - { - OstTrace0( TRACE_NORMAL, DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL, "CBlacklistDb::RemoveFromUnloadListL:: UID not found" ); - CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): UID not found"); - } - CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): Exit"); + OstTrace1( TRACE_NORMAL, DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL, "No. of rows removed succesfully is ;RowCount=%d", rowCount ); - OstTraceFunctionExit0( CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT ); + CPIXLOGSTRING("CBlacklistDb::RemoveFromDontloadListL(): Exit"); + + OstTraceFunctionExit0( CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT ); return ; } // ----------------------------------------------------------------------------- -// CBlacklistDb::FindFromUnloadListL +// CBlacklistDb::FindInDontloadListL // ----------------------------------------------------------------------------- // -TBool CBlacklistDb::FindFromUnloadListL( TInt32 aPluginUid ) +TBool CBlacklistDb::FindInDontloadListL( TInt32 aPluginUid ) { - OstTraceFunctionEntry0( CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY ); - CPIXLOGSTRING2("CBlacklistDb::FindFromUnloadListL(): Uid = %x " , aPluginUid ); + OstTraceFunctionEntry0( CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY ); + CPIXLOGSTRING2("CBlacklistDb::FindInDontloadListL(): Uid = %x " , aPluginUid ); if ( !iOpened ) return EFalse; @@ -520,9 +515,9 @@ //Check if the item is available in database //Prepare the sql TBuf sql; - sql.Format( KUnloadlistSqlFormatSeek , aPluginUid ); + sql.Format( KdontloadlistSqlFormatSeek , aPluginUid ); TBool found = EFalse; - //Prepare the view + //Prepare the view to get the list of rows which has the given Uid RDbView dbView; CleanupClosePushL( dbView ); @@ -533,11 +528,11 @@ if ( isAtRow ) { - OstTrace0( TRACE_NORMAL, CBLACKLISTDB_FINDFROMUNLOADLISTL, "CBlacklistDb::FindFromUnloadListL::UID found" ); - CPIXLOGSTRING("CBlacklistDb::FindFromUnloadListL(): UID found"); + OstTrace0( TRACE_NORMAL, CBLACKLISTDB_FINDFROMDONTLOADLISTL, "CBlacklistDb::FindFromDontloadListL::UID found" ); + CPIXLOGSTRING("CBlacklistDb::FindFromDontloadListL(): UID found"); found = ETrue; } CleanupStack::PopAndDestroy( &dbView ); // dbView/ - OstTraceFunctionExit0( CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT ); + OstTraceFunctionExit0( CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT ); return found; } diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/src/cblacklistmgr.cpp --- a/harvester/harvesterserver/src/cblacklistmgr.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/src/cblacklistmgr.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -153,52 +153,52 @@ } // ----------------------------------------------------------------------------- -// CBlacklistMgr::AddtoUnloadListL() +// CBlacklistMgr::AddtoDontloadListL() // ----------------------------------------------------------------------------- // -TInt CBlacklistMgr::AddtoUnloadListL( TUid aPluginUid ) +TInt CBlacklistMgr::AddtoDontloadListL( TUid aPluginUid ) { OstTraceFunctionEntry0( CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY ); - OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_ADDTOUNLOADLISTL, "CBlacklistMgr::AddtoUnloadListL;Uid=%x", aPluginUid.iUid ); - CPIXLOGSTRING2("CBlacklistMgr::AddtoUnloadListL(): Uid = %x " , aPluginUid.iUid ); + OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_ADDTODONTLOADLISTL, "CBlacklistMgr::AddtoDontloadListL;Uid=%x", aPluginUid.iUid ); + CPIXLOGSTRING2("CBlacklistMgr::AddtoDontloadListL(): Uid = %x " , aPluginUid.iUid ); //Check if the record with given plugin uid is already available in database or not //If available just ignore the addition //If there is no record found in database with given uid, add new record with given uid TInt err = KErrNone; - if( !(iBlacklistDb->FindFromUnloadListL( aPluginUid.iUid )) ) + if( !(iBlacklistDb->FindInDontloadListL( aPluginUid.iUid )) ) { - err = iBlacklistDb->AddtoUnloadListL( aPluginUid.iUid ); + err = iBlacklistDb->AddtoDontloadListL( aPluginUid.iUid ); } - CPIXLOGSTRING("CBlacklistMgr::AddtoUnloadListL(): Exit"); - OstTraceFunctionExit0( CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT ); + CPIXLOGSTRING("CBlacklistMgr::AddtoDontloadListL(): Exit"); + OstTraceFunctionExit0( CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT ); return err; } // ----------------------------------------------------------------------------- -// CBlacklistMgr::RemoveFromUnloadListL() +// CBlacklistMgr::RemoveFromDontloadListL() // ----------------------------------------------------------------------------- // -void CBlacklistMgr::RemoveFromUnloadListL( TUid aPluginUid ) +void CBlacklistMgr::RemoveFromDontloadListL( TUid aPluginUid ) { OstTraceFunctionEntry0( CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY ); - OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_REMOVEFROMUNLOADLISTL, "CBlacklistMgr::RemoveFromUnloadListL;Uid=%x", aPluginUid.iUid ); - CPIXLOGSTRING2("CBlacklistMgr::RemoveFromUnloadListL(): Uid = %x " , aPluginUid.iUid ); + OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_REMOVEFROMUNLOADLISTL, "CBlacklistMgr::RemoveFromDontloadListL;Uid=%x", aPluginUid.iUid ); + CPIXLOGSTRING2("CBlacklistMgr::RemoveFromDontloadListL(): Uid = %x " , aPluginUid.iUid ); //Remove the item record to database - iBlacklistDb->RemoveFromUnloadListL( aPluginUid.iUid ); + iBlacklistDb->RemoveFromDontloadListL( aPluginUid.iUid ); - CPIXLOGSTRING("CBlacklistMgr::RemoveFromUnloadListL(): Exit"); - OstTraceFunctionExit0( CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT ); + CPIXLOGSTRING("CBlacklistMgr::RemoveFromDontloadListL(): Exit"); + OstTraceFunctionExit0( CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT ); } // ----------------------------------------------------------------------------- -// CBlacklistMgr::FindfromUnloadListL() +// CBlacklistMgr::FindfromDontloadListL() // ----------------------------------------------------------------------------- // -TBool CBlacklistMgr::FindfromUnloadListL(TUid aPluginUid ) +TBool CBlacklistMgr::FindInDontloadListL(TUid aPluginUid ) { - CPIXLOGSTRING2("CBlacklistMgr::FindfromUnloadListL(): Uid = %x " , aPluginUid.iUid ); - OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_FINDFROMUNLOADLISTL, "CBlacklistMgr::FindfromUnloadListL;Uid=%x", aPluginUid.iUid ); - return ( iBlacklistDb->FindFromUnloadListL( aPluginUid.iUid ) ); + CPIXLOGSTRING2("CBlacklistMgr::FindInDontloadListL(): Uid = %x " , aPluginUid.iUid ); + OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_FINDINDONTLOADLISTL, "CBlacklistMgr::FindInDontloadListL;Uid=%x", aPluginUid.iUid ); + return ( iBlacklistDb->FindInDontloadListL( aPluginUid.iUid ) ); } diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/src/charvesterserver.cpp --- a/harvester/harvesterserver/src/charvesterserver.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/src/charvesterserver.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -129,12 +129,15 @@ // void CHarvesterServer::ThreadFunctionL() { + //QCoreApplication installs one ActiveScheduler. So removing the Active scheduler install + //from ThreadFunctionL + // Construct active scheduler - CActiveScheduler* activeScheduler = new ( ELeave ) CActiveScheduler; - CleanupStack::PushL(activeScheduler); + //CActiveScheduler* activeScheduler = new ( ELeave ) CActiveScheduler; + //CleanupStack::PushL(activeScheduler); // Install active scheduler - CActiveScheduler::Install(activeScheduler); + //CActiveScheduler::Install(activeScheduler); // Construct server CHarvesterServer* server = CHarvesterServer::NewLC(); @@ -173,7 +176,7 @@ // Cleanup CleanupStack::PopAndDestroy( server ); - CleanupStack::PopAndDestroy( activeScheduler ); + //CleanupStack::PopAndDestroy( activeScheduler ); } // ----------------------------------------------------------------------------- @@ -207,10 +210,10 @@ // Returns the address of the function to be called. // ----------------------------------------------------------------------------- // -TInt E32Main() - { - CHarvesterServer::ThreadFunction(); - return KErrNone; - } +//TInt E32Main() +// { +// CHarvesterServer::ThreadFunction(); +// return KErrNone; +// } // End of File diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/src/cindexingmanager.cpp --- a/harvester/harvesterserver/src/cindexingmanager.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/src/cindexingmanager.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -49,11 +49,11 @@ //constants for enable and disable status const TInt KEnable = 1; const TInt KDisable = 0; - +//Uid of Harvester server cetral repository database const TUid KCPIXHSrepoUidMenu = {0x2001f6fb}; //Length of uid string in cenrep -const TInt KuidStringLength = 8; +const TInt KCenrepUidLength = 8; // ----------------------------------------------------------------------------- // CHarvesterServer::NewL() // ----------------------------------------------------------------------------- @@ -141,7 +141,7 @@ //Instantiate Contentinfo manager iContentInfoMgr = CContentInfoMgr::NewL(); - UpdateUnloadListL(); + UpdateDontloadListL(); // Load plugins LoadPluginsL(); @@ -308,6 +308,7 @@ // void CIndexingManager::LoadPluginsL() { + OstTraceFunctionEntry0( CINDEXINGMANAGER_LOADPLUGINSL_ENTRY ); RImplInfoPtrArray infoArray; TCleanupItem cleanupItem( CPixSearchECom::CleanupEComArray, &infoArray ); CleanupStack::PushL( cleanupItem ); @@ -316,69 +317,30 @@ TInt count( 0 ); count = infoArray.Count(); - //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: PLUGINS COUNT %d", count ); - CIndexingPlugin* plugin = NULL; - TInt contentcount(iContentInfoMgr->GetContentCountL() ); // If the content count in the content info DB is not equal to the plugin count, reset the content info DB if ( contentcount != count) iContentInfoMgr->ResetL(); + CContentInfo* contentinfo = CContentInfo::NewL(); + for ( TInt i = 0; i < count; i++ ) { TUid uid = infoArray[i]->ImplementationUid(); // Create the plug-ins TInt version = infoArray[i]->Version(); - //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: PLUGINS UID %x", uid ); - plugin = NULL; + //Update the details of the plugin in Contentinfo DB + UpdateContentInfoDbL( infoArray[i]->DisplayName(), contentinfo ); + //Get the load status of the plugin. + TBool pluginloadstatus = GetPluginLoadStatusL ( uid, version, infoArray[i]->DisplayName() ); - UpdateContentInfoDbL( infoArray[i]->DisplayName() ); - TBool loadplugin = ETrue; - //status of plugin in blacklist table - TBool pluginblacklisted = iBlacklistMgr->FindL( uid, version); - //status of plugin in unload table - TBool loadstatus = iBlacklistMgr->FindfromUnloadListL( uid ); - //Check the Uid in both the tables of the blacklist db - if ( loadstatus || pluginblacklisted ) - loadplugin = EFalse; - - if ( loadstatus ) - { - //Found in unload list.Update the indexing and blacklist status in contentinfo DB - iContentInfoMgr->UpdatePluginIndexStatusL( infoArray[i]->DisplayName() , KDisable ); - iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KDisable ); - } - if ( pluginblacklisted ) - //Update the blacklist status in content info db - iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KEnable ); - - if ( loadplugin ) + if ( pluginloadstatus ) { - // Plugin is not black listed. Add it to database and try to load the plugin - iBlacklistMgr->AddL( uid , version ); - OstTrace1( TRACE_NORMAL, CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is added to DB", uid.iUid ); - CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is added to database", uid.iUid); - TRAPD( err, plugin = CIndexingPlugin::NewL( uid ) ); - //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: ERROR %d", err ); - if ( err == KErrNone ) - { - // Plugin loaded succesfully. Remove it from the database - iBlacklistMgr->Remove(uid); - OstTrace1( TRACE_NORMAL, DUP1_CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is removed from DB", uid.iUid ); - CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is removed from database", uid.iUid); - iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KDisable ); - CleanupStack::PushL( plugin ); - plugin->SetObserver( *this ); - plugin->SetSearchSession( iSearchSession ); - iPluginArray.AppendL( plugin ); // and add them to array - CleanupStack::Pop( plugin ); - OstTrace1( TRACE_NORMAL, DUP2_CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is loaded successfully", uid.iUid ); - CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is loaded succesfully", uid.iUid); - } + LoadHarvesterpluginL (uid, version, infoArray[i]->DisplayName() );//Load the harvester plugin } - } - CleanupStack::PopAndDestroy( &infoArray ); // infoArray, results in a call to CleanupEComArray - //FFLOGSTRING( "CFastFindHarvesterPluginControl::LoadPluginsL() plugin!" ); - + } + delete contentinfo; + CleanupStack::PopAndDestroy( &infoArray ); // infoArray, results in a call to CleanupEComArray + OstTraceFunctionExit0( CINDEXINGMANAGER_LOADPLUGINSL_EXIT ); } // ----------------------------------------------------------------------------- @@ -626,7 +588,7 @@ // CIndexingManager::UpdateContentInfoDbL() // ----------------------------------------------------------------------------- // -void CIndexingManager::UpdateContentInfoDbL( const TDesC& aPluginName) +void CIndexingManager::UpdateContentInfoDbL( const TDesC& aPluginName, CContentInfo* aContentinfo) { OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY ); TBool iscontentfound = iContentInfoMgr->FindL( aPluginName ); @@ -634,12 +596,11 @@ if( !iscontentfound ) { //Add the content details to database - CContentInfo* contentinfo = CContentInfo::NewL(); - contentinfo->SetNameL( aPluginName ); - contentinfo->SetBlacklistStatus( KEnable ); - contentinfo->SetIndexStatus( KEnable ); - iContentInfoMgr->AddL( contentinfo ); - delete contentinfo; + aContentinfo->SetNameL( aPluginName ); + aContentinfo->SetBlacklistStatus( KEnable ); + aContentinfo->SetIndexStatus( KEnable ); + iContentInfoMgr->AddL( aContentinfo ); + } else { @@ -649,13 +610,13 @@ } // ----------------------------------------------------------------------------- -// CIndexingManager::UpdateUnloadList() +// CIndexingManager::UpdateDontloadListL() // ----------------------------------------------------------------------------- // -void CIndexingManager::UpdateUnloadListL() +void CIndexingManager::UpdateDontloadListL() { - OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY ); - CPIXLOGSTRING("CIndexingManager::UpdateUnloadList : Start"); + OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY ); + CPIXLOGSTRING("CIndexingManager::UpdateDontloadList : Start"); //Read the list of Uid's from the cenrep and update blacklist database //Open the unload list common repository CRepository* unloadrepo = NULL; @@ -664,10 +625,13 @@ return; RArray uidlist; //Read all the key list + //Matches occur whenever (key & mask) == (partialKey & mask). + //The partial key is guaranteed to be masked before use + // To fetch all the keys we have done masking with '0' TInt error = unloadrepo->FindL( 0, 0, uidlist); if ( error == KErrNone ) { - TBuf temp; + TBuf temp; //get the Uid of each and every plugin and add it to blacklist database TInt count = uidlist.Count(); for (int i = 0; i < count; i++ ) @@ -678,9 +642,69 @@ TLex uidvalue(temp); TInt xerr = uidvalue.Val( value,EHex ); uid.iUid = value; - (void)iBlacklistMgr->AddtoUnloadListL( uid ); + (void)iBlacklistMgr->AddtoDontloadListL( uid ); } } - CPIXLOGSTRING("CIndexingManager::UpdateUnloadList : End"); - OstTraceFunctionExit0( CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT ); + CPIXLOGSTRING("CIndexingManager::UpdateDontloadList : End"); + OstTraceFunctionExit0( CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT ); + } + +// ----------------------------------------------------------------------------- +// CIndexingManager::GetPluginLoadStatus() +// ----------------------------------------------------------------------------- +// +TBool CIndexingManager::GetPluginLoadStatusL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName) + { + OstTraceFunctionEntry0( DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY ); + //status of plugin in blacklist table + TBool pluginblacklisted = iBlacklistMgr->FindL( aPluginUid, aVersion); + //status of plugin in unload table + TBool loadstatus = iBlacklistMgr->FindInDontloadListL( aPluginUid ); + //Check the Uid in both the tables of the blacklist db +// if ( loadstatus || pluginblacklisted ) +// loadplugin = EFalse; + + if ( loadstatus ) + { + //Found in unload list.Update the indexing and blacklist status in contentinfo DB + iContentInfoMgr->UpdatePluginIndexStatusL( aPluginName , KDisable ); + iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KDisable ); + } + if ( pluginblacklisted ) + //Update the blacklist status in content info db + iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KEnable ); + + return (! (loadstatus | pluginblacklisted)); } + +// ----------------------------------------------------------------------------- +// CIndexingManager::GetPluginLoadStatus() +// ----------------------------------------------------------------------------- +// +void CIndexingManager::LoadHarvesterpluginL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName) + { + OstTraceFunctionEntry0( CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY ); + CIndexingPlugin* plugin = NULL; + // Plugin is not black listed. Add it to blacklist database + iBlacklistMgr->AddL( aPluginUid , aVersion ); + OstTrace1( TRACE_NORMAL, DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is added to DB", aPluginUid.iUid ); + CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is added to database", aPluginUid.iUid); + //try to load the plugin + TRAPD( err, plugin = CIndexingPlugin::NewL( aPluginUid ) ); + if ( err == KErrNone ) + { + // Plugin loaded succesfully. Remove it from the blacklist database + iBlacklistMgr->Remove(aPluginUid); + OstTrace1( TRACE_NORMAL, CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is removed from DB", aPluginUid.iUid ); + CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is removed from database", aPluginUid.iUid); + iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KDisable ); + CleanupStack::PushL( plugin ); + plugin->SetObserver( *this ); + plugin->SetSearchSession( iSearchSession ); + iPluginArray.AppendL( plugin ); // and add them to array + CleanupStack::Pop( plugin ); + OstTrace1( TRACE_NORMAL, DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is loaded successfully", aPluginUid.iUid ); + CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is loaded succesfully", aPluginUid.iUid); + } + OstTraceFunctionExit0( CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT ); + } diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/src/main.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/harvester/harvesterserver/src/main.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,35 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ +#include +#include +#include "charvesterserver.h" +#include "qtmythread.h" + +int main(int argc, char *argv[]) + { + QCoreApplication a( argc , argv); + HarvesterThread mythread; + mythread.start(); + return a.exec(); + } + +void HarvesterThread::run() + { + //Trapping the error is handled inside the ThreadFunction + CHarvesterServer::ThreadFunction(); + exec(); + } diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/traces/CBlacklistDbTraces.h --- a/harvester/harvesterserver/traces/CBlacklistDbTraces.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/traces/CBlacklistDbTraces.h Mon Jun 28 10:34:53 2010 +0530 @@ -18,16 +18,16 @@ #define CBLACKLISTDB_UPDATEL_EXIT 0x8a000a #define CBLACKLISTDB_CREATEDBL_ENTRY 0x8a000b #define CBLACKLISTDB_CREATEDBL_EXIT 0x8a000c -#define CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY 0x8a000d -#define CBLACKLISTDB_CREATECOLUMNSETLC_EXIT 0x8a000e -#define CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY 0x8a003b -#define CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT 0x8a003c -#define CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY 0x8a003d -#define CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT 0x8a003e -#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY 0x8a003f -#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT 0x8a0040 -#define CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY 0x8a0041 -#define CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT 0x8a0042 +#define CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY 0x8a004b +#define CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT 0x8a004c +#define CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY 0x8a004d +#define CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT 0x8a004e +#define CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY 0x8a004f +#define CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT 0x8a0050 +#define CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY 0x8a0051 +#define CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT 0x8a0052 +#define CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY 0x8a0053 +#define CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT 0x8a0054 #define CBLACKLISTDB_CONSTRUCTL 0x860001 #define CBLACKLISTDB_ADDL 0x860002 #define CBLACKLISTDB_REMOVE 0x860003 @@ -38,9 +38,8 @@ #define CBLACKLISTDB_UPDATEL 0x860008 #define CBLACKLISTDB_FINDL 0x860009 #define DUP1_CBLACKLISTDB_FINDL 0x86000a -#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL 0x860027 -#define DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL 0x860028 -#define CBLACKLISTDB_FINDFROMUNLOADLISTL 0x860029 +#define DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL 0x86002d +#define CBLACKLISTDB_FINDFROMDONTLOADLISTL 0x86002e inline TBool OstTraceGen2( TUint32 aTraceID, TUint aParam1, TInt aParam2 ) diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/traces/CBlacklistMgrTraces.h --- a/harvester/harvesterserver/traces/CBlacklistMgrTraces.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/traces/CBlacklistMgrTraces.h Mon Jun 28 10:34:53 2010 +0530 @@ -15,17 +15,17 @@ #define CBLACKLISTMGR_REMOVE_ENTRY 0x8a0015 #define CBLACKLISTMGR_REMOVE_EXIT 0x8a0016 #define CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY 0x8a0043 -#define CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT 0x8a0044 #define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY 0x8a0045 -#define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT 0x8a0046 +#define CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT 0x8a0055 +#define CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT 0x8a0056 #define CBLACKLISTMGR_ADDL 0x86000b #define CBLACKLISTMGR_REMOVE 0x86000c #define CBLACKLISTMGR_FINDL 0x86000d #define DUP1_CBLACKLISTMGR_FINDL 0x86000e #define DUP2_CBLACKLISTMGR_FINDL 0x86000f -#define CBLACKLISTMGR_ADDTOUNLOADLISTL 0x86002a #define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL 0x86002b -#define CBLACKLISTMGR_FINDFROMUNLOADLISTL 0x86002c +#define CBLACKLISTMGR_ADDTODONTLOADLISTL 0x86002f +#define CBLACKLISTMGR_FINDINDONTLOADLISTL 0x860030 inline TBool OstTraceGen2( TUint32 aTraceID, TUint aParam1, TInt aParam2 ) diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/traces/CIndexingManagerTraces.h --- a/harvester/harvesterserver/traces/CIndexingManagerTraces.h Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/traces/CIndexingManagerTraces.h Mon Jun 28 10:34:53 2010 +0530 @@ -8,8 +8,13 @@ #define CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY 0x8a0047 #define CINDEXINGMANAGER_UPDATECONTENTINFODBL_EXIT 0x8a0048 -#define CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY 0x8a0049 -#define CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT 0x8a004a +#define CINDEXINGMANAGER_LOADPLUGINSL_ENTRY 0x8a0057 +#define CINDEXINGMANAGER_LOADPLUGINSL_EXIT 0x8a0058 +#define CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY 0x8a0059 +#define CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT 0x8a005a +#define DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY 0x8a005b +#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY 0x8a005c +#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT 0x8a005d #define CINDEXINGMANAGER_RUNL 0x860010 #define DUP1_CINDEXINGMANAGER_RUNL 0x860011 #define DUP2_CINDEXINGMANAGER_RUNL 0x860012 @@ -20,9 +25,6 @@ #define DUP7_CINDEXINGMANAGER_RUNL 0x860017 #define DUP8_CINDEXINGMANAGER_RUNL 0x860018 #define DUP9_CINDEXINGMANAGER_RUNL 0x860019 -#define CINDEXINGMANAGER_LOADPLUGINSL 0x86001a -#define DUP1_CINDEXINGMANAGER_LOADPLUGINSL 0x86001b -#define DUP2_CINDEXINGMANAGER_LOADPLUGINSL 0x86001c #define CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001d #define DUP1_CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001e #define DUP2_CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001f @@ -33,6 +35,9 @@ #define CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860024 #define DUP1_CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860025 #define DUP2_CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860026 +#define DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860031 +#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860032 +#define DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860033 #ifndef __KERNEL_MODE__ diff -r a5fbfefd615f -r 6547bf8ca13a harvester/harvesterserver/traces/fixed_id.definitions --- a/harvester/harvesterserver/traces/fixed_id.definitions Fri Jun 11 14:43:47 2010 +0300 +++ b/harvester/harvesterserver/traces/fixed_id.definitions Mon Jun 28 10:34:53 2010 +0530 @@ -3,36 +3,36 @@ [GROUP]TRACE_NORMAL=0x86 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDL_ENTRY=0x5 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDL_EXIT=0x6 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY=0x3d -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT=0x3e +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY=0x4f +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT=0x50 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CONSTRUCTL_ENTRY=0x3 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CONSTRUCTL_EXIT=0x4 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY=0xd -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_EXIT=0xe +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY=0x4b +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT=0x4c [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDBL_ENTRY=0xb [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDBL_EXIT=0xc -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY=0x3b -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT=0x3c -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY=0x41 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT=0x42 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY=0x4d +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT=0x4e +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY=0x53 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT=0x54 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_NEWL_ENTRY=0x1 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_NEWL_EXIT=0x2 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY=0x3f -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT=0x40 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY=0x51 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT=0x52 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVE_ENTRY=0x7 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVE_EXIT=0x8 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_UPDATEL_ENTRY=0x9 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_UPDATEL_EXIT=0xa [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDL_ENTRY=0x13 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDL_EXIT=0x14 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT=0x55 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY=0x43 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT=0x44 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_CONSTRUCTL_ENTRY=0x11 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_CONSTRUCTL_EXIT=0x12 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_NEWL_ENTRY=0xf [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_NEWL_EXIT=0x10 +[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT=0x56 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY=0x45 -[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT=0x46 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVE_ENTRY=0x15 [TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVE_EXIT=0x16 [TRACE]TRACE_FLOW[0x8A]_CCONTENTINFODB_ADDL_ENTRY=0x25 @@ -71,46 +71,50 @@ [TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_NEWL_EXIT=0x18 [TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_SETNAMEL_ENTRY=0x1d [TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_SETNAMEL_EXIT=0x1e +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY=0x5c +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT=0x5d +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADPLUGINSL_ENTRY=0x57 +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADPLUGINSL_EXIT=0x58 [TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY=0x47 [TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATECONTENTINFODBL_EXIT=0x48 -[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY=0x49 -[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT=0x4a +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY=0x59 +[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT=0x5a +[TRACE]TRACE_FLOW[0x8A]_DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY=0x5b [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_ADDL=0x2 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_CONSTRUCTL=0x1 -[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMUNLOADLISTL=0x29 +[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMDONTLOADLISTL=0x2e [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDL=0x9 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDWITHVERSIONL=0x6 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVE=0x3 -[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x27 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_UPDATEL=0x8 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDL=0xb -[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTOUNLOADLISTL=0x2a -[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDFROMUNLOADLISTL=0x2c +[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTODONTLOADLISTL=0x2f +[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDINDONTLOADLISTL=0x30 [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDL=0xd [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_REMOVE=0xc [TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL=0x2b [TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1d [TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x24 -[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADPLUGINSL=0x1a +[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x32 [TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x21 [TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_RUNL=0x10 [TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_FINDL=0xa [TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_FINDWITHVERSIONL=0x7 [TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVE=0x4 -[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x28 [TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTMGR_FINDL=0xe [TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1e [TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x25 -[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADPLUGINSL=0x1b +[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x31 [TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x22 [TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_RUNL=0x11 [TRACE]TRACE_NORMAL[0x86]_DUP2_CBLACKLISTDB_REMOVE=0x5 [TRACE]TRACE_NORMAL[0x86]_DUP2_CBLACKLISTMGR_FINDL=0xf [TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1f [TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x26 -[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADPLUGINSL=0x1c +[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x33 [TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x23 [TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_RUNL=0x12 +[TRACE]TRACE_NORMAL[0x86]_DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL=0x2d [TRACE]TRACE_NORMAL[0x86]_DUP3_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x20 [TRACE]TRACE_NORMAL[0x86]_DUP3_CINDEXINGMANAGER_RUNL=0x13 [TRACE]TRACE_NORMAL[0x86]_DUP4_CINDEXINGMANAGER_RUNL=0x14 @@ -119,3 +123,25 @@ [TRACE]TRACE_NORMAL[0x86]_DUP7_CINDEXINGMANAGER_RUNL=0x17 [TRACE]TRACE_NORMAL[0x86]_DUP8_CINDEXINGMANAGER_RUNL=0x18 [TRACE]TRACE_NORMAL[0x86]_DUP9_CINDEXINGMANAGER_RUNL=0x19 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY=0x3d +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT=0x3e +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY=0xd +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_EXIT=0xe +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY=0x3b +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT=0x3c +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY=0x41 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT=0x42 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY=0x3f +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT=0x40 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT=0x44 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT=0x46 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY=0x49 +[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT=0x4a +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMUNLOADLISTL=0x29 +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x27 +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTOUNLOADLISTL=0x2a +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDFROMUNLOADLISTL=0x2c +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADPLUGINSL=0x1a +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x28 +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADPLUGINSL=0x1b +[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADPLUGINSL=0x1c diff -r a5fbfefd615f -r 6547bf8ca13a layers.sysdef.xml --- a/layers.sysdef.xml Fri Jun 11 14:43:47 2010 +0300 +++ b/layers.sysdef.xml Mon Jun 28 10:34:53 2010 +0530 @@ -7,7 +7,8 @@ - + + diff -r a5fbfefd615f -r 6547bf8ca13a qcpix/qcpixsearchclient.pro --- a/qcpix/qcpixsearchclient.pro Fri Jun 11 14:43:47 2010 +0300 +++ b/qcpix/qcpixsearchclient.pro Mon Jun 28 10:34:53 2010 +0530 @@ -41,6 +41,8 @@ VERSION = 1.0.0 LIBS += -leuser -lcpixsearchclient + INCLUDEPATH += $$APP_LAYER_SYSTEMINCLUDE + HEADERS += src/platform/s60/inc/qcpixdocumentprivate.h \ src/platform/s60/inc/qcpixdocumentfieldprivate.h \ src/platform/s60/inc/qcpixsearcherprivate.h \ diff -r a5fbfefd615f -r 6547bf8ca13a qcpix/tsrc/qtcpixunittests/qtcpixunittests.pro --- a/qcpix/tsrc/qtcpixunittests/qtcpixunittests.pro Fri Jun 11 14:43:47 2010 +0300 +++ b/qcpix/tsrc/qtcpixunittests/qtcpixunittests.pro Mon Jun 28 10:34:53 2010 +0530 @@ -39,7 +39,9 @@ "data/segments \epoc32\winscw\c\private\2001f6f7\indexing\indexdb\root\contact\_0\segments" \ "data/cpixreg.txt \epoc32\winscw\c\private\2001f6f7\cpixreg.txt" \ "data/config.ini \epoc32\winscw\c\system\data\config.ini" - + + INCLUDEPATH += $$APP_LAYER_SYSTEMINCLUDE + TARGET.CAPABILITY = ALL -TCB -DRM TARGET.UID3 = 0xE76C2AE7 LIBS += -lqcpixsearchclient diff -r a5fbfefd615f -r 6547bf8ca13a rom/cpix_mw.iby --- a/rom/cpix_mw.iby Fri Jun 11 14:43:47 2010 +0300 +++ b/rom/cpix_mw.iby Mon Jun 28 10:34:53 2010 +0530 @@ -54,6 +54,8 @@ data=DATAZ_\PRIVATE\101f875a\import\20029ab8.rsc private\101f875a\import\20029ab8.rsc +data=DATAZ_\resource\cpix\analyzer.loc \resource\cpix\analyzer.loc +data=DATAZ_\resource\cpix\thaidict.sm \resource\cpix\thaidict.sm data=ZSYSTEM\install\cpixsearch_stub.sis System\Install\cpixsearch_stub.sis data=DATAZ_\private\10202be9\2001f6fb.cre private\10202be9\2001f6fb.cre diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/data/resource/analyzer.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/data/resource/analyzer.loc Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,92 @@ +/** + * This file defines the default analyzer used for CPix. + * The default analyzer should its behaviour depending of used + * locale. + * + * The codes that are used in switch should follow ISO-892-1 standard + * or ISO-839-2 standard, when 2 letter codes are not available. + * + * WARNING: It is not guaranteed that in Symbian platform the language + * codes are translated to ISO code. Also symbian platform codes of form + * 's29' for Taiwan Chinese or 's1' for English are supported. + * + * Refer for CPiX documentation for this file's syntax. + */ + +config_switch { + + /** + * The prefiltering is done e.g, when searching "$cat", "$cat.cal" etc. + */ + case 'prefix': + locale_switch { + + // French + case 'fr': stdtokens>stdfilter>lowercase>elision(fr); + + // Default + default: stdtokens>stdfilter>lowercase; + }; + + /** + * Analyzers used for queries and indexing + */ + default: + + locale_switch { + + // French + case 'fr': stdtokens>stdfilter>lowercase>elision(fr)>stop(fr); + + // Hebrew + case 'he': + config_switch { + case 'query': // do not use prefix filter, when searchign + stdtokens>stdfilter>lowercase>stop(en); + default: // use prefix filter only when indexing + stdtokens>stdfilter>lowercase>prefix(he)>stop(en); + }; + + // English + case 'en': stdtokens>stdfilter>lowercase>stop(en); + + // Thai + case 'th': stdtokens>stdfilter>lowercase>thai>stop(en); + + /* + * Far east asian languages + * + * note: Hong and Taiwanese are not differentiated. + * What are their language codes? + * + * note: Should we include also ISO-839-2 codes in here + * + * note: Japan is no more supported. Let's used ngram for it anyway + */ + case 'jp', 'zh', 'ch': + ngram(1)>lowercase>stop(en); + + /** + * Korean + * + * note: Because special optimizations, different analyzers + * are used for queries and indexing + * + * + * WARNING: Korean analyzer is not properly tested (!) + */ + case 'ko': + config_switch { + case 'query': koreanquery>lowercase>stop(en); + default: korean>lowercase>stop(en); + }; + + /** + * Default option; used for most languages and should work 'ok' + * for most alphabetic writing systems. + * + * note: Should we include english stop word list? + */ + default: stdtokens>stdfilter>lowercase; + }; +} diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/data/resource/thaidict.sm Binary file searchengine/cpix/cpix/data/resource/thaidict.sm has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/group/bld.inf --- a/searchengine/cpix/cpix/group/bld.inf Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/group/bld.inf Mon Jun 28 10:34:53 2010 +0530 @@ -22,3 +22,10 @@ PRJ_MMPFILES cpix.mmp +PRJ_EXPORTS + +../data/resource/analyzer.loc /epoc32/data/z/resource/cpix/analyzer.loc +../data/resource/thaidict.sm /epoc32/data/z/resource/cpix/thaidict.sm +../data/resource/analyzer.loc /epoc32/release/winscw/udeb/z/resource/cpix/analyzer.loc +../data/resource/thaidict.sm /epoc32/release/winscw/udeb/z/resource/cpix/thaidict.sm + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/group/cpix.mmp --- a/searchengine/cpix/cpix/group/cpix.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/group/cpix.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -54,6 +54,7 @@ SOURCE common/cloners.cpp SOURCE ifieldfilter.cpp SOURCE prefixopt.cpp +SOURCE localization.cpp SOURCE filters/quadfilter.cpp SOURCE fileparser/fileparser.cpp SOURCE fileparser/textfileparser.cpp @@ -73,12 +74,23 @@ SOURCE spi/s60/audiometadata.cpp USERINCLUDE ../../../../searchsrv_plat/cpix_utility_api/inc +SOURCEPATH ../src +SOURCE customanalyzer.cpp +SOURCE prefixqueryparser.cpp +SOURCE queryparser.cpp +SOURCEPATH ../src/spi/s60 +SOURCE s60locale.cpp +SOURCEPATH ../src/spi +SOURCE locale.cpp +SOURCEPATH ../src/common + USERINCLUDE ../inc/public USERINCLUDE ../inc/private USERINCLUDE ../../../oss/cl/clucene/src USERINCLUDE ../../../oss/sb/snowball/include USERINCLUDE ../../../util/cpixtools/inc/public USERINCLUDE ../../../oss/cl/clucene/src/CLucene +USERINCLUDE ../../../oss/loc/analysis/inc/public USERINCLUDE ../../../cpix/cpix/src MW_LAYER_SYSTEMINCLUDE @@ -94,8 +106,9 @@ //LIBRARY libm.lib //LIBRARY euser.lib -//STATICLIBRARY libclucene.lib -//STATICLIBRARY libstemmer.lib +STATICLIBRARY libclucene.lib +STATICLIBRARY libstemmer.lib +STATICLIBRARY libanalysis.lib // For SPI //LIBRARY efsrv.lib diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/analyzer.h --- a/searchengine/cpix/cpix/inc/private/analyzer.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/analyzer.h Mon Jun 28 10:34:53 2010 +0530 @@ -32,6 +32,7 @@ namespace lucene { namespace analysis { class TokenStream; + class Analyzer; } namespace util { class Reader; @@ -40,9 +41,8 @@ namespace Cpix { - namespace AnalyzerExp { - class Piping; - } + class InitParams; + struct TokenizerClassEntry; struct FilterClassEntry; @@ -50,13 +50,80 @@ class Document; class Field; - class DocumentFieldIterator; + class DocumentFieldIterator; + + class LocaleSwitchStreamFactory; + class CustomAnalyzer; + + namespace AnalyzerExp { + class LocaleSwitch; + class Piping; + } } // Class definitions namespace Cpix { + class Analysis { + + public: + + /** + * Initializes the Analysis. Uses init paremeters' resource dir + * to locate & load analysis & localization related resources. + * + * NOTE: The init is made to work in a fault-tolerant fashion. + * If needed resource files are not found, a warning is logged + * (if logging is enabled) and some meaningful default is used instead. + * If logging is not enabled, init _may fail silently_. + */ + static void init(InitParams& ip); + + /** + * Releases all resources that are used by analysis. + */ + static void shutdown(); + + /** + * Returns the default analyzer. This analyzer is likely localized + * and will analyze differently depending of what locale is currently + * active. + */ + static lucene::analysis::Analyzer& getDefaultAnalyzer(); + + /** + * Returns the query analyzer. This analyzer is likely localized + * and will analyze differently depending of what locale is currently + * active. + */ + static lucene::analysis::Analyzer& getQueryAnalyzer(); + + /** + * Returns the query filter analyzer. This analyzer is likely localized + * and will analyze differently depending of what locale is currently + * active. + */ + static lucene::analysis::Analyzer& getPrefixAnalyzer(); + + private: + + Analysis(InitParams& ip); + + std::auto_ptr parse(std::string path); + + static Analysis* theInstance_; + + std::auto_ptr defaultAnalyzer_; + + std::auto_ptr queryAnalyzer_; + + std::auto_ptr prefixAnalyzer_; + + }; + + + /** * This is a special filter that is used to generate prefixes * of the searched words. @@ -167,68 +234,7 @@ lucene::analysis::Analyzer* analyzer_; }; - - - /** - * Forms a series of analyzers, tokenizers and filters based on textual - * analyzer definition. - */ - class CustomAnalyzer : public lucene::analysis::Analyzer - { - public: - - /** - * Constructs a custom analyzer based on given definition string. - * See CPix documentation to see, how proper analyzer definition - * strings ought to be formed. - * - * Throws on failure, e.g. if definition parsing fails, if - * declared identifiers are not found and if parameters are wrong. - */ - CustomAnalyzer(const wchar_t* definition); - - /** - * For internal usage only. Constructs analyzer from a parsed - * definition string or from a fragment of a parsed definition - * string. - */ - CustomAnalyzer(const Cpix::AnalyzerExp::Piping& definition); - - virtual ~CustomAnalyzer(); - - /** - * Token stream is based on the analyzer definition string - */ - lucene::analysis::TokenStream* - tokenStream(const wchar_t * fieldName, - lucene::util::Reader * reader); - - private: - - /** - * Setups the TokenStream factory based on the analyzer - * definition stored in the piping - */ - void setup(const Cpix::AnalyzerExp::Piping& definition); - - /** - * Return TokenizerClassEntry, which matches the given - * identifier. - */ - static TokenizerClassEntry& - CustomAnalyzer::getTokenizerEntry(std::wstring id); - - /** - * Return FilterClassEntry, which matches the given - * identifier. - */ - static FilterClassEntry& - CustomAnalyzer::getFilterEntry(std::wstring id); - - private: - - std::auto_ptr factory_; - }; + std::auto_ptr CreateDefaultAnalyzer(); } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/analyzerexp.h --- a/searchengine/cpix/cpix/inc/private/analyzerexp.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/analyzerexp.h Mon Jun 28 10:34:53 2010 +0530 @@ -43,20 +43,21 @@ namespace AnalyzerExp { /** Identifiers for the tokens. Extends the list present in the cpixparsetools.h */ - enum TokenType { - TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8 - TOKEN_RIGHT_BRACKET, - TOKEN_COMMA, // 10 - TOKEN_PIPE, - TOKEN_SWITCH, - TOKEN_CASE, - TOKEN_DEFAULT, - TOKEN_LEFT_BRACE, // 15 - TOKEN_RIGHT_BRACE, - TOKEN_COLON, - TOKEN_TERMINATOR - }; - + + extern const wchar_t TOKEN_LEFT_BRACKET[]; + extern const wchar_t TOKEN_RIGHT_BRACKET[]; + extern const wchar_t TOKEN_COMMA[]; + extern const wchar_t TOKEN_PIPE[]; + extern const wchar_t TOKEN_SWITCH[]; + extern const wchar_t TOKEN_LOCALE_SWITCH[]; + extern const wchar_t TOKEN_CONFIG_SWITCH[]; + extern const wchar_t TOKEN_CASE[]; + extern const wchar_t TOKEN_DEFAULT[]; + extern const wchar_t TOKEN_LEFT_BRACE[]; + extern const wchar_t TOKEN_RIGHT_BRACE[]; + extern const wchar_t TOKEN_COLON[]; + extern const wchar_t TOKEN_TERMINATOR[]; + /** * Tokenizer used for analyzer definition strings' lexical analysis */ @@ -70,6 +71,8 @@ virtual Cpt::Lex::TokenizerState consume(const wchar_t* cursor); private: // data Cpt::Lex::WhitespaceTokenizer ws_; + Cpt::Lex::LineCommentTokenizer lcomment_; + Cpt::Lex::SectionCommentTokenizer scomment_; Cpt::Lex::IdTokenizer ids_; Cpt::Lex::StrLitTokenizer strlits_; Cpt::Lex::IntLitTokenizer intlits_; @@ -79,6 +82,8 @@ Cpt::Lex::SymbolTokenizer cm_; // comma Cpt::Lex::SymbolTokenizer pp_; // pipe symbol '>' Cpt::Lex::SymbolTokenizer sw_; // switch + Cpt::Lex::SymbolTokenizer lsw_; // locale switch + Cpt::Lex::SymbolTokenizer csw_; // config switch Cpt::Lex::SymbolTokenizer cs_; // case Cpt::Lex::SymbolTokenizer df_; // default Cpt::Lex::SymbolTokenizer lbc_; // left brace @@ -198,18 +203,18 @@ }; /** - * A case of switch statement. Of form: "case 'field': + * A case of switch statement. Of form: "case 'case': * tokenizer>filter>filter;" */ class Case : public Exp { public: - Case(const std::vector & fields, + Case(const std::vector & cases, std::auto_ptr piping); virtual ~Case(); - const std::vector& fields() const; + const std::vector& cases() const; const Piping& piping() const; private: - std::vector fields_; + std::vector cases_; std::auto_ptr piping_; }; @@ -228,7 +233,38 @@ Cpt::auto_vector cases_; std::auto_ptr def_; }; - std::auto_ptr ParsePiping(Cpt::Parser::Lexer& lexer); + + /** + * LocaleSwitch expression + */ + class LocaleSwitch : public Exp { + public: + LocaleSwitch(Cpt::auto_vector & cases, + std::auto_ptr def); + virtual ~LocaleSwitch(); + const std::vector& cases() const; + const Piping& def() const; + public: + Cpt::auto_vector cases_; + std::auto_ptr def_; + }; + + /** + * ConfigSwitch expression + */ + class ConfigSwitch : public Exp { + public: + ConfigSwitch(Cpt::auto_vector & cases, + std::auto_ptr def); + virtual ~ConfigSwitch(); + const std::vector& cases() const; + const Piping& def() const; + public: + Cpt::auto_vector cases_; + std::auto_ptr def_; + }; + + std::auto_ptr ParsePiping(const wchar_t* definition); } } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/cluceneext.h --- a/searchengine/cpix/cpix/inc/private/cluceneext.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/cluceneext.h Mon Jun 28 10:34:53 2010 +0530 @@ -91,6 +91,68 @@ { namespace util { + /** + * Frees one reference out of Clucene object without desctroying it + * Used to pass newly created Terms for queries. Queries are allowed + * to take full ownership of them. + */ + template + inline T* freeref(T* t) { + t->__cl_decref(); + return t; + } + template + class auto_ref { + + public: + /** + * NOTE: Constructing auto_ref does not increased referred + * item's reference count. + */ + auto_ref(T* ref) : ref_( ref ) {} + + auto_ref(auto_ptr ref) : ref_( ref.release() ) {} + + + void reset(auto_ptr ref) { + _CLDECDELETE( ref_ ); + ref_ = ref.release(); + } + + void reset(T* ref) { + _CLDECDELETE( ref_ ); + ref_ = ref; + } + + operator auto_ptr () { + return auto_ptr(release()); + } + + T* release() { + T* ret = ref_; + ref_ = 0; + return ret; + } + + /** + * Decreases referred item's reference count + */ + ~auto_ref() { + _CLDECDELETE( ref_ ); + } + + T* get() { + return ref_; + } + + T* operator->() { + return ref_; + } + + private: + + T* ref_; + }; /** * This class is almost like clucene::util::FileReader, diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/customanalyzer.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/inc/private/customanalyzer.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,119 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#ifndef CUSTOMANALYZER_H_ +#define CUSTOMANALYZER_H_ + +// Forward declarations +namespace Cpt { + namespace Parser { + class Lexer; + } +} +namespace Cpix { + namespace AnalyzerExp { + class Piping; + class LocaleSwitch; + class ConfigSwitch; + } + struct TokenizerClassEntry; + struct FilterClassEntry; +} + + +namespace Cpix { + + /** + * Creates token stream for the given reader and fieldName. + * This class in in many ways similar to CLucene analyzer class + * definition. + */ + class TokenStreamFactory { + public: + virtual ~TokenStreamFactory(); + virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName, + lucene::util::Reader * reader) = 0; + }; + + /** + * Forms a series of analyzers, tokenizers and filters based on textual + * analyzer definition. + */ + class CustomAnalyzer : public lucene::analysis::Analyzer, public TokenStreamFactory + { + public: + + /** + * Constructs a custom analyzer based on given definition string. + * See CPix documentation to see, how proper analyzer definition + * strings ought to be formed. + * + * Throws on failure, e.g. if definition parsing fails, if + * declared identifiers are not found and if parameters are wrong. + */ + CustomAnalyzer(const wchar_t* definition, const wchar_t* config = NULL); + + /** + * For internal usage only. Constructs analyzer from a parsed + * definition string or from a fragment of a parsed definition + * string. + */ + CustomAnalyzer(const Cpix::AnalyzerExp::Piping& definition, const wchar_t* config = NULL); + + virtual ~CustomAnalyzer(); + + /** + * Token stream is based on the analyzer definition string + */ + lucene::analysis::TokenStream* + tokenStream(const wchar_t * fieldName, + lucene::util::Reader * reader); + + private: + + /** + * Setups the TokenStream factory based on the analyzer + * definition stored in the piping + */ + void setup(const Cpix::AnalyzerExp::Piping& definition, const wchar_t* config); + + /** + * Return TokenizerClassEntry, which matches the given + * identifier. + */ + static std::auto_ptr resolveConfigSwitch(const Cpix::AnalyzerExp::ConfigSwitch& csw, const wchar_t* config); + + /** + * Return TokenizerClassEntry, which matches the given + * identifier. + */ + static TokenizerClassEntry& getTokenizerEntry(std::wstring id); + + /** + * Return FilterClassEntry, which matches the given + * identifier. + */ + static FilterClassEntry& getFilterEntry(std::wstring id); + + private: + + std::auto_ptr factory_; + }; + +} + +#endif /* CUSTOMANALYZER_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/initparams.h --- a/searchengine/cpix/cpix/inc/private/initparams.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/initparams.h Mon Jun 28 10:34:53 2010 +0530 @@ -340,6 +340,8 @@ // for cpixreg.txt and automatic index paths std::string cpixDir_; + // for localization information + std::string resourceDir_; // log related parameters std::string logFileBase_; @@ -402,6 +404,10 @@ void setCpixDir(const char * value); + const char * getResourceDir() const; + void setResourceDir(const char * value); + + const char * getLogFileBase() const; void setLogFileBase(const char * value); diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/iqrytype.h --- a/searchengine/cpix/cpix/inc/private/iqrytype.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/iqrytype.h Mon Jun 28 10:34:53 2010 +0530 @@ -23,7 +23,10 @@ #include #include +#include "cpixtools.h" +#include "cpixexc.h" #include "common/refcountedbase.h" +#include "cpixparsetools.h" namespace lucene { @@ -267,16 +270,6 @@ class QryCall { private: - - enum TokenType - { - DOLLAR = Cpt::Lex::TOKEN_LAST_RESERVED, - LESSTHAN, - GREATERTHAN, - COMMA, - LEFTPARENTHESIS, - RIGHTPARENTHESIS - }; /** * This static member (tokenizer_) has const-usage @@ -296,7 +289,7 @@ * parse(). */ typedef int State; - typedef int Symbol; + typedef Cpt::Lex::token_type_t Symbol; typedef std::pair StateSymbolPair; typedef std::map TransitionTable; static TransitionTable * transitions_; diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/localization.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/inc/private/localization.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,94 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#ifndef LOCALIZATION_H_ +#define LOCALIZATION_H_ + +#include +#include +#include "cpixsynctools.h" + +namespace Cpix { + + /** + * Class that is used for storing and retrieving used locale + */ + class Localization { + + public: + + /** + * Returns a list of language names. + * + * MT safe + */ + std::vector getLanguageNames(); + + /** + * Sets the used locale, if locale is set to be "auto", + * underlying mechanism will consult environment for + * maintaining correct locale. + * + * MT safe + */ + void setLocale(const char* locale); + + /** + * Sets the used locale, if locale is set to be "auto", + * underlying mechanism will consult environment for + * maintaining correct locale. + * + * MT safe + */ + void setLocale(const wchar_t* locale); + + public: // static API + + /** + * Accessor for the localization singleton instance + * + * NOTE: Should be called during init. Otherwise, if two threads try + * to access localization instance at the same time, memory + * leak may result. In this case two singleton instances may + * be constructed. + */ + static Localization& instance(); + + /** + * Shutsdown + */ + static void shutdown(); + + private: + + Localization(); + + static Localization* theInstance_; + + private: + + Cpt::Mutex mutex_; + + bool auto_; + + std::vector languageNames_; + + }; + +} +#endif /* LOCALIZATION_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/prefixqueryparser.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/inc/private/prefixqueryparser.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,81 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#ifndef PREFIXQUERYPARSER_H_ +#define PREFIXQUERYPARSER_H_ + +#include + +#include "queryparser.h" + +#include "cpixmaindefs.h" + +namespace lucene { + namespace analysis { + class Token; + } + namespace search { + class Query; + } +} +namespace Cpt { + namespace Lex { + class Token; + } +} + +namespace Cpix { + + /** + * Cpix special query parser. + * + * Potential parameters + * + * * Target Field (makes sense) + * * QueryAnalyzer (doesn't make sense) + * * PrefixAnalyzer (doesn't make sense) + */ + class PrefixQueryParser : public IQueryParser { + + public: + + PrefixQueryParser(const wchar_t* field = LCPIX_DEFAULT_FIELD); + + virtual ~PrefixQueryParser(); + + virtual std::auto_ptr parse(const wchar_t* query); + + virtual const wchar_t* getField() const; + + virtual void setDefaultOperator(cpix_QP_Operator op); + + private: + + std::auto_ptr toQuery(Cpt::Lex::Token word); + + bool usePrefixFor(lucene::analysis::Token& token); + + private: + + std::wstring field_; + + }; + +} + +#endif /* PREFIXQUERYPARSER_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/queryparser.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/inc/private/queryparser.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,130 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#ifndef QUERYPARSER_H_ +#define QUERYPARSER_H_ + +#include + +#include "CLucene.h" +#include "CLucene/queryParser/MultiFieldQueryParser.h" + +#include "prefixopt.h" + +#include "cpixsearch.h" + +namespace Cpix { + + class IQueryParser { + + public: + + virtual ~IQueryParser(); + + virtual std::auto_ptr parse(const wchar_t* query) = 0; + + // Not really a fundamental property of a query parser: + virtual const wchar_t* getField() const = 0; + + virtual void setDefaultOperator(cpix_QP_Operator op) = 0; + }; + + class CLuceneQueryParser : public IQueryParser { + + public: + + ~CLuceneQueryParser(); + + CLuceneQueryParser(const wchar_t* field, lucene::analysis::Analyzer& analyzer); + + virtual std::auto_ptr parse(const wchar_t* query); + + virtual const wchar_t* getField() const; + + virtual void setDefaultOperator(cpix_QP_Operator op); + + private: + + std::auto_ptr parser_; + + }; + + class CLuceneMultiFieldQueryParser : public IQueryParser { + + public: + + ~CLuceneMultiFieldQueryParser(); + + CLuceneMultiFieldQueryParser(const wchar_t** fields, + lucene::analysis::Analyzer& analyzer, + lucene::queryParser::BoostMap& boostMap); + + virtual std::auto_ptr parse(const wchar_t* query); + + virtual const wchar_t* getField() const; + + virtual void setDefaultOperator(cpix_QP_Operator op); + + private: + + std::auto_ptr parser_; + + }; + + + class PrefixOptQueryParser : public IQueryParser { + + public: + + PrefixOptQueryParser(std::auto_ptr parser); + + ~PrefixOptQueryParser(); + + virtual std::auto_ptr parse(const wchar_t* query); + + virtual const wchar_t* getField() const; + + virtual void setDefaultOperator(cpix_QP_Operator op); + + private: + + PrefixOptQueryRewriter prefixOpt_; + + std::auto_ptr parser_; + + }; + + // + // Following factory methods apply necessary optimization wraps + // over the query parsers. + // + + + IQueryParser* CreateCLuceneQueryParser(const wchar_t* defaultField, + lucene::analysis::Analyzer* analyzer); + + IQueryParser* CreateCLuceneMultiFieldQueryParser( + const wchar_t* fields[], + lucene::analysis::Analyzer* analyzer, + lucene::queryParser::BoostMap* boostMap); + + IQueryParser* CreatePrefixQueryParser(const wchar_t* field); + +} + +#endif /* QUERYPARSER_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/spi/locale.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/inc/private/spi/locale.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,41 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#ifndef LOCALE_H_ +#define LOCALE_H_ + +#include +#include + +namespace Cpix { + + namespace Spi { + + extern const wchar_t* SymbianLanguageCodePrefix; + + /** + * Returns a vector containing a list language names that is + * ordered by priority. + */ + std::vector GetLanguageNames(); + + } +} + + +#endif /* LOCALE_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/private/wrappertraitsdb.h --- a/searchengine/cpix/cpix/inc/private/wrappertraitsdb.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/private/wrappertraitsdb.h Mon Jun 28 10:34:53 2010 +0530 @@ -106,6 +106,7 @@ namespace Cpix { class CustomAnalyzer; class SystemAnalyzer; + class IQueryParser; } @@ -195,24 +196,12 @@ typedef Cpix::SystemAnalyzer NativeClass; }; - -struct cpix_MultiFieldQueryParser : public cpix_QueryParser { }; - - template<> struct WrapperTraits { - typedef lucene::queryParser::QueryParser NativeClass; + typedef Cpix::IQueryParser NativeClass; }; - -template<> -struct WrapperTraits -{ - typedef lucene::queryParser::MultiFieldQueryParser NativeClass; -}; - - template<> struct WrapperTraits { diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/public/appclass-hierarchy.txt --- a/searchengine/cpix/cpix/inc/public/appclass-hierarchy.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/public/appclass-hierarchy.txt Mon Jun 28 10:34:53 2010 +0530 @@ -23,124 +23,150 @@ | [ _mimetype (opt) ] {EStoreYes | EIndexNo} | +-- msg - | [ To ] {EStoreYes | EIndexTokenized} {ExcerptYes, if present} - | [ From ] {EStoreYes | EIndexTokenized} {ExcerptYes, if present} - | [ Body ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Folder ] {EStoreYes | EIndexNo} {ExcerptNA} - | [ Subject ] {EStoreYes | EIndexTokenized} {ExcperptNo} - | + | [ To ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes, if present} + | [ From ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes, if present} + | [ Body ] {EStoreYes | EIndexTokenized } {ExcerptYes} + | [ Folder ] {EStoreYes | EIndexNo} {ExcerptNA} + | [ Subject ] {EStoreYes | EIndexTokenized} {ExcperptNo} + | [ Attachment ] {EStoreYes | EIndexTokenized} {ExcperptNo} | +-- file | | - | | + | | | +-- content - | | [ Contents ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | | [ BaseName ] {EStoreNo | EIndexTokenized} {ExcerptNo} - | | [ Extension ] {EStoreNo | EIndexTokenized} {ExcerptNo} - | | - | +-- folder - | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ Extension ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} + | | [ Contents ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | | [ BaseName ] {EStoreNo | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | | [ Extension ] {EStoreNo | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | | + | +-- folder + | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | [ Extension ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} | | +-- media | | | | | +-- audio - | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} - | | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} - | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ Album ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ AlbumArtist ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ OriginalArtist ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ Composer ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ Artist ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | + | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} + | | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} + | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | | [ Artist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ Album ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ AlbumArtist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ OriginalArtist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ Composer ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ Author ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Track ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | | [ CaptureDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | | [ Duration ] {EStoreYes | EIndexTokenized} {ExcerptNo} | | | +-- image - | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} - | | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} - | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | | [ DateTimeOrignal] {EStoreYes | EIndexUnTokenized} {ExcerptNA} - | | + | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} + | | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} + | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | | [ DateTimeOrignal ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} | | | +-- video - | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} - | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} - | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Artist ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Author ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | + | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo} + | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | [ Extension ] {EStoreYes | EIndexTokenized } {ExcerptYes} + | [ Artist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Author ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ ResolutionUnit ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Track ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ CaptureDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes} + | [ Duration ] {EStoreYes | EIndexTokenized} {ExcerptNo} | +-- contact /* The order of fields in excerpt is as below. The order in this case * is the order of fields shown when you 'Edit' the contact. */ - | [ GivenName ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ FamilyName ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ PhoneNumber ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ EMail ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ SIPID ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ CompanyName ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ JobTitle ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Note ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ GivenName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | [ FamilyName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | [ PhoneNumber ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ EMail ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ SIPID ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ CompanyName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ JobTitle ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Note ] {EStoreYes | EIndexTokenized} {ExcerptYes} /* The following fields are not displayed when 'Edit'-ing the contact. * The order here is arbitrary. */ - | [ Address ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ SecondName ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Suffix ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ URL ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ PostOffice ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ ExtendedAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Locality ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Region ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ PostCode ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Country ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Spouse ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Children ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Class ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Prefix ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ AdditionalName ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Fax ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ GivenNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ FamilyNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ CompanyNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Address ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ SecondName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Suffix ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ URL ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ PostOffice ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ ExtendedAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Locality ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Region ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ PostCode ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Country ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Spouse ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Children ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Class ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Prefix ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ AdditionalName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Fax ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Assistant ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ Department ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes} + | [ IMAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ ServiceProvider ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Birthday ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ Anniversary ] {EStoreYes | EIndexTokenized} {ExcerptNo} | | +-- calendar - | [ Summary ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ Location ] {EStoreYes | EIndexTokenized} {ExcerptYes} - | [ StartTime ] (YYYYMMDDHHSS) (TimeZone UTC) {EStoreYes | EIndexUnTokenized} {ExcerptNA} - | [ EndTime ] (YYYYMMDDHHSS) (TimeZone UTC) {EStoreYes | EIndexUnTokenized} {ExcerptNA} + | [ Summary ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Location ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ StartTime ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} (YYYY MM DD HH SS) (TimeZone UTC) + | [ EndTime ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} (YYYY MM DD HH SS) (TimeZone UTC) + | [ Priority ] {EStoreYes | EIndexTokenized} {ExcerptNo} | | +-- bookmark - | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ Url ] {EStoreYes | EIndexTokenized} {ExcerptYes} + | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ Url ] {EStoreYes | EIndexTokenized} {ExcerptYes} | | +-- applications - | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo} - | [ Uid ] {EStoreYes | EIndexTokenized | ENoAggregate} {ExcerptNo} - | [ Path ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo} + | [ Uid ] {EStoreYes | EIndexTokenized | ENoAggregate} {ExcerptNo} + | [ Path ] {EStoreYes | EIndexTokenized} {ExcerptNo} | | +-- notes - | [ Date ] (YYYYMMDDHHSS) {EStoreYes | EIndexUnTokenized} {ExcerptNA} - | [ Memo ] {EStoreYes | EIndexTokenized} {ExcerptNo} + | [ Date ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} + | [ Memo ] {EStoreYes | EIndexTokenized} {ExcerptNo} For instance, a document for an email message should have the diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/public/cpixidxdb.h --- a/searchengine/cpix/cpix/inc/public/cpixidxdb.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/public/cpixidxdb.h Mon Jun 28 10:34:53 2010 +0530 @@ -132,8 +132,21 @@ * Initializes an excerpt processor internal state instance. */ void cpix_init_EPIState(cpix_EPIState * state); - - + + extern const char* cpix_LOCALE_AUTO; + + /** + * Sets the locale used by CPix. Locale is used in indexing and + * searching for text's lexical analysis. Text of different languages + * may be treated differently. At this point, the locale + * should only hold languge code following ISO 639-1 two letter + * format or ISO 639-2 three letter format, if two letter format is + * not available. If cpix_LOCALE_AUTO is given, locale is left to + * be determined automatically by cpix. + * + * @param locale the new locale. Should be a language code of ISO 639-1 standard + */ + void cpix_SetLocale(cpix_Result* result, const char* locale); /** * A simple utility function getting the first couple of words of diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/public/cpixinit.h --- a/searchengine/cpix/cpix/inc/public/cpixinit.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/public/cpixinit.h Mon Jun 28 10:34:53 2010 +0530 @@ -102,7 +102,20 @@ void cpix_InitParams_setCpixDir(cpix_InitParams * thisIp, const char * value); - + /** + * Gets / sets property "resourceDir". + * + * ResourceDir is the path to the directory where cpix should look for: + * + * (a) Localization data + * + * Must not be NULL or empty string. + * + * Default value is cf DEFAULT_CPIX_DIR in cfg/indevicecfg.h. + */ + const char * cpix_InitParams_getResourceDir(cpix_InitParams * thisIp); + void cpix_InitParams_setResourceDir(cpix_InitParams * thisIp, + const char * value); /** * Gets / sets property "logFileBase". diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/inc/public/cpixsearch.h --- a/searchengine/cpix/cpix/inc/public/cpixsearch.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/inc/public/cpixsearch.h Mon Jun 28 10:34:53 2010 +0530 @@ -148,7 +148,10 @@ const wchar_t * fieldName, cpix_Analyzer * analyzer); - + + cpix_QueryParser * + cpix_CreatePrefixQueryParser(cpix_Result * result, + const wchar_t * fieldName); /** * Constructs a special type of query parser, a multi-field query diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/analyzer.cpp --- a/searchengine/cpix/cpix/src/analyzer.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/analyzer.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -15,30 +15,36 @@ * */ - -#include "CLucene.h" -#include "CLucene/analysis/AnalysisHeader.h" -#include "CLucene/analysis/Analyzers.h" - -#include "analyzer.h" -#include "analyzerexp.h" -#include "cpixanalyzer.h" -#include "cluceneext.h" - -#include "cpixexc.h" -#include "cpixparsetools.h" - +// general utilities #include "wchar.h" #include #include #include #include +#include +#include -#include "document.h" +// clucene +#include "CLucene.h" + +// support +#include "cpixparsetools.h" +#include "cpixfstools.h" -#include "indevicecfg.h" +// internal +#include "analyzer.h" +#include "cpixanalyzer.h" +#include "cpixexc.h" +#include "document.h" +#include "cluceneext.h" +#include "indevicecfg.h" +#include "initparams.h" +#include "thaianalysis.h" -#include "initparams.h" +#include "analyzerexp.h" +#include "customanalyzer.h" +#include "common/cpixlog.h" + namespace { const char AGGR_NONFILEREADERPROXY_ERR[] @@ -46,11 +52,111 @@ const char AGGR_STREAMREADER_ERR[] = "Aggregating streamValue-fields not implemented"; + + const char THAI_LANGUAGE_FILE[] + = "thaidict.sm"; + + const char ANALYZER_FILE[] + = "analyzer.loc"; + + const wchar_t DEFAULT_ANALYZER_CONFIG[] + = L"default"; + + const wchar_t QUERY_ANALYZER_CONFIG[] + = L"query"; + + const wchar_t PREFIX_ANALYZER_CONFIG[] + = L"prefix"; + +// const wchar_t CPIX_ANALYZER_FALLBACK[] +// = CPIX_ANALYZER_STANDARD; +// +// const wchar_t CPIX_PREFIX_ANALYZER_FALLBACK[] +// = CPIX_TOKENIZER_LETTER L">" CPIX_FILTER_LOWERCASE; + + } namespace Cpix { + +Analysis* Analysis::theInstance_ = NULL; + + void Analysis::init(InitParams& ip) { + // Init thai analysis with thai dictionary + std::string thai( Cpt::appendpath(ip.getResourceDir(), + THAI_LANGUAGE_FILE) ); + + if ( Cpt::filesize( thai.c_str() ) ) { + analysis::InitThaiAnalysis(thai.c_str()); + } else { + logMsg(CPIX_LL_WARNING, + "Thai dictionary could not be found. Thai analysis will NOT work."); + } + + // Setup the analysis instance + theInstance_ = new Analysis(ip); + } + + Analysis::Analysis(InitParams& ip) + : defaultAnalyzer_(), + queryAnalyzer_(), + prefixAnalyzer_() { + + auto_ptr p = parse( Cpt::appendpath( ip.getResourceDir(), ANALYZER_FILE ) ); + + defaultAnalyzer_.reset( new CustomAnalyzer( *p, DEFAULT_ANALYZER_CONFIG ) ); + queryAnalyzer_.reset( new CustomAnalyzer( *p, QUERY_ANALYZER_CONFIG ) ); + prefixAnalyzer_.reset( new CustomAnalyzer( *p, PREFIX_ANALYZER_CONFIG ) ); + } + + auto_ptr Analysis::parse(std::string path) { + std::wifstream in(path.c_str()); + auto_ptr ret; + if ( in ) { + + // Reserve constant size buffer and populate it with definition + // + int filesize = Cpt::filesize(path.c_str()); + Cpt::auto_array buf( new wchar_t[filesize+1] ); + in.read(buf.get(), filesize); + buf.get()[filesize] = '\0'; + if ( !in.fail() ) { + try { + ret = AnalyzerExp::ParsePiping( buf.get() ); + } catch (...) {} + } + in.close(); + } + + if ( !ret.get() ) { + THROW_CPIXEXC("Analyzer definition not found. %s could not be opened. ", path.c_str()); + } + return ret; + } + + void Analysis::shutdown() { + analysis::ShutdownThaiAnalysis(); + delete theInstance_; + theInstance_ = NULL; + } + + lucene::analysis::Analyzer& Analysis::getDefaultAnalyzer() { + // TODO: Assert( theInstance_ ); + return *theInstance_->defaultAnalyzer_; + } + + lucene::analysis::Analyzer& Analysis::getQueryAnalyzer() { + // TODO: Assert( theInstance_ ); + return *theInstance_->queryAnalyzer_; + } + + lucene::analysis::Analyzer& Analysis::getPrefixAnalyzer() { + // TODO: Assert( theInstance_ ); + return *theInstance_->prefixAnalyzer_; + } + PrefixGenerator::PrefixGenerator( lucene::analysis::TokenStream* in, bool deleteTS, @@ -221,488 +327,5 @@ return analyzer_->tokenStream( fieldName, reader ); } } - - // - // Following sections provide the glue code for connecting the - // analyzer definition syntax with analyzer, tokenizers and filter - // implementations. - // - // The glue code is template heavy with the indent of providing - // automation for associating specific keywords with specific - // analyzers, tokenizers and filters implementing corresponding - // CLucene abstractions. Additional classes are needed only if - // filters, tokenizers, etc. accept parameters. - // - // NOTE: To understand the analyzers, it is sufficient to understand - // that an analyzer transforms characters stream into specific token streams - // (e.g. character stream 'foobarmetawords' can be transformed into token - // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main - // parts which are tokenization and filtering. Tokenization converts - // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr') - // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' -> - // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for - // constructing a tokenizer and a sequence of filters to perform - // these required tasks. - // - // See the documentation around TokenizerClassEntries and - // FilterClassEntries to see how implementations not taking parameters - // can be easily added. - // - - using namespace Cpix::AnalyzerExp; - - /** - * Creates token stream for the given reader and fieldName. - * This class in in many ways similar to CLucene analyzer class - * definition. - */ - class TokenStreamFactory { - public: - virtual ~TokenStreamFactory(); - virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName, - lucene::util::Reader * reader) = 0; - }; - - TokenStreamFactory::~TokenStreamFactory() {}; - - /** - * Template class used to create CLucene tokenizers. Template - * parameter T must implement lucene::analysis::Tokenizer abstraction. - */ - template - class TokenizerFactory : public TokenStreamFactory - { - public: - TokenizerFactory(const Invokation& invokation) { - if (invokation.params().size() > 0) { - THROW_CPIXEXC(L"Tokenizer %S does not accept parameters", - invokation.id().c_str()); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/, - lucene::util::Reader * reader) { - return _CLNEW T(reader); - } - }; - - /** - * Template class wrapping CLucene analyzers. Template parameter T must - * implement lucene::analysis::Analyzer abstraction. - */ - template - class AnalyzerWrap : public TokenStreamFactory - { - public: - AnalyzerWrap(const Invokation& invokation) : analyzer_() { - if (invokation.params().size() > 0) { - THROW_CPIXEXC(L"Tokenizer %S does not accept parameters", - invokation.id().c_str()); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return analyzer_.tokenStream(fieldName, reader); - } - private: - T analyzer_; - }; - - /** - * Template class associated with CLucene filter and a TokenStreamFactory. - * Uses TokenStreamFactory to transform given character stream into tokenstream - * and then applies the given Clucene filter to the token stream. - * The template parameter T must implement lucene::analysis::Filter abstraction. - */ - template - class FilterFactory : public TokenStreamFactory - { - public: - FilterFactory(const Invokation& invokation, auto_ptr factory) : factory_(factory) { - if (invokation.params().size() > 0) { - THROW_CPIXEXC(L"Filter %S does not accept parameters", - invokation.id().c_str()); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return _CLNEW T(factory_->tokenStream(fieldName, reader), true); - } - private: - std::auto_ptr factory_; - }; - - /** - * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized - * template is needed because perfield analyzer accepts parameters - * (specific analyzers for different field plus default analyzer) - */ - template<> - class AnalyzerWrap : public TokenStreamFactory { - public: - AnalyzerWrap(const Switch& sw) : analyzer_(0) { - using namespace Cpt::Parser; - using namespace lucene::analysis; - - analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def())); - - for (int i = 0; i < sw.cases().size(); i++) { - const Case& cs = *sw.cases()[i]; - for (int j = 0; j < cs.fields().size(); j++) { - analyzer_->addAnalyzer( cs.fields()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping() ) ); - } - } - } - virtual ~AnalyzerWrap() { - _CLDELETE(analyzer_); - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return analyzer_->tokenStream(fieldName, reader); - } - private: - lucene::analysis::PerFieldAnalyzerWrapper* analyzer_; - }; - - - - /** - * Specialized StopFilter factory. Specialized filter is needed - * because StopFilter needs parameters (stop word list or a language) - */ - template<> - class FilterFactory : public TokenStreamFactory - { - public: - FilterFactory(const Invokation& invokation, - auto_ptr factory) - :words_(0), ownWords_(0), factory_(factory) { - using namespace Cpt::Parser; - if (invokation.params().size() == 1 && dynamic_cast(invokation.params()[0])) { - Identifier* id = dynamic_cast(invokation.params()[0]); - //cpix_LangCode lang; - if (id->id() == CPIX_WLANG_EN) { - words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS; - } else { - THROW_CPIXEXC(L"No prepared stopword list for language code '%S'", - id->id().c_str()); - } - } else { - ownWords_ = new wchar_t*[invokation.params().size()+1]; - memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); - // FIXE: args may leak - for (int i = 0; i < invokation.params().size(); i++) { - StringLit* lit = dynamic_cast(invokation.params()[i]); - if (lit) { - const wstring& str = lit->text(); - ownWords_[i] = new wchar_t[str.length()+1]; - wcscpy(ownWords_[i], str.c_str()); - } else { - THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters."); - } - } - } - - } - virtual ~FilterFactory() { - if (ownWords_) { - for (int i = 0; ownWords_[i]; i++) { - delete[] ownWords_[i]; - } - delete[] ownWords_; - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast(ownWords_) : words_); - } - private: - const wchar_t **words_; - wchar_t **ownWords_; // owned - std::auto_ptr factory_; - }; - - /** - * Specialized SnowballFilter factory is needed, because SnowballFilter - * accepts parameters (the language). - */ - template<> - class FilterFactory : public TokenStreamFactory - { - public: - FilterFactory(const Invokation& invokation, - auto_ptr factory) - : factory_(factory) { - using namespace Cpt::Parser; - if (invokation.params().size() != 1 || !dynamic_cast(invokation.params()[0])) { - THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." ); - } - Identifier* id = dynamic_cast(invokation.params()[0]); - if (id->id() == CPIX_WLANG_EN) { - lang_ = cpix_LANG_EN; - } else { - THROW_CPIXEXC(L"Language identifier %S is not supported for stemming", - id->id().c_str()); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_); - } - private: - cpix_LangCode lang_; - std::auto_ptr factory_; - }; - - /** - * Specialized LengthFilter factory is needed, because length filter - * accepts parameters (minimum length and maximum length) - */ - template<> - class FilterFactory : public TokenStreamFactory - { - public: - FilterFactory(const Invokation& invokation, - auto_ptr factory) - : factory_(factory) { - using namespace Cpt::Parser; - if (!(invokation.params().empty())) { - if (invokation.params().size() != 2 || - !dynamic_cast(invokation.params()[0]) || - !dynamic_cast(invokation.params()[1])) { - THROW_CPIXEXC("Length filter takes exactly two integer parameters"); - } - min_ = dynamic_cast(invokation.params()[0])->value(); - max_ = dynamic_cast(invokation.params()[1])->value(); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ ); - } - private: - int min_, max_; - std::auto_ptr factory_; - }; - - /** - * Specialized PrefixGenerator factory is needed, because PrefixGenerator - * requires the max prefix size. - */ - template<> - class FilterFactory : public TokenStreamFactory - { - public: - FilterFactory(const Invokation& invokation, - auto_ptr factory) - : factory_(factory) { - using namespace Cpt::Parser; - if (invokation.params().empty()) { - if (invokation.params().size() != 1 || - !dynamic_cast(invokation.params()[0])) { - THROW_CPIXEXC("Prefix generator takes exactly one integer parameter"); - } - maxPrefixLength_ = dynamic_cast(invokation.params()[0])->value(); - } - } - virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, - lucene::util::Reader * reader) { - return _CLNEW PrefixGenerator(factory_->tokenStream(fieldName, reader), true, maxPrefixLength_ ); - } - private: - int maxPrefixLength_; - std::auto_ptr factory_; - }; - - - typedef auto_ptr (*TokenizerFactoryCreator)(const Invokation& invokation); - typedef auto_ptr (*FilterFactoryCreator)(const Invokation& invokation, - auto_ptr factory); - /** - * Sets up a tokenizer factory with given invokation parameters - */ - template - struct TokenizerFactoryCtor - { - static auto_ptr create(const Invokation& invokation) { - return auto_ptr(new TokenizerFactory(invokation)); - } - }; - - /** - * Sets up an analyzer wrap with given invokation parameters - */ - template - struct AnalyzerWrapCtor - { - static auto_ptr create(const Invokation& invokation) { - return auto_ptr(new AnalyzerWrap(invokation)); - } - }; - - /** - * Sets up a filter factory with given invokation parameters - */ - template - struct FilterFactoryCtor - { - static auto_ptr create(const Invokation& invokation, - auto_ptr factory) { - return auto_ptr(new FilterFactory(invokation, factory)); - } - }; - - struct TokenizerClassEntry { - const wchar_t *id_; - TokenizerFactoryCreator createFactory_; - }; - - // - // Following TokenizerClassEntries and FilterClassEntries contain - // the mapping from tokenizer/analyzer/filter names into glue code - // templates providing the implementations. - // - - TokenizerClassEntry TokenizerClassEntries[] = { - {CPIX_TOKENIZER_STANDARD, TokenizerFactoryCtor::create}, - {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor::create}, - {CPIX_TOKENIZER_LETTER, TokenizerFactoryCtor::create}, - {CPIX_TOKENIZER_KEYWORD, TokenizerFactoryCtor::create}, - {CPIX_ANALYZER_STANDARD, AnalyzerWrapCtor::create}, - -// TODO: Add more Tokenizers/Analyzers - -// Example tokenizer (works as such if tokenizers don't take parameters) -// {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor::create}, - -// Example analyzer (works as such if analyzer don't take parameters) -// {CPIX_ANALYZER_MYANALYZER, AnalyzerWrapCtor::create}, - - {0, 0} - }; - - struct FilterClassEntry { - const wchar_t *id_; - FilterFactoryCreator createFactory_; - }; - - FilterClassEntry FilterClassEntries[] = { - {CPIX_FILTER_STANDARD, FilterFactoryCtor::create}, - {CPIX_FILTER_LOWERCASE, FilterFactoryCtor::create}, - {CPIX_FILTER_ACCENT, FilterFactoryCtor::create}, - {CPIX_FILTER_STOP, FilterFactoryCtor::create}, - {CPIX_FILTER_STEM, FilterFactoryCtor::create}, - {CPIX_FILTER_LENGTH, FilterFactoryCtor::create}, - {CPIX_FILTER_PREFIXES, FilterFactoryCtor::create}, - -// TODO: Add more Filters - -// Example filter (works as such if analyzer don't take parameters) -// {CPIX_FILTER_MYFILTER, FilterFactoryCtor::create}, - - {0, 0} - }; - - CustomAnalyzer::CustomAnalyzer(const wchar_t* definition) - { - using namespace Cpt::Lex; - using namespace Cpt::Parser; - - - try - { - // 1. Setup an tokenizer - Cpix::AnalyzerExp::Tokenizer - tokenizer; - StdLexer - lexer(tokenizer, definition); - - // 2. Parse - std::auto_ptr - def = ParsePiping(lexer); - lexer.eatEof(); - - // 3. Setup this item based on parsed definition - setup(*def); - } - catch (Cpt::ITxtCtxtExc & exc) - { - // provide addition info for thrown exception - exc.setContext(definition); - - // throw it fwd - throw; - } - } - - CustomAnalyzer::CustomAnalyzer(const Piping& definition) - { - setup(definition); - } - using namespace Cpt::Parser; - - void CustomAnalyzer::setup(const Piping& piping) { - - // If the first item is invokation, create corresponding analyzer/tokenizer - if (dynamic_cast(&piping.tokenizer())) - { - const Invokation& tokenizer = dynamic_cast(piping.tokenizer()); - TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() ); - factory_ = tokenizerEntry.createFactory_( tokenizer ); - } else { - // If the first item is switch statement, create per-field analyzer - const Switch& tokenizer = dynamic_cast(piping.tokenizer()); - factory_ = new AnalyzerWrap( tokenizer ); - } - - // Add filters - const std::vector& filters = piping.filters(); - for (int i = 0; i < filters.size(); i++) { - FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() ); - factory_ = filterEntry.createFactory_( *filters[i], factory_ ); - } - } - - TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) { - - // Looks for a match in the TokenizerClassEntries. After finding - // a match it returns a proper tokenizer/analyzer implementation provider - // - for (int i = 0; TokenizerClassEntries[i].id_; i++) { - if (id == std::wstring(TokenizerClassEntries[i].id_)) { - return TokenizerClassEntries[i]; - } - } - - THROW_CPIXEXC(L"Unknown tokenizer '%S'.", - id.c_str()); - } - - FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) { - - // Looks for a match in the FilterClassEntries. After finding - // a match it returns a proper tokenizer/analyzer implementation - // provider - // - for (int i = 0; FilterClassEntries[i].id_; i++) { - if (id == std::wstring(FilterClassEntries[i].id_)) { - return FilterClassEntries[i]; - } - } - - THROW_CPIXEXC(L"Unknown filter '%S'.", - id.c_str()); - } - - CustomAnalyzer::~CustomAnalyzer() {} - - lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t * fieldName, - lucene::util::Reader * reader) { - // Utilizes the the token stream factory to form token stream. - // token stream factory is prepared during custom analyzer construction - // and based on the analyzer definition string. - - return factory_->tokenStream(fieldName, reader); - } - } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/analyzerexp.cpp --- a/searchengine/cpix/cpix/src/analyzerexp.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/analyzerexp.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -16,7 +16,6 @@ */ - #include "analyzerexp.h" #include "indevicecfg.h" @@ -25,8 +24,28 @@ namespace AnalyzerExp { + const wchar_t TOKEN_LEFT_BRACKET[] = L"("; + const wchar_t TOKEN_RIGHT_BRACKET[] = L")"; + const wchar_t TOKEN_COMMA[] = L"comma"; + const wchar_t TOKEN_PIPE[] = L">"; + const wchar_t TOKEN_SWITCH[] = L"switch"; + const wchar_t TOKEN_LOCALE_SWITCH[] = L"locale_switch"; + const wchar_t TOKEN_CONFIG_SWITCH[] = L"config_switch"; + const wchar_t TOKEN_CASE[] = L"case"; + const wchar_t TOKEN_DEFAULT[] = L"default"; + const wchar_t TOKEN_LEFT_BRACE[] = L"{"; + const wchar_t TOKEN_RIGHT_BRACE[] = L"}"; + const wchar_t TOKEN_COLON[] = L";"; + const wchar_t TOKEN_TERMINATOR[] = L";"; + + + std::auto_ptr ParsePiping(Cpt::Parser::Lexer& lexer); + + Tokenizer::Tokenizer() : ws_(), + lcomment_(), + scomment_(), ids_(), strlits_('\''), intlits_(), @@ -36,6 +55,8 @@ cm_(TOKEN_COMMA, L","), pp_(TOKEN_PIPE, CPIX_PIPE), sw_(TOKEN_SWITCH, CPIX_SWITCH), + lsw_(TOKEN_LOCALE_SWITCH, CPIX_LOCALE_SWITCH), + csw_(TOKEN_CONFIG_SWITCH, CPIX_CONFIG_SWITCH), cs_(TOKEN_CASE, CPIX_CASE), df_(TOKEN_DEFAULT, CPIX_DEFAULT), lbc_(TOKEN_LEFT_BRACE, L"{"), @@ -43,24 +64,29 @@ cl_(TOKEN_COLON, L":"), tr_(TOKEN_TERMINATOR, L";") { - tokenizers_ = new Cpt::Lex::Tokenizer*[17]; - tokenizers_[0] = &ws_; - tokenizers_[1] = &lb_; - tokenizers_[2] = &rb_; - tokenizers_[3] = &cm_; - tokenizers_[4] = &pp_; - tokenizers_[5] = &sw_; - tokenizers_[6] = &cs_; - tokenizers_[7] = &df_; - tokenizers_[8] = &lbc_; - tokenizers_[9] = &rbc_; - tokenizers_[10] = &cl_; - tokenizers_[11] = &tr_; - tokenizers_[12] = &ids_; - tokenizers_[13] = &strlits_; - tokenizers_[14] = &intlits_; - tokenizers_[15] = &reallits_; - tokenizers_[16] = 0; + int i = 0; + tokenizers_ = new Cpt::Lex::Tokenizer*[21]; + tokenizers_[i++] = &ws_; + tokenizers_[i++] = &lcomment_; + tokenizers_[i++] = &scomment_; + tokenizers_[i++] = &lb_; + tokenizers_[i++] = &rb_; + tokenizers_[i++] = &cm_; + tokenizers_[i++] = &pp_; + tokenizers_[i++] = &sw_; + tokenizers_[i++] = &lsw_; + tokenizers_[i++] = &csw_; + tokenizers_[i++] = &cs_; + tokenizers_[i++] = &df_; + tokenizers_[i++] = &lbc_; + tokenizers_[i++] = &rbc_; + tokenizers_[i++] = &cl_; + tokenizers_[i++] = &tr_; + tokenizers_[i++] = &ids_; + tokenizers_[i++] = &strlits_; + tokenizers_[i++] = &intlits_; + tokenizers_[i++] = &reallits_; + tokenizers_[i++] = 0; tokenizer_.reset( new Cpt::Lex::MultiTokenizer(tokenizers_) ); } @@ -137,12 +163,11 @@ return filters_; } - Case::Case(const std::vector& fields, std::auto_ptr piping) - : fields_(fields), piping_(piping) { - } + Case::Case(const std::vector& cases, std::auto_ptr piping) + : cases_(cases), piping_(piping) {} Case::~Case() {}; - const std::vector& Case::fields() const { return fields_; } - const Piping& Case::piping() const { return *piping_; } + const std::vector& Case::cases() const { return cases_; } + const Piping& Case::piping() const { return *piping_; } Switch::Switch(Cpt::auto_vector& cases, std::auto_ptr def) : cases_(cases), def_(def) { @@ -153,7 +178,67 @@ const std::vector& Switch::cases() const { return cases_; } const Piping& Switch::def() const { return *def_; } - + + LocaleSwitch::LocaleSwitch(Cpt::auto_vector& cases, std::auto_ptr def) + : cases_(cases), def_(def) { + } + + LocaleSwitch::~LocaleSwitch() { + } + + const std::vector& LocaleSwitch::cases() const { return cases_; } + const Piping& LocaleSwitch::def() const { return *def_; } + + ConfigSwitch::ConfigSwitch(Cpt::auto_vector& cases, std::auto_ptr def) + : cases_(cases), def_(def) { + } + + ConfigSwitch::~ConfigSwitch() { + } + + const std::vector& ConfigSwitch::cases() const { return cases_; } + const Piping& ConfigSwitch::def() const { return *def_; } + + // + // Parsing methods + // --------------- + // + + // + // How the parsing is implemented? + // -- + // + // Parsing uses the Lexer - object from Cpt::Parser package. + // The basic way how lexer operates is that the lexer + // converts a source stream of characters lazily into + // stream of tokens. If the lexer object fails at tokenizing + // the character stream because syntax error, LexException + // is thrown. + // + // The produced stream of tokens can be iterated + // with 'eat' methods. Typically one moves forward in the + // token stream by 'eating' specific tokens, e.g. by + // command lexer.eat(TOKEN_LEFT_BRACKET). If the 'eaten' + // token is not of the specified type, parse exception is + // raised. In cases, where token can be of a number of types, + // use of lexer.peek() is adviced. + // + + // + // Example code of using lexer for parsing syntax '(ID[, STRING])': + // + // lexer.eat(TOKEN_LEFT_BRACKET); + // std::string id = lexer.parseId(); + // if (lexer.peek().type() == TOKEN_COMMA) { + // lexer.eat(TOKEN_COMMA); + // std::string str = lexer.parseString(); + // ) + // lexer.eat(TOKEN_RIGHT_BRACKET); + // + + // Atomic expressions, e.g. "'foo'", "4", "4.5", "id" + // + std::auto_ptr ParseString(Cpt::Parser::Lexer& lexer) { return std::auto_ptr(new StringLit(lexer.eatString())); @@ -176,13 +261,12 @@ std::auto_ptr ParseParameter(Cpt::Parser::Lexer& lexer) { - switch (lexer.peek().type()) { - case Cpt::Lex::TOKEN_ID: return std::auto_ptr( ParseIdentifier(lexer).release() ); - case Cpt::Lex::TOKEN_STRLIT: return std::auto_ptr( ParseString(lexer).release() ); - case Cpt::Lex::TOKEN_INTLIT: return std::auto_ptr( ParseInteger(lexer).release() ); - case Cpt::Lex::TOKEN_REALLIT: return std::auto_ptr( ParseReal(lexer).release() ); - default: throw Cpt::Parser::ParseException(L"Expected literal . ", lexer.peek()); - } + Cpt::Lex::token_type_t type = lexer.peek().type(); + if (type == Cpt::Lex::TOKEN_ID) return std::auto_ptr( ParseIdentifier(lexer).release() ); + if (type == Cpt::Lex::TOKEN_STRLIT) return std::auto_ptr( ParseString(lexer).release() ); + if (type == Cpt::Lex::TOKEN_INTLIT) return std::auto_ptr( ParseInteger(lexer).release() ); + if (type == Cpt::Lex::TOKEN_REALLIT)return std::auto_ptr( ParseReal(lexer).release() ); + throw Cpt::Parser::ParseException(L"Expected literal. ", lexer.peek()); } std::auto_ptr ParseParameters(Cpt::Parser::Lexer& lexer) @@ -255,10 +339,47 @@ return std::auto_ptr(new Switch(cases, def)); } + + std::auto_ptr ParseLocaleSwitch(Cpt::Parser::Lexer& lexer) + { + lexer.eat(TOKEN_LOCALE_SWITCH); + lexer.eat(TOKEN_LEFT_BRACE); + Cpt::auto_vector cases; + while (lexer && lexer.peek().type() == TOKEN_CASE) { + cases.donate_back(ParseCase(lexer)); + } + std::auto_ptr def = ParseDefault(lexer); + lexer.eat(TOKEN_RIGHT_BRACE); + return std::auto_ptr(new LocaleSwitch(cases, def)); + } + + std::auto_ptr ParseConfigSwitch(Cpt::Parser::Lexer& lexer) + { + lexer.eat(TOKEN_CONFIG_SWITCH); + lexer.eat(TOKEN_LEFT_BRACE); + Cpt::auto_vector cases; + while (lexer && lexer.peek().type() == TOKEN_CASE) { + cases.donate_back(ParseCase(lexer)); + } + std::auto_ptr def = ParseDefault(lexer); + lexer.eat(TOKEN_RIGHT_BRACE); + + return std::auto_ptr(new ConfigSwitch(cases, def)); + } + + + // Tokenizer can be either in Invocation form or switch-case + // structure + // + std::auto_ptr ParseTokenizer(Cpt::Parser::Lexer& lexer) { if (lexer.peek().type() == TOKEN_SWITCH) { return std::auto_ptr(ParseSwitch(lexer).release()); + } else if (lexer.peek().type() == TOKEN_LOCALE_SWITCH) { + return std::auto_ptr(ParseLocaleSwitch(lexer).release()); + } else if (lexer.peek().type() == TOKEN_CONFIG_SWITCH) { + return std::auto_ptr(ParseConfigSwitch(lexer).release()); } else { return std::auto_ptr(ParseRelaxedInvokation(lexer).release()); } @@ -275,6 +396,32 @@ } return std::auto_ptr(new Piping(tokenizer, filters)); } + + std::auto_ptr ParsePiping(const wchar_t* definition) { + using namespace Cpt::Lex; + using namespace Cpt::Parser; + + try { + // 1. Setup an tokenizer + Cpix::AnalyzerExp::Tokenizer + tokenizer; + StdLexer + lexer(tokenizer, definition); + + // 2. Parse + std::auto_ptr + def = ParsePiping(lexer); + lexer.eatEof(); + + return def; + } catch (Cpt::ITxtCtxtExc & exc) { + // provide addition info for thrown exception + exc.setContext(definition); + + // throw it fwd + throw; + } + } } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/cpixanalyzer.cpp --- a/searchengine/cpix/cpix/src/cpixanalyzer.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/cpixanalyzer.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -35,6 +35,7 @@ // CPix impl details #include "cluceneext.h" #include "analyzer.h" +#include "customanalyzer.h" #include "cpixsearch.h" #include "cpixidxdb.h" #include "idxdb.h" @@ -141,9 +142,9 @@ // the wrapper custom must be released in any case, as // it was just a first step in the construction - // sequence + // sequence + cpix_Analyzer_destroy(custom); } - cpix_Analyzer_destroy(custom); return system; } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/cpixinit.cpp --- a/searchengine/cpix/cpix/src/cpixinit.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/cpixinit.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -83,6 +83,27 @@ value)); } +const char * cpix_InitParams_getResourceDir(cpix_InitParams * thisIp) +{ + using namespace Cpix; + + return XlateExc(thisIp, + Caller(thisIp, + &InitParams::getResourceDir)); +} + + +void cpix_InitParams_setResourceDir(cpix_InitParams * thisIp, + const char * value) +{ + using namespace Cpix; + + XlateExc(thisIp, + Caller(thisIp, + &InitParams::setResourceDir, + value)); +} + const char * cpix_InitParams_getLogFileBase(cpix_InitParams * thisIp) { diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/cpixsearch.cpp --- a/searchengine/cpix/cpix/src/cpixsearch.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/cpixsearch.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -39,6 +39,7 @@ #include "iqrytype.h" #include "document.h" #include "analyzer.h" +#include "queryparser.h" /*********************************************************** @@ -56,22 +57,76 @@ const wchar_t * fieldName, cpix_Analyzer * analyzer) { - using namespace lucene::analysis; - - Cpix::SystemAnalyzer + lucene::analysis::Analyzer * a = Cast2Native(analyzer); cpix_QueryParser * rv = NULL; - rv = Create(result, - CallCtor(rv, - fieldName, - static_cast(a))); - + Cpix::IQueryParser* parser = + XlateExc( + result, + CallFreeFunc( + &Cpix::CreateCLuceneQueryParser, + fieldName, + a)); + + if ( cpix_Succeeded( result ) ) { + CreateWrapper(parser, result, rv); + } return rv; } +cpix_QueryParser * + cpix_CreatePrefixQueryParser(cpix_Result * result, + const wchar_t * fieldName) +{ + using namespace lucene::analysis; + + cpix_QueryParser + * rv = NULL; + + Cpix::IQueryParser* parser = + XlateExc( + result, + CallFreeFunc( + &Cpix::CreatePrefixQueryParser, + fieldName)); + + if ( cpix_Succeeded( result ) ) { + CreateWrapper(parser, result, rv); + } + return rv; +} + + + +cpix_QueryParser * + cpix_CreateMultiFieldQueryParser(cpix_Result * result, + const wchar_t * fieldNames[], + cpix_Analyzer * analyzer, + cpix_BoostMap * boosts) +{ + cpix_QueryParser + * rv = NULL; + + lucene::analysis::Analyzer + * a = Cast2Native(analyzer); + + Cpix::IQueryParser* parser = + XlateExc( + result, + CallFreeFunc( + &Cpix::CreateCLuceneMultiFieldQueryParser, + fieldNames, + a, + Cast2Native(boosts))); + + if ( cpix_Succeeded( result ) ) { + CreateWrapper(parser, result, rv); + } + return rv; +} cpix_BoostMap * @@ -121,39 +176,16 @@ DestroyWrapper(thisMap); } - -cpix_QueryParser * -cpix_CreateMultiFieldQueryParser(cpix_Result * result, - const wchar_t * fieldNames[], - cpix_Analyzer * analyzer, - cpix_BoostMap * boosts) -{ - cpix_MultiFieldQueryParser - * rv = NULL; - - rv = Create(result, - CallCtor(rv, - fieldNames, - Cast2Native(analyzer), - Cast2Native(boosts))); - - return rv; -} - - -void +void cpix_QueryParser_setDefaultOperator(cpix_QueryParser * thisQueryParser, cpix_QP_Operator op) { - using namespace lucene::queryParser; - XlateExc(thisQueryParser, Caller(thisQueryParser, - &QueryParser::setDefaultOperator, - static_cast(op))); + &Cpix::IQueryParser::setDefaultOperator, + op)); } - cpix_Query * cpix_QueryParser_parse(cpix_QueryParser * thisQueryParser, const wchar_t * queryStr) diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/customanalyzer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/customanalyzer.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,797 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +// system library +#include "wchar.h" +#include +#include +#include +#include +#include + +// clucene +#include "CLucene.h" +#include "CLucene/analysis/AnalysisHeader.h" +#include "CLucene/analysis/Analyzers.h" + +// local libary +#include "thaianalysis.h" +#include "ngram.h" +#include "koreananalyzer.h" +#include "cjkanalyzer.h" +#include "cpixparsetools.h" +#include "prefixfilter.h" + +// cpix internal +#include "customanalyzer.h" +#include "cpixanalyzer.h" +#include "analyzer.h" +#include "cluceneext.h" +#include "analyzerexp.h" +#include "indevicecfg.h" +#include "cpixexc.h" +#include "localization.h" + +namespace Cpix { + + // + // Following sections provide the glue code for connecting the + // analyzer definition syntax with analyzer, tokenizers and filter + // implementations. + // + // The glue code is template heavy with the indent of providing + // automation for associating specific keywords with specific + // analyzers, tokenizers and filters implementing corresponding + // CLucene abstractions. Additional classes are needed only if + // filters, tokenizers, etc. accept parameters. + // + // NOTE: To understand the analyzers, it is sufficient to understand + // that an analyzer transforms characters stream into specific token streams + // (e.g. character stream 'foobarmetawords' can be transformed into token + // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main + // parts which are tokenization and filtering. Tokenization converts + // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr') + // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' -> + // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for + // constructing a tokenizer and a sequence of filters to perform + // these required tasks. + // + // See the documentation around TokenizerClassEntries and + // FilterClassEntries to see how implementations not taking parameters + // can be easily added. + // + + using namespace Cpix::AnalyzerExp; + +// Safe assumption +#define MAX_LANGCODE_LENGTH 256 + + class LocaleSwitchStreamFactory : public TokenStreamFactory { + public: + + LocaleSwitchStreamFactory(const AnalyzerExp::LocaleSwitch& sw, const wchar_t* config); + + ~LocaleSwitchStreamFactory(); + + virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName, + lucene::util::Reader * reader); + + lucene::analysis::TokenStream* tokenStream(std::vector& languages, + const wchar_t * fieldName, + lucene::util::Reader * reader); + + private: + std::map analyzers_; + std::auto_ptr default_; + }; + + + TokenStreamFactory::~TokenStreamFactory() {}; + + LocaleSwitchStreamFactory::LocaleSwitchStreamFactory(const LocaleSwitch& sw, const wchar_t* config) { + for (int i = 0; i < sw.cases().size(); i++) { + const Case& cs = *sw.cases()[i]; + for (int j = 0; j < cs.cases().size(); j++) { + std::wstring c = cs.cases()[j]; + if (analyzers_.count(c)) delete analyzers_[c]; + analyzers_[c] = new CustomAnalyzer(cs.piping(), config); + } + } + default_.reset(new CustomAnalyzer(sw.def())); + } + + LocaleSwitchStreamFactory::~LocaleSwitchStreamFactory() { + typedef std::map::iterator iter; + for (iter i = analyzers_.begin(); i != analyzers_.end(); i++) { + delete i->second; + } + } + + lucene::analysis::TokenStream* + LocaleSwitchStreamFactory::tokenStream(const wchar_t * fieldName, + lucene::util::Reader * reader) { + std::vector languages = + Localization::instance().getLanguageNames(); + + return tokenStream(languages, fieldName, reader); + } + + lucene::analysis::TokenStream* + LocaleSwitchStreamFactory::tokenStream(std::vector& languages, + const wchar_t * fieldName, + lucene::util::Reader * reader) { + for (int i = 0; i < languages.size(); i++) { + if ( analyzers_.count(languages[i]) ) { + return analyzers_[languages[i]]->tokenStream( fieldName, reader ); + } + } + return default_->tokenStream( fieldName, reader ); + } + + class DefaultTokenStreamFactory : public TokenStreamFactory { + public: + + enum Target { + NORMAL, + INDEXING, + QUERY, + PREFIX + }; + + DefaultTokenStreamFactory(const Invokation& invokation) { + if (invokation.params().size() == 1) { + const Identifier* id = dynamic_cast( invokation.params()[0] ); + if ( id ) { + if ( id->id() == CPIX_ID_INDEXING ) { + target_ = INDEXING; + } else if ( id->id() == CPIX_ID_QUERY ) { + target_ = QUERY; + } else if ( id->id() == CPIX_ID_PREFIX ) { + target_ = PREFIX; + } else { + THROW_CPIXEXC(L"Default analyzer does not accept %S for parameter", id->id().c_str()); + } + } else { + THROW_CPIXEXC(L"Default accepts only identifier as a parameter."); + } + } else if (invokation.params().size() > 1) { + THROW_CPIXEXC(L"Default analyzer does not accept more than one parameter"); + } else { + target_ = NORMAL; + } + } + + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + switch (target_) { + case QUERY: + return Analysis::getQueryAnalyzer().tokenStream( fieldName, reader ); + case PREFIX: + return Analysis::getPrefixAnalyzer().tokenStream( fieldName, reader ); + } + return Analysis::getDefaultAnalyzer().tokenStream( fieldName, reader ); + } + + private: + + Target target_; + + }; + + /** + * Template class used to create CLucene tokenizers. Template + * parameter T must implement lucene::analysis::Tokenizer abstraction. + */ + template + class TokenizerFactory : public TokenStreamFactory + { + public: + TokenizerFactory(const Invokation& invokation) { + if (invokation.params().size() > 0) { + THROW_CPIXEXC(L"Tokenizer %S does not accept parameters", + invokation.id().c_str()); + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/, + lucene::util::Reader * reader) { + return _CLNEW T(reader); + } + }; + + template<> + class TokenizerFactory : public TokenStreamFactory + { + public: + static const int DefaultNgramSize = 1; + TokenizerFactory(const Invokation& invokation) { + using namespace Cpix::AnalyzerExp; + if (invokation.params().size() > 1) { + THROW_CPIXEXC(L"Cjk Ngram tokenizer does not accept more than one parameter", + invokation.id().c_str()); + } + if (invokation.params().size() == DefaultNgramSize) { + IntegerLit* ngramSize = dynamic_cast(invokation.params()[0]); + if ( ngramSize ) { + ngramSize_ = ngramSize->value(); + } else { + THROW_CPIXEXC(L"Cjk Ngram tokenizer parameter must be an integer"); + } + } else { + ngramSize_ = 1; + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/, + lucene::util::Reader * reader) { + return _CLNEW analysis::CjkNGramTokenizer(reader, ngramSize_); + } + + private: + + int ngramSize_; + }; + + + /** + * Template class wrapping CLucene analyzers. Template parameter T must + * implement lucene::analysis::Analyzer abstraction. + */ + template + class AnalyzerWrap : public TokenStreamFactory + { + public: + AnalyzerWrap(const Invokation& invokation) : analyzer_() { + if (invokation.params().size() > 0) { + THROW_CPIXEXC(L"Tokenizer %S does not accept parameters", + invokation.id().c_str()); + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return analyzer_.tokenStream(fieldName, reader); + } + private: + T analyzer_; + }; + + /** + * Template class associated with CLucene filter and a TokenStreamFactory. + * Uses TokenStreamFactory to transform given character stream into tokenstream + * and then applies the given Clucene filter to the token stream. + * The template parameter T must implement lucene::analysis::Filter abstraction. + */ + template + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, auto_ptr factory) : factory_(factory) { + if (invokation.params().size() > 0) { + THROW_CPIXEXC(L"Filter %S does not accept parameters", + invokation.id().c_str()); + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW T(factory_->tokenStream(fieldName, reader), true); + } + private: + std::auto_ptr factory_; + }; + + /** + * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized + * template is needed because perfield analyzer accepts parameters + * (specific analyzers for different field plus default analyzer) + */ + template<> + class AnalyzerWrap : public TokenStreamFactory { + public: + AnalyzerWrap(const Switch& sw, const wchar_t* config) : analyzer_(0) { + using namespace Cpt::Parser; + using namespace lucene::analysis; + + analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def())); + + for (int i = 0; i < sw.cases().size(); i++) { + const Case& cs = *sw.cases()[i]; + for (int j = 0; j < cs.cases().size(); j++) { + analyzer_->addAnalyzer( cs.cases()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping(), config ) ); + } + } + } + virtual ~AnalyzerWrap() { + _CLDELETE(analyzer_); + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return analyzer_->tokenStream(fieldName, reader); + } + private: + lucene::analysis::PerFieldAnalyzerWrapper* analyzer_; + }; + + + + /** + * Specialized StopFilter factory. Specialized filter is needed + * because StopFilter needs parameters (stop word list or a language) + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + :words_(0), ownWords_(0), factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() == 1 && dynamic_cast(invokation.params()[0])) { + Identifier* id = dynamic_cast(invokation.params()[0]); + //cpix_LangCode lang; + if (id->id() == CPIX_WLANG_EN) { + words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS; + } else if (id->id() == CPIX_WLANG_FR) { + words_ = analysis::NonEnglishStopWords::FRENCH_STOP_WORDS; + } else { + THROW_CPIXEXC(L"No prepared stopword list for language code '%S'", + id->id().c_str()); + } + } else { + ownWords_ = new wchar_t*[invokation.params().size()+1]; + memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); + // FIXE: args may leak + for (int i = 0; i < invokation.params().size(); i++) { + StringLit* lit = dynamic_cast(invokation.params()[i]); + if (lit) { + const wstring& str = lit->text(); + ownWords_[i] = new wchar_t[str.length()+1]; + wcscpy(ownWords_[i], str.c_str()); + } else { + THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters."); + } + } + } + + } + virtual ~FilterFactory() { + if (ownWords_) { + for (int i = 0; ownWords_[i]; i++) { + delete[] ownWords_[i]; + } + delete[] ownWords_; + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast(ownWords_) : words_); + } + private: + const wchar_t **words_; + wchar_t **ownWords_; // owned + std::auto_ptr factory_; + }; + + /** + * Specialized SnowballFilter factory is needed, because SnowballFilter + * accepts parameters (the language). + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + : factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() != 1 || !dynamic_cast(invokation.params()[0])) { + THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." ); + } + Identifier* id = dynamic_cast(invokation.params()[0]); + if (id->id() == CPIX_WLANG_EN) { + lang_ = cpix_LANG_EN; + } else { + THROW_CPIXEXC(L"Language identifier %S is not supported for stemming", + id->id().c_str()); + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_); + } + private: + cpix_LangCode lang_; + std::auto_ptr factory_; + }; + + /** + * Specialized LengthFilter factory is needed, because length filter + * accepts parameters (minimum length and maximum length) + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + : factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() != 2 || + !dynamic_cast(invokation.params()[0]) || + !dynamic_cast(invokation.params()[1])) { + THROW_CPIXEXC("Length filter takes exactly two integer parameters"); + } + min_ = dynamic_cast(invokation.params()[0])->value(); + max_ = dynamic_cast(invokation.params()[1])->value(); + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ ); + } + private: + int min_, max_; + std::auto_ptr factory_; + }; + + /** + * Specialized PrefixGenerator factory is needed, because PrefixGenerator + * requires the max prefix size. + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + : factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() != 1 || + !dynamic_cast(invokation.params()[0])) { + THROW_CPIXEXC("Prefix generator takes exactly one integer parameter"); + } + maxPrefixLength_ = dynamic_cast(invokation.params()[0])->value(); + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW PrefixGenerator(factory_->tokenStream(fieldName, reader), true, maxPrefixLength_ ); + } + private: + int maxPrefixLength_; + std::auto_ptr factory_; + }; + + /** + * Specialized PrefixFilter factory is needed, because prefix filter + * accepts parameters (language set or prefixes) + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + : prefixes_(0), ownPrefixes_(0), factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() == 1 && + dynamic_cast(invokation.params()[0])) { + Identifier* id = dynamic_cast(invokation.params()[0]); + //cpix_LangCode lang; + if (id->id() == CPIX_WLANG_HE) { + prefixes_ = analysis::HebrewPrefixes; + } else { + THROW_CPIXEXC(L"No prepared prefix list for language code '%S'", + id->id().c_str()); + } + } else { + ownPrefixes_ = new wchar_t*[invokation.params().size()+1]; + memset(ownPrefixes_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); + // FIXE: args may leak + for (int i = 0; i < invokation.params().size(); i++) { + StringLit* lit = dynamic_cast(invokation.params()[i]); + if (lit) { + const wstring& str = lit->text(); + ownPrefixes_[i] = new wchar_t[str.length()+1]; + wcscpy(ownPrefixes_[i], str.c_str()); + } else { + THROW_CPIXEXC(L"PrefixFilter accepts only language identifer or list of strings as a parameters."); + } + } + } + } + virtual ~FilterFactory() { + if (ownPrefixes_) { + for (int i = 0; ownPrefixes_[i]; i++) { + delete[] ownPrefixes_[i]; + } + delete[] ownPrefixes_; + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW analysis::PrefixFilter(factory_->tokenStream(fieldName, reader), true, ownPrefixes_ ? const_cast(ownPrefixes_) : prefixes_); + } + private: + const wchar_t **prefixes_; + wchar_t **ownPrefixes_; // owned + std::auto_ptr factory_; + }; + + /** + * Specialized ElisionFilter factory is needed, because elision filter + * accepts parameters (language set or articles) + */ + template<> + class FilterFactory : public TokenStreamFactory + { + public: + FilterFactory(const Invokation& invokation, + auto_ptr factory) + : articles_(0), ownArticles_(0), factory_(factory) { + using namespace Cpt::Parser; + if (invokation.params().size() == 1 && + dynamic_cast(invokation.params()[0])) { + Identifier* id = dynamic_cast(invokation.params()[0]); + //cpix_LangCode lang; + if (id->id() == CPIX_WLANG_FR) { + articles_ = analysis::FrenchArticles; + } else { + THROW_CPIXEXC(L"No prepared article list for language code '%S'", + id->id().c_str()); + } + } else { + ownArticles_ = new wchar_t*[invokation.params().size()+1]; + memset(ownArticles_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); + // FIXE: args may leak + for (int i = 0; i < invokation.params().size(); i++) { + StringLit* lit = dynamic_cast(invokation.params()[i]); + if (lit) { + const wstring& str = lit->text(); + ownArticles_[i] = new wchar_t[str.length()+1]; + wcscpy(ownArticles_[i], str.c_str()); + } else { + THROW_CPIXEXC(L"PrefixFilter accepts only language identifer or list of strings as a parameters."); + } + } + } + } + virtual ~FilterFactory() { + if (ownArticles_) { + for (int i = 0; ownArticles_[i]; i++) { + delete[] ownArticles_[i]; + } + delete[] ownArticles_; + } + } + virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName, + lucene::util::Reader * reader) { + return _CLNEW analysis::ElisionFilter(factory_->tokenStream(fieldName, reader), true, ownArticles_ ? const_cast(ownArticles_) : articles_); + } + private: + const wchar_t **articles_; + wchar_t **ownArticles_; // owned + std::auto_ptr factory_; + }; + + typedef auto_ptr (*TokenizerFactoryCreator)(const Invokation& invokation); + typedef auto_ptr (*FilterFactoryCreator)(const Invokation& invokation, + auto_ptr factory); + + template + struct TokenStreamFactoryCtor + { + static auto_ptr create(const Invokation& invokation) { + return auto_ptr(new T(invokation)); + } + }; + + /** + * Sets up a tokenizer factory with given invokation parameters + */ + template + struct TokenizerFactoryCtor + { + static auto_ptr create(const Invokation& invokation) { + return auto_ptr(new TokenizerFactory(invokation)); + } + }; + + /** + * Sets up an analyzer wrap with given invokation parameters + */ + template + struct AnalyzerWrapCtor + { + static auto_ptr create(const Invokation& invokation) { + return auto_ptr(new AnalyzerWrap(invokation)); + } + }; + + /** + * Sets up a filter factory with given invokation parameters + */ + template + struct FilterFactoryCtor + { + static auto_ptr create(const Invokation& invokation, + auto_ptr factory) { + return auto_ptr(new FilterFactory(invokation, factory)); + } + }; + + struct TokenizerClassEntry { + const wchar_t *id_; + TokenizerFactoryCreator createFactory_; + }; + + // + // Following TokenizerClassEntries and FilterClassEntries contain + // the mapping from tokenizer/analyzer/filter names into glue code + // templates providing the implementations. + // + + TokenizerClassEntry TokenizerClassEntries[] = { + {CPIX_TOKENIZER_STANDARD, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_LETTER, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_KEYWORD, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_CJK, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_NGRAM, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_KOREAN, TokenizerFactoryCtor::create}, + {CPIX_TOKENIZER_KOREAN_QUERY,TokenizerFactoryCtor::create}, + + {CPIX_ANALYZER_STANDARD, AnalyzerWrapCtor::create}, + {CPIX_ANALYZER_DEFAULT, TokenStreamFactoryCtor::create}, + + // TODO: Add more Tokenizers/Analyzers + + // Example tokenizer (works as such if tokenizers don't take parameters) + // {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor::create}, + + // Example analyzer (works as such if analyzer don't take parameters) + // {CPIX_ANALYZER_MYANALYZER, AnalyzerWrapCtor::create}, + + {0, 0} + }; + + struct FilterClassEntry { + const wchar_t *id_; + FilterFactoryCreator createFactory_; + }; + + FilterClassEntry FilterClassEntries[] = { + {CPIX_FILTER_STANDARD, FilterFactoryCtor::create}, + {CPIX_FILTER_LOWERCASE, FilterFactoryCtor::create}, + {CPIX_FILTER_ACCENT, FilterFactoryCtor::create}, + {CPIX_FILTER_STOP, FilterFactoryCtor::create}, + {CPIX_FILTER_STEM, FilterFactoryCtor::create}, + {CPIX_FILTER_LENGTH, FilterFactoryCtor::create}, + {CPIX_FILTER_PREFIXES, FilterFactoryCtor::create}, + {CPIX_FILTER_THAI, FilterFactoryCtor::create}, + {CPIX_FILTER_PREFIX, FilterFactoryCtor::create}, + {CPIX_FILTER_ELISION, FilterFactoryCtor::create}, + + // TODO: Add more Filters + + // Example filter (works as such if filter don't take parameters) + // {CPIX_FILTER_MYFILTER, FilterFactoryCtor::create}, + + {0, 0} + }; + + CustomAnalyzer::CustomAnalyzer(const wchar_t* definition, const wchar_t* config) { + std::auto_ptr piping = AnalyzerExp::ParsePiping( definition ); + setup( *piping, config ); + } + + CustomAnalyzer::CustomAnalyzer(const Piping& definition, const wchar_t* config) { + setup(definition, config); + } + + using namespace Cpt::Parser; + + void CustomAnalyzer::setup(const Piping& piping, const wchar_t* config) { + + // If the first item is invokation, create corresponding analyzer/tokenizer + if (dynamic_cast(&piping.tokenizer())) { + const Invokation& tokenizer = dynamic_cast(piping.tokenizer()); + TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() ); + factory_ = tokenizerEntry.createFactory_( tokenizer ); + } else if (dynamic_cast(&piping.tokenizer())) { + // If the first item is switch statement, create per-field analyzer + const Switch& tokenizer = dynamic_cast(piping.tokenizer()); + factory_ = new AnalyzerWrap( tokenizer, config ); + } else if (dynamic_cast(&piping.tokenizer())) { + const LocaleSwitch& tokenizer = dynamic_cast(piping.tokenizer()); + factory_ = new LocaleSwitchStreamFactory( tokenizer, config ); + } else if (dynamic_cast(&piping.tokenizer())) { + const ConfigSwitch& tokenizer = dynamic_cast(piping.tokenizer()); + factory_ = resolveConfigSwitch( tokenizer, config ); + } else { + THROW_CPIXEXC(L"Analyzer definition syntax did not begin with valid tokenizer"); + } + + // Add filters + const std::vector& filters = piping.filters(); + for (int i = 0; i < filters.size(); i++) { + FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() ); + factory_ = filterEntry.createFactory_( *filters[i], factory_ ); + } + } + + std::auto_ptr CustomAnalyzer::resolveConfigSwitch(const ConfigSwitch& csw, const wchar_t* config) { + if (config) { + for (int i = 0; i < csw.cases().size(); i++) { + const Case& cs = *csw.cases()[i]; + for (int j = 0; j < cs.cases().size(); j++) { + if (wcscmp(config, cs.cases()[j].c_str()) == 0) { + return std::auto_ptr( + new CustomAnalyzer(cs.piping(), config)); + } + } + } + } + return std::auto_ptr(new CustomAnalyzer(csw.def(), config)); + } + + TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) { + + // Looks for a match in the TokenizerClassEntries. After finding + // a match it returns a proper tokenizer/analyzer implementation provider + // + for (int i = 0; TokenizerClassEntries[i].id_; i++) { + if (id == std::wstring(TokenizerClassEntries[i].id_)) { + return TokenizerClassEntries[i]; + } + } + + THROW_CPIXEXC(L"Unknown tokenizer '%S'.", + id.c_str()); + } + + FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) { + + // Looks for a match in the FilterClassEntries. After finding + // a match it returns a proper tokenizer/analyzer implementation + // provider + // + for (int i = 0; FilterClassEntries[i].id_; i++) { + if (id == std::wstring(FilterClassEntries[i].id_)) { + return FilterClassEntries[i]; + } + } + + THROW_CPIXEXC(L"Unknown filter '%S'.", + id.c_str()); + } + + CustomAnalyzer::~CustomAnalyzer() {} + + lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t * fieldName, + lucene::util::Reader * reader) { + // Utilizes the the token stream factory to form token stream. + // token stream factory is prepared during custom analyzer construction + // and based on the analyzer definition string. + + return factory_->tokenStream(fieldName, reader); + } + + std::auto_ptr CreateDefaultAnalyzer() + { + return + std::auto_ptr( + new SystemAnalyzer(_CLNEW lucene::analysis::standard::StandardAnalyzer())); + } + +} diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/fileparser/fileparser.cpp --- a/searchengine/cpix/cpix/src/fileparser/fileparser.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/fileparser/fileparser.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -162,7 +162,8 @@ wFullName.c_str(), cpix_STORE_YES | cpix_INDEX_TOKENIZED - | cpix_AGGREGATE_YES)); + | cpix_AGGREGATE_YES + | cpix_FREE_TEXT)); doc->add(newField.get()); newField.release(); @@ -172,7 +173,8 @@ wBaseName.c_str(), cpix_STORE_NO | cpix_INDEX_TOKENIZED - | cpix_AGGREGATE_YES)); + | cpix_AGGREGATE_YES + | cpix_FREE_TEXT)); doc->add(newField.get()); newField.release(); @@ -180,7 +182,8 @@ wExtension.c_str(), cpix_STORE_NO | cpix_INDEX_TOKENIZED - | cpix_AGGREGATE_YES)); + | cpix_AGGREGATE_YES + | cpix_FREE_TEXT)); doc->add(newField.get()); newField.release(); } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/iidxdb.cpp --- a/searchengine/cpix/cpix/src/iidxdb.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/iidxdb.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -25,6 +25,9 @@ #include "iqrytype.h" // 'unnecessary' dependency for static instance releasing #include "ifieldfilter.h" // for static instance releasing +#include "analyzer.h" +#include "localization.h" + namespace Cpix { @@ -184,6 +187,9 @@ cleanupClLockDir(); // TODO init lucene (??? operation not provided) + + Localization::instance(); + Analysis::init(*ip); IdxDbMgr::init(*ip); ShutdownSentry @@ -277,6 +283,26 @@ } try + { + Analysis::shutdown(); + } + catch (...) + { + logMsg(CPIX_LL_ERROR, + "Cpix shutdownAll: FAILED. Analysis::shutdown ########\n"); + } + + try + { + Localization::shutdown(); + } + catch (...) + { + logMsg(CPIX_LL_ERROR, + "Cpix shutdownAll: FAILED. Localization::shutdown ########\n"); + } + + try { _lucene_shutdown(); } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/initparams.cpp --- a/searchengine/cpix/cpix/src/initparams.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/initparams.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -36,7 +36,8 @@ InitParams::InitParams() - : cpixDir_(DEFAULT_CPIX_DIR) + : cpixDir_(DEFAULT_CPIX_DIR), + resourceDir_(DEFAULT_RESOURCE_DIR) { setenv("LUCENE_LOCK_DIR_ENV_1",DEFAULT_CLUCENE_LOCK_DIR,1); } @@ -58,6 +59,22 @@ cpixDir_ = value; } + const char * InitParams::getResourceDir() const + { + return resourceDir_.c_str(); + } + + void InitParams::setResourceDir(const char * value) + { + if (value == NULL + || strlen(value) == 0) + { + THROW_CPIXEXC("Value for property resourceDir cannot be NULL or empty string"); + } + + resourceDir_ = value; + } + const char * InitParams::getCluceneLockDir() const { return getenv("LUCENE_LOCK_DIR_ENV_1"); diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/iqrytype.cpp --- a/searchengine/cpix/cpix/src/iqrytype.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/iqrytype.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -309,6 +309,17 @@ } +namespace { + + const wchar_t DOLLAR[] = L"$"; + const wchar_t LESSTHAN[] = L"<"; + const wchar_t GREATERTHAN[] = L">"; + const wchar_t COMMA[] = L","; + const wchar_t LEFTPARENTHESIS[] = L"("; + const wchar_t RIGHTPARENTHESIS[] = L")"; +} + + namespace Cpix { @@ -569,7 +580,7 @@ Tokens source(tokenizer(), qryStr); - WhiteSpaceFilter + StdFilter tokens(source); State @@ -761,8 +772,7 @@ IQryType * IQryType::parseQry(cpix_QueryParser * queryParser, const wchar_t * qryStr) { - IQryType - * rv = NULL; + auto_ptr rv( NULL ); QryCall qryCall(qryStr); @@ -783,13 +793,13 @@ qryCall.qryTypeId_.c_str()); } - rv = qti->factory_(); + rv.reset( qti->factory_() ); rv->setUp(queryParser, qryCall.args_, qryCall.innerQryStr_.c_str()); - return rv; + return rv.release(); } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/localization.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/localization.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,112 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#include "CLucene.h" +#include "CLucene\queryParser\Multifieldqueryparser.h" + +#include "cpixidxdb.h" + +#include "cpixstrtools.h" +#include "cpixhits.h" +#include "cpixsearch.h" +#include "iidxdb.h" +#include "cpixutil.h" +#include "localization.h" + +#include "spi/locale.h" + +#include "glib.h" + +const char* cpix_LOCALE_AUTO = "auto"; +const wchar_t* cpix_WIDE_LOCALE_AUTO = L"auto"; + +namespace Cpix { + + Localization* Localization::theInstance_ = NULL; + + Localization& Localization::instance() { + if ( !theInstance_ ) { + theInstance_ = new Localization(); + } + return *theInstance_; + } + + void Localization::shutdown() { + delete theInstance_; + theInstance_ = false; + } + + Localization::Localization() + : mutex_(), + auto_( true ), + languageNames_(){} + + + void Localization::setLocale(const wchar_t* locale) { + Cpt::SyncRegion lock( mutex_ ); + + languageNames_.resize(0); + if ( wcscmp( locale, cpix_WIDE_LOCALE_AUTO ) == 0 ) { + auto_ = true; + } else { + auto_ = false; + languageNames_.push_back(locale); + } + } + + void Localization::setLocale(const char* locale) { + Cpt::auto_array wlocale(locale, strlen(locale)); + setLocale(wlocale.get()); + } + + + std::vector Localization::getLanguageNames() { + Cpt::SyncRegion lock( mutex_ ); + + if ( auto_ ) { + // might be slow + return Spi::GetLanguageNames(); + } else { + return languageNames_; + } + } + +} + +class SetLocaleFunctor +{ + private: + + const char* locale_; + + public: + + typedef void result_type; // returns nothing + + SetLocaleFunctor(const char * locale) + : locale_(locale) {} + + void operator()() { + Cpix::Localization::instance().setLocale( locale_ ); + } +}; + +void cpix_SetLocale(cpix_Result* result, const char* locale) { + XlateExc(result, + SetLocaleFunctor(locale)); +} + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/prefixopt.cpp --- a/searchengine/cpix/cpix/src/prefixopt.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/prefixopt.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -23,6 +23,8 @@ #include "cpixstrtools.h" +#include "cluceneext.h" + namespace Cpix { using namespace lucene::search; @@ -60,7 +62,7 @@ // rewrite term std::wstring text( term->text() ); text = text.substr(0, length); - term = new Term(prefixField_.c_str(), text.c_str(), true); + term = lucene::util::freeref( _CLNEW Term(prefixField_.c_str(), text.c_str()) ); // rewrite query std::auto_ptr ret( new TermQuery( term ) ); @@ -75,7 +77,9 @@ if ( boolq ) { // Just modify the query - Cpt::auto_array clauses( boolq->getClauses() ); + Cpt::auto_array clauses( new BooleanClause*[boolq->getClauseCount() + 1]); + + boolq->getClauses( clauses.get() ); for ( int i = 0; i < boolq->getClauseCount(); i++ ) { diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/prefixqueryparser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/prefixqueryparser.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,201 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "CLucene.h" + +#include "cpixmaindefs.h" + +// internal libs +#include "cpixparsetools.h" + +// internal +#include "analyzer.h" + +#include "prefixqueryparser.h" + +#include "cpixanalyzer.h" +#include "cluceneext.h" + +#include "tinyunicode.h" + +#include "cpixexc.h" + +namespace Cpix { + + using namespace lucene::analysis; + using namespace lucene::search; + using namespace lucene::document; + using namespace lucene::util; + using lucene::index::Term; + using namespace std; + + namespace { + + /** + * Small optimization to avoid creating extra boolean queries + */ + class QueryConstructor { + + public: + QueryConstructor() : q_(), bq_(0) {} + + auto_ptr operator()() { + return q_; + } + void add(auto_ptr q) { + if ( q.get() ) { + if ( bq_ ) { + bq_->add( q.release(), true, true, false ); + } else { + if ( q_.get() ) { + auto_ptr bq( new BooleanQuery() ); + bq_ = bq.get(); + bq_->add( q_.release(), true, true, false ); + bq_->add( q.release(), true, true, false ); + q_.reset( bq.release() ); + } else { + q_ = q; + } + } + } + } + inline void add(Query* q) { + add( auto_ptr( q ) ); + } + + private: + + auto_ptr q_; + BooleanQuery* bq_; + + }; + + /** + * TokenStream interface with one modification: + * * Ability to check if returned token was last one in the stream + */ + class HasNextTokenStream { + + public: + + HasNextTokenStream(TokenStream* tokens) + : i_(true), + next_(), + buf_(), + tokens_( tokens ){ + next_ = tokens_->next(&buf_[0]); + } + + inline Token& next() { + next_ = tokens_->next(&buf_[i_]); + i_ = !i_; + return buf_[i_]; + } + + inline bool hasNext() { + return next_; + } + + private: + bool i_, next_; + Token buf_[2]; + auto_ptr tokens_; + }; + + + } + + PrefixQueryParser::PrefixQueryParser(const wchar_t* field) + : field_(field) {} + + PrefixQueryParser::~PrefixQueryParser() {} + + auto_ptr PrefixQueryParser::parse(const wchar_t* query) { + Cpt::Lex::WhitespaceSplitter split(query); + QueryConstructor ret; + while ( split ) { + ret.add( toQuery( split++ ) ); + } + return ret(); + } + + const wchar_t* PrefixQueryParser::getField() const { + return field_.c_str(); + } + + void PrefixQueryParser::setDefaultOperator(cpix_QP_Operator op) { + THROW_CPIXEXC("Prefix query parser does not support setting the default operator."); + } + + bool PrefixQueryParser::usePrefixFor(lucene::analysis::Token& token) { + return !analysis::unicode::IsCjk(token.termText()[0]); + } + + auto_ptr + PrefixQueryParser::toQuery(Cpt::Lex::Token word) { + Analyzer& preAnalyzer( Analysis::getPrefixAnalyzer() ); + StringReader reader( word.begin(), word.length() ); + HasNextTokenStream tokens( + preAnalyzer.tokenStream( field_.c_str(), + &reader ) ); + + QueryConstructor ret; + + while ( tokens.hasNext() ) { + lucene::analysis::Token& token = tokens.next(); + + if ( usePrefixFor(token) ) { + if (!tokens.hasNext()) { + // Turn only last token of this word into prefix query + ret.add( + _CLNEW PrefixQuery( freeref( _CLNEW Term( field_.c_str(), + token.termText() ) ) ) ); + } else { + // Others tokens can be normal term queries + ret.add( + _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(), + token.termText() ) ) ) ); + } + } else { + Analyzer& termAnalyzer = Analysis::getQueryAnalyzer(); + StringReader reader( token.termText(), token.termTextLength() ); + HasNextTokenStream tokens( + termAnalyzer.tokenStream( field_.c_str(), + &reader ) ); + + Token& first = tokens.next(); + if (tokens.hasNext()) { // more than one + auto_ptr phrase( _CLNEW PhraseQuery() ); + phrase->add( freeref( _CLNEW Term( field_.c_str(), + first.termText() ) ) ); + while (tokens.hasNext()) { + phrase->add( freeref( _CLNEW Term( field_.c_str(), + tokens.next().termText() ) ) ); + } + ret.add( std::auto_ptr( phrase.release() ) ); + } else { + ret.add( + _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(), + first.termText() ) ) ) ); + } + } + } + return ret(); + } + +} diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/qrytypes/cluceneqrytype.cpp --- a/searchengine/cpix/cpix/src/qrytypes/cluceneqrytype.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/qrytypes/cluceneqrytype.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -34,7 +34,8 @@ //Introduced for prefix optimization. #include "prefixopt.h" #include "cpixmaindefs.h" -#include "iqrytype.h" + +#include "queryparser.h" namespace Cpix { @@ -48,7 +49,6 @@ // // private members // - lucene::queryParser::QueryParser * clQueryParser_; lucene::search::Query * clQuery_; public: @@ -60,8 +60,7 @@ // lifetime management // LuceneQryType() - : clQueryParser_(NULL), - clQuery_(NULL) + : clQuery_(NULL) { ; } @@ -81,40 +80,34 @@ const std::list & args, const wchar_t * qryStr) { - clQueryParser_ = Cast2Native(queryParser); - - if (args.size() > 0) + if (args.size() > 0) { THROW_CPIXEXC(PL_ERROR "No arguments needed here"); } + + IQueryParser* qp = Cast2Native(queryParser); + clQuery_ = qp->parse(qryStr).release(); - //Can we do get rid of this parse here? - clQuery_ = clQueryParser_->parse(qryStr); - PrefixOptQueryRewriter prefixOpt_(OPTIMIZED_PREFIX_MAX_LENGTH, - LCPIX_DEFAULT_FIELD, - LCPIX_DEFAULT_PREFIX_FIELD ); - //Switch query ownership to stack and back - std::auto_ptr q( clQuery_ ); clQuery_ = NULL; - clQuery_ = prefixOpt_.rewrite( q ).release(); - - if (clQuery_ == NULL) - { + if (clQuery_ == NULL) + { THROW_CPIXEXC("Query reduced to empty query."); - } + } } virtual cpix_Hits * search(cpix_IdxSearcher * idxSearcher) { return CLuceneSearchIdx(idxSearcher, - clQuery_); + clQuery_ + ); } virtual cpix_Hits * search(cpix_IdxDb * idxDb) { return CLuceneSearchIdx(idxDb, - clQuery_); + clQuery_ + ); } private: diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/qrytypes/dumpqrytype.cpp --- a/searchengine/cpix/cpix/src/qrytypes/dumpqrytype.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/qrytypes/dumpqrytype.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -50,6 +50,10 @@ namespace { + const wchar_t STAR[] = L"*"; + const wchar_t AND1[] = L"and1"; + const wchar_t AND2[] = L"and2"; + /** * Parses the syntax "* ( ( AND | && ) QRY )?", setting the member * clQryStr_ (clucene query) to QRY, if any. @@ -58,19 +62,12 @@ { private: - enum TokenType - { - STAR = Cpt::Lex::TOKEN_LAST_RESERVED, - AND1, - AND2, - }; - Cpt::Lex::MultiTokenizer * tokenizer_; // for the transition table definition, see comments for parse() typedef int State; - typedef int Symbol; + typedef Cpt::Lex::token_type_t Symbol; typedef std::pair StateSymbolPair; typedef std::map TransitionTable; TransitionTable transitions_; @@ -220,7 +217,7 @@ Tokens source(*tokenizer_, qryStr); - WhiteSpaceFilter + StdFilter tokens(source); State diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/qrytypes/prefixqrytype.cpp --- a/searchengine/cpix/cpix/src/qrytypes/prefixqrytype.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/qrytypes/prefixqrytype.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -32,8 +32,10 @@ #include "cpixutil.h" #include "iqrytype.h" #include "analyzer.h" +#include "customanalyzer.h" #include "cpixmaindefs.h" +#include "queryparser.h" namespace Cpix @@ -48,7 +50,7 @@ // // private members // - lucene::queryParser::QueryParser * clQueryParser_; + IQueryParser * clQueryParser_; lucene::search::Query * clQuery_; public: @@ -82,7 +84,8 @@ getAnalyzedString(qryStr, mQryStr ); clQueryParser_ = Cast2Native(queryParser); - clQuery_ = clQueryParser_->parse((const wchar_t *)mQryStr); + clQuery_ = clQueryParser_->parse((const wchar_t *)mQryStr).release(); + free(mQryStr); diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/qrytypes/termsqrytype.cpp --- a/searchengine/cpix/cpix/src/qrytypes/termsqrytype.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/qrytypes/termsqrytype.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -32,6 +32,8 @@ #include "cpixutil.h" #include "iqrytype.h" +#include "queryparser.h" + namespace Cpix { @@ -73,7 +75,7 @@ THROW_CPIXEXC("Too many arguments for terms search"); } - lucene::queryParser::QueryParser + IQueryParser * qp = Cast2Native(queryParser); fieldName_ = qp->getField(); diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/queryparser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/queryparser.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,130 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "queryParser.h" + +#include "cpixmaindefs.h" + +#include "initparams.h" + +#include "prefixqueryparser.h" + +#include "cpixexc.h" + +namespace Cpix { + + IQueryParser::~IQueryParser() {} + + CLuceneQueryParser::CLuceneQueryParser(const wchar_t* defaultField, + lucene::analysis::Analyzer& analyzer) + : parser_() { + parser_.reset(_CLNEW lucene::queryParser::QueryParser(defaultField, &analyzer)); + } + + CLuceneQueryParser::~CLuceneQueryParser() {} + + std::auto_ptr CLuceneQueryParser::parse(const wchar_t* query) { + return std::auto_ptr( parser_->parse(query) ); + } + + const wchar_t* CLuceneQueryParser::getField() const { + return parser_->getField(); + } + + void CLuceneQueryParser::setDefaultOperator(cpix_QP_Operator op) { + parser_->setDefaultOperator(static_cast(op)); + } + + PrefixOptQueryParser::PrefixOptQueryParser(std::auto_ptr parser) + : + prefixOpt_(OPTIMIZED_PREFIX_MAX_LENGTH, + LCPIX_DEFAULT_FIELD, + LCPIX_DEFAULT_PREFIX_FIELD ), + parser_( parser ) + {} + + PrefixOptQueryParser::~PrefixOptQueryParser() {} + + std::auto_ptr PrefixOptQueryParser::parse(const wchar_t* query) { + return prefixOpt_.rewrite( parser_->parse(query) ); + } + + const wchar_t* PrefixOptQueryParser::getField() const { + return parser_->getField(); + } + + void PrefixOptQueryParser::setDefaultOperator(cpix_QP_Operator op) { + parser_->setDefaultOperator(op); + } + + CLuceneMultiFieldQueryParser::CLuceneMultiFieldQueryParser( + const wchar_t** fields, + lucene::analysis::Analyzer& analyzer, + lucene::queryParser::BoostMap& boostMap) + : parser_() { + parser_.reset( + _CLNEW lucene::queryParser::MultiFieldQueryParser( fields, &analyzer, &boostMap )); + + } + + CLuceneMultiFieldQueryParser::~CLuceneMultiFieldQueryParser() {} + + std::auto_ptr + CLuceneMultiFieldQueryParser::parse(const wchar_t* query) { + return std::auto_ptr( parser_->parse( query ) ); + + } + + const wchar_t* CLuceneMultiFieldQueryParser::getField() const { + THROW_CPIXEXC("Multi field query parser does not support getField operation"); + } + + void CLuceneMultiFieldQueryParser::setDefaultOperator(cpix_QP_Operator op) { + parser_->setDefaultOperator(static_cast(op)); + } + + IQueryParser* CreateCLuceneQueryParser(const wchar_t* defaultField, + lucene::analysis::Analyzer* analyzer) { + return + new PrefixOptQueryParser( + std::auto_ptr( + new CLuceneQueryParser(defaultField, *analyzer))); + } + + IQueryParser* CreateCLuceneMultiFieldQueryParser( + const wchar_t* fields[], + lucene::analysis::Analyzer* analyzer, + lucene::queryParser::BoostMap* boostMap) { + return + new PrefixOptQueryParser( + std::auto_ptr( + new CLuceneMultiFieldQueryParser(fields, + *analyzer, + *boostMap))); + + } + + IQueryParser* CreatePrefixQueryParser(const wchar_t* field) { + return + new PrefixOptQueryParser( + std::auto_ptr( + new PrefixQueryParser(field))); + } + + +} diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/rotlogger.cpp --- a/searchengine/cpix/cpix/src/rotlogger.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/cpix/src/rotlogger.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -93,8 +93,12 @@ THROW_CPIXEXC("Cannot dup() STDERR_FILENO"); } - Cpt_EINTR_RETRY_SP( close(STDOUT_FILENO) ); - Cpt_EINTR_RETRY_SP( close(STDERR_FILENO) ); + int + result; + Cpt_EINTR_RETRY(result, + close(STDOUT_FILENO)); + Cpt_EINTR_RETRY(result, + close(STDERR_FILENO)); redirectStdOutErr(); diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/spi/locale.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/spi/locale.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,27 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "spi/locale.h" + +namespace Cpix { + + namespace Spi { + + const wchar_t* SymbianLanguageCodePrefix = L"s"; + } +} diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/cpix/src/spi/s60/s60locale.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/cpix/src/spi/s60/s60locale.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,74 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#include +#include + +#include "spi/locale.h" + +namespace Cpix { + + namespace Spi { + + struct LangCodeTranslationEntry { + int symbianCode_; + const wchar_t* isoCode_; + }; + + LangCodeTranslationEntry LangCodeTranslations[] = { + {ELangEnglish, L"en"}, + {ELangCanadianEnglish, L"en"}, + {ELangInternationalEnglish, L"en"}, + {ELangSouthAfricanEnglish, L"en"}, + + {ELangFrench, L"fr"}, + {ELangSwissFrench, L"fr"}, + {ELangBelgianFrench, L"fr"}, + {ELangInternationalFrench, L"fr"}, + {ELangCanadianFrench, L"fr"}, + + {ELangHebrew, L"he"}, + + {ELangTaiwanChinese, L"ch"}, + {ELangHongKongChinese, L"ch"}, + {ELangPrcChinese, L"ch"}, + {ELangThai, L"th"}, + {ELangJapanese, L"jp"}, + {ELangKorean, L"ko"}, + + {ELangNone, 0} + }; + + std::vector GetLanguageNames() { + TLanguage lang = User::Language(); + + std::vector ret; + std::wostringstream code; + code<lowercase>stop(en) Creating analyzer letter>lowercase>stop('a', 'an', 'the') Creating analyzer letter>*here**here*>lowercase -Analyzer creation failed with Expected identifier instead of token '>' of type 11 at: "letter>*here*>*here*lowercase" +Analyzer creation failed with Expected identifier instead of token '>' of type > at: +"letter>*here*>*here*lowercase" Creating analyzer >letter>>lowercase lowercase -Analyzer creation failed with Expected identifier instead of token '>' of type 11 at: "*here*>*here*letter>>lowercase lowercase" +Analyzer creation failed with Expected identifier instead of token '>' of type > at: +"*here*>*here*letter>>lowercase lowercase" Creating analyzer letter lowercase -Analyzer creation failed with Expected EOF instead of 'lowercase' of type 3 at: "letter *here*lowercase*here*" +Analyzer creation failed with Expected EOF instead of 'lowercase' of type identifier at: +"letter *here*lowercase*here*" diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/usage_exp_out.txt --- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/usage_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/usage_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,4 +1,6 @@ -Indexing and searching with whitespace +using analyzer "whitespace" + +indexing: Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt @@ -8,12 +10,33 @@ Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt +Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt + +Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt + Indexed empty item. + +searching: + +search "happy": Number of hits: 0 + +search "happiness": Number of hits: 0 + +search "happening": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here? -Indexing and searching with letter>lowercase + +search "ดาวตà¸": +Number of hits: 0 + +search "มี": +Number of hits: 0 + +using analyzer "letter>lowercase" + +indexing: Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt @@ -23,14 +46,36 @@ Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt +Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt + +Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt + Indexed empty item. + +searching: + +search "happy": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy. + +search "happiness": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness! + +search "happening": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here? -Indexing and searching with stdtokens>lowercase>stem(en) + +search "ดาวตà¸": +Number of hits: 0 + +search "มี": +Number of hits: 1 +DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 + +using analyzer "stdtokens>lowercase>accent" + +indexing: Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt @@ -40,16 +85,35 @@ Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt +Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt + +Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt + Indexed empty item. -Number of hits: 2 -DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness! + +searching: + +search "happy": +Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy. -Number of hits: 2 + +search "happiness": +Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness! -DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy. + +search "happening": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here? -Indexing and searching with letter>lowercase>stop(en) + +search "ดาวตà¸": +Number of hits: 0 + +search "มี": +Number of hits: 0 + +using analyzer "letter>lowercase>stop(en)" + +indexing: Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt @@ -59,14 +123,36 @@ Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt +Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt + +Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt + Indexed empty item. + +searching: + +search "happy": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy. + +search "happiness": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness! + +search "happening": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here? -Indexing and searching with letter>lowercase>stop('a', 'an', 'the') + +search "ดาวตà¸": +Number of hits: 0 + +search "มี": +Number of hits: 1 +DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 + +using analyzer "letter>lowercase>stop('a', 'an', 'the')" + +indexing: Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt @@ -76,10 +162,30 @@ Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt +Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt + +Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt + Indexed empty item. + +searching: + +search "happy": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy. + +search "happiness": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness! + +search "happening": Number of hits: 1 DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here? + +search "ดาวตà¸": +Number of hits: 0 + +search "มี": +Number of hits: 1 +DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt --- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -5,10 +5,17 @@ 'Oh' 'happiness' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'Nothing' 'important' 'in' 'here' 'So' 'don't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What' 'is' 'happening' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon' 'nyt' 'teetä' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + Analyzer "whitespace": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'I' 'am' 'happy.' @@ -16,10 +23,17 @@ 'Oh' 'happiness!' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What' 'is' 'happening' 'here?' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon' 'nyt' 'teetä.' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee' 'näin!' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + Analyzer "whitespace>lowercase": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'i' 'am' 'happy.' @@ -27,10 +41,17 @@ 'oh' 'happiness!' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'nothing' 'important' 'in' 'here.' 'so' 'don't' 'even' 'look.' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'is' 'happening' 'here?' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'juon' 'nyt' 'teetä.' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'tee' 'näin!' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + Analyzer "whitespace>accent": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'I' 'am' 'happy.' @@ -38,10 +59,17 @@ 'Oh' 'happiness!' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What' 'is' 'happening' 'here?' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon' 'nyt' 'teeta.' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee' 'nain!' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + Analyzer "letter": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'I' 'am' 'happy' @@ -49,10 +77,17 @@ 'Oh' 'happiness' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'Nothing' 'important' 'in' 'here' 'So' 'don' 't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What' 'is' 'happening' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon' 'nyt' 'teetä' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™' + Analyzer "letter>lowercase": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'i' 'am' 'happy' @@ -60,10 +95,17 @@ 'oh' 'happiness' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'is' 'happening' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'juon' 'nyt' 'teetä' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™' + Analyzer "keyword": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'I am happy. @@ -83,10 +125,21 @@ nothing whatsoever. ' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What is happening here? + +' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon nyt teetä.' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee näin! ' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 +' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552 +' + Analyzer "keyword>lowercase": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'i am happy. @@ -106,43 +159,57 @@ nothing whatsoever. ' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what is happening here? + +' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'juon nyt teetä.' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'tee näin! ' -Analyzer "stdtokens>lowercase>accent>stem(en)": -File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: - 'i' 'am' 'happi' -File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: - 'oh' 'happi' -File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: - 'noth' 'import' 'in' 'here' 'so' 'don't' 'even' 'look' 'becaus' 'you' 'shall' 'find' 'noth' 'whatsoev' -File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: - 'juon' 'nyt' 'teeta' -File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: - 'tee' 'nain' -Analyzer "letter>lowercase>accent>stop(en)": +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 +' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552 +' + +Analyzer "letter>lowercase>stop(en)": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'i' 'am' 'happy' File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: 'oh' 'happiness' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: - 'juon' 'nyt' 'teeta' + 'juon' 'nyt' 'teetä' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: - 'tee' 'nain' -Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'näin')": + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™' + +Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'am' 'happy' File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: 'happiness' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'is' 'happening' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'juon' 'teetä' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: - 'tee' + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™' + Analyzer "letter>length(2, 4)": File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: 'am' @@ -150,7 +217,140 @@ 'Oh' File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: 'in' 'here' 'So' 'don' 'even' 'look' 'you' 'find' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'What' 'is' 'here' File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: 'Juon' 'nyt' File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: 'Tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ยน' 'ทย' 'นว' 'ณฑ' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'าวว' 'พฤศจ' 'à¸à¸²à¸¢à¸™' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'คร' 'งสำค' 'à¸à¸—' 'วเม' 'อป' 'อในค' 'นว' 'นท' 'อเน' 'องว' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™' + +Analyzer "standard>prefixes(1)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'a' 'h' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'o' 'h' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'n' 'i' 'h' 's' 'd' 'e' 'l' 'b' 'y' 's' 'f' 'n' 'w' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'w' 'h' 'h' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'j' 'n' 't' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 't' 'n' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ป' '7' '-' 'พ' 'ย' 'น' 'ต' 'ป' '1' '-' 'พ' '2' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จ' '2' '-' 'ค' '1' 'ต' '1' 'พ' '2' + +Analyzer "standard>prefixes(2)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am'|'a' 'ha'|'h' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh'|'o' 'ha'|'h' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'no'|'n' 'im'|'i' 'he'|'h' 'so'|'s' 'do'|'d' 'ev'|'e' 'lo'|'l' 'be'|'b' 'yo'|'y' 'sh'|'s' 'fi'|'f' 'no'|'n' 'wh'|'w' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'wh'|'w' 'ha'|'h' 'he'|'h' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'ju'|'j' 'ny'|'n' 'te'|'t' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'te'|'t' 'nä'|'n' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปร'|'ป' '7' '-1'|'-' 'พ' 'ยน'|'ย' 'นา'|'น' 'ตั'|'ต' 'ปร'|'ป' '17'|'1' '-1'|'-' 'พฤ'|'พ' '25'|'2' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะ'|'จ' '25'|'2' '-2'|'-' 'คื'|'ค' '17'|'1' 'ต่'|'ต' '18'|'1' 'พฤ'|'พ' '25'|'2' + +Analyzer "standard>prefixes(3)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am'|'a' 'hap'|'ha'|'h' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh'|'o' 'hap'|'ha'|'h' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'not'|'no'|'n' 'imp'|'im'|'i' 'her'|'he'|'h' 'so'|'s' 'don'|'do'|'d' 'eve'|'ev'|'e' 'loo'|'lo'|'l' 'bec'|'be'|'b' 'you'|'yo'|'y' 'sha'|'sh'|'s' 'fin'|'fi'|'f' 'not'|'no'|'n' 'wha'|'wh'|'w' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'wha'|'wh'|'w' 'hap'|'ha'|'h' 'her'|'he'|'h' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juo'|'ju'|'j' 'nyt'|'ny'|'n' 'tee'|'te'|'t' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee'|'te'|'t' 'näi'|'nä'|'n' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปรา'|'ปร'|'ป' '7' '-18'|'-1'|'-' 'พ' 'ยนี'|'ยน'|'ย' 'นาย'|'นา'|'น' 'ตัน'|'ตั'|'ต' 'ปรา'|'ปร'|'ป' '17'|'1' '-18'|'-1'|'-' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม'|'จะ'|'จ' '254'|'25'|'2' '-25'|'-2'|'-' 'คือ'|'คื'|'ค' '17'|'1' 'ต่อ'|'ต่'|'ต' '18'|'1' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2' + +Analyzer "stdtokens>stdfilter>lowercase>thai>stop(en)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + +Analyzer "cjk>stop(en)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปร' 'รา' 'าà¸' 'à¸à¸' 'à¸à¸' 'à¸à¸²' 'าร' 'รณ' 'à¸à¸™' 'นด' 'ดา' 'าว' 'วต' 'ตà¸' '17' '18' 'พ' 'ยน' 'นา' 'าย' 'ยว' 'วร' 'รว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปร' 'รา' 'าช' 'ชà¸' 'ภ' 'ม' 'ป' 'à¸à¸' 'à¸à¸²' 'าท' 'อง' 'งถ' 'นด' 'าน' 'นด' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'ไท' 'ทย' 'ยà¸' 'à¸à¸¥' 'าว' 'วว' 'า' '17' '18' 'พฤ' 'ฤศ' 'ศจ' 'à¸à¸²' 'าย' 'ยน' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะ' 'ะม' 'ปร' 'รา' 'าà¸' 'à¸à¸' 'à¸à¸' 'à¸à¸²' 'าร' 'รณ' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'คร' 'งส' 'สำ' 'ำค' 'à¸à¸—' 'ชา' 'าว' 'วไ' 'ไท' 'ทย' 'ยเ' 'เค' 'คย' 'ยป' 'ปร' 'ระ' 'ะท' 'บใ' 'ใจ' 'จม' 'มา' 'าà¹' 'à¹à¸¥' 'วเ' 'เม' 'อป' '2541' '2544' 'ค' 'อใ' 'ใน' 'นค' 'นว' 'นท' '17' 'ต' 'อเ' 'เน' 'อง' 'งว' 'นท' '18' 'พฤ' 'ฤศ' 'ศจ' 'à¸à¸²' 'าย' 'ยน' '2552' + +Analyzer "ngram(1)>lowercase>stop(en)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' '17' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552' + +Analyzer "ngram(2)>lowercase>stop(en)": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' '17' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,44 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + +Analyzer "natural": +File !:\data\cpixunittestcorpus\loc\ch_hk\1.txt tokenized: + '陶' 'å‚‘' '(' '1958' 'å¹´' '8' '月' '17' 'æ—¥' ')' ',' +File !:\data\cpixunittestcorpus\loc\ch_hk\2.txt tokenized: + '原' 'å' 'ä¿‚' '曹' 'æ·' ',' '香' '港' 'å°ˆ' '欄' '作' '家' 'åŠ' '傳' '媒' 'å·¥' '作' '者' ',' +File !:\data\cpixunittestcorpus\loc\ch_hk\3.txt tokenized: + '有' '香' '江' '第' '一' 'æ‰' 'å­' '嘅' '稱' '號' ',' +File !:\data\cpixunittestcorpus\loc\ch_hk\4.txt tokenized: + '以' 'æ–‡' 'ç­†' 'è¾›' 'è¾£' 'ç«‹' 'å ´' '親' '西' 'æ–¹' '(' 'å°¤' 'å…¶' '是' '英' '國' ')' '見' '稱' +File !:\data\cpixunittestcorpus\loc\ch_prc\1.txt tokenized: + '美' '国' '总' '统' '奥' 'å·´' '马' '星' '期' '一' '(' '11' '月' '16' 'æ—¥' ')' '在' '上' 'æµ·' '与' '中' '国' 'é’' 'å¹´' '对' 'è¯' ',' +File !:\data\cpixunittestcorpus\loc\ch_prc\2.txt tokenized: + 'ä»–' '回' 'ç­”' '了' '现' '场' 'å¬' 'ä¼—' 'å’Œ' '网' 'æ°‘' 'çš„' 'æ' 'é—®' ',' +File !:\data\cpixunittestcorpus\loc\ch_prc\3.txt tokenized: + '内' '容' '涉' 'åŠ' 'å°' 'æ¹¾' '中' '美' 'è´¸' '易' '以' 'åŠ' '环' '境' 'é—®' '题' 'ç­‰' +File !:\data\cpixunittestcorpus\loc\ch_prc\4.txt tokenized: + '奥' 'å·´' '马' '在' '上' 'æµ·' '科' '技' '馆' 'é¢' '对' '500' 'ä½™' 'å' '上' 'æµ·' 'é’' 'å¹´' +File !:\data\cpixunittestcorpus\loc\ch_simple\1.txt tokenized: + '奥' 'å·´' '马' '在' '上' 'æµ·' '与' '中' '国' 'é’' 'å¹´' '人' '对' 'è¯' +File !:\data\cpixunittestcorpus\loc\ch_simple\2.txt tokenized: + '美' '国' '总' '统' '奥' 'å·´' '马' '星' '期' '一' '(' '11' '月' '16' 'æ—¥' ')' '在' '上' 'æµ·' '与' '中' '国' 'é’' 'å¹´' '对' 'è¯' ',' 'ä»–' '回' 'ç­”' '了' '现' '场' 'å¬' 'ä¼—' 'å’Œ' '网' 'æ°‘' 'çš„' 'æ' 'é—®' ',' '内' '容' '涉' 'åŠ' 'å°' 'æ¹¾' '中' '美' 'è´¸' '易' '以' 'åŠ' '环' '境' 'é—®' '题' 'ç­‰' +File !:\data\cpixunittestcorpus\loc\ch_simple\3.txt tokenized: + '奥' 'å·´' '马' '在' '上' 'æµ·' '科' '技' '馆' 'é¢' '对' '500' 'ä½™' 'å' '上' 'æµ·' 'é’' 'å¹´' '以' 'åŠ' 'æ•°' '以' '万' '计' 'çš„' '中' '国' '互' 'è”' '网' '使' '用' '者' 'è¿›' 'è¡Œ' '了' '一' '场' 'é—®' 'ç­”' '会' +File !:\data\cpixunittestcorpus\loc\ch_simple\4.txt tokenized: + 'è¿™' '次' '对' 'è¯' 'ç”±' 'å¤' 'æ—¦' '大' 'å­¦' 'æ ¡' 'é•¿' 'æ¨' '玉' '良' '主' 'æŒ' '美' '国' 'é©»' '中' '国' '大' '使' 'æ´ª' 'åš' '培' '致' '辞' +File !:\data\cpixunittestcorpus\loc\ch_tw\1.txt tokenized: + '拈' '花' '惹' 'è‰' '趣' 'å—' '投' '花' 'å‰' '嘉' 'å¹´' 'è¯' 'ç’€' 'ç’¨' 'è¿Ž' '賓' +File !:\data\cpixunittestcorpus\loc\ch_tw\2.txt tokenized: + '2009' 'å—' '投' '花' 'å‰' '嘉' 'å¹´' 'è¯' '花' 'ç¾' '幸' 'ç¦' '暢' 'éŠ' 'å—' '投' +File !:\data\cpixunittestcorpus\loc\ch_tw\3.txt tokenized: + 'æ–¼' 'æ—¥' 'å‰' '11' '15' 'å‡' 'å—' '投' '縣' '埔' '里' '鎮' '埔' '里' '花' 'å‰' '物' 'æµ' '中' '心' '隆' 'é‡' 'ç™»' 'å ´' ',' +File !:\data\cpixunittestcorpus\loc\ch_tw\4.txt tokenized: + '在' '為' '期' '五' 'å' '天' 'çš„' 'æ´»' 'å‹•' '中' ',' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/currentlocale_C_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/currentlocale_C_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,18 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,20 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,28 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + +Analyzer "natural": +File !:\data\cpixunittestcorpus\loc\jp\1.txt tokenized: + '国' 'éš›' '交' 'æµ' '基' '金' '(' 'ジャパンファウンデ' 'ション' ')' 'ã¯' '主' 'ã«' 'æ–‡' '化' '芸' 'è¡“' '交' 'æµ' 'æµ·' '外' 'ã«' 'ãŠ' 'ã‘' 'ã‚‹' 'æ—¥' '本' '語' 'æ•™' '育' +File !:\data\cpixunittestcorpus\loc\jp\2.txt tokenized: + 'æ—¥' '本' 'ç ”' '究' '知' 'çš„' '交' 'æµ' 'ã®' '3' 'ã¤' 'ã®' '分' '野' 'ã«' 'ãŠ' 'ã„' 'ã¦' '事' '業' 'ã‚’' '実' 'æ–½' 'ã—' 'ã¦' 'ãŠ' 'ã‚Š' +File !:\data\cpixunittestcorpus\loc\jp\3.txt tokenized: + 'ã' 'ã‚Œ' 'ãž' 'ã‚Œ' 'ã®' '分' '野' 'ã§' 'å…¬' 'å‹Ÿ' 'プログラム' 'ã«' 'よ' 'ã‚Š' '国' 'éš›' '交' 'æµ' '事' '業' 'ã‚’' 'ä¼' 'ç”»' 'ã™' 'ã‚‹' '個' '人' 'ã‚„' +File !:\data\cpixunittestcorpus\loc\jp\4.txt tokenized: + '団' '体' 'ã«' '対' 'ã—' 'ã¦' '助' 'æˆ' '金' 'ç ”' '究' '奨' 'å­¦' '金' 'ç­‰' 'ã‚’' 'æ' 'ä¾›' 'ã—' 'ã¦' 'ã„' 'ã¾' 'ã™' +File !:\data\cpixunittestcorpus\loc\jp_old\1.txt tokenized: + '国' 'éš›' '交' 'æµ' '基' '金' '(' 'ジャパンファウンデ' 'ション' ')' 'ã¯' '主' 'ã«' 'æ–‡' '化' '芸' 'è¡“' '交' 'æµ' 'æµ·' '外' 'ã«' 'ãŠ' 'ã‘' 'ã‚‹' 'æ—¥' '本' '語' 'æ•™' '育' +File !:\data\cpixunittestcorpus\loc\jp_old\2.txt tokenized: + 'æ—¥' '本' 'ç ”' '究' '知' 'çš„' '交' 'æµ' 'ã®' '3' 'ã¤' 'ã®' '分' '野' 'ã«' 'ãŠ' 'ã„' 'ã¦' '事' '業' 'ã‚’' '実' 'æ–½' 'ã—' 'ã¦' 'ãŠ' 'ã‚Š' +File !:\data\cpixunittestcorpus\loc\jp_old\3.txt tokenized: + 'ã' 'ã‚Œ' 'ãž' 'ã‚Œ' 'ã®' '分' '野' 'ã§' 'å…¬' 'å‹Ÿ' 'プログラム' 'ã«' 'よ' 'ã‚Š' '国' 'éš›' '交' 'æµ' '事' '業' 'ã‚’' 'ä¼' 'ç”»' 'ã™' 'ã‚‹' '個' '人' 'ã‚„' +File !:\data\cpixunittestcorpus\loc\jp_old\4.txt tokenized: + '団' '体' 'ã«' '対' 'ã—' 'ã¦' '助' 'æˆ' '金' 'ç ”' '究' '奨' 'å­¦' '金' 'ç­‰' 'ã‚’' 'æ' 'ä¾›' 'ã—' 'ã¦' 'ã„' 'ã¾' 'ã™' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,20 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + +Analyzer "natural": +File !:\data\cpixunittestcorpus\loc\ko\1.txt tokenized: + 'ì œ'|'제'|'á„Œ' 'ê°€'|'가'|'á„€' 'ë…¼' '스'|'스'|'ᄉ' '톱'|'á„ᅩᆸ'|'á„á…©'|'á„' 'ì„'|'을'|'á„‹á…³'|'á„‹' 'ë³´' '는'|'는'|'á„‚á…³'|'á„‚' 'ë°'|'데'|'ᄃ' 'ìš”'|'á„‹á…­'|'á„‹' 'ê¹€' '지'|'지'|'á„Œ' 'ìš°'|'á„‹á…®'|'á„‹' 'ê°€'|'가'|'á„€' '스' '토'|'á„á…©'|'á„' '리'|'á„…á…µ'|'á„…' 'ìƒ'|'상'|'사'|'ᄉ' '으'|'á„‹á…³'|'á„‹' 'ë¡œ'|'á„…á…©'|'á„…' 'ì¼' '본'|'본'|'보'|'ᄇ' '으'|'á„‹á…³'|'á„‹' 'ë¡œ'|'á„…á…©'|'á„…' 'ê°„' '다'|'다'|'ᄃ' '구'|'구'|'á„€' '하' '네'|'á„‚á…¦'|'á„‚' 'ìš”'|'á„‹á…­'|'á„‹' +File !:\data\cpixunittestcorpus\loc\ko\2.txt tokenized: + 'ê·¸'|'그'|'á„€' '러'|'á„…á…¥'|'á„…' 'ê³ '|'고'|'á„€' '나'|'á„‚á…¡'|'á„‚' 'ì„œ'|'서'|'ᄉ' '다' 'ìŒ'|'음'|'á„‹á…³'|'á„‹' 'ì´' '야'|'á„‹á…£'|'á„‹' '기'|'기'|'á„€' '예' 'ê³ '|'고'|'á„€' '는'|'는'|'á„‚á…³'|'á„‚' '안' '나'|'á„‚á…¡'|'á„‚' '오'|'á„‹á…©'|'á„‹' 'ê³ '|'고'|'á„€' 'ê¹€' '지'|'지'|'á„Œ' 'ìš°'|'á„‹á…®'|'á„‹' 'ì˜'|'á„‹á…´'|'á„‹' '첨' '부'|'부'|'ᄇ' 'í„°'|'á„á…¥'|'á„' 'ì—¬' '태'|'á„á…¢'|'á„' '까'|'á„á…¡'|'á„' '지'|'지'|'á„Œ' 'ì˜'|'á„‹á…´'|'á„‹' 'ì´' '미'|'미'|'ᄆ' '지'|'지'|'á„Œ' '만'|'만'|'마'|'ᄆ' 'ë³´' 'ì—¬'|'á„‹á…§'|'á„‹' '주'|'주'|'á„Œ' 'ê³ '|'고'|'á„€' +File !:\data\cpixunittestcorpus\loc\ko\3.txt tokenized: + 'ê·¸'|'그'|'á„€' '냥'|'냥'|'á„‚á…£'|'á„‚' 'ë' '냈'|'냈'|'á„‚á…¢'|'á„‚' '는'|'는'|'á„‚á…³'|'á„‚' 'ë°'|'데'|'ᄃ' 'ìš”'|'á„‹á…­'|'á„‹' 'ì´' 'ê±°'|'거'|'á„€' '진' '짜'|'á„á…¡'|'á„' 'ì´' '별'|'별'|'벼'|'ᄇ' '하'|'á„’á…¡'|'á„’' '는'|'는'|'á„‚á…³'|'á„‚' 'ê±°'|'거'|'á„€' 'ê°™' '다'|'다'|'ᄃ' '는'|'는'|'á„‚á…³'|'á„‚' 'ìƒ' 'ê°'|'각'|'가'|'á„€' 'ì´'|'á„‹á…µ'|'á„‹' +File !:\data\cpixunittestcorpus\loc\ko\4.txt tokenized: + 'ê·¸'|'그'|'á„€' 'ë•Œ'|'á„„á…¢'|'á„„' 'ì—¬' '승'|'승'|'스'|'ᄉ' 'í˜'|'혁'|'á„’á…§'|'á„’' '처'|'á„Žá…¥'|'á„Ž' '럼'|'럼'|'á„…á…¥'|'á„…' 'ë…¼' '스'|'스'|'ᄉ' '톱'|'á„ᅩᆸ'|'á„á…©'|'á„' 'ê·¹' '중'|'중'|'주'|'á„Œ' 'ì¸' '물'|'물'|'무'|'ᄆ' 'ì—'|'á„‹á…¦'|'á„‹' 'ì„œ'|'서'|'ᄉ' 'ë¹ ' '트'|'á„á…³'|'á„' '리'|'á„…á…µ'|'á„…' '는'|'는'|'á„‚á…³'|'á„‚' 'ê±´'|'건'|'거'|'á„€' 'ê°€'|'가'|'á„€' 'ìš”'|'á„‹á…­'|'á„‹' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,20 @@ +Analyzer "natural": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' + +Analyzer "natural": +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\3.txt tokenized: + 'จะ' 'เà¸à¸´à¸”' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' 'จาà¸' 'à¸à¸¥à¸¸à¹ˆà¸¡' 'ดาว' 'สิงโต' 'หรือ' 'à¸à¸™' 'ดาวตà¸' 'เลโอ' 'นิคส์' 'ที่' 'นัà¸' 'ดาราศาสตร์' 'ทั้ง' 'หลาย' 'คาด' 'ว่า' 'จะ' 'มี' 'ประมาณ' '100' '-150' 'ดวง' 'ต่อ' 'ชั่วโมง' +File !:\data\cpixunittestcorpus\loc\th\4.txt tokenized: + 'เมื่อ' 'วัน' 'ที่' '8' 'มีนา' 'ที่' 'ผ่าน' 'มา' 'ผม' 'ได้' 'ไป' 'งาน' 'ที่' 'โรงเรียน' 'เหมือน' 'เช่น' 'ทุà¸' 'ปี' 'ตอน' 'à¸à¸¥à¸±à¸š' 'เดิน' 'มา' 'ตาม' 'ตึà¸' 'ยาว' 'เพื่อ' 'จะ' 'à¸à¸¥à¸±à¸š' 'มาท' 'า' 'งป' 'ระตู' 'ด้าน' 'เพาะ' 'ช่าง' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,103 @@ +locale=en: +Analyzer " +locale_switch { + case 'en': stdtokens>stdfilter>lowercase>stop(en); + case 'th': stdtokens>stdfilter>lowercase>thai>stop(en); + case 'ca': stdtokens>stdfilter>lowercase>accent; + default: stdtokens>stdfilter>lowercase; +}": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + + +locale=th: +Analyzer " +locale_switch { + case 'en': stdtokens>stdfilter>lowercase>stop(en); + case 'th': stdtokens>stdfilter>lowercase>thai>stop(en); + case 'ca': stdtokens>stdfilter>lowercase>accent; + default: stdtokens>stdfilter>lowercase; +}": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + + +locale=ca: +Analyzer " +locale_switch { + case 'en': stdtokens>stdfilter>lowercase>stop(en); + case 'th': stdtokens>stdfilter>lowercase>thai>stop(en); + case 'ca': stdtokens>stdfilter>lowercase>accent; + default: stdtokens>stdfilter>lowercase; +}": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'is' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teeta' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'nain' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + + +default locale: +Analyzer " +locale_switch { + case 'en': stdtokens>stdfilter>lowercase>stop(en); + case 'th': stdtokens>stdfilter>lowercase>thai>stop(en); + case 'ca': stdtokens>stdfilter>lowercase>accent; + default: stdtokens>stdfilter>lowercase; +}": +File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: + 'i' 'am' 'happy' +File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: + 'oh' 'happiness' +File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: + 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' +File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: + 'what' 'is' 'happening' 'here' +File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: + 'juon' 'nyt' 'teetä' +File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: + 'tee' 'näin' +File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: + 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' +File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: + 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552' + diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt --- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,4 +1,8 @@ -ParseException: Unexpected EOF at: "foobar(zap, foo, 'bar', 'raf', do, *here*" -LexException: Unrecognized syntax: ''a, raboof)' at: "foobar(zap, foo, 'bar', *here*'a, raboof)" -LexException: Unrecognized syntax: '!' at: "foobar(*here*!zap, foo, 'bar', 'a', raboof)" -ParseException: Expected token of type 10 instead of token 'raboof' of type 3 at: "foobar(zap, foo, 'bar', 'a' *here*raboof*here*)" +ParseException: Unexpected EOF at: +"foobar(zap, foo, 'bar', 'raf', do, *here*" +LexException: Unrecognized syntax: ''a, raboof)' at: +"foobar(zap, foo, 'bar', *here*'a, raboof)" +LexException: Unrecognized syntax: '!' at: +"foobar(*here*!zap, foo, 'bar', 'a', raboof)" +ParseException: Expected comma instead of token 'raboof' of type identifier at: +"foobar(zap, foo, 'bar', 'a' *here*raboof*here*)" diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/optimized_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/optimized_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1010 @@ +Started indexing. +Indexed 200 documents. +Search with b* found 59 hits +Search with r* found 48 hits +Search with n* found 62 hits +Search with u* found 100 hits +Search with e* found 47 hits +Search with u* found 100 hits +Search with m* found 73 hits +Search with h* found 92 hits +Search with g* found 75 hits +Search with u* found 100 hits +Search with m* found 73 hits +Search with d* found 77 hits +Search with p* found 43 hits +Search with v* found 8 hits +Search with q* found 3 hits +Search with v* found 8 hits +Search with w* found 107 hits +Search with m* found 73 hits +Search with l* found 93 hits +Search with k* found 22 hits +Search with y* found 33 hits +Search with b* found 59 hits +Search with n* found 62 hits +Search with w* found 107 hits +Search with y* found 33 hits +Search with b* found 59 hits +Search with q* found 3 hits +Search with c* found 97 hits +Search with w* found 107 hits +Search with c* found 97 hits +Search with k* found 22 hits +Search with x* found 5 hits +Search with v* found 8 hits +Search with y* found 33 hits +Search with t* found 81 hits +Search with b* found 59 hits +Search with v* found 8 hits +Search with g* found 75 hits +Search with i* found 98 hits +Search with e* found 47 hits +Search with e* found 47 hits +Search with w* found 107 hits +Search with h* found 92 hits +Search with t* found 81 hits +Search with v* found 8 hits +Search with a* found 67 hits +Search with p* found 43 hits +Search with u* found 100 hits +Search with o* found 63 hits +Search with d* found 77 hits +Search with f* found 42 hits +Search with o* found 63 hits +Search with g* found 75 hits +Search with u* found 100 hits +Search with n* found 62 hits +Search with g* found 75 hits +Search with v* found 8 hits +Search with e* found 47 hits +Search with k* found 22 hits +Search with v* found 8 hits +Search with j* found 23 hits +Search with v* found 8 hits +Search with t* found 81 hits +Search with f* found 42 hits +Search with w* found 107 hits +Search with p* found 43 hits +Search with i* found 98 hits +Search with s* found 85 hits +Search with y* found 33 hits +Search with s* found 85 hits +Search with w* found 107 hits +Search with d* found 77 hits +Search with s* found 85 hits +Search with g* found 75 hits +Search with w* found 107 hits +Search with o* found 63 hits +Search with i* found 98 hits +Search with o* found 63 hits +Search with l* found 93 hits +Search with x* found 5 hits +Search with t* found 81 hits +Search with q* found 3 hits +Search with o* found 63 hits +Search with a* found 67 hits +Search with m* found 73 hits +Search with c* found 97 hits +Search with i* found 98 hits +Search with k* found 22 hits +Search with g* found 75 hits +Search with t* found 81 hits +Search with g* found 75 hits +Search with r* found 48 hits +Search with r* found 48 hits +Search with d* found 77 hits +Search with a* found 67 hits +Search with o* found 63 hits +Search with s* found 85 hits +Search with i* found 98 hits +Search with j* found 23 hits +Search with t* found 81 hits +Search with ci* found 2 hits +Search with wu* found 0 hits +Search with qv* found 0 hits +Search with ly* found 0 hits +Search with mw* found 0 hits +Search with wg* found 0 hits +Search with pn* found 0 hits +Search with je* found 1 hits +Search with pt* found 0 hits +Search with ry* found 0 hits +Search with pa* found 9 hits +Search with qh* found 0 hits +Search with dq* found 0 hits +Search with xy* found 4 hits +Search with cj* found 0 hits +Search with se* found 7 hits +Search with rs* found 0 hits +Search with bk* found 2 hits +Search with qo* found 0 hits +Search with ke* found 2 hits +Search with nj* found 0 hits +Search with mf* found 0 hits +Search with xv* found 0 hits +Search with kn* found 1 hits +Search with sc* found 4 hits +Search with oi* found 0 hits +Search with eg* found 0 hits +Search with si* found 7 hits +Search with ys* found 0 hits +Search with jb* found 0 hits +Search with de* found 30 hits +Search with hw* found 1 hits +Search with yk* found 0 hits +Search with iq* found 0 hits +Search with yu* found 2 hits +Search with uo* found 0 hits +Search with eh* found 3 hits +Search with ue* found 0 hits +Search with gf* found 0 hits +Search with uy* found 0 hits +Search with jj* found 0 hits +Search with jn* found 0 hits +Search with pc* found 0 hits +Search with xp* found 0 hits +Search with vh* found 0 hits +Search with sb* found 0 hits +Search with ob* found 0 hits +Search with xn* found 0 hits +Search with li* found 22 hits +Search with en* found 11 hits +Search with db* found 0 hits +Search with ck* found 0 hits +Search with lw* found 0 hits +Search with ot* found 3 hits +Search with cj* found 0 hits +Search with sn* found 0 hits +Search with tc* found 0 hits +Search with cl* found 6 hits +Search with hc* found 0 hits +Search with df* found 0 hits +Search with mw* found 0 hits +Search with gb* found 0 hits +Search with xe* found 0 hits +Search with ql* found 0 hits +Search with px* found 0 hits +Search with ys* found 0 hits +Search with yc* found 0 hits +Search with dm* found 0 hits +Search with ys* found 0 hits +Search with ge* found 8 hits +Search with cb* found 0 hits +Search with tx* found 1 hits +Search with gx* found 0 hits +Search with kn* found 1 hits +Search with bn* found 0 hits +Search with un* found 4 hits +Search with mb* found 0 hits +Search with qk* found 0 hits +Search with hh* found 0 hits +Search with yw* found 0 hits +Search with hx* found 0 hits +Search with sj* found 0 hits +Search with cv* found 0 hits +Search with vc* found 0 hits +Search with qf* found 0 hits +Search with iv* found 0 hits +Search with gc* found 0 hits +Search with tm* found 6 hits +Search with bg* found 0 hits +Search with ce* found 1 hits +Search with uw* found 0 hits +Search with rh* found 0 hits +Search with xk* found 0 hits +Search with ui* found 0 hits +Search with tt* found 1 hits +Search with hc* found 0 hits +Search with ta* found 13 hits +Search with lw* found 0 hits +Search with wj* found 0 hits +Search with bp* found 0 hits +Started indexing. +Indexed 200 documents. +Search with o* found 129 hits +Search with j* found 43 hits +Search with l* found 193 hits +Search with v* found 19 hits +Search with n* found 128 hits +Search with i* found 192 hits +Search with k* found 40 hits +Search with q* found 6 hits +Search with o* found 129 hits +Search with n* found 128 hits +Search with u* found 203 hits +Search with l* found 193 hits +Search with k* found 40 hits +Search with o* found 129 hits +Search with v* found 19 hits +Search with l* found 193 hits +Search with y* found 69 hits +Search with q* found 6 hits +Search with v* found 19 hits +Search with v* found 19 hits +Search with m* found 153 hits +Search with d* found 163 hits +Search with x* found 13 hits +Search with g* found 143 hits +Search with e* found 105 hits +Search with l* found 193 hits +Search with f* found 78 hits +Search with d* found 163 hits +Search with u* found 203 hits +Search with g* found 143 hits +Search with s* found 180 hits +Search with k* found 40 hits +Search with r* found 86 hits +Search with h* found 167 hits +Search with i* found 192 hits +Search with i* found 192 hits +Search with p* found 76 hits +Search with s* found 180 hits +Search with y* found 69 hits +Search with g* found 143 hits +Search with g* found 143 hits +Search with u* found 203 hits +Search with s* found 180 hits +Search with t* found 176 hits +Search with l* found 193 hits +Search with q* found 6 hits +Search with f* found 78 hits +Search with l* found 193 hits +Search with h* found 167 hits +Search with b* found 113 hits +Search with j* found 43 hits +Search with v* found 19 hits +Search with g* found 143 hits +Search with h* found 167 hits +Search with e* found 105 hits +Search with k* found 40 hits +Search with u* found 203 hits +Search with k* found 40 hits +Search with n* found 128 hits +Search with p* found 76 hits +Search with s* found 180 hits +Search with j* found 43 hits +Search with c* found 173 hits +Search with n* found 128 hits +Search with q* found 6 hits +Search with m* found 153 hits +Search with v* found 19 hits +Search with i* found 192 hits +Search with g* found 143 hits +Search with u* found 203 hits +Search with q* found 6 hits +Search with o* found 129 hits +Search with r* found 86 hits +Search with l* found 193 hits +Search with i* found 192 hits +Search with e* found 105 hits +Search with c* found 173 hits +Search with p* found 76 hits +Search with r* found 86 hits +Search with l* found 193 hits +Search with s* found 180 hits +Search with b* found 113 hits +Search with h* found 167 hits +Search with a* found 141 hits +Search with k* found 40 hits +Search with o* found 129 hits +Search with k* found 40 hits +Search with f* found 78 hits +Search with b* found 113 hits +Search with b* found 113 hits +Search with w* found 187 hits +Search with t* found 176 hits +Search with m* found 153 hits +Search with a* found 141 hits +Search with h* found 167 hits +Search with f* found 78 hits +Search with m* found 153 hits +Search with d* found 163 hits +Search with p* found 76 hits +Search with u* found 203 hits +Search with bg* found 0 hits +Search with ks* found 0 hits +Search with su* found 12 hits +Search with wu* found 1 hits +Search with lq* found 0 hits +Search with jg* found 0 hits +Search with rq* found 0 hits +Search with gc* found 0 hits +Search with ht* found 0 hits +Search with hi* found 4 hits +Search with wh* found 47 hits +Search with fj* found 0 hits +Search with hm* found 8 hits +Search with ov* found 4 hits +Search with se* found 20 hits +Search with rt* found 0 hits +Search with me* found 65 hits +Search with oh* found 12 hits +Search with yo* found 22 hits +Search with dm* found 0 hits +Search with fo* found 13 hits +Search with tx* found 1 hits +Search with hc* found 0 hits +Search with cp* found 0 hits +Search with vm* found 1 hits +Search with xs* found 0 hits +Search with td* found 9 hits +Search with ed* found 0 hits +Search with su* found 12 hits +Search with yn* found 0 hits +Search with bs* found 0 hits +Search with jo* found 5 hits +Search with wx* found 0 hits +Search with vy* found 0 hits +Search with mb* found 0 hits +Search with ls* found 0 hits +Search with pf* found 0 hits +Search with sx* found 0 hits +Search with iw* found 0 hits +Search with pg* found 0 hits +Search with jp* found 0 hits +Search with cf* found 0 hits +Search with vi* found 1 hits +Search with io* found 0 hits +Search with ek* found 0 hits +Search with ef* found 0 hits +Search with dn* found 0 hits +Search with vd* found 0 hits +Search with ls* found 0 hits +Search with cb* found 0 hits +Search with vp* found 0 hits +Search with vo* found 1 hits +Search with vq* found 0 hits +Search with mg* found 0 hits +Search with ne* found 36 hits +Search with oa* found 0 hits +Search with tq* found 0 hits +Search with fp* found 0 hits +Search with co* found 72 hits +Search with eg* found 0 hits +Search with bk* found 3 hits +Search with le* found 71 hits +Search with xk* found 0 hits +Search with hm* found 8 hits +Search with dl* found 0 hits +Search with pb* found 0 hits +Search with cl* found 14 hits +Search with pa* found 14 hits +Search with ce* found 1 hits +Search with ir* found 0 hits +Search with iw* found 0 hits +Search with rd* found 0 hits +Search with qa* found 0 hits +Search with ss* found 0 hits +Search with qa* found 0 hits +Search with yr* found 3 hits +Search with km* found 0 hits +Search with vl* found 0 hits +Search with wg* found 0 hits +Search with xc* found 0 hits +Search with rn* found 0 hits +Search with ev* found 3 hits +Search with bv* found 0 hits +Search with vf* found 0 hits +Search with be* found 17 hits +Search with yj* found 0 hits +Search with cr* found 9 hits +Search with mu* found 16 hits +Search with ti* found 50 hits +Search with nk* found 0 hits +Search with io* found 0 hits +Search with cs* found 0 hits +Search with da* found 23 hits +Search with gd* found 4 hits +Search with ge* found 13 hits +Search with hb* found 0 hits +Search with tn* found 0 hits +Search with ww* found 1 hits +Search with kt* found 0 hits +Search with cl* found 14 hits +Started indexing. +Indexed 200 documents. +Search with a* found 238 hits +Search with c* found 265 hits +Search with u* found 316 hits +Search with c* found 265 hits +Search with v* found 32 hits +Search with k* found 53 hits +Search with y* found 93 hits +Search with p* found 133 hits +Search with s* found 303 hits +Search with m* found 268 hits +Search with a* found 238 hits +Search with d* found 248 hits +Search with d* found 248 hits +Search with e* found 151 hits +Search with x* found 17 hits +Search with j* found 63 hits +Search with e* found 151 hits +Search with f* found 134 hits +Search with m* found 268 hits +Search with n* found 197 hits +Search with j* found 63 hits +Search with t* found 267 hits +Search with o* found 193 hits +Search with g* found 236 hits +Search with i* found 282 hits +Search with n* found 197 hits +Search with d* found 248 hits +Search with s* found 303 hits +Search with j* found 63 hits +Search with g* found 236 hits +Search with g* found 236 hits +Search with k* found 53 hits +Search with k* found 53 hits +Search with d* found 248 hits +Search with o* found 193 hits +Search with g* found 236 hits +Search with n* found 197 hits +Search with o* found 193 hits +Search with v* found 32 hits +Search with i* found 282 hits +Search with d* found 248 hits +Search with x* found 17 hits +Search with l* found 292 hits +Search with j* found 63 hits +Search with c* found 265 hits +Search with k* found 53 hits +Search with s* found 303 hits +Search with i* found 282 hits +Search with p* found 133 hits +Search with h* found 259 hits +Search with v* found 32 hits +Search with y* found 93 hits +Search with b* found 173 hits +Search with m* found 268 hits +Search with h* found 259 hits +Search with l* found 292 hits +Search with b* found 173 hits +Search with l* found 292 hits +Search with g* found 236 hits +Search with m* found 268 hits +Search with r* found 123 hits +Search with m* found 268 hits +Search with w* found 297 hits +Search with e* found 151 hits +Search with s* found 303 hits +Search with o* found 193 hits +Search with k* found 53 hits +Search with g* found 236 hits +Search with d* found 248 hits +Search with i* found 282 hits +Search with p* found 133 hits +Search with i* found 282 hits +Search with i* found 282 hits +Search with d* found 248 hits +Search with r* found 123 hits +Search with m* found 268 hits +Search with n* found 197 hits +Search with m* found 268 hits +Search with u* found 316 hits +Search with d* found 248 hits +Search with t* found 267 hits +Search with t* found 267 hits +Search with f* found 134 hits +Search with x* found 17 hits +Search with g* found 236 hits +Search with m* found 268 hits +Search with j* found 63 hits +Search with j* found 63 hits +Search with x* found 17 hits +Search with s* found 303 hits +Search with w* found 297 hits +Search with r* found 123 hits +Search with h* found 259 hits +Search with t* found 267 hits +Search with x* found 17 hits +Search with a* found 238 hits +Search with i* found 282 hits +Search with k* found 53 hits +Search with h* found 259 hits +Search with n* found 197 hits +Search with sy* found 1 hits +Search with wd* found 0 hits +Search with cq* found 0 hits +Search with ps* found 0 hits +Search with gn* found 0 hits +Search with va* found 0 hits +Search with hd* found 0 hits +Search with xn* found 0 hits +Search with qk* found 0 hits +Search with aq* found 0 hits +Search with dw* found 0 hits +Search with lk* found 0 hits +Search with sj* found 0 hits +Search with nc* found 0 hits +Search with uu* found 0 hits +Search with rp* found 0 hits +Search with vq* found 0 hits +Search with va* found 0 hits +Search with kn* found 11 hits +Search with sq* found 2 hits +Search with br* found 23 hits +Search with qk* found 0 hits +Search with ur* found 52 hits +Search with bn* found 0 hits +Search with cb* found 0 hits +Search with hf* found 0 hits +Search with xs* found 0 hits +Search with rq* found 0 hits +Search with ef* found 0 hits +Search with ub* found 0 hits +Search with cn* found 0 hits +Search with ta* found 27 hits +Search with gp* found 0 hits +Search with bq* found 0 hits +Search with dv* found 1 hits +Search with hh* found 0 hits +Search with ny* found 0 hits +Search with rj* found 0 hits +Search with qs* found 0 hits +Search with yu* found 25 hits +Search with tg* found 0 hits +Search with ct* found 1 hits +Search with yt* found 0 hits +Search with lg* found 0 hits +Search with cg* found 0 hits +Search with hg* found 0 hits +Search with vd* found 0 hits +Search with hd* found 0 hits +Search with uk* found 0 hits +Search with tb* found 0 hits +Search with ge* found 19 hits +Search with iu* found 0 hits +Search with fc* found 0 hits +Search with gx* found 0 hits +Search with vf* found 0 hits +Search with sr* found 1 hits +Search with mu* found 26 hits +Search with mn* found 0 hits +Search with qx* found 0 hits +Search with vs* found 0 hits +Search with gg* found 0 hits +Search with af* found 16 hits +Search with jj* found 0 hits +Search with if* found 0 hits +Search with te* found 25 hits +Search with ga* found 7 hits +Search with io* found 0 hits +Search with wn* found 0 hits +Search with sd* found 0 hits +Search with lo* found 113 hits +Search with le* found 89 hits +Search with jx* found 0 hits +Search with bv* found 0 hits +Search with ns* found 0 hits +Search with vk* found 0 hits +Search with nc* found 0 hits +Search with qp* found 0 hits +Search with ha* found 109 hits +Search with yr* found 3 hits +Search with hu* found 14 hits +Search with wn* found 0 hits +Search with xh* found 0 hits +Search with eu* found 0 hits +Search with vx* found 0 hits +Search with bj* found 0 hits +Search with om* found 0 hits +Search with ox* found 0 hits +Search with mr* found 6 hits +Search with ta* found 27 hits +Search with kr* found 0 hits +Search with kb* found 1 hits +Search with wd* found 0 hits +Search with qe* found 0 hits +Search with gs* found 0 hits +Search with yn* found 0 hits +Search with nx* found 0 hits +Search with cl* found 17 hits +Search with fi* found 41 hits +Search with hb* found 0 hits +Search with ik* found 2 hits +Started indexing. +Indexed 200 documents. +Search with l* found 402 hits +Search with x* found 26 hits +Search with w* found 401 hits +Search with c* found 365 hits +Search with x* found 26 hits +Search with j* found 81 hits +Search with t* found 365 hits +Search with u* found 410 hits +Search with l* found 402 hits +Search with h* found 376 hits +Search with m* found 371 hits +Search with y* found 136 hits +Search with i* found 357 hits +Search with j* found 81 hits +Search with c* found 365 hits +Search with b* found 241 hits +Search with q* found 26 hits +Search with i* found 357 hits +Search with t* found 365 hits +Search with p* found 179 hits +Search with w* found 401 hits +Search with i* found 357 hits +Search with n* found 258 hits +Search with b* found 241 hits +Search with v* found 44 hits +Search with s* found 416 hits +Search with l* found 402 hits +Search with f* found 187 hits +Search with w* found 401 hits +Search with u* found 410 hits +Search with p* found 179 hits +Search with k* found 73 hits +Search with u* found 410 hits +Search with m* found 371 hits +Search with m* found 371 hits +Search with s* found 416 hits +Search with x* found 26 hits +Search with i* found 357 hits +Search with n* found 258 hits +Search with k* found 73 hits +Search with r* found 170 hits +Search with b* found 241 hits +Search with j* found 81 hits +Search with c* found 365 hits +Search with m* found 371 hits +Search with l* found 402 hits +Search with e* found 210 hits +Search with d* found 338 hits +Search with w* found 401 hits +Search with x* found 26 hits +Search with s* found 416 hits +Search with v* found 44 hits +Search with i* found 357 hits +Search with i* found 357 hits +Search with w* found 401 hits +Search with h* found 376 hits +Search with e* found 210 hits +Search with i* found 357 hits +Search with m* found 371 hits +Search with b* found 241 hits +Search with f* found 187 hits +Search with d* found 338 hits +Search with n* found 258 hits +Search with a* found 314 hits +Search with r* found 170 hits +Search with c* found 365 hits +Search with v* found 44 hits +Search with q* found 26 hits +Search with k* found 73 hits +Search with l* found 402 hits +Search with d* found 338 hits +Search with f* found 187 hits +Search with o* found 283 hits +Search with m* found 371 hits +Search with h* found 376 hits +Search with c* found 365 hits +Search with a* found 314 hits +Search with l* found 402 hits +Search with f* found 187 hits +Search with y* found 136 hits +Search with m* found 371 hits +Search with b* found 241 hits +Search with u* found 410 hits +Search with w* found 401 hits +Search with l* found 402 hits +Search with r* found 170 hits +Search with e* found 210 hits +Search with p* found 179 hits +Search with d* found 338 hits +Search with r* found 170 hits +Search with s* found 416 hits +Search with k* found 73 hits +Search with w* found 401 hits +Search with g* found 325 hits +Search with n* found 258 hits +Search with o* found 283 hits +Search with i* found 357 hits +Search with j* found 81 hits +Search with h* found 376 hits +Search with v* found 44 hits +Search with uk* found 0 hits +Search with bm* found 0 hits +Search with yk* found 0 hits +Search with oa* found 0 hits +Search with yv* found 0 hits +Search with bl* found 5 hits +Search with yw* found 0 hits +Search with kl* found 0 hits +Search with qp* found 0 hits +Search with bt* found 0 hits +Search with jw* found 0 hits +Search with hg* found 0 hits +Search with du* found 89 hits +Search with wm* found 0 hits +Search with ef* found 4 hits +Search with kb* found 1 hits +Search with rn* found 0 hits +Search with nr* found 0 hits +Search with xe* found 0 hits +Search with tw* found 5 hits +Search with bu* found 40 hits +Search with ka* found 12 hits +Search with rv* found 0 hits +Search with nj* found 0 hits +Search with lp* found 0 hits +Search with fu* found 10 hits +Search with om* found 0 hits +Search with br* found 28 hits +Search with ha* found 154 hits +Search with gn* found 0 hits +Search with fq* found 0 hits +Search with rx* found 0 hits +Search with ef* found 4 hits +Search with rf* found 0 hits +Search with ml* found 0 hits +Search with cn* found 0 hits +Search with gn* found 0 hits +Search with pb* found 0 hits +Search with je* found 1 hits +Search with ku* found 0 hits +Search with vr* found 0 hits +Search with rk* found 0 hits +Search with hu* found 21 hits +Search with co* found 159 hits +Search with ul* found 0 hits +Search with fd* found 0 hits +Search with ew* found 0 hits +Search with dl* found 0 hits +Search with eu* found 0 hits +Search with qq* found 0 hits +Search with gs* found 0 hits +Search with go* found 268 hits +Search with iw* found 0 hits +Search with pr* found 46 hits +Search with bc* found 0 hits +Search with ow* found 0 hits +Search with ui* found 0 hits +Search with je* found 1 hits +Search with dl* found 0 hits +Search with ub* found 0 hits +Search with ya* found 29 hits +Search with ee* found 0 hits +Search with yh* found 0 hits +Search with pg* found 0 hits +Search with cg* found 0 hits +Search with wk* found 8 hits +Search with bg* found 0 hits +Search with yk* found 0 hits +Search with dp* found 0 hits +Search with cg* found 0 hits +Search with tt* found 13 hits +Search with fq* found 0 hits +Search with co* found 159 hits +Search with ug* found 0 hits +Search with cq* found 0 hits +Search with hc* found 0 hits +Search with sn* found 1 hits +Search with is* found 1 hits +Search with ux* found 0 hits +Search with yy* found 0 hits +Search with gx* found 0 hits +Search with jh* found 0 hits +Search with gi* found 17 hits +Search with rj* found 0 hits +Search with aw* found 4 hits +Search with rw* found 0 hits +Search with qy* found 0 hits +Search with nu* found 6 hits +Search with pl* found 22 hits +Search with bs* found 0 hits +Search with ck* found 0 hits +Search with ww* found 1 hits +Search with xf* found 1 hits +Search with pu* found 9 hits +Search with fq* found 0 hits +Search with tl* found 0 hits +Search with rf* found 0 hits +Search with sx* found 0 hits +Search with ql* found 0 hits +Search with ks* found 0 hits +Started indexing. +Indexed 200 documents. +Search with k* found 89 hits +Search with e* found 279 hits +Search with p* found 215 hits +Search with b* found 306 hits +Search with e* found 279 hits +Search with g* found 408 hits +Search with v* found 50 hits +Search with t* found 462 hits +Search with r* found 213 hits +Search with a* found 394 hits +Search with o* found 373 hits +Search with v* found 50 hits +Search with m* found 478 hits +Search with n* found 315 hits +Search with s* found 530 hits +Search with n* found 315 hits +Search with u* found 513 hits +Search with l* found 527 hits +Search with i* found 453 hits +Search with a* found 394 hits +Search with e* found 279 hits +Search with f* found 244 hits +Search with n* found 315 hits +Search with v* found 50 hits +Search with m* found 478 hits +Search with h* found 497 hits +Search with u* found 513 hits +Search with d* found 427 hits +Search with u* found 513 hits +Search with h* found 497 hits +Search with y* found 169 hits +Search with f* found 244 hits +Search with l* found 527 hits +Search with o* found 373 hits +Search with i* found 453 hits +Search with p* found 215 hits +Search with w* found 503 hits +Search with g* found 408 hits +Search with m* found 478 hits +Search with q* found 34 hits +Search with i* found 453 hits +Search with d* found 427 hits +Search with m* found 478 hits +Search with v* found 50 hits +Search with r* found 213 hits +Search with i* found 453 hits +Search with j* found 106 hits +Search with o* found 373 hits +Search with t* found 462 hits +Search with t* found 462 hits +Search with p* found 215 hits +Search with a* found 394 hits +Search with b* found 306 hits +Search with f* found 244 hits +Search with w* found 503 hits +Search with o* found 373 hits +Search with m* found 478 hits +Search with t* found 462 hits +Search with t* found 462 hits +Search with j* found 106 hits +Search with b* found 306 hits +Search with s* found 530 hits +Search with q* found 34 hits +Search with m* found 478 hits +Search with k* found 89 hits +Search with b* found 306 hits +Search with f* found 244 hits +Search with h* found 497 hits +Search with i* found 453 hits +Search with t* found 462 hits +Search with y* found 169 hits +Search with q* found 34 hits +Search with w* found 503 hits +Search with n* found 315 hits +Search with o* found 373 hits +Search with q* found 34 hits +Search with v* found 50 hits +Search with a* found 394 hits +Search with g* found 408 hits +Search with r* found 213 hits +Search with w* found 503 hits +Search with x* found 39 hits +Search with s* found 530 hits +Search with x* found 39 hits +Search with d* found 427 hits +Search with r* found 213 hits +Search with o* found 373 hits +Search with s* found 530 hits +Search with l* found 527 hits +Search with j* found 106 hits +Search with c* found 473 hits +Search with o* found 373 hits +Search with e* found 279 hits +Search with v* found 50 hits +Search with d* found 427 hits +Search with o* found 373 hits +Search with w* found 503 hits +Search with i* found 453 hits +Search with y* found 169 hits +Search with h* found 497 hits +Search with ea* found 66 hits +Search with bc* found 0 hits +Search with nr* found 0 hits +Search with um* found 0 hits +Search with sb* found 0 hits +Search with ep* found 0 hits +Search with yy* found 0 hits +Search with nf* found 0 hits +Search with qf* found 0 hits +Search with xe* found 0 hits +Search with qc* found 0 hits +Search with su* found 33 hits +Search with xw* found 0 hits +Search with mx* found 0 hits +Search with fn* found 0 hits +Search with fm* found 0 hits +Search with ni* found 23 hits +Search with ob* found 1 hits +Search with bl* found 7 hits +Search with pt* found 0 hits +Search with nu* found 9 hits +Search with lo* found 222 hits +Search with vb* found 0 hits +Search with tn* found 0 hits +Search with gr* found 15 hits +Search with rw* found 0 hits +Search with ul* found 0 hits +Search with uu* found 0 hits +Search with kj* found 0 hits +Search with sp* found 12 hits +Search with wb* found 0 hits +Search with ek* found 0 hits +Search with ls* found 1 hits +Search with nm* found 0 hits +Search with he* found 171 hits +Search with iu* found 0 hits +Search with bt* found 2 hits +Search with lw* found 0 hits +Search with vg* found 0 hits +Search with mc* found 2 hits +Search with ah* found 66 hits +Search with cw* found 1 hits +Search with sy* found 1 hits +Search with sd* found 0 hits +Search with in* found 28 hits +Search with uf* found 0 hits +Search with qa* found 0 hits +Search with rd* found 1 hits +Search with uf* found 0 hits +Search with pc* found 0 hits +Search with lb* found 0 hits +Search with ym* found 0 hits +Search with ul* found 0 hits +Search with ms* found 19 hits +Search with ty* found 4 hits +Search with xt* found 0 hits +Search with ga* found 15 hits +Search with tb* found 0 hits +Search with yo* found 26 hits +Search with gh* found 1 hits +Search with ce* found 6 hits +Search with ov* found 10 hits +Search with gg* found 0 hits +Search with yd* found 0 hits +Search with no* found 157 hits +Search with ia* found 0 hits +Search with rh* found 0 hits +Search with on* found 81 hits +Search with ub* found 0 hits +Search with gq* found 0 hits +Search with de* found 135 hits +Search with nl* found 0 hits +Search with gh* found 1 hits +Search with pf* found 0 hits +Search with vx* found 0 hits +Search with oa* found 0 hits +Search with ed* found 2 hits +Search with vk* found 0 hits +Search with ju* found 64 hits +Search with qa* found 0 hits +Search with my* found 125 hits +Search with ae* found 0 hits +Search with io* found 0 hits +Search with tg* found 0 hits +Search with sb* found 0 hits +Search with wx* found 0 hits +Search with hk* found 0 hits +Search with lo* found 222 hits +Search with tb* found 0 hits +Search with vr* found 0 hits +Search with cn* found 0 hits +Search with sg* found 1 hits +Search with qq* found 0 hits +Search with td* found 9 hits +Search with ok* found 125 hits +Search with dd* found 0 hits +Search with lf* found 0 hits +Search with jt* found 0 hits +Search with ve* found 17 hits +Search with aq* found 0 hits diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/unoptimized_exp_out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/unoptimized_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1010 @@ +Started indexing. +Indexed 200 documents. +Search with b* found 59 hits +Search with r* found 48 hits +Search with n* found 62 hits +Search with u* found 100 hits +Search with e* found 47 hits +Search with u* found 100 hits +Search with m* found 73 hits +Search with h* found 92 hits +Search with g* found 75 hits +Search with u* found 100 hits +Search with m* found 73 hits +Search with d* found 77 hits +Search with p* found 43 hits +Search with v* found 8 hits +Search with q* found 3 hits +Search with v* found 8 hits +Search with w* found 107 hits +Search with m* found 73 hits +Search with l* found 93 hits +Search with k* found 22 hits +Search with y* found 33 hits +Search with b* found 59 hits +Search with n* found 62 hits +Search with w* found 107 hits +Search with y* found 33 hits +Search with b* found 59 hits +Search with q* found 3 hits +Search with c* found 97 hits +Search with w* found 107 hits +Search with c* found 97 hits +Search with k* found 22 hits +Search with x* found 5 hits +Search with v* found 8 hits +Search with y* found 33 hits +Search with t* found 81 hits +Search with b* found 59 hits +Search with v* found 8 hits +Search with g* found 75 hits +Search with i* found 98 hits +Search with e* found 47 hits +Search with e* found 47 hits +Search with w* found 107 hits +Search with h* found 92 hits +Search with t* found 81 hits +Search with v* found 8 hits +Search with a* found 67 hits +Search with p* found 43 hits +Search with u* found 100 hits +Search with o* found 63 hits +Search with d* found 77 hits +Search with f* found 42 hits +Search with o* found 63 hits +Search with g* found 75 hits +Search with u* found 100 hits +Search with n* found 62 hits +Search with g* found 75 hits +Search with v* found 8 hits +Search with e* found 47 hits +Search with k* found 22 hits +Search with v* found 8 hits +Search with j* found 23 hits +Search with v* found 8 hits +Search with t* found 81 hits +Search with f* found 42 hits +Search with w* found 107 hits +Search with p* found 43 hits +Search with i* found 98 hits +Search with s* found 85 hits +Search with y* found 33 hits +Search with s* found 85 hits +Search with w* found 107 hits +Search with d* found 77 hits +Search with s* found 85 hits +Search with g* found 75 hits +Search with w* found 107 hits +Search with o* found 63 hits +Search with i* found 98 hits +Search with o* found 63 hits +Search with l* found 93 hits +Search with x* found 5 hits +Search with t* found 81 hits +Search with q* found 3 hits +Search with o* found 63 hits +Search with a* found 67 hits +Search with m* found 73 hits +Search with c* found 97 hits +Search with i* found 98 hits +Search with k* found 22 hits +Search with g* found 75 hits +Search with t* found 81 hits +Search with g* found 75 hits +Search with r* found 48 hits +Search with r* found 48 hits +Search with d* found 77 hits +Search with a* found 67 hits +Search with o* found 63 hits +Search with s* found 85 hits +Search with i* found 98 hits +Search with j* found 23 hits +Search with t* found 81 hits +Search with ci* found 2 hits +Search with wu* found 0 hits +Search with qv* found 0 hits +Search with ly* found 0 hits +Search with mw* found 0 hits +Search with wg* found 0 hits +Search with pn* found 0 hits +Search with je* found 1 hits +Search with pt* found 0 hits +Search with ry* found 0 hits +Search with pa* found 9 hits +Search with qh* found 0 hits +Search with dq* found 0 hits +Search with xy* found 4 hits +Search with cj* found 0 hits +Search with se* found 7 hits +Search with rs* found 0 hits +Search with bk* found 2 hits +Search with qo* found 0 hits +Search with ke* found 2 hits +Search with nj* found 0 hits +Search with mf* found 0 hits +Search with xv* found 0 hits +Search with kn* found 1 hits +Search with sc* found 4 hits +Search with oi* found 0 hits +Search with eg* found 0 hits +Search with si* found 7 hits +Search with ys* found 0 hits +Search with jb* found 0 hits +Search with de* found 30 hits +Search with hw* found 1 hits +Search with yk* found 0 hits +Search with iq* found 0 hits +Search with yu* found 2 hits +Search with uo* found 0 hits +Search with eh* found 3 hits +Search with ue* found 0 hits +Search with gf* found 0 hits +Search with uy* found 0 hits +Search with jj* found 0 hits +Search with jn* found 0 hits +Search with pc* found 0 hits +Search with xp* found 0 hits +Search with vh* found 0 hits +Search with sb* found 0 hits +Search with ob* found 0 hits +Search with xn* found 0 hits +Search with li* found 22 hits +Search with en* found 11 hits +Search with db* found 0 hits +Search with ck* found 0 hits +Search with lw* found 0 hits +Search with ot* found 3 hits +Search with cj* found 0 hits +Search with sn* found 0 hits +Search with tc* found 0 hits +Search with cl* found 6 hits +Search with hc* found 0 hits +Search with df* found 0 hits +Search with mw* found 0 hits +Search with gb* found 0 hits +Search with xe* found 0 hits +Search with ql* found 0 hits +Search with px* found 0 hits +Search with ys* found 0 hits +Search with yc* found 0 hits +Search with dm* found 0 hits +Search with ys* found 0 hits +Search with ge* found 8 hits +Search with cb* found 0 hits +Search with tx* found 1 hits +Search with gx* found 0 hits +Search with kn* found 1 hits +Search with bn* found 0 hits +Search with un* found 4 hits +Search with mb* found 0 hits +Search with qk* found 0 hits +Search with hh* found 0 hits +Search with yw* found 0 hits +Search with hx* found 0 hits +Search with sj* found 0 hits +Search with cv* found 0 hits +Search with vc* found 0 hits +Search with qf* found 0 hits +Search with iv* found 0 hits +Search with gc* found 0 hits +Search with tm* found 6 hits +Search with bg* found 0 hits +Search with ce* found 1 hits +Search with uw* found 0 hits +Search with rh* found 0 hits +Search with xk* found 0 hits +Search with ui* found 0 hits +Search with tt* found 1 hits +Search with hc* found 0 hits +Search with ta* found 13 hits +Search with lw* found 0 hits +Search with wj* found 0 hits +Search with bp* found 0 hits +Started indexing. +Indexed 200 documents. +Search with o* found 129 hits +Search with j* found 43 hits +Search with l* found 193 hits +Search with v* found 19 hits +Search with n* found 128 hits +Search with i* found 192 hits +Search with k* found 40 hits +Search with q* found 6 hits +Search with o* found 129 hits +Search with n* found 128 hits +Search with u* found 203 hits +Search with l* found 193 hits +Search with k* found 40 hits +Search with o* found 129 hits +Search with v* found 19 hits +Search with l* found 193 hits +Search with y* found 69 hits +Search with q* found 6 hits +Search with v* found 19 hits +Search with v* found 19 hits +Search with m* found 153 hits +Search with d* found 163 hits +Search with x* found 13 hits +Search with g* found 143 hits +Search with e* found 105 hits +Search with l* found 193 hits +Search with f* found 78 hits +Search with d* found 163 hits +Search with u* found 203 hits +Search with g* found 143 hits +Search with s* found 180 hits +Search with k* found 40 hits +Search with r* found 86 hits +Search with h* found 167 hits +Search with i* found 192 hits +Search with i* found 192 hits +Search with p* found 76 hits +Search with s* found 180 hits +Search with y* found 69 hits +Search with g* found 143 hits +Search with g* found 143 hits +Search with u* found 203 hits +Search with s* found 180 hits +Search with t* found 176 hits +Search with l* found 193 hits +Search with q* found 6 hits +Search with f* found 78 hits +Search with l* found 193 hits +Search with h* found 167 hits +Search with b* found 113 hits +Search with j* found 43 hits +Search with v* found 19 hits +Search with g* found 143 hits +Search with h* found 167 hits +Search with e* found 105 hits +Search with k* found 40 hits +Search with u* found 203 hits +Search with k* found 40 hits +Search with n* found 128 hits +Search with p* found 76 hits +Search with s* found 180 hits +Search with j* found 43 hits +Search with c* found 173 hits +Search with n* found 128 hits +Search with q* found 6 hits +Search with m* found 153 hits +Search with v* found 19 hits +Search with i* found 192 hits +Search with g* found 143 hits +Search with u* found 203 hits +Search with q* found 6 hits +Search with o* found 129 hits +Search with r* found 86 hits +Search with l* found 193 hits +Search with i* found 192 hits +Search with e* found 105 hits +Search with c* found 173 hits +Search with p* found 76 hits +Search with r* found 86 hits +Search with l* found 193 hits +Search with s* found 180 hits +Search with b* found 113 hits +Search with h* found 167 hits +Search with a* found 141 hits +Search with k* found 40 hits +Search with o* found 129 hits +Search with k* found 40 hits +Search with f* found 78 hits +Search with b* found 113 hits +Search with b* found 113 hits +Search with w* found 187 hits +Search with t* found 176 hits +Search with m* found 153 hits +Search with a* found 141 hits +Search with h* found 167 hits +Search with f* found 78 hits +Search with m* found 153 hits +Search with d* found 163 hits +Search with p* found 76 hits +Search with u* found 203 hits +Search with bg* found 0 hits +Search with ks* found 0 hits +Search with su* found 12 hits +Search with wu* found 1 hits +Search with lq* found 0 hits +Search with jg* found 0 hits +Search with rq* found 0 hits +Search with gc* found 0 hits +Search with ht* found 0 hits +Search with hi* found 4 hits +Search with wh* found 47 hits +Search with fj* found 0 hits +Search with hm* found 8 hits +Search with ov* found 4 hits +Search with se* found 20 hits +Search with rt* found 0 hits +Search with me* found 65 hits +Search with oh* found 12 hits +Search with yo* found 22 hits +Search with dm* found 0 hits +Search with fo* found 13 hits +Search with tx* found 1 hits +Search with hc* found 0 hits +Search with cp* found 0 hits +Search with vm* found 1 hits +Search with xs* found 0 hits +Search with td* found 9 hits +Search with ed* found 0 hits +Search with su* found 12 hits +Search with yn* found 0 hits +Search with bs* found 0 hits +Search with jo* found 5 hits +Search with wx* found 0 hits +Search with vy* found 0 hits +Search with mb* found 0 hits +Search with ls* found 0 hits +Search with pf* found 0 hits +Search with sx* found 0 hits +Search with iw* found 0 hits +Search with pg* found 0 hits +Search with jp* found 0 hits +Search with cf* found 0 hits +Search with vi* found 1 hits +Search with io* found 0 hits +Search with ek* found 0 hits +Search with ef* found 0 hits +Search with dn* found 0 hits +Search with vd* found 0 hits +Search with ls* found 0 hits +Search with cb* found 0 hits +Search with vp* found 0 hits +Search with vo* found 1 hits +Search with vq* found 0 hits +Search with mg* found 0 hits +Search with ne* found 36 hits +Search with oa* found 0 hits +Search with tq* found 0 hits +Search with fp* found 0 hits +Search with co* found 72 hits +Search with eg* found 0 hits +Search with bk* found 3 hits +Search with le* found 71 hits +Search with xk* found 0 hits +Search with hm* found 8 hits +Search with dl* found 0 hits +Search with pb* found 0 hits +Search with cl* found 14 hits +Search with pa* found 14 hits +Search with ce* found 1 hits +Search with ir* found 0 hits +Search with iw* found 0 hits +Search with rd* found 0 hits +Search with qa* found 0 hits +Search with ss* found 0 hits +Search with qa* found 0 hits +Search with yr* found 3 hits +Search with km* found 0 hits +Search with vl* found 0 hits +Search with wg* found 0 hits +Search with xc* found 0 hits +Search with rn* found 0 hits +Search with ev* found 3 hits +Search with bv* found 0 hits +Search with vf* found 0 hits +Search with be* found 17 hits +Search with yj* found 0 hits +Search with cr* found 9 hits +Search with mu* found 16 hits +Search with ti* found 50 hits +Search with nk* found 0 hits +Search with io* found 0 hits +Search with cs* found 0 hits +Search with da* found 23 hits +Search with gd* found 4 hits +Search with ge* found 13 hits +Search with hb* found 0 hits +Search with tn* found 0 hits +Search with ww* found 1 hits +Search with kt* found 0 hits +Search with cl* found 14 hits +Started indexing. +Indexed 200 documents. +Search with a* found 238 hits +Search with c* found 265 hits +Search with u* found 316 hits +Search with c* found 265 hits +Search with v* found 32 hits +Search with k* found 53 hits +Search with y* found 93 hits +Search with p* found 133 hits +Search with s* found 303 hits +Search with m* found 268 hits +Search with a* found 238 hits +Search with d* found 248 hits +Search with d* found 248 hits +Search with e* found 151 hits +Search with x* found 17 hits +Search with j* found 63 hits +Search with e* found 151 hits +Search with f* found 134 hits +Search with m* found 268 hits +Search with n* found 197 hits +Search with j* found 63 hits +Search with t* found 267 hits +Search with o* found 193 hits +Search with g* found 236 hits +Search with i* found 282 hits +Search with n* found 197 hits +Search with d* found 248 hits +Search with s* found 303 hits +Search with j* found 63 hits +Search with g* found 236 hits +Search with g* found 236 hits +Search with k* found 53 hits +Search with k* found 53 hits +Search with d* found 248 hits +Search with o* found 193 hits +Search with g* found 236 hits +Search with n* found 197 hits +Search with o* found 193 hits +Search with v* found 32 hits +Search with i* found 282 hits +Search with d* found 248 hits +Search with x* found 17 hits +Search with l* found 292 hits +Search with j* found 63 hits +Search with c* found 265 hits +Search with k* found 53 hits +Search with s* found 303 hits +Search with i* found 282 hits +Search with p* found 133 hits +Search with h* found 259 hits +Search with v* found 32 hits +Search with y* found 93 hits +Search with b* found 173 hits +Search with m* found 268 hits +Search with h* found 259 hits +Search with l* found 292 hits +Search with b* found 173 hits +Search with l* found 292 hits +Search with g* found 236 hits +Search with m* found 268 hits +Search with r* found 123 hits +Search with m* found 268 hits +Search with w* found 297 hits +Search with e* found 151 hits +Search with s* found 303 hits +Search with o* found 193 hits +Search with k* found 53 hits +Search with g* found 236 hits +Search with d* found 248 hits +Search with i* found 282 hits +Search with p* found 133 hits +Search with i* found 282 hits +Search with i* found 282 hits +Search with d* found 248 hits +Search with r* found 123 hits +Search with m* found 268 hits +Search with n* found 197 hits +Search with m* found 268 hits +Search with u* found 316 hits +Search with d* found 248 hits +Search with t* found 267 hits +Search with t* found 267 hits +Search with f* found 134 hits +Search with x* found 17 hits +Search with g* found 236 hits +Search with m* found 268 hits +Search with j* found 63 hits +Search with j* found 63 hits +Search with x* found 17 hits +Search with s* found 303 hits +Search with w* found 297 hits +Search with r* found 123 hits +Search with h* found 259 hits +Search with t* found 267 hits +Search with x* found 17 hits +Search with a* found 238 hits +Search with i* found 282 hits +Search with k* found 53 hits +Search with h* found 259 hits +Search with n* found 197 hits +Search with sy* found 1 hits +Search with wd* found 0 hits +Search with cq* found 0 hits +Search with ps* found 0 hits +Search with gn* found 0 hits +Search with va* found 0 hits +Search with hd* found 0 hits +Search with xn* found 0 hits +Search with qk* found 0 hits +Search with aq* found 0 hits +Search with dw* found 0 hits +Search with lk* found 0 hits +Search with sj* found 0 hits +Search with nc* found 0 hits +Search with uu* found 0 hits +Search with rp* found 0 hits +Search with vq* found 0 hits +Search with va* found 0 hits +Search with kn* found 11 hits +Search with sq* found 2 hits +Search with br* found 23 hits +Search with qk* found 0 hits +Search with ur* found 52 hits +Search with bn* found 0 hits +Search with cb* found 0 hits +Search with hf* found 0 hits +Search with xs* found 0 hits +Search with rq* found 0 hits +Search with ef* found 0 hits +Search with ub* found 0 hits +Search with cn* found 0 hits +Search with ta* found 27 hits +Search with gp* found 0 hits +Search with bq* found 0 hits +Search with dv* found 1 hits +Search with hh* found 0 hits +Search with ny* found 0 hits +Search with rj* found 0 hits +Search with qs* found 0 hits +Search with yu* found 25 hits +Search with tg* found 0 hits +Search with ct* found 1 hits +Search with yt* found 0 hits +Search with lg* found 0 hits +Search with cg* found 0 hits +Search with hg* found 0 hits +Search with vd* found 0 hits +Search with hd* found 0 hits +Search with uk* found 0 hits +Search with tb* found 0 hits +Search with ge* found 19 hits +Search with iu* found 0 hits +Search with fc* found 0 hits +Search with gx* found 0 hits +Search with vf* found 0 hits +Search with sr* found 1 hits +Search with mu* found 26 hits +Search with mn* found 0 hits +Search with qx* found 0 hits +Search with vs* found 0 hits +Search with gg* found 0 hits +Search with af* found 16 hits +Search with jj* found 0 hits +Search with if* found 0 hits +Search with te* found 25 hits +Search with ga* found 7 hits +Search with io* found 0 hits +Search with wn* found 0 hits +Search with sd* found 0 hits +Search with lo* found 113 hits +Search with le* found 89 hits +Search with jx* found 0 hits +Search with bv* found 0 hits +Search with ns* found 0 hits +Search with vk* found 0 hits +Search with nc* found 0 hits +Search with qp* found 0 hits +Search with ha* found 109 hits +Search with yr* found 3 hits +Search with hu* found 14 hits +Search with wn* found 0 hits +Search with xh* found 0 hits +Search with eu* found 0 hits +Search with vx* found 0 hits +Search with bj* found 0 hits +Search with om* found 0 hits +Search with ox* found 0 hits +Search with mr* found 6 hits +Search with ta* found 27 hits +Search with kr* found 0 hits +Search with kb* found 1 hits +Search with wd* found 0 hits +Search with qe* found 0 hits +Search with gs* found 0 hits +Search with yn* found 0 hits +Search with nx* found 0 hits +Search with cl* found 17 hits +Search with fi* found 41 hits +Search with hb* found 0 hits +Search with ik* found 2 hits +Started indexing. +Indexed 200 documents. +Search with l* found 402 hits +Search with x* found 26 hits +Search with w* found 401 hits +Search with c* found 365 hits +Search with x* found 26 hits +Search with j* found 81 hits +Search with t* found 365 hits +Search with u* found 410 hits +Search with l* found 402 hits +Search with h* found 376 hits +Search with m* found 371 hits +Search with y* found 136 hits +Search with i* found 357 hits +Search with j* found 81 hits +Search with c* found 365 hits +Search with b* found 241 hits +Search with q* found 26 hits +Search with i* found 357 hits +Search with t* found 365 hits +Search with p* found 179 hits +Search with w* found 401 hits +Search with i* found 357 hits +Search with n* found 258 hits +Search with b* found 241 hits +Search with v* found 44 hits +Search with s* found 416 hits +Search with l* found 402 hits +Search with f* found 187 hits +Search with w* found 401 hits +Search with u* found 410 hits +Search with p* found 179 hits +Search with k* found 73 hits +Search with u* found 410 hits +Search with m* found 371 hits +Search with m* found 371 hits +Search with s* found 416 hits +Search with x* found 26 hits +Search with i* found 357 hits +Search with n* found 258 hits +Search with k* found 73 hits +Search with r* found 170 hits +Search with b* found 241 hits +Search with j* found 81 hits +Search with c* found 365 hits +Search with m* found 371 hits +Search with l* found 402 hits +Search with e* found 210 hits +Search with d* found 338 hits +Search with w* found 401 hits +Search with x* found 26 hits +Search with s* found 416 hits +Search with v* found 44 hits +Search with i* found 357 hits +Search with i* found 357 hits +Search with w* found 401 hits +Search with h* found 376 hits +Search with e* found 210 hits +Search with i* found 357 hits +Search with m* found 371 hits +Search with b* found 241 hits +Search with f* found 187 hits +Search with d* found 338 hits +Search with n* found 258 hits +Search with a* found 314 hits +Search with r* found 170 hits +Search with c* found 365 hits +Search with v* found 44 hits +Search with q* found 26 hits +Search with k* found 73 hits +Search with l* found 402 hits +Search with d* found 338 hits +Search with f* found 187 hits +Search with o* found 283 hits +Search with m* found 371 hits +Search with h* found 376 hits +Search with c* found 365 hits +Search with a* found 314 hits +Search with l* found 402 hits +Search with f* found 187 hits +Search with y* found 136 hits +Search with m* found 371 hits +Search with b* found 241 hits +Search with u* found 410 hits +Search with w* found 401 hits +Search with l* found 402 hits +Search with r* found 170 hits +Search with e* found 210 hits +Search with p* found 179 hits +Search with d* found 338 hits +Search with r* found 170 hits +Search with s* found 416 hits +Search with k* found 73 hits +Search with w* found 401 hits +Search with g* found 325 hits +Search with n* found 258 hits +Search with o* found 283 hits +Search with i* found 357 hits +Search with j* found 81 hits +Search with h* found 376 hits +Search with v* found 44 hits +Search with uk* found 0 hits +Search with bm* found 0 hits +Search with yk* found 0 hits +Search with oa* found 0 hits +Search with yv* found 0 hits +Search with bl* found 5 hits +Search with yw* found 0 hits +Search with kl* found 0 hits +Search with qp* found 0 hits +Search with bt* found 0 hits +Search with jw* found 0 hits +Search with hg* found 0 hits +Search with du* found 89 hits +Search with wm* found 0 hits +Search with ef* found 4 hits +Search with kb* found 1 hits +Search with rn* found 0 hits +Search with nr* found 0 hits +Search with xe* found 0 hits +Search with tw* found 5 hits +Search with bu* found 40 hits +Search with ka* found 12 hits +Search with rv* found 0 hits +Search with nj* found 0 hits +Search with lp* found 0 hits +Search with fu* found 10 hits +Search with om* found 0 hits +Search with br* found 28 hits +Search with ha* found 154 hits +Search with gn* found 0 hits +Search with fq* found 0 hits +Search with rx* found 0 hits +Search with ef* found 4 hits +Search with rf* found 0 hits +Search with ml* found 0 hits +Search with cn* found 0 hits +Search with gn* found 0 hits +Search with pb* found 0 hits +Search with je* found 1 hits +Search with ku* found 0 hits +Search with vr* found 0 hits +Search with rk* found 0 hits +Search with hu* found 21 hits +Search with co* found 159 hits +Search with ul* found 0 hits +Search with fd* found 0 hits +Search with ew* found 0 hits +Search with dl* found 0 hits +Search with eu* found 0 hits +Search with qq* found 0 hits +Search with gs* found 0 hits +Search with go* found 268 hits +Search with iw* found 0 hits +Search with pr* found 46 hits +Search with bc* found 0 hits +Search with ow* found 0 hits +Search with ui* found 0 hits +Search with je* found 1 hits +Search with dl* found 0 hits +Search with ub* found 0 hits +Search with ya* found 29 hits +Search with ee* found 0 hits +Search with yh* found 0 hits +Search with pg* found 0 hits +Search with cg* found 0 hits +Search with wk* found 8 hits +Search with bg* found 0 hits +Search with yk* found 0 hits +Search with dp* found 0 hits +Search with cg* found 0 hits +Search with tt* found 13 hits +Search with fq* found 0 hits +Search with co* found 159 hits +Search with ug* found 0 hits +Search with cq* found 0 hits +Search with hc* found 0 hits +Search with sn* found 1 hits +Search with is* found 1 hits +Search with ux* found 0 hits +Search with yy* found 0 hits +Search with gx* found 0 hits +Search with jh* found 0 hits +Search with gi* found 17 hits +Search with rj* found 0 hits +Search with aw* found 4 hits +Search with rw* found 0 hits +Search with qy* found 0 hits +Search with nu* found 6 hits +Search with pl* found 22 hits +Search with bs* found 0 hits +Search with ck* found 0 hits +Search with ww* found 1 hits +Search with xf* found 1 hits +Search with pu* found 9 hits +Search with fq* found 0 hits +Search with tl* found 0 hits +Search with rf* found 0 hits +Search with sx* found 0 hits +Search with ql* found 0 hits +Search with ks* found 0 hits +Started indexing. +Indexed 200 documents. +Search with k* found 89 hits +Search with e* found 279 hits +Search with p* found 215 hits +Search with b* found 306 hits +Search with e* found 279 hits +Search with g* found 408 hits +Search with v* found 50 hits +Search with t* found 462 hits +Search with r* found 213 hits +Search with a* found 394 hits +Search with o* found 373 hits +Search with v* found 50 hits +Search with m* found 478 hits +Search with n* found 315 hits +Search with s* found 530 hits +Search with n* found 315 hits +Search with u* found 513 hits +Search with l* found 527 hits +Search with i* found 453 hits +Search with a* found 394 hits +Search with e* found 279 hits +Search with f* found 244 hits +Search with n* found 315 hits +Search with v* found 50 hits +Search with m* found 478 hits +Search with h* found 497 hits +Search with u* found 513 hits +Search with d* found 427 hits +Search with u* found 513 hits +Search with h* found 497 hits +Search with y* found 169 hits +Search with f* found 244 hits +Search with l* found 527 hits +Search with o* found 373 hits +Search with i* found 453 hits +Search with p* found 215 hits +Search with w* found 503 hits +Search with g* found 408 hits +Search with m* found 478 hits +Search with q* found 34 hits +Search with i* found 453 hits +Search with d* found 427 hits +Search with m* found 478 hits +Search with v* found 50 hits +Search with r* found 213 hits +Search with i* found 453 hits +Search with j* found 106 hits +Search with o* found 373 hits +Search with t* found 462 hits +Search with t* found 462 hits +Search with p* found 215 hits +Search with a* found 394 hits +Search with b* found 306 hits +Search with f* found 244 hits +Search with w* found 503 hits +Search with o* found 373 hits +Search with m* found 478 hits +Search with t* found 462 hits +Search with t* found 462 hits +Search with j* found 106 hits +Search with b* found 306 hits +Search with s* found 530 hits +Search with q* found 34 hits +Search with m* found 478 hits +Search with k* found 89 hits +Search with b* found 306 hits +Search with f* found 244 hits +Search with h* found 497 hits +Search with i* found 453 hits +Search with t* found 462 hits +Search with y* found 169 hits +Search with q* found 34 hits +Search with w* found 503 hits +Search with n* found 315 hits +Search with o* found 373 hits +Search with q* found 34 hits +Search with v* found 50 hits +Search with a* found 394 hits +Search with g* found 408 hits +Search with r* found 213 hits +Search with w* found 503 hits +Search with x* found 39 hits +Search with s* found 530 hits +Search with x* found 39 hits +Search with d* found 427 hits +Search with r* found 213 hits +Search with o* found 373 hits +Search with s* found 530 hits +Search with l* found 527 hits +Search with j* found 106 hits +Search with c* found 473 hits +Search with o* found 373 hits +Search with e* found 279 hits +Search with v* found 50 hits +Search with d* found 427 hits +Search with o* found 373 hits +Search with w* found 503 hits +Search with i* found 453 hits +Search with y* found 169 hits +Search with h* found 497 hits +Search with ea* found 66 hits +Search with bc* found 0 hits +Search with nr* found 0 hits +Search with um* found 0 hits +Search with sb* found 0 hits +Search with ep* found 0 hits +Search with yy* found 0 hits +Search with nf* found 0 hits +Search with qf* found 0 hits +Search with xe* found 0 hits +Search with qc* found 0 hits +Search with su* found 33 hits +Search with xw* found 0 hits +Search with mx* found 0 hits +Search with fn* found 0 hits +Search with fm* found 0 hits +Search with ni* found 23 hits +Search with ob* found 1 hits +Search with bl* found 7 hits +Search with pt* found 0 hits +Search with nu* found 9 hits +Search with lo* found 222 hits +Search with vb* found 0 hits +Search with tn* found 0 hits +Search with gr* found 15 hits +Search with rw* found 0 hits +Search with ul* found 0 hits +Search with uu* found 0 hits +Search with kj* found 0 hits +Search with sp* found 12 hits +Search with wb* found 0 hits +Search with ek* found 0 hits +Search with ls* found 1 hits +Search with nm* found 0 hits +Search with he* found 171 hits +Search with iu* found 0 hits +Search with bt* found 2 hits +Search with lw* found 0 hits +Search with vg* found 0 hits +Search with mc* found 2 hits +Search with ah* found 66 hits +Search with cw* found 1 hits +Search with sy* found 1 hits +Search with sd* found 0 hits +Search with in* found 28 hits +Search with uf* found 0 hits +Search with qa* found 0 hits +Search with rd* found 1 hits +Search with uf* found 0 hits +Search with pc* found 0 hits +Search with lb* found 0 hits +Search with ym* found 0 hits +Search with ul* found 0 hits +Search with ms* found 19 hits +Search with ty* found 4 hits +Search with xt* found 0 hits +Search with ga* found 15 hits +Search with tb* found 0 hits +Search with yo* found 26 hits +Search with gh* found 1 hits +Search with ce* found 6 hits +Search with ov* found 10 hits +Search with gg* found 0 hits +Search with yd* found 0 hits +Search with no* found 157 hits +Search with ia* found 0 hits +Search with rh* found 0 hits +Search with on* found 81 hits +Search with ub* found 0 hits +Search with gq* found 0 hits +Search with de* found 135 hits +Search with nl* found 0 hits +Search with gh* found 1 hits +Search with pf* found 0 hits +Search with vx* found 0 hits +Search with oa* found 0 hits +Search with ed* found 2 hits +Search with vk* found 0 hits +Search with ju* found 64 hits +Search with qa* found 0 hits +Search with my* found 125 hits +Search with ae* found 0 hits +Search with io* found 0 hits +Search with tg* found 0 hits +Search with sb* found 0 hits +Search with wx* found 0 hits +Search with hk* found 0 hits +Search with lo* found 222 hits +Search with tb* found 0 hits +Search with vr* found 0 hits +Search with cn* found 0 hits +Search with sg* found 1 hits +Search with qq* found 0 hits +Search with td* found 9 hits +Search with ok* found 125 hits +Search with dd* found 0 hits +Search with lf* found 0 hits +Search with jt* found 0 hits +Search with ve* found 17 hits +Search with aq* found 0 hits diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +陶傑(1958å¹´8月17日), \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +原å係曹æ·ï¼Œé¦™æ¸¯å°ˆæ¬„作家åŠå‚³åª’工作者, \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +有「香江第一æ‰å­ã€å˜…稱號, \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +以文筆辛辣ã€ç«‹å ´è¦ªè¥¿æ–¹ï¼ˆå°¤å…¶æ˜¯è‹±åœ‹ï¼‰è¦‹ç¨±ã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +美国总统奥巴马星期一(11月16日)在上海与中国é’年对è¯ï¼Œ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +他回答了现场å¬ä¼—和网民的æ问, \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +内容涉åŠå°æ¹¾ã€ä¸­ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç­‰ã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +奥巴马在上海科技馆é¢å¯¹500ä½™å上海é’å¹´ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +奥巴马在上海与中国é’å¹´äººå¯¹è¯ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +美国总统奥巴马星期一(11月16日)在上海与中国é’年对è¯ï¼Œä»–回答了现场å¬ä¼—和网民的æ问,内容涉åŠå°æ¹¾ã€ä¸­ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç­‰ã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +奥巴马在上海科技馆é¢å¯¹500ä½™å上海é’年以åŠæ•°ä»¥ä¸‡è®¡çš„中国互è”网使用者进行了一场问答会。 \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +这次对è¯ç”±å¤æ—¦å¤§å­¦æ ¡é•¿æ¨çŽ‰è‰¯ä¸»æŒã€‚美国驻中国大使洪åšåŸ¹è‡´è¾žã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +拈花惹è‰è¶£ å—投花å‰å˜‰å¹´è¯ç’€ç’¨è¿Žè³“ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +2009å—投花å‰å˜‰å¹´è¯â”€ã€ŒèŠ±ç¾å¹¸ç¦â€§æš¢éŠå—投〠\ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +於日å‰(11/15)å‡å—投縣埔里鎮埔里花å‰ç‰©æµä¸­å¿ƒéš†é‡ç™»å ´ï¼Œ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +在為期五å天的活動中, \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +Un approvisionnement sûr et durable \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +L'énergie est au cÅ“ur de nos vies. Nous dépendons d'elle pour \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +nous déplacer, pour chauffer nos maisons ou les rafraîchir, \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +pour faire fonctionner nos usines, nos exploitations agricoles et nos \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +שפעת החזירי×: תושבת נצרת עילית בת 51 ×œ×œ× ×ž×—×œ×•×ª רקע מתה diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +בבית ×”×—×•×œ×™× "העמק".החולה טופלה ×תמול בתרופה הניסיונית diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +"פרהמיביר",××•×œ× ×ž×¦×‘×” הוסיף להידרדר \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +,והבוקר נקבע מותה.החולה, תושבת נצרת עילית \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育〠\ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +日本研究・知的交æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥­ã‚’実施ã—ã¦ãŠã‚Šã€ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ログラムã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥­ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +団体ã«å¯¾ã—ã¦åŠ©æˆé‡‘ã€ç ”究奨学金等をæä¾›ã—ã¦ã„ã¾ã™ã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育〠\ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +日本研究・知的交æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥­ã‚’実施ã—ã¦ãŠã‚Šã€ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ログラムã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥­ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +団体ã«å¯¾ã—ã¦åŠ©æˆé‡‘ã€ç ”究奨学金等をæä¾›ã—ã¦ã„ã¾ã™ã€‚ \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +제가 ë…¼ìŠ¤í†±ì„ ë³´ëŠ”ë°ìš”, 김지우가 스토리ìƒìœ¼ë¡œ ì¼ë³¸ìœ¼ë¡œ 간다구 하네요 \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +그러고나서 ë‹¤ìŒ ì´ì•¼ê¸° 예고는 안나오고 ê¹€ì§€ìš°ì˜ ì²¨ë¶€í„° ì—¬íƒœê¹Œì§€ì˜ ì´ë¯¸ì§€ë§Œ 보여주고 \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +그냥 ë냈는ë°ìš”, ì´ê±° 진짜 ì´ë³„하는거 같다는 ìƒê°ì´. \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +그때 여승í˜ì²˜ëŸ¼ 논스톱 극중 ì¸ë¬¼ì—ì„œ 빠트리는건가요? \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/1.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ à¸¹à¸¡à¸´à¸›à¸±à¸à¸à¸²à¸—้องถิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552 diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/2.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸­à¸›à¸µ 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552 diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/3.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +จะเà¸à¸´à¸”ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸à¸ˆà¸²à¸à¸à¸¥à¸¸à¹ˆà¸¡à¸”าวสิงโตหรือà¸à¸™à¸”าวตà¸à¹€à¸¥à¹‚อนิคส์ที่นัà¸à¸”าราศาสตร์ทั้งหลายคาดว่าจะมีประมาณ 100-150 ดวงต่อชั่วโมง diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +เมื่อวันที่ 8 มีนา ที่ผ่านมาผมได้ไปงานที่โรงเรียน เหมือนเช่นทุà¸à¸›à¸µà¸•à¸­à¸™à¸à¸¥à¸±à¸šà¹€à¸”ินมา ตามตึà¸à¸¢à¸²à¸§à¹€à¸žà¸·à¹ˆà¸­à¸ˆà¸°à¸à¸¥à¸±à¸šà¸¡à¸²à¸—างประตูด้านเพาะช่าง \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/Albert Einstein, E mc 2.mp3 Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/Albert Einstein, E mc 2.mp3 has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/One Step For Man.mp3 Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/One Step For Man.mp3 has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/The Eagle Has Landed.mp3 Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/The Eagle Has Landed.mp3 has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/DCTDecode.pdf Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/DCTDecode.pdf has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/geology.pdf Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/geology.pdf has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/samplepdf.pdf Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/samplepdf.pdf has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/windjack.pdf Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/windjack.pdf has changed diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/group/4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/group/4.txt Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,1 @@ +,והבוקר נקבע מותה.החולה, תושבת נצרת עילית \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/group/bld.inf --- a/searchengine/cpix/tsrc/cpixunittest/group/bld.inf Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/group/bld.inf Mon Jun 28 10:34:53 2010 +0530 @@ -17,6 +17,13 @@ PRJ_TESTEXPORTS // +// CPix Localization Resources +// + +../../../cpix/data/resource/analyzer.loc /epoc32/winscw/c/Data/cpixunittestcorpus/resource/analyzer.loc +../../../cpix/data/resource/thaidict.sm /epoc32/winscw/c/Data/cpixunittestcorpus/resource/thaidict.sm + +// // Test Definition Files (For ITK IOCapture testing) // @@ -224,18 +231,32 @@ ../data/cpixunittest/whitebox/delta/wrn-complete/startStage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/whitebox/delta/wrn-complete/startStage_exp_out.txt ../data/cpixunittest/whitebox/delta/wrn-complete/recoveredStage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/whitebox/delta/wrn-complete/recoveredStage_exp_out.txt - - ../data/cpixunittest/analysis/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/parsing_exp_out.txt ../data/cpixunittest/analysis/switchParsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/switchParsing_exp_out.txt ../data/cpixunittest/analysis/usage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/usage_exp_out.txt + +../data/cpixunittest/analysis/loc/currentLocale_C_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/currentLocale_C_exp_out.txt +../data/cpixunittest/analysis/loc/en_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/en_exp_out.txt +../data/cpixunittest/analysis/loc/th_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/th_exp_out.txt +../data/cpixunittest/analysis/loc/ch_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/ch_exp_out.txt +../data/cpixunittest/analysis/loc/ko_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/ko_exp_out.txt +../data/cpixunittest/analysis/loc/jp_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/jp_exp_out.txt + ../data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt -../data/cpixunittest/analysis/whitebox/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing_exp_out.txt -../data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt +../data/cpixunittest/analysis/whitebox/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing_exp_out.txt +../data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt ../data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt ../data/cpixunittest/analysis/whitebox/switchAnalyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/switchAnalyzer_exp_out.txt +../data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt ../data/cpixunittest/analysis/whitebox/tokenization_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/tokenization_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/currentLocale_C_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/currentLocale_C_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt +../data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt + ../data/cpixunittest/utf8/utf8_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8/utf8_exp_out.txt ../data/cpixunittest/maps/searchMaps_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/maps/searchMaps_exp_out.txt @@ -285,6 +306,12 @@ ../data/cpixunittest/utf8path/harvest_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8path/harvest_exp_out.txt ../data/cpixunittest/utf8path/search_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8path/search_exp_out.txt +../data/cpixunittest/prefixopt/unoptimized_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/prefixopt/unoptimized_out.txt +../data/cpixunittest/prefixopt/optimized_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/prefixopt/optimized_exp_out.txt + + + + // // Corpus files @@ -360,11 +387,45 @@ "../data/cpixunittestcorpus/query/query8.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/query/query8.txt" "../data/cpixunittestcorpus/query/query9.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/query/query9.txt" -"../data/cpixunittestcorpus/pdf/ctutor.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/ctutor.pdf" -"../data/cpixunittestcorpus/pdf/Empty.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/Empty.pdf" -"../data/cpixunittestcorpus/pdf/geology.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/geology.pdf" -"../data/cpixunittestcorpus/pdf/samplepdf.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/samplepdf.pdf" -"../data/cpixunittestcorpus/pdf/windjack.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/windjack.PDF" +"../data/cpixunittestcorpus/loc/fr/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/1.txt" +"../data/cpixunittestcorpus/loc/fr/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/2.txt" +"../data/cpixunittestcorpus/loc/fr/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/3.txt" +"../data/cpixunittestcorpus/loc/fr/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/4.txt" + +"../data/cpixunittestcorpus/loc/he/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/1.txt" +"../data/cpixunittestcorpus/loc/he/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/2.txt" +"../data/cpixunittestcorpus/loc/he/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/3.txt" +"../data/cpixunittestcorpus/loc/he/4.txt" + +"../data/cpixunittestcorpus/loc/th/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/1.txt" +"../data/cpixunittestcorpus/loc/th/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/2.txt" +"../data/cpixunittestcorpus/loc/th/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/3.txt" +"../data/cpixunittestcorpus/loc/th/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/4.txt" + +"../data/cpixunittestcorpus/loc/ch_prc/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/1.txt" +"../data/cpixunittestcorpus/loc/ch_prc/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/2.txt" +"../data/cpixunittestcorpus/loc/ch_prc/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/3.txt" +"../data/cpixunittestcorpus/loc/ch_prc/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/4.txt" + +"../data/cpixunittestcorpus/loc/ch_simple/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/1.txt" +"../data/cpixunittestcorpus/loc/ch_simple/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/2.txt" +"../data/cpixunittestcorpus/loc/ch_simple/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/3.txt" +"../data/cpixunittestcorpus/loc/ch_simple/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/4.txt" + +"../data/cpixunittestcorpus/loc/ch_hk/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/1.txt" +"../data/cpixunittestcorpus/loc/ch_hk/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/2.txt" +"../data/cpixunittestcorpus/loc/ch_hk/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/3.txt" +"../data/cpixunittestcorpus/loc/ch_hk/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/4.txt" + +"../data/cpixunittestcorpus/loc/ch_tw/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/1.txt" +"../data/cpixunittestcorpus/loc/ch_tw/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/2.txt" +"../data/cpixunittestcorpus/loc/ch_tw/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/3.txt" +"../data/cpixunittestcorpus/loc/ch_tw/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/4.txt" + +"../data/cpixunittestcorpus/loc/jp/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/1.txt" +"../data/cpixunittestcorpus/loc/jp/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/2.txt" +"../data/cpixunittestcorpus/loc/jp/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/3.txt" +"../data/cpixunittestcorpus/loc/jp/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/4.txt" PRJ_TESTMMPFILES cpixunittest.mmp diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/group/cpixunittest.mmp --- a/searchengine/cpix/tsrc/cpixunittest/group/cpixunittest.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/group/cpixunittest.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -71,7 +71,8 @@ SOURCE config.cpp SOURCE clq/uxqry.cpp SOURCE clq/clqry.cpp -SOURCE pdftests.cpp querytest.cpp std_log_result.cpp misc.cpp +SOURCE pdftests.cpp querytest.cpp +SOURCE localetestinfos.cpp misc.cpp std_log_result.cpp USERINCLUDE ../../../../../searchsrv_plat/cpix_utility_api/inc USERINCLUDE ../inc @@ -94,6 +95,7 @@ STATICLIBRARY libstemmer.lib STATICLIBRARY libitk.lib STATICLIBRARY libcpixtools.lib +STATICLIBRARY libanalysis.lib // For SPI LIBRARY efsrv.lib diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/inc/config.h --- a/searchengine/cpix/tsrc/cpixunittest/inc/config.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/inc/config.h Mon Jun 28 10:34:53 2010 +0530 @@ -25,9 +25,13 @@ // #ifdef __WINS__ +#define RESOURCE_PATH "c:\\data\\cpixunittestcorpus\\resource\\" #define CORPUS_PATH "c:\\data\\cpixunittestcorpus" +#define LCORPUS_PATH L"c:\\data\\cpixunittestcorpus" #else // __ARMv5__, assume memory card +#define RESOURCE_PATH "e:\\data\\cpixunittestcorpus\\resource\\" #define CORPUS_PATH "e:\\data\\cpixunittestcorpus" +#define LCORPUS_PATH L"e:\\data\\cpixunittestcorpus" #endif #define SMS_TEST_CORPUS_PATH CORPUS_PATH "\\sms\\SMS_corpus.txt" @@ -35,6 +39,9 @@ #define JPG_TEST_CORPUS_PATH CORPUS_PATH "\\jpgs" #define MAPS_TEST_CORPUS_PATH CORPUS_PATH "\\maps" #define MEDIA_TEST_CORPUS_PATH CORPUS_PATH "\\media" +#define LOC_TEST_CORPUS_PATH CORPUS_PATH "\\loc" + +#define LJPG_TEST_CORPUS_PATH LCORPUS_PATH L"\\jpgs" // TODO: Obsolete these #define FILE_TEST_CORPUS_PATH STEM_TEST_CORPUS_PATH @@ -46,6 +53,8 @@ #define CPIX_FILE_IDXDB "\\root\\file" +#define CPIXUNITTEST_DIRECTORY "c:\\data\\cpixunittest" + /** * Zero terminated array of file names */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/inc/localetestinfos.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/inc/localetestinfos.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,51 @@ +#ifndef LOCALETESTINFOS_H_ +#define LOCALETESTINFOS_H_ + +/** + * Information around testing various locales. + */ + +struct EnglishLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct FrenchLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct HebrewLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct ThaiLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct ChineseLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct KoreanLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + +struct JapaneseLocale { + static const char* LOCALE; + static const char* FILES[]; + static const wchar_t* QUERIES[]; +}; + + +#endif /* LOCALETESTINFOS_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp --- a/searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -1,24 +1,6 @@ -/* -* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). -* All rights reserved. -* This component and the accompanying materials are made available -* under the terms of "Eclipse Public License v1.0" -* which accompanies this distribution, and is available -* at the URL "http://www.eclipse.org/legal/epl-v10.html". -* -* Initial Contributors: -* Nokia Corporation - initial contribution. -* -* Contributors: -* -* Description: -* -*/ - #include #include - #include #include "cpixidxdb.h" @@ -28,215 +10,202 @@ #include "config.h" #include "testutils.h" -#include "std_log_result.h" - // For testing custom analyzer #include "CLucene.h" #include "CLucene\analysis\AnalysisHeader.h" #include "CLucene\util\stringreader.h" -#include "analyzer.h" #include "analyzerexp.h" +#include "customanalyzer.h" + +#include "localetestinfos.h" + +#include "spi/locale.h" +#include "cpixstrtools.h" using namespace Cpt::Lex; using namespace Cpt::Parser; using namespace Cpix::AnalyzerExp; void PrintToken(Cpt::Lex::Token token) { - switch (token.type()) { - case TOKEN_WS: printf("space"); break; - case TOKEN_ID: printf("id"); break; - case TOKEN_LIT: printf("lit"); break; - case TOKEN_STRLIT: printf("str-lit"); break; - case TOKEN_REALLIT: printf("real-lit"); break; - case TOKEN_INTLIT: printf("int-lit"); break; - case TOKEN_LEFT_BRACKET: printf("lbr"); break; - case TOKEN_RIGHT_BRACKET: printf("rbr"); break; - case TOKEN_COMMA: printf("comma"); break; - case TOKEN_PIPE: printf("pipe"); break; - case TOKEN_SWITCH : printf("sw"); break; - case TOKEN_CASE : printf("case"); break; - case TOKEN_DEFAULT : printf("default"); break; - case TOKEN_LEFT_BRACE : printf("lbc"); break; - case TOKEN_RIGHT_BRACE : printf("rbc"); break; - case TOKEN_COLON : printf("cl"); break; - case TOKEN_TERMINATOR : printf("tr"); break; - - default: printf("unknown"); break; - } - printf("('%S')", (token.text()).c_str()); + printf("%S('%S')", token.type(), token.text()); } -void TestTokenization6(Itk::TestMgr * ) +void TestTokenization6(Itk::TestMgr * testMgr) { - char *xml_file = (char*)__FUNCTION__; - assert_failed = 0; - Cpix::AnalyzerExp::Tokenizer tokenizer; + Cpix::AnalyzerExp::Tokenizer tokenizer; Tokens source(tokenizer, L"switch { " L"case '_docuid', '_mimetype': keywords;" L"case '_baseappclass': whitespace>lowercase;" L"default: natural(en); " L"}"); - WhiteSpaceFilter + StdFilter tokens(source); - while (tokens) PrintToken(tokens++); - testResultXml(xml_file); + while (tokens) PrintToken(tokens++); } -void TestParsing(Itk::TestMgr* ) +void TestParsing(Itk::TestMgr* mgr) { Cpix::AnalyzerExp::Tokenizer tokenizer; - char *xml_file = (char*)__FUNCTION__; - assert_failed = 0; + Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)"); - WhiteSpaceFilter tokens(source); + StdFilter tokens(source); Lexer lexer(tokens); - - Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) "); - WhiteSpaceFilter tokens2(source2); - Lexer lexer2(tokens2); + + const wchar_t* text = L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) "; Tokens source3(tokenizer, L"foobar(zap, 0, 0.0045, 4, 'a', 9223.031)"); - WhiteSpaceFilter tokens3(source3); + StdFilter tokens3(source3); Lexer lexer3(tokens3); try { auto_ptr invoke = ParseInvokation(lexer); lexer.eatEof(); - printf("Invoke identifier: %S\n", (invoke->id()).c_str()); + printf("Invoke identifier: %S\n", invoke->id()); printf("%d parameters\n", invoke->params().size()); - auto_ptr piping = ParsePiping(lexer2); - lexer2.eatEof(); + auto_ptr piping = ParsePiping(text); printf("piping done.\n"); if (dynamic_cast(&piping->tokenizer())) { - printf("Tokenizer: %S\n", dynamic_cast(piping->tokenizer()).id().c_str()); + printf("Tokenizer: %S\n", dynamic_cast(piping->tokenizer()).id()); } printf("%d filters\n", piping->filters().size()); invoke = ParseInvokation(lexer3); lexer3.eatEof(); - printf("Invoke identifier: %S\n", (invoke->id()).c_str()); + printf("Invoke identifier: %S\n", invoke->id()); printf("%d parameters\n", invoke->params().size()); } catch (ParseException& e) { - assert_failed = 1; printf("ParseException: %S\n", e.wWhat()); } catch (LexException& e) { - assert_failed = 1; printf("LexException: %S\n", e.wWhat()); } - testResultXml(xml_file); } -void TestSwitch(Itk::TestMgr* ) +void TestSwitch(Itk::TestMgr* mgr) { Cpix::AnalyzerExp::Tokenizer tokenizer; - char *xml_file = (char*)__FUNCTION__; - assert_failed = 0; - const wchar_t* text; - Tokens source(tokenizer, text = + + const wchar_t* text = L"switch { " L"case '_docuid', '_mimetype': keywords;" L"case '_baseappclass': whitespace>lowercase;" L"default: natural(en); " - L"}"); - WhiteSpaceFilter tokens(source); - Lexer lexer(tokens); + L"}"; try { - auto_ptr sw = ParsePiping(lexer); - lexer.eatEof(); + auto_ptr sw = ParsePiping(text); if (dynamic_cast(&sw->tokenizer())) { const Switch* s = dynamic_cast(&sw->tokenizer()); for (int i = 0; i < s->cases().size(); i++) { const Case* c = s->cases()[i]; printf("case "); - for (int j = 0; j < c->fields().size(); j++) { - printf("%S", (c->fields()[j]).c_str()); + for (int j = 0; j < c->cases().size(); j++) { + printf("%S", c->cases()[j]); } printf(": ...\n"); -// wcout<def().tokenizer().id(); + // wcout<def().tokenizer().id(); + } + printf("default: ...\n");//<def().tokenizer().id()<<"...;"; + } + } catch (ParseException& e) { + // OBS wcout< sw = ParsePiping(text); + if (dynamic_cast(&sw->tokenizer())) { + const ConfigSwitch* s = dynamic_cast(&sw->tokenizer()); + for (int i = 0; i < s->cases().size(); i++) { + const Case* c = s->cases()[i]; + printf("case "); + for (int j = 0; j < c->cases().size(); j++) { + printf("%S", c->cases()[j]); + } + printf(": ...\n"); + // wcout<def().tokenizer().id(); } printf("default: ...\n");//<def().tokenizer().id()<<"...;"; } } catch (ParseException& e) { // OBS wcout<close(); _CLDELETE( stream ); } + printf("\n"); +} + +void TestCustomAnalyzer(Itk::TestMgr * testMgr, const wchar_t* definition) { + TestCustomAnalyzer(testMgr, CustomAnalyzerTestDocs, definition); } void TestCustomAnalyzers(Itk::TestMgr * testMgr) { - char *xml_file = (char*)__FUNCTION__; - assert_failed = 0; TestCustomAnalyzer(testMgr, L"stdtokens"); TestCustomAnalyzer(testMgr, L"whitespace"); TestCustomAnalyzer(testMgr, L"whitespace>lowercase"); @@ -291,14 +265,68 @@ TestCustomAnalyzer(testMgr, L"letter>lowercase"); TestCustomAnalyzer(testMgr, L"keyword"); TestCustomAnalyzer(testMgr, L"keyword>lowercase"); - TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)"); - TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)"); - TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')"); +// TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>stem(en)"); // Does not work with NON-ASCII + TestCustomAnalyzer(testMgr, L"letter>lowercase>stop(en)"); + TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')"); TestCustomAnalyzer(testMgr, L"letter>length(2, 4)"); - testResultXml(xml_file); + TestCustomAnalyzer(testMgr, L"standard>prefixes(1)"); + TestCustomAnalyzer(testMgr, L"standard>prefixes(2)"); + TestCustomAnalyzer(testMgr, L"standard>prefixes(3)"); + TestCustomAnalyzer(testMgr, L"stdtokens>stdfilter>lowercase>thai>stop(en)"); + TestCustomAnalyzer(testMgr, L"cjk>stop(en)"); + TestCustomAnalyzer(testMgr, L"ngram(1)>lowercase>stop(en)"); + TestCustomAnalyzer(testMgr, L"ngram(2)>lowercase>stop(en)"); } -void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field) +void TestTokenizationWithLocales(Itk::TestMgr * testMgr) { + printf("locale=en\n"); + cpix_Result result; + cpix_SetLocale( &result, "en" ); + TestCustomAnalyzer(testMgr, L"natural"); + + printf("locale=th\n"); + cpix_SetLocale( &result, "th" ); + TestCustomAnalyzer(testMgr, L"natural"); + + printf("locale=ko\n"); + cpix_SetLocale( &result, "ko" ); + TestCustomAnalyzer(testMgr, L"natural"); + + printf("locale=zh\n"); + cpix_SetLocale( &result, "zh" ); + TestCustomAnalyzer(testMgr, L"natural"); + + printf("locale=jp\n"); + cpix_SetLocale( &result, "jp" ); + TestCustomAnalyzer(testMgr, L"natural"); + + cpix_SetLocale( &result, cpix_LOCALE_AUTO ); +} + +template +void TestTokenizationWithLocale(Itk::TestMgr * testMgr) { + cpix_Result result; + cpix_SetLocale( &result, T::LOCALE ); + TestCustomAnalyzer(testMgr, EnglishLocale::FILES, L"natural"); + TestCustomAnalyzer(testMgr, T::FILES, L"natural"); + cpix_SetLocale( &result, cpix_LOCALE_AUTO ); +} + + +template +void AddTokenizationWithLocaleTest(Itk::SuiteTester* suite) { + suite->add(T::LOCALE, + &TestTokenizationWithLocale, + T::LOCALE); +} + +void TestTokenizationWithCurrentLocale(Itk::TestMgr * testMgr) { + cpix_Result result; + cpix_SetLocale( &result, cpix_LOCALE_AUTO ); + TestCustomAnalyzer(testMgr, L"natural"); +} + +void TestAnalyzerWithField(Itk::TestMgr * testMgr, const wchar_t* definition, const wchar_t* field) { using namespace lucene::analysis; using namespace lucene::util; @@ -317,9 +345,7 @@ void TestSwitchAnalyzers(Itk::TestMgr * testMgr) { - char *xml_file = (char*)__FUNCTION__; - assert_failed = 0; - const wchar_t* sw = L"\n" + const wchar_t* sw = L"\n" L"switch {\n" L" case '_docuid': keyword;\n" L" case '_appclass': whitespace>lowercase;\n" @@ -331,23 +357,80 @@ TestAnalyzerWithField(testMgr, sw, L"Title"); TestAnalyzerWithField(testMgr, sw, L"message"); TestAnalyzerWithField(testMgr, sw, L"field"); - testResultXml(xml_file); } +void TestLocaleSwitchAnalyzers(Itk::TestMgr * testMgr) +{ + const wchar_t* sw = L"\n" + L"locale_switch {\n" + L" case 'en': stdtokens>stdfilter>lowercase>stop(en);\n" + L" case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);\n" + L" case 'ca': stdtokens>stdfilter>lowercase>accent;\n" + L" default: stdtokens>stdfilter>lowercase;\n" + L"}"; + cpix_Result result; + printf("locale=en:\n"); + cpix_SetLocale( &result, "en" ); + TestCustomAnalyzer(testMgr, sw); + printf("\n"); + printf("locale=th:\n"); + cpix_SetLocale( &result, "th" ); + TestCustomAnalyzer(testMgr, sw); + printf("\n"); + printf("locale=ca:\n"); + cpix_SetLocale( &result, "ca" ); + TestCustomAnalyzer(testMgr, sw); + printf("\n"); + printf("default locale:\n"); + cpix_SetLocale( &result, "fail" ); + TestCustomAnalyzer(testMgr, sw); + cpix_SetLocale( &result, cpix_LOCALE_AUTO ); +} + + +Itk::TesterBase * CreateAnalysisWhiteBoxLocalizationTests() { + using namespace Itk; + + SuiteTester + * tests = new SuiteTester("loc"); + + std::string locale; + locale = "currentlocale_"; + + Cpt::auto_array name( Cpix::Spi::GetLanguageNames()[0].c_str() ); + locale += name.get(); + + tests->add(locale.c_str(), + &TestTokenizationWithCurrentLocale, + locale.c_str()); + + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + AddTokenizationWithLocaleTest(tests); + + return tests; +} Itk::TesterBase * CreateAnalysisWhiteBoxTests() { using namespace Itk; SuiteTester - * analysisTests = new SuiteTester("analysiswhitebox"); + * analysisTests = new SuiteTester("whitebox"); analysisTests->add("analyzer", &TestCustomAnalyzers, "analyzer"); - analysisTests->add("switchanalyzer", + analysisTests->add("switchAnalyzer", &TestSwitchAnalyzers, - "switchanalyzer"); + "switchAnalyzer"); + analysisTests->add("localeSwitchAnalyzer", + &TestLocaleSwitchAnalyzers, + "localeSwitchAnalyzer"); analysisTests->add("tokenization", TestTokenization6, "tokenization"); @@ -357,10 +440,14 @@ analysisTests->add("parsing2", TestSwitch, "parsing2"); + analysisTests->add("parsing3", + TestConfigSwitch, + "parsing3"); analysisTests->add("parsingerrors", TestParsingErrors, "parsingerrors"); - + + analysisTests->add(CreateAnalysisWhiteBoxLocalizationTests()); return analysisTests; } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/cpixunittest/src/localetestinfos.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/cpix/tsrc/cpixunittest/src/localetestinfos.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,140 @@ +/* + * analysisutil.cpp + * + * Created on: Mar 25, 2010 + * Author: admin + */ + +#include "localetestinfos.h" +#include "config.h" + +const char* EnglishLocale::LOCALE = "en"; +const char* EnglishLocale::FILES[] = { + STEM_TEST_CORPUS_PATH "\\en\\1.txt", + STEM_TEST_CORPUS_PATH "\\en\\2.txt", + STEM_TEST_CORPUS_PATH "\\en\\3.txt", + STEM_TEST_CORPUS_PATH "\\en\\4.txt", + 0 +}; +const wchar_t* EnglishLocale::QUERIES[] = { + L"happy", + L"happiness", + L"happening", + 0 +}; + + +const char* FrenchLocale::LOCALE = "fr"; + +const char* FrenchLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\fr\\1.txt", + LOC_TEST_CORPUS_PATH "\\fr\\2.txt", + LOC_TEST_CORPUS_PATH "\\fr\\3.txt", + LOC_TEST_CORPUS_PATH "\\fr\\4.txt", + 0 +}; + +const wchar_t* FrenchLocale::QUERIES[] = { + L"d'\xe9nergie", + L"\xe9nergie", + L"elle", + 0 +}; + +const char* HebrewLocale::LOCALE = "he"; + +const char* HebrewLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\he\\1.txt", + LOC_TEST_CORPUS_PATH "\\he\\2.txt", + LOC_TEST_CORPUS_PATH "\\he\\3.txt", + LOC_TEST_CORPUS_PATH "\\he\\4.txt", + 0 +}; + +const wchar_t* HebrewLocale::QUERIES[] = { + L"\x05e9\x05e4\x05e2\x05ea", // L"שפעת", + L"\x05e4\x05e2\x05ea", // L"פעת", + L"\x05e9\x05e4\x05e2*", // L"שפע*", + L"\x05e4\x05e8*", //L"פר*", + 0 +}; + +const char* ThaiLocale::LOCALE = "th"; +const char* ThaiLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\th\\1.txt", + LOC_TEST_CORPUS_PATH "\\th\\2.txt", + LOC_TEST_CORPUS_PATH "\\th\\3.txt", + LOC_TEST_CORPUS_PATH "\\th\\4.txt", + 0 +}; +const wchar_t* ThaiLocale::QUERIES[] = { + L"\x0E14\x0E32\x0E27\x0E15\x0E01", // a thai word + L"\x0E21\x0E35", // another thai word + 0 +}; + + +const char* ChineseLocale::LOCALE = "ch"; +const char* ChineseLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\ch_hk\\1.txt", + LOC_TEST_CORPUS_PATH "\\ch_hk\\2.txt", + LOC_TEST_CORPUS_PATH "\\ch_hk\\3.txt", + LOC_TEST_CORPUS_PATH "\\ch_hk\\4.txt", + + LOC_TEST_CORPUS_PATH "\\ch_prc\\1.txt", + LOC_TEST_CORPUS_PATH "\\ch_prc\\2.txt", + LOC_TEST_CORPUS_PATH "\\ch_prc\\3.txt", + LOC_TEST_CORPUS_PATH "\\ch_prc\\4.txt", + + LOC_TEST_CORPUS_PATH "\\ch_simple\\1.txt", + LOC_TEST_CORPUS_PATH "\\ch_simple\\2.txt", + LOC_TEST_CORPUS_PATH "\\ch_simple\\3.txt", + LOC_TEST_CORPUS_PATH "\\ch_simple\\4.txt", + + LOC_TEST_CORPUS_PATH "\\ch_tw\\1.txt", + LOC_TEST_CORPUS_PATH "\\ch_tw\\2.txt", + LOC_TEST_CORPUS_PATH "\\ch_tw\\3.txt", + LOC_TEST_CORPUS_PATH "\\ch_tw\\4.txt", + + 0 +}; +const wchar_t* ChineseLocale::QUERIES[] = { + L"\x53f0\x6e7e", // a chinese word + L"\x4e2d\x56fd", // another chinese word + 0 +}; + +const char* KoreanLocale::LOCALE = "ko"; +const char* KoreanLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\ko\\1.txt", + LOC_TEST_CORPUS_PATH "\\ko\\2.txt", + LOC_TEST_CORPUS_PATH "\\ko\\3.txt", + LOC_TEST_CORPUS_PATH "\\ko\\4.txt", + + 0 +}; +const wchar_t* KoreanLocale::QUERIES[] = { + L"\xc2a4\xd1a0\xb9ac", // a korean word + L"\xc778\xbb3c", // another korean word + 0 +}; + +const char* JapaneseLocale::LOCALE = "jp"; +const char* JapaneseLocale::FILES[] = { + LOC_TEST_CORPUS_PATH "\\jp\\1.txt", + LOC_TEST_CORPUS_PATH "\\jp\\2.txt", + LOC_TEST_CORPUS_PATH "\\jp\\3.txt", + LOC_TEST_CORPUS_PATH "\\jp\\4.txt", + + LOC_TEST_CORPUS_PATH "\\jp_old\\1.txt", + LOC_TEST_CORPUS_PATH "\\jp_old\\2.txt", + LOC_TEST_CORPUS_PATH "\\jp_old\\3.txt", + LOC_TEST_CORPUS_PATH "\\jp_old\\4.txt", + + 0 +}; +const wchar_t* JapaneseLocale::QUERIES[] = { + L"\x56fd\x969b", // a japanese word + L"\x65e5\x672c", // another japanese word + 0 +}; diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/cpix/tsrc/perfmetrics/group/perfmetrics.mmp --- a/searchengine/cpix/tsrc/perfmetrics/group/perfmetrics.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/cpix/tsrc/perfmetrics/group/perfmetrics.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -60,6 +60,7 @@ STATICLIBRARY libstemmer.lib STATICLIBRARY libitk.lib STATICLIBRARY libcpixtools.lib +STATICLIBRARY libanalysis.lib // For SPI LIBRARY efsrv.lib diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/cpixtools/inc/public/cpixfstools.h --- a/searchengine/util/cpixtools/inc/public/cpixfstools.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/cpixtools/inc/public/cpixfstools.h Mon Jun 28 10:34:53 2010 +0530 @@ -28,14 +28,7 @@ // with EINTR which means that they just have to be retried. Now, even // if OpenC does not support this, the code here is supposed to be // platform independent and must work on a true *NIX (like linux). -//_SP is for single parameter -#define Cpt_EINTR_RETRY_SP(op) while ((op == -1) && (errno == EINTR)) { ; /* NOP */ } - -// A lot of POSIX system calls (open, close, read, write) can fail -// with EINTR which means that they just have to be retried. Now, even -// if OpenC does not support this, the code here is supposed to be -// platform independent and must work on a true *NIX (like linux). -#define Cpt_EINTR_RETRY(res,op) while (((res=op) == -1) && (errno == EINTR)) { ; /* NOP */ } +#define Cpt_EINTR_RETRY(res,op) while (((res=op) == -1) && (errno == EINTR)) { res ++; /* To avoid compiler warning: FIXME */ } // Same as Cpt_EINTR_RETRY, but for cases when the return value is not @@ -167,11 +160,22 @@ off_t filesize(int fileDesc); + /** + * Obtain the size of a directory + * + * @param the path of the directory + */ + off_t dirsize(const char * path); /** * Returns when the file was last modified or 0 if some error occurred. */ time_t filemodified(const char * path); + + /** + * Adds delimiter if needed + */ + std::string appendpath(const char* path, const char* item); /** * Reads line to buffer diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/cpixtools/inc/public/cpixparsetools.h --- a/searchengine/util/cpixtools/inc/public/cpixparsetools.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/cpixtools/inc/public/cpixparsetools.h Mon Jun 28 10:34:53 2010 +0530 @@ -68,23 +68,19 @@ * (e.g. "file*.tx?") itself is not supported) */ namespace Lex { + + typedef const wchar_t* token_type_t; - /** - * Basic token types - */ - enum TokenType { - TOKEN_UNKNOWN = 0, - TOKEN_EOF = 1, - TOKEN_WS, - TOKEN_ID, - TOKEN_STRLIT, - TOKEN_INTLIT, - TOKEN_REALLIT, - TOKEN_LIT, - - TOKEN_LAST_RESERVED // 8 - }; + extern token_type_t TOKEN_UNKNOWN; + extern token_type_t TOKEN_EOF; + extern token_type_t TOKEN_WS; + extern token_type_t TOKEN_COMMENT; + extern token_type_t TOKEN_ID; + extern token_type_t TOKEN_STRLIT; + extern token_type_t TOKEN_INTLIT; + extern token_type_t TOKEN_REALLIT; + extern token_type_t TOKEN_LIT; class LexException : public ITxtCtxtExc { public: @@ -106,15 +102,15 @@ */ class Token { public: - Token(int type, const wchar_t* begin, const wchar_t* end); + Token(token_type_t type, const wchar_t* begin, const wchar_t* end); Token(); - int type() const; + const wchar_t* type() const; const wchar_t* begin() const; const wchar_t* end() const; int length() const; std::wstring text() const; private: - int type_; + token_type_t type_; const wchar_t* begin_; const wchar_t* end_; }; @@ -221,17 +217,61 @@ class SymbolTokenizer : public Tokenizer { public: - SymbolTokenizer(int tokenType, const wchar_t* symbol); + SymbolTokenizer(const wchar_t* tokenType, const wchar_t* symbol); virtual void reset(); virtual Token get(); virtual TokenizerState consume(const wchar_t* cursor); private: const wchar_t* begin_; const wchar_t* end_; - int tokenType_; + token_type_t tokenType_; const wchar_t* symbol_; }; - + + /** + * C style line comment, e.g. // comment + */ + class LineCommentTokenizer : public Tokenizer { + public: + LineCommentTokenizer(); + virtual void reset(); + virtual Token get(); + virtual TokenizerState consume(const wchar_t* cursor); + private: + enum State { + READY, + SLASH_CONSUMED, + COMMENT, + FINISHED + }; + State state_; + const wchar_t* begin_; + const wchar_t* end_; + }; + + /** + * C++ style section comments. Like the one's surrounding this comment + */ + class SectionCommentTokenizer : public Tokenizer { + public: + SectionCommentTokenizer(); + virtual void reset(); + virtual Token get(); + virtual TokenizerState consume(const wchar_t* cursor); + private: + enum State { + READY, + SLASH_CONSUMED, + COMMENT, + STAR_CONSUMED, + FINISH + }; + State state_; + const wchar_t* begin_; + const wchar_t* end_; + + }; + /** * Tokenizes text by using given tokenizers. Text is consumed * until no tokenizer is in hungry state e.g., all tokenizers @@ -303,6 +343,16 @@ virtual ~TokenIterator(); }; + + class WhitespaceSplitter : public TokenIterator { + public: + WhitespaceSplitter(const wchar_t* text); + virtual operator bool(); + virtual Token operator++(int); + public: + const wchar_t* begin_; + const wchar_t* end_; + }; /** * Uses tokenizer for converting given text into token stream @@ -328,9 +378,9 @@ /** * Filters out all tokens of type TOKEN_WS */ - class WhiteSpaceFilter : public TokenIterator { + class StdFilter : public TokenIterator { public: - WhiteSpaceFilter(TokenIterator& tokens); + StdFilter(TokenIterator& tokens); virtual operator bool(); virtual Token operator++(int); private: @@ -425,7 +475,7 @@ Lexer(Lex::TokenIterator& tokens); // throws ParseException instead of LexException on EOF. virtual Lex::Token operator++(int); - Lex::Token eat(int tokenType); + Lex::Token eat(Lex::token_type_t tokenType); void eatEof(); std::wstring eatId(); std::wstring eatString(); @@ -441,7 +491,7 @@ StdLexer(Lex::Tokenizer& tokens, const wchar_t* text); private: Lex::Tokens tokens_; - Lex::WhiteSpaceFilter ws_; + Lex::StdFilter filter_; }; } // Parser diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/cpixtools/src/cpixfstools.cpp --- a/searchengine/util/cpixtools/src/cpixfstools.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/cpixtools/src/cpixfstools.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -52,7 +52,7 @@ size_t i = len-1; - while (1)/*(i >= 0)*/ { + while (i > 0) { char c = child[i]; if (c == '\\' || c == '/') { @@ -63,9 +63,8 @@ } if (i+1 >= FILENAME_MAX - || i+1 >= bufSize) - //|| i < 0) - { + || i+1 >= bufSize + || i == 0) { return -1; } @@ -87,7 +86,10 @@ Cpt_EINTR_RETRY_PTR(d, opendir(path)); if (d) { - Cpt_EINTR_RETRY_SP( closedir(d) ); + int + result; + Cpt_EINTR_RETRY(result, closedir(d)); + rv = true; } @@ -205,7 +207,7 @@ if (getparent(parent, sizeof(parent), path) >= 0) { // make the parent - (void)mkdirs(parent, mod); + mkdirs(parent, mod); } return mkdir(path, mod); @@ -225,8 +227,10 @@ mod)); if (fd != -1) { - - Cpt_EINTR_RETRY_SP( close(fd) ); + int + result; + Cpt_EINTR_RETRY(result, + close(fd)); } return fd == -1 ? -1 : 0; @@ -246,7 +250,10 @@ if (rv) { - Cpt_EINTR_RETRY_SP( close(fd) ); + int + result; + Cpt_EINTR_RETRY(result, + close(fd)); } return rv; @@ -318,8 +325,56 @@ return rv; } + + namespace + { + class DirectorySizeCalculator : public IFileVisitor + { + public: + + DirectorySizeCalculator() + : totalSize_(0) + {} + + virtual bool visitFile(const char * path) + { + totalSize_ += filesize(path); + return true; + } + + virtual DirVisitResult visitDirPre(const char * path) + { + //To avoid compiler warning. + std::string ret = path; + + return IFV_CONTINUE; + } + + virtual bool visitDirPost(const char * path) + { + std::string ret = path; + ret.empty(); + return true; + } + + long totalSize() + { + return totalSize_; + } + + private: + + long totalSize_; + + }; + } - + off_t dirsize(const char* path) + { + DirectorySizeCalculator sizeCalculator; + traverse(path, &sizeCalculator); + return sizeCalculator.totalSize(); + } time_t filemodified(const char * path) { @@ -341,6 +396,17 @@ return rv; } + std::string appendpath(const char* path, const char* item) + { + std::string ret; + ret += path; + if (ret[ret.length()-1] != '\\' && ret[ret.length()-1] != '/') { + ret += DIR_SEPARATOR; + } + ret += item; + return ret; + } + bool fgetline(FILE* file, std::string& line) { std::ostringstream buf; @@ -655,7 +721,10 @@ DIRSentry::~DIRSentry() { - Cpt_EINTR_RETRY_SP( closedir(d_) ); + int + result; + + Cpt_EINTR_RETRY(result, closedir(d_)); } @@ -769,7 +838,11 @@ if (fileDesc_ != NULL && *fileDesc_ != -1) { - Cpt_EINTR_RETRY_SP( close(*fileDesc_) ); + int + result; + + Cpt_EINTR_RETRY(result, + close(*fileDesc_)); } } @@ -790,7 +863,10 @@ { if ( file_ != NULL ) { - Cpt_EINTR_RETRY_SP( fclose(file_) ); + int result; + + Cpt_EINTR_RETRY(result, + fclose(file_)); } } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/cpixtools/src/cpixparsetools.cpp --- a/searchengine/util/cpixtools/src/cpixparsetools.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/cpixtools/src/cpixparsetools.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -27,11 +27,67 @@ #include #include #include +#include "wctype.h" + +namespace { + + std::wstring describeException(std::wstring what, const wchar_t* context, const wchar_t* where, const wchar_t* where2) { + std::wstring line; + int l = 0; + bool found = false; + + for (; ; context++) { + if (context == where) { + line += L"*here*"; + found = true; + if (!where2) break; + } + if (context == where2) { + line += L"*here*"; + break; + } + if (!*context) { + line += L"*here*"; + break; + } else if (*context == '\n' && !found) { + l++; + line = L""; + } else { + line += *context; + } + } + for (; *context && *context != '\n' && *context != '\r'; context++) { + line += *context; + } + + std::wostringstream tmp; + tmp<')id('lowercase')pipe('>')id('stopwords')lbr('(')lit(''a'')comma(',')space(' ')lit(''an'')comma(',')lit(''the)>stem('')id('en') -LexException: Unrecognized syntax: '')' at: "stdtokens>lowercase>stopwords('a', 'an','the)>stem('en*here*')" -id('fas')lit('-324')id('we') -LexException: Unrecognized syntax: '?' at: "fas-324we*here*?`213ff3*21(+" +identifier('stdtokens')pipe('>')identifier('lowercase')pipe('>')identifier('stopwords')left bracket('(')literal(''a'')comma(',')whitespace(' ')literal(''an'')comma(',')literal(''the)>stem('')identifier('en') +LexException: Unrecognized syntax: '')' at: +"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en*here*')" +identifier('fas')literal('-324')identifier('we') +LexException: Unrecognized syntax: '?' at: +"fas-324we*here*?`213ff3*21(+" diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization1_exp_out.txt --- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization1_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization1_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,1 +1,1 @@ -id('stdtokens')pipe('>')id('lowercase')pipe('>')id('stopwords')lbr('(')lit(''a'')comma(',')lit(''an'')comma(',')lit(''the'')rbr(')')pipe('>')id('stem')lbr('(')lit(''en'')rbr(')') +identifier('stdtokens')pipe('>')identifier('lowercase')pipe('>')identifier('stopwords')left bracket('(')literal(''a'')comma(',')literal(''an'')comma(',')literal(''the'')right bracket(')')pipe('>')identifier('stem')left bracket('(')literal(''en'')right bracket(')') diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization2_exp_out.txt --- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization2_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization2_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,1 +1,1 @@ -lit(''foo'')int-lit('0')int-lit('1')int-lit('-2')lit(''bar'')int-lit('+234')int-lit('-34') +literal(''foo'')integer('0')integer('1')integer('-2')literal(''bar'')integer('+234')integer('-34') diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization3_exp_out.txt --- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization3_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization3_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,1 +1,1 @@ -lit(''hallo'')real-lit('0.0')real-lit('.0')real-lit('.5')real-lit('-1.0')real-lit('-.05')int-lit('45')lit(''bar'')real-lit('+.123')real-lit('+3.1415') +literal(''hallo'')real number('0.0')real number('.0')real number('.5')real number('-1.0')real number('-.05')integer('45')literal(''bar'')real number('+.123')real number('+3.1415') diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization4_exp_out.txt --- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization4_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization4_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,2 +1,2 @@ -lit(''\' '')lit(''\\'')lit(''\a'')lit(''\ +literal(''\' '')literal(''\\'')literal(''\a'')literal(''\ '') diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization5_exp_out.txt --- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization5_exp_out.txt Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization5_exp_out.txt Mon Jun 28 10:34:53 2010 +0530 @@ -1,1 +1,1 @@ -id('fo')unknown('for')id('fore')id('forth')id('ofor')id('oforo')id('i')unknown('if')id('ifdom')id('ifer')id('fif')id('fifi')id('forfi')id('fifor') +identifier('fo')for('for')identifier('fore')identifier('forth')identifier('ofor')identifier('oforo')identifier('i')if('if')identifier('ifdom')identifier('ifer')identifier('fif')identifier('fifi')identifier('forfi')identifier('fifor') diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/group/cpixtoolsunittest.mmp --- a/searchengine/util/tsrc/cpixtoolsunittest/group/cpixtoolsunittest.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/group/cpixtoolsunittest.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -69,6 +69,7 @@ LIBRARY libpthread.lib LIBRARY libm.lib LIBRARY euser.lib +LIBRARY libz.lib // No capabilities needed by this application CAPABILITY ReadUserData WriteUserData diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/src/geotests.cpp --- a/searchengine/util/tsrc/cpixtoolsunittest/src/geotests.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/src/geotests.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -283,7 +283,7 @@ } -void printQNrArea(Itk::TestMgr * , +void printQNrArea(Itk::TestMgr * testMgr, const Cpt::QNr & qnr) { using namespace std; diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/src/memorytest.cpp --- a/searchengine/util/tsrc/cpixtoolsunittest/src/memorytest.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/src/memorytest.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -50,7 +50,7 @@ return zoo_iterator( new poly_zoo_iterator_() ); } -void TestAutoIterator(Itk::TestMgr * ) { +void TestAutoIterator(Itk::TestMgr * testMgr) { zoo_iterator i = ZooIterator(); while (i) { std::cout<<"Animal: "<add("tokenization1", TestTokenization1, "tokenization1"); @@ -223,11 +291,19 @@ parsingTests->add("tokenization5", TestTokenization5, "tokenization5"); + + parsingTests->add("tokenization6", + TestTokenization6, + "tokenization6"); parsingTests->add("syntaxerrors", TestTokenizationErrors, "syntaxerrors"); - + + parsingTests->add("whitespace", + TestWhitespaceSplitter, + "whitespace"); + return parsingTests; } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/cpixtoolsunittest/src/pooltests.cpp --- a/searchengine/util/tsrc/cpixtoolsunittest/src/pooltests.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/cpixtoolsunittest/src/pooltests.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -163,9 +163,9 @@ void ItemsArrayDeleter(void * p) { - /*TestPoolItem + TestPoolItem ** items = reinterpret_cast(p); - */ + delete[] p; } @@ -207,7 +207,7 @@ -void testSingleThreadedUse(Itk::TestMgr * ) +void testSingleThreadedUse(Itk::TestMgr * testMgr) { using namespace Cpt; diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/itk/group/itk.mmp --- a/searchengine/util/tsrc/itk/group/itk.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/itk/group/itk.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -40,8 +40,8 @@ SYSTEMINCLUDE /epoc32/include OS_LAYER_LIBC_SYSTEMINCLUDE OS_LAYER_STDCPP_SYSTEMINCLUDE +OS_LAYER_GLIB_SYSTEMINCLUDE SYSTEMINCLUDE /epoc32/include/stdapis/stlportv5 -OS_LAYER_GLIB_SYSTEMINCLUDE // libc and euser are always needed when using main() entry point LIBRARY libc.lib @@ -49,6 +49,7 @@ LIBRARY libstdcpp.lib LIBRARY libm.lib LIBRARY euser.lib +LIBRARY libz.lib // No capabilities needed by this application CAPABILITY None diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/itk/src/itkimpl.cpp --- a/searchengine/util/tsrc/itk/src/itkimpl.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/itk/src/itkimpl.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -68,6 +68,8 @@ if (isreadable(inFilePath.c_str())) { + int + res; duplicatedStdInFD_ = dup(STDIN_FILENO); if (duplicatedStdInFD_ == -1) @@ -79,7 +81,7 @@ open(inFilePath.c_str(),O_RDONLY)); if (inFileFD_ == -1) { - Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_)); + Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_)); throw IOCaptureExc(inFilePath.c_str()); } int @@ -87,8 +89,8 @@ STDIN_FILENO); if (newStdIn == -1) { - Cpt_EINTR_RETRY_SP(close(inFileFD_)); - Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_)); + Cpt_EINTR_RETRY(res,close(inFileFD_)); + Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_)); throw IOCaptureExc("Can't dup2(infile,stdin)"); } } @@ -106,8 +108,10 @@ // failures here, but they must not go unnoticed assert(fd != -1); - Cpt_EINTR_RETRY_SP(close(inFileFD_)); - Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_)); + int + res; + Cpt_EINTR_RETRY(res,close(inFileFD_)); + Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_)); } } @@ -281,7 +285,7 @@ } Cpt::FileSentry resFsSentry( resFs ); - ssize_t + size_t firstDifferingLine = -1, currentLine = 1; string diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/itk/src/itkobservers.cpp --- a/searchengine/util/tsrc/itk/src/itkobservers.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/itk/src/itkobservers.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -44,7 +44,7 @@ /***************************************************************** * TestRunConsole */ - void TestRunConsole::beginRun(TestMgr * , + void TestRunConsole::beginRun(TestMgr * testMgr, size_t /* testCount */, const char * /* baseDirPath */) { @@ -53,14 +53,14 @@ } - void TestRunConsole::endRun(TestMgr * ) throw () + void TestRunConsole::endRun(TestMgr * testMgr) throw () { using namespace std; os_ << endl << "TEST RUN COMPLETED." << endl; } - void TestRunConsole::beginTestCase(TestMgr * , + void TestRunConsole::beginTestCase(TestMgr * testMgr, TesterBase * testerBase) { ++indent_; @@ -754,7 +754,9 @@ void ProgressFsDisplayer::deleteCurFile() { - Cpt_EINTR_RETRY_SP( remove(curFilePath_.c_str()) ); + int + success; + Cpt_EINTR_RETRY(success,remove(curFilePath_.c_str())); } diff -r a5fbfefd615f -r 6547bf8ca13a searchengine/util/tsrc/itkdemo/group/itkdemo.mmp --- a/searchengine/util/tsrc/itkdemo/group/itkdemo.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searchengine/util/tsrc/itkdemo/group/itkdemo.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -61,6 +61,7 @@ LIBRARY libm.lib LIBRARY euser.lib LIBRARY libpthread.lib +LIBRARY libz.lib // No capabilities needed by this application CAPABILITY ReadUserData WriteUserData diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchclient/bwins/cpixsearchclientu.def --- a/searcher/searchclient/bwins/cpixsearchclientu.def Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchclient/bwins/cpixsearchclientu.def Mon Jun 28 10:34:53 2010 +0530 @@ -90,4 +90,8 @@ ?ContinueHouseKeeping@RSearchServerSession@@QAEHXZ @ 89 NONAME ; int RSearchServerSession::ContinueHouseKeeping(void) ?ForceHouseKeeping@RSearchServerSession@@QAEHXZ @ 90 NONAME ; int RSearchServerSession::ForceHouseKeeping(void) ?StopHouseKeeping@RSearchServerSession@@QAEHXZ @ 91 NONAME ; int RSearchServerSession::StopHouseKeeping(void) + ?SetQueryParserL@RSearchServerSubSession@@QAEXH@Z @ 92 NONAME ; void RSearchServerSubSession::SetQueryParserL(int) + ?SetQueryParserL@CCPixSearcher@@QAEXW4TQueryParser@1@@Z @ 93 NONAME ; void CCPixSearcher::SetQueryParserL(enum CCPixSearcher::TQueryParser) + ?SetQueryParserL@CCPixSearcher@@QAEXAAVMCPixSetQueryParserRequestObserver@@W4TQueryParser@1@@Z @ 94 NONAME ; void CCPixSearcher::SetQueryParserL(class MCPixSetQueryParserRequestObserver &, enum CCPixSearcher::TQueryParser) + ?SetQueryParser@RSearchServerSubSession@@QAEXHAAVTRequestStatus@@@Z @ 95 NONAME ; void RSearchServerSubSession::SetQueryParser(int, class TRequestStatus &) diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchclient/eabi/cpixsearchclientu.def --- a/searcher/searchclient/eabi/cpixsearchclientu.def Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchclient/eabi/cpixsearchclientu.def Mon Jun 28 10:34:53 2010 +0530 @@ -107,4 +107,8 @@ _ZN20RSearchServerSession16StopHouseKeepingEv @ 106 NONAME _ZN20RSearchServerSession17ForceHouseKeepingEv @ 107 NONAME _ZN20RSearchServerSession20ContinueHouseKeepingEv @ 108 NONAME + _ZN13CCPixSearcher15SetQueryParserLENS_12TQueryParserE @ 109 NONAME + _ZN13CCPixSearcher15SetQueryParserLER34MCPixSetQueryParserRequestObserverNS_12TQueryParserE @ 110 NONAME + _ZN23RSearchServerSubSession14SetQueryParserEiR14TRequestStatus @ 111 NONAME + _ZN23RSearchServerSubSession15SetQueryParserLEi @ 112 NONAME diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchclient/inc/searchservercommon.h --- a/searcher/searchclient/inc/searchservercommon.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchclient/inc/searchservercommon.h Mon Jun 28 10:34:53 2010 +0530 @@ -80,7 +80,8 @@ // Set analyzer. This must be latest message. Following messages // are not supported by server - ESearchServerSetAnalyzer + ESearchServerSetAnalyzer, + ESearchServerSetQueryParser }; /** diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchclient/src/ccpixsearcher.cpp --- a/searcher/searchclient/src/ccpixsearcher.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchclient/src/ccpixsearcher.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -157,6 +157,26 @@ iSubSession.SetAnalyzer( aAnalyzer, iStatus ); SetActive(); } + + EXPORT_C void CCPixSearcher::SetQueryParserL( TQueryParser aQueryParser ) + { + if ( !iIsDatabaseOpen ) User::Leave(KErrNotReady); + if ( IsActive() ) User::Leave(KErrInUse); + + iSubSession.SetQueryParserL( aQueryParser ); + } + +EXPORT_C void CCPixSearcher::SetQueryParserL( MCPixSetQueryParserRequestObserver& aObserver, TQueryParser aQueryParser ) + { + if ( !iIsDatabaseOpen ) User::Leave(KErrNotReady); + if ( IsActive() ) User::Leave(KErrInUse); + + iObserver.iSetQueryParser = &aObserver; + iState = EStateSetQueryParser; + iSubSession.SetQueryParser( aQueryParser, iStatus ); + SetActive(); + } + // CCPixSearcher::FormQueryString() // Suport method for SearchL-methods @@ -288,6 +308,12 @@ } break; + case EStateSetQueryParser: + if ( observer.iSetQueryParser ) { + observer.iSetQueryParser->HandleSetQueryParserResultL( iStatus.Int() ); + } + break; + case EStateSearch: delete iQueryString; iQueryString = NULL; // cleanup diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchclient/src/rsearchserversession.cpp --- a/searcher/searchclient/src/rsearchserversession.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchclient/src/rsearchserversession.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -192,6 +192,24 @@ SendReceive(ESearchServerSetAnalyzer, args, aStatus); } +EXPORT_C void RSearchServerSubSession::SetQueryParserL(TInt aQueryParser) + { + TIpcArgs args(aQueryParser); + + // This call completes immediately, however the server will not + // complete the request until later, so don't pass any local + // descriptors as they will be out of scope by the time the server + // attempts to read or write + User::LeaveIfError( SendReceive(ESearchServerSetQueryParser, args) ); + } + +EXPORT_C void RSearchServerSubSession::SetQueryParser(TInt aQueryParser, TRequestStatus& aStatus) + { + TIpcArgs args(aQueryParser); + + SendReceive(ESearchServerSetQueryParser, args, aStatus); + } + // RSearchServerSubSession::Search() EXPORT_C void RSearchServerSubSession::SearchL(const TDesC& aSearchTerms) diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/group/searchserver.mmp --- a/searcher/searchserver/group/searchserver.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/group/searchserver.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -52,7 +52,7 @@ SOURCE CHeartBeatTimer.cpp SOURCE CCPixAsyncronizer.cpp SOURCE CHouseKeepingHandler.cpp -SOURCE CLogPlayerRecorder.cpp +SOURCE CLogPlayerRecorder.cpp CCPixAbstractSearcher.cpp LIBRARY euser.lib LIBRARY efsrv.lib @@ -72,11 +72,13 @@ STATICLIBRARY libstemmer.lib STATICLIBRARY libclucene.lib LIBRARY libpthread.lib +STATICLIBRARY libanalysis.lib // For SPI LIBRARY exiflib.lib LIBRARY MetaDataUtility.lib LIBRARY charconv.lib +LIBRARY lbs.lib // Logging LIBRARY flogger.lib diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/inc/CCPixAbstractSearcher.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searcher/searchserver/inc/CCPixAbstractSearcher.h Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,103 @@ +/* + * CCPixAbstractSearcher.h + * + * Created on: Oct 8, 2009 + * Author: admin + */ + +#ifndef CCPIXABSTRACTSEARCHER_H_ +#define CCPIXABSTRACTSEARCHER_H_ + +#include + +#include "cpixmaindefs.h" +#include "cpixdoc.h" + +// FORWARD DECLARATIONS +class CSearchDocument; +class CCPixAsyncronizer; +class MCPixAsyncronizerObserver; + +_LIT(KCpixDefaultSearchField, CPIX_DEFAULT_FIELD); + +class CCPixAbstractSearcher : public CBase + { +public: + + enum TQueryParser { + EDatabaseQueryParser = 0, + EIncrementalQueryParser = 1 + }; + + +public: + + virtual ~CCPixAbstractSearcher(); + + /** + * Cancel any incomplete asyncronous operation + * @param aMessage RMessage2 of CancelAll request + */ + virtual void CancelAll(const RMessage2& aMessage) = 0; + + /** + * Search given terms from the default field. + * @param aSearchTerms Terms to look for. + * @param aObserver Observing object for this asyncronous call + * @param aMessage The requesting message + * @return ETrue if search was commited, EFalse if it was stop word for example and search was not commited + */ + virtual TBool SearchL(const TDesC& aSearchTerms, MCPixAsyncronizerObserver* aObserver, const RMessage2& aMessage) = 0; + + /** + * Complete previous call to SearchL + * @return Count of result documents. + */ + virtual TInt SearchCompleteL() = 0; + + /** + * Gets document from the current search results. + * @param aObserver Observing object for this asyncronous call + * @param aMessage The requesting message + * @parma aIndex index of the requested document + */ + virtual void GetDocumentL(TInt aIndex, MCPixAsyncronizerObserver* aObserver, const RMessage2& aMessage) = 0; + + /** + * Complete the previous GetDocumentL + * @return Document. Ownership is transferred to the caller of this function. + */ + virtual CSearchDocument* GetDocumentCompleteL() = 0; + + /** + * Creates new database (and destroys existing) if path is given. Otherwise, opens existing database. + * @param aDefaultSearchField Default field to which query results are looked from. + * @param aBaseAppClass Application class of this database handle. + * Defines which database this handle connects to. + */ + virtual void OpenDatabaseL(const TDesC& aSearchableId, const TDesC& aDefaultSearchField = KCpixDefaultSearchField) = 0; + + virtual void SetQueryParserL( TInt aQueryParser ) = 0; + + /** + * IsOpen + * @returns ETrue if the database is currently open + */ + virtual TBool IsOpen() = 0; + + /** + * Sets the analyzer for this searcher. + * + * @param aAnalyzer analyzer definition string. See analyzer definition + * syntax in the documentation + */ + virtual void SetAnalyzerL(const TDesC& aAnalyzer) = 0; + +public: + + static CSearchDocument* ConvertDocumentL( cpix_Document* aDocument ); + + }; + + +#endif /* CCPIXABSTRACTSEARCHER_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/inc/ccpixsearch.h --- a/searcher/searchserver/inc/ccpixsearch.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/inc/ccpixsearch.h Mon Jun 28 10:34:53 2010 +0530 @@ -19,6 +19,8 @@ #define CCPIXSEARCH_H_ #include +#include "CCPixAbstractSearcher.h" + #include #include #include @@ -32,12 +34,11 @@ class CCPixAsyncronizer; class MCPixAsyncronizerObserver; -_LIT(KCpixDefaultSearchField, CPIX_DEFAULT_FIELD); /** * Symbian C++ wrapper for OpenC CPixSearch interface. */ -class CCPixSearch : public CBase +class CCPixSearch : public CCPixAbstractSearcher { public: @@ -115,7 +116,11 @@ */ TBool IsOpen(); - void SetAnalyzerL(const TDesC& aAnalyzer); + void SetAnalyzerL(const TDesC& aAnalyzer); + + void SetQueryParserL(TInt aQueryParser); + + void RefreshQueryParserL(); static void InitializeL(); @@ -178,6 +183,7 @@ /** * Query parser. */ + TQueryParser iQueryParserType; cpix_QueryParser* iQueryParser; /** diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/inc/csearchserversubsession.h --- a/searcher/searchserver/inc/csearchserversubsession.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/inc/csearchserversubsession.h Mon Jun 28 10:34:53 2010 +0530 @@ -23,7 +23,7 @@ // FORWARD DECLARATIONS class CSearchServerSession; class CCPixIdxDb; -class CCPixSearch; +class CCPixAbstractSearcher; class CSearchDocument; class CSearchServerSubSession : public CObject, public MCPixAsyncronizerObserver @@ -35,6 +35,20 @@ public: /** + * OpenSearchableL + * Opens CPixSearcher, if aSearchableId identifies search domain, + * or CPixDiscoverer, if aSearchableId identifies discovery + * service. + * + * @param aSearchableId either search domain or discovery service + * @param aDefaultfield in case aSearchableId defines search domain, + * this field is used as searchers default id. + * In case discoverer is opened, this parameter is + * ignored + */ + void OpenSearcherL(const TDesC& aSearchableId, const TDesC& aDefaultField); + + /** * OpenDatabaseL. * Opens database * @param aMessage Message from client. @@ -47,6 +61,12 @@ * @param aMessage Message from client. */ void SetAnalyzerL(const RMessage2& aMessage); + + /** + * SetQueryParserL. + * TODO + */ + void SetQueryParserL(const RMessage2& aMessage); /** * SearchL. @@ -157,7 +177,7 @@ private: CCPixIdxDb* iIndexDb; - CCPixSearch* iSearchDb; + CCPixAbstractSearcher* iSearchDb; CSearchDocument* iNextDocument; CSearchServerSession* iSession; }; diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/CCPixAbstractSearcher.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searcher/searchserver/src/CCPixAbstractSearcher.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -0,0 +1,101 @@ +/* + * CCPixAbstractSearcher.cpp + * + * Created on: Oct 8, 2009 + * Author: admin + */ +#include "CCpixAbstractSearcher.h" + +#include "Common.h" +#include "SearchServerHelper.h" +#include "CSearchDocument.h" + +CCPixAbstractSearcher::~CCPixAbstractSearcher() {} + + +namespace { + +/** + * cpix_DocFieldEnum destroyer for TCleanupItem + * @param aCpixDocFieldEnum CPix document + */ +void CpixDocFieldEnumDestroyer(TAny* aCpixDocFieldEnum) + { + cpix_DocFieldEnum_destroy( static_cast( aCpixDocFieldEnum ) ); + } + +} // namespace + +CSearchDocument* CCPixAbstractSearcher::ConvertDocumentL( cpix_Document* aDocument ) + { + // Read first the system fields that are passed as constructor parameters + // + const wchar_t* documentId = cpix_Document_getFieldValue( aDocument, LCPIX_DOCUID_FIELD); + SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed); + + TPtrC documentIdPtr(KNullDesC); + if (documentId) + documentIdPtr.Set(reinterpret_cast(documentId)); + + const wchar_t* documentAppClass = cpix_Document_getFieldValue(aDocument, LCPIX_APPCLASS_FIELD); + SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed); + + TPtrC documentAppClassPtr(KNullDesC); + if (documentAppClass) + documentAppClassPtr.Set(reinterpret_cast(documentAppClass)); + + const wchar_t* documentExcerpt = cpix_Document_getFieldValue(aDocument, LCPIX_EXCERPT_FIELD); + SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed); + + TPtrC documentExcerptPtr(KNullDesC); + if (documentExcerpt) + documentExcerptPtr.Set(reinterpret_cast(documentExcerpt)); + + // Setup the document + // + + CSearchDocument* document = CSearchDocument::NewLC(documentIdPtr, documentAppClassPtr, documentExcerptPtr); + + // Enumerate the field of cpix_Document and add each of them + // into the CSearchDocument object. + // + + cpix_DocFieldEnum* docFieldEnum = cpix_Document_fields(aDocument); + SearchServerHelper::CheckCpixErrorL(aDocument, KErrDocumentAccessFailed); + + CleanupStack::PushL( TCleanupItem(CpixDocFieldEnumDestroyer, docFieldEnum) ); + + cpix_Field field; + while (cpix_DocFieldEnum_hasMore(docFieldEnum)) + { + cpix_DocFieldEnum_next(docFieldEnum, &field); + SearchServerHelper::CheckCpixErrorL(docFieldEnum, KErrDatabaseQueryFailed); + + const wchar_t* name = cpix_Field_name(&field); + SearchServerHelper::CheckCpixErrorL(&field, KErrDatabaseQueryFailed); + + TPtrC namePtr( reinterpret_cast( name ) ); + if ( namePtr == TPtrC( (TUint16*)LCPIX_DOCUID_FIELD ) + || namePtr == TPtrC( (TUint16*)LCPIX_APPCLASS_FIELD ) + || namePtr == TPtrC( (TUint16*)LCPIX_EXCERPT_FIELD ) ) + { + continue; // These fields have already been added + } + + const wchar_t* value = cpix_Field_stringValue(&field); + SearchServerHelper::CheckCpixErrorL(&field, KErrDatabaseQueryFailed); + + TPtrC stringvalue( reinterpret_cast( value ) ); + + // NOTE: Also system fields will be iterated. Because + // the field name is not checked, all _appclass, + // _excerpt etc. fields will be overwritten. + document->AddFieldL(namePtr, stringvalue); + } + + CleanupStack::PopAndDestroy(docFieldEnum); + + CleanupStack::Pop(document); + + return document; + } diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/ccpixidxdb.cpp --- a/searcher/searchserver/src/ccpixidxdb.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/src/ccpixidxdb.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -143,6 +143,9 @@ void CCPixIdxDb::InitializeL() { + + const char* KCPixResourceDirectory = "z:\\resource\\cpix"; // FIXME + #ifdef CPIX_LOGGING_ENABLED _LIT(KCPixLogDirectory, "c:\\logs\\CPix\\OpenC\\"); const char* CPIX_LOG_FILE = "c:\\logs\\CPix\\OpenC\\libcpix"; @@ -186,6 +189,11 @@ SearchServerHelper::CheckCpixErrorL(initParams, KErrCPixInitializationFailed); + cpix_InitParams_setResourceDir( initParams, + KCPixResourceDirectory ); + SearchServerHelper::CheckCpixErrorL(initParams, + KErrCPixInitializationFailed); + cpix_InitParams_setMaxIdleSec(initParams, IDXDB_MAXIDLE_SEC); SearchServerHelper::CheckCpixErrorL(initParams, @@ -478,6 +486,7 @@ if (!doc) { SearchServerHelper::LogErrorL(*result.err_); + cpix_ClearError(doc); User::Leave(KErrCannotCreateDocument); } // document created, push to cleanup stack. diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/ccpixsearch.cpp --- a/searcher/searchserver/src/ccpixsearch.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/src/ccpixsearch.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -27,18 +27,7 @@ #endif -namespace { -/** - * cpix_DocFieldEnum destroyer for TCleanupItem - * @param aCpixDocFieldEnum CPix document - */ -void CpixDocFieldEnumDestroyer(TAny* aCpixDocFieldEnum) - { - cpix_DocFieldEnum_destroy( static_cast( aCpixDocFieldEnum ) ); - } - -} // namespace CCPixSearch* CCPixSearch::NewL() { @@ -56,7 +45,9 @@ } CCPixSearch::CCPixSearch() - : iPendingTask(EPendingTaskNone) + : iQueryParserType(EIncrementalQueryParser), + iPendingTask(EPendingTaskNone) + { } @@ -238,7 +229,9 @@ cpix_Hits_asyncDocResults(iHits, iPendingJobId); SearchServerHelper::CheckCpixErrorL(iHits, KErrDocumentAccessFailed); - + + return ConvertDocumentL( &iCurrentCpixDocument ); +#if 0 // TODO XXX TIM const wchar_t* documentId = cpix_Document_getFieldValue(&iCurrentCpixDocument, LCPIX_DOCUID_FIELD); SearchServerHelper::CheckCpixErrorL(&iCurrentCpixDocument, KErrDatabaseQueryFailed); @@ -300,6 +293,7 @@ OstTraceFunctionExit0( CCPIXSEARCH_GETDOCUMENTCOMPLETEL_EXIT ); return document; +#endif // 0 } void CCPixSearch::SetAnalyzerL(const TDesC& aAnalyzer) @@ -321,15 +315,43 @@ iAnalyzer = cpix_Analyzer_create(&result, cAnalyzer); SearchServerHelper::CheckCpixErrorL(&result, KErrCannotCreateAnalyzer); - CleanupStack::PopAndDestroy( analyzer ); + CleanupStack::PopAndDestroy( analyzer ); + + RefreshQueryParserL(); + } + +void CCPixSearch::SetQueryParserL(TInt aQueryParser) + { + iQueryParserType = TQueryParser(aQueryParser); + RefreshQueryParserL(); + } - iQueryParser = - cpix_QueryParser_create(&result, - reinterpret_cast(iDefaultSearchFieldZ->Des().PtrZ()), - iAnalyzer); +void CCPixSearch::RefreshQueryParserL() + { + cpix_QueryParser_destroy( iQueryParser ); + iQueryParser = NULL; + cpix_Result result; + + if ( iQueryParserType == EDatabaseQueryParser ) + { + iQueryParser = + cpix_QueryParser_create( &result, + reinterpret_cast( + iDefaultSearchFieldZ->Des().PtrZ()), + iAnalyzer ); + } + else if ( iQueryParserType == EIncrementalQueryParser ) + { + iQueryParser = + cpix_CreatePrefixQueryParser( &result, + reinterpret_cast( + iDefaultSearchFieldZ->Des().PtrZ()) ); + + } SearchServerHelper::CheckCpixErrorL(&result, KErrCannotCreateQueryParser); } + void CCPixSearch::OpenDatabaseL(const TDesC& aBaseAppClass, const TDesC& aDefaultSearchField) { // Release data associated with old database diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/csearchserver.cpp --- a/searcher/searchserver/src/csearchserver.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/src/csearchserver.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -51,7 +51,7 @@ ESearchServerAdd, // Indexing related messages ESearchServerCancelAll, // Cancellation ESearchServerDatabaseDefine, // Database define - ESearchServerSetAnalyzer+1 // Unsupported messages + ESearchServerSetQueryParser+1 // Unsupported messages }; // iElementsIndex of TPolicy diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/csearchserversession.cpp --- a/searcher/searchserver/src/csearchserversession.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/src/csearchserversession.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -207,6 +207,12 @@ RECORDED_EXECUTION_END("setAnalyzer") break; + case ESearchServerSetQueryParser: + RECORDED_EXECUTION_BEGIN + subsession->SetQueryParserL(aMessage); + RECORDED_EXECUTION_END("setQueryParser") + break; + case ESearchServerSearch: RECORDED_EXECUTION_BEGIN subsession->SearchL(aMessage); diff -r a5fbfefd615f -r 6547bf8ca13a searcher/searchserver/src/csearchserversubsession.cpp --- a/searcher/searchserver/src/csearchserversubsession.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/searcher/searchserver/src/csearchserversubsession.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -83,7 +83,7 @@ // CSearchServerSession::CancelAll() void CSearchServerSubSession::CancelAll(const RMessage2& aMessage) { - if (iSearchDb->IsOpen()) + if (iSearchDb && iSearchDb->IsOpen()) { // Cancel searching iSearchDb->CancelAll(aMessage); @@ -102,6 +102,15 @@ TRAP_IGNORE( LOG_PLAYER_RECORD( CLogPlayerRecorder::LogCancelL( reinterpret_cast( this ) ) ) ); } +void CSearchServerSubSession::OpenSearcherL(const TDesC& aSearchableId, const TDesC& aDefaultField) + { + delete iSearchDb; + iSearchDb = NULL; + + iSearchDb = CCPixSearch::NewL(); + iSearchDb->OpenDatabaseL( aSearchableId, aDefaultField ); + + } void CSearchServerSubSession::OpenDatabaseL(const RMessage2& aMessage) { HBufC* baseAppClass = HBufC::NewLC(aMessage.GetDesLength(0)); @@ -122,7 +131,7 @@ } else { - iSearchDb->OpenDatabaseL(*baseAppClass, *defaultSearchField); + OpenSearcherL( *baseAppClass, *defaultSearchField ); } CleanupStack::PopAndDestroy(defaultSearchField); @@ -134,7 +143,7 @@ void CSearchServerSubSession::SetAnalyzerL(const RMessage2& aMessage) { - if (!iSearchDb->IsOpen() && !iIndexDb->IsOpen()) + if (!(iSearchDb && iSearchDb->IsOpen()) && !iIndexDb->IsOpen()) { iSession->PanicClient(aMessage, EDatabaseNotOpen); return; @@ -144,7 +153,7 @@ TPtr analyzerPtr = analyzer->Des(); aMessage.ReadL(0, analyzerPtr); - if (iSearchDb->IsOpen()) { + if (iSearchDb && iSearchDb->IsOpen()) { iSearchDb->SetAnalyzerL( *analyzer ); } if (iIndexDb->IsOpen()) { @@ -156,13 +165,30 @@ aMessage.Complete(KErrNone); } +void CSearchServerSubSession::SetQueryParserL(const RMessage2& aMessage) + { + if (!(iSearchDb && iSearchDb->IsOpen())) + { + iSession->PanicClient(aMessage, EDatabaseNotOpen); + return; + } + + TInt queryParser = aMessage.Int0(); + + iSearchDb->SetQueryParserL( queryParser ); + + // Complete the request + aMessage.Complete(KErrNone); + } + + void CSearchServerSubSession::SearchL(const RMessage2& aMessage) { OstTraceFunctionEntry0( CSEARCHSERVERSUBSESSION_SEARCHL_ENTRY ); PERFORMANCE_LOG_START("CSearchServerSubSession::SearchL"); // Sanity check - if (!iSearchDb->IsOpen()) + if (!(iSearchDb && iSearchDb->IsOpen())) { iSession->PanicClient(aMessage, EDatabaseNotOpen); OstTraceFunctionExit0( CSEARCHSERVERSUBSESSION_SEARCHL_EXIT ); @@ -252,7 +278,7 @@ PERFORMANCE_LOG_START("CSearchServerSubSession::GetDocumentObjectL"); // Sanity check - if (!iSearchDb->IsOpen()) + if ( !iSearchDb || !iSearchDb->IsOpen() ) { iSession->PanicClient(aMessage, EDatabaseNotOpen); OstTraceFunctionExit0( CSEARCHSERVERSUBSESSION_GETDOCUMENTOBJECTL_EXIT ); diff -r a5fbfefd615f -r 6547bf8ca13a searchsrv_plat/cpix_framework_api/inc/ccpixsearcher.h --- a/searchsrv_plat/cpix_framework_api/inc/ccpixsearcher.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchsrv_plat/cpix_framework_api/inc/ccpixsearcher.h Mon Jun 28 10:34:53 2010 +0530 @@ -33,7 +33,7 @@ class MCPixSearchRequestObserver; class MCPixNextDocumentRequestObserver; class MCPixSetAnalyzerRequestObserver; - +class MCPixSetQueryParserRequestObserver; // CLASS DECLARATION /** @@ -46,6 +46,32 @@ */ class CCPixSearcher : public CActive { + public: + + enum TQueryParser { + /** + * Database query parser provides advanced syntax support + * for creating complex and powerful queries. This query parser + * is intended for accessing the CPix databases. It is not + * localized and it should not be used to form queries + * directly from user input. It may simply fail with some + * locales. + */ + EDatabaseQueryParser = 0, + + /** + * Query parser aimed for incremental queries provided directly + * by user. This query parser is localized and should + * work properly for all locales. The language accepted + * by this parser is always the language specified by + * the current locale. May behave internally somewhat + * differently depending of language, but as a rule it + * should always provides meaningful results for direct + * user input. + */ + EIncrementalQueryParser = 1 + }; + public: // Constructors and destructors /** @@ -122,7 +148,26 @@ /** * SetAnalyzer. Asynchronous version */ - IMPORT_C void SetAnalyzerL( MCPixSetAnalyzerRequestObserver& aObserver, const TDesC& aAnalyzer ); + IMPORT_C void SetAnalyzerL( MCPixSetAnalyzerRequestObserver& aObserver, const TDesC& aAnalyzer ); + + /** + * SetAnalyzer. Synchronous version + * + * The set QueryParser defines the query syntax used for searching. Two + * different query parsers are supported for two main use cases, that + * are powerful accessing of the database for document and the other is + * + * + * @note SetAnalyzerL MUST NOT be used, when searching using discovery services + */ + IMPORT_C void SetQueryParserL( TQueryParser aQueryParser ); + + /** + * SetQueryParser. Synchronous version + * + * @note Query parser cannot be set for discovery service + */ + IMPORT_C void SetQueryParserL( MCPixSetQueryParserRequestObserver& aObserver, TQueryParser aQueryParser ); /** * SearchL. Synchronous version. @@ -242,7 +287,8 @@ EStateOpenDatabase, EStateSearch, EStateGetDocument, - EStateSetAnalyzer + EStateSetAnalyzer, + EStateSetQueryParser }; union TObserver @@ -252,6 +298,7 @@ MCPixSearchRequestObserver* iSearch; MCPixNextDocumentRequestObserver* iNextDocument; MCPixSetAnalyzerRequestObserver* iSetAnalyzer; + MCPixSetQueryParserRequestObserver* iSetQueryParser; }; private: diff -r a5fbfefd615f -r 6547bf8ca13a searchsrv_plat/cpix_framework_api/inc/mcpixdatabaseobserver.h --- a/searchsrv_plat/cpix_framework_api/inc/mcpixdatabaseobserver.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchsrv_plat/cpix_framework_api/inc/mcpixdatabaseobserver.h Mon Jun 28 10:34:53 2010 +0530 @@ -64,5 +64,25 @@ virtual void HandleSetAnalyzerResultL( TInt aError ) = 0; }; +/** + * @brief Observes completions of CPixIndexer's asynchronous requests + * @ingroup ClientAPI + * + * Mixin class. + * Observer to handle indexing operations completion codes. + * + * Link against: CPixSearchClient.lib + */ +class MCPixSetQueryParserRequestObserver + { + public: + + /** + * TODO + * @param aError Coompletion code of a asynchronous request. KErrNone if operation + * was succesful, otherwise system wide error code. + */ + virtual void HandleSetQueryParserResultL( TInt aError ) = 0; + }; #endif /* MCPIXDATABASEOBSERVER_H_ */ diff -r a5fbfefd615f -r 6547bf8ca13a searchsrv_plat/cpix_framework_api/inc/rsearchserversession.h --- a/searchsrv_plat/cpix_framework_api/inc/rsearchserversession.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchsrv_plat/cpix_framework_api/inc/rsearchserversession.h Mon Jun 28 10:34:53 2010 +0530 @@ -165,6 +165,10 @@ IMPORT_C void SetAnalyzerL(const TDesC& aAnalyzer); IMPORT_C void SetAnalyzer(const TDesC& aAnalyzer, TRequestStatus& aStatus); + + IMPORT_C void SetQueryParserL(TInt aQueryParser); + IMPORT_C void SetQueryParser(TInt aQueryParser, TRequestStatus& aStatus); + /** * Search. * Issues a request for a search diff -r a5fbfefd615f -r 6547bf8ca13a searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h --- a/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Mon Jun 28 10:34:53 2010 +0530 @@ -196,6 +196,7 @@ */ #define DEFAULT_CPIX_DIR "c:\\Data\\" #define DEFAULT_CLUCENE_LOCK_DIR "c:\\system\\temp" +#define DEFAULT_RESOURCE_DIR "c:\\Data\\" @@ -223,18 +224,40 @@ *********************************************** */ -#define CPIX_PIPE L">" -#define CPIX_SWITCH L"switch" -#define CPIX_CASE L"case" -#define CPIX_DEFAULT L"default" +#define CPIX_PIPE L">" +#define CPIX_SWITCH L"switch" +#define CPIX_LOCALE_SWITCH L"locale_switch" +#define CPIX_CONFIG_SWITCH L"config_switch" +#define CPIX_CASE L"case" +#define CPIX_DEFAULT L"default" + +// These can be given as parameters for the 'natural' analyzer +#define CPIX_ID_INDEXING L"indexing" +#define CPIX_ID_QUERY L"query" +#define CPIX_ID_PREFIX L"prefix" + +#define CPIX_ANALYZER_DEFAULT L"natural" + +// Default indexing analyzer +//#define CPIX_ANALYZER_DEFAULT_QUERY L"natural(indexing)" + +// Default term query analyzer +#define CPIX_ANALYZER_DEFAULT_QUERY L"natural(query)" + +// Default prefix analyzer +#define CPIX_ANALYZER_DEFAULT_PREFIX L"natural(prefix)" #define CPIX_ANALYZER_STANDARD L"standard" -#define CPIX_ANALYZER_DEFAULT L"standard" +//#define CPIX_ANALYZER_DEFAULT L"standard" #define CPIX_TOKENIZER_STANDARD L"stdtokens" #define CPIX_TOKENIZER_WHITESPACE L"whitespace" #define CPIX_TOKENIZER_LETTER L"letter" #define CPIX_TOKENIZER_KEYWORD L"keyword" +#define CPIX_TOKENIZER_CJK L"cjk" +#define CPIX_TOKENIZER_NGRAM L"ngram" +#define CPIX_TOKENIZER_KOREAN L"korean" +#define CPIX_TOKENIZER_KOREAN_QUERY L"koreanquery" #define CPIX_FILTER_STANDARD L"stdfilter" #define CPIX_FILTER_LOWERCASE L"lowercase" @@ -243,11 +266,24 @@ #define CPIX_FILTER_STEM L"stem" #define CPIX_FILTER_LENGTH L"length" #define CPIX_FILTER_PREFIXES L"prefixes" +#define CPIX_FILTER_THAI L"thai" +#define CPIX_FILTER_PREFIX L"prefix" +#define CPIX_FILTER_ELISION L"elision" -#define CPIX_WLANG_EN L"en" -#define CPIX_WLANG_FI L"fi" -#define CPIX_WLANG_HU L"hu" -#define CPIX_WLANG_RU L"ru" +#define CPIX_WLANG_EN L"en" // english +#define CPIX_WLANG_FI L"fi" // finnish +#define CPIX_WLANG_HU L"hu" // hungarian +#define CPIX_WLANG_RU L"ru" // russian +#define CPIX_WLANG_AR L"ar" // arabic +#define CPIX_WLANG_CN L"cn" // chinese +#define CPIX_WLANG_CS L"cs" // czech +#define CPIX_WLANG_DE L"de" // deutch - german +#define CPIX_WLANG_EL L"el" // greek +#define CPIX_WLANG_FA L"fa" // farsi - persian +#define CPIX_WLANG_FR L"fr" // french +#define CPIX_WLANG_HE L"he" // hebrew +#define CPIX_WLANG_NL L"nl" // dutch +#define CPIX_WLANG_BN L"bn" // bangla - bengali #define MAX_EXCERPT_LENGTH 125 //maximum exceprt length diff -r a5fbfefd615f -r 6547bf8ca13a searchsrv_plat/cpix_utility_api/inc/messageharvesterdefs.h --- a/searchsrv_plat/cpix_utility_api/inc/messageharvesterdefs.h Fri Jun 11 14:43:47 2010 +0300 +++ b/searchsrv_plat/cpix_utility_api/inc/messageharvesterdefs.h Mon Jun 28 10:34:53 2010 +0530 @@ -40,6 +40,7 @@ #define SUBJECT_FIELD "Subject" #define FOLDER_FIELD "Folder" #define BODY_FIELD "Body" +#define ATTACHMENT_FIELD "Attachment" #define LTO_FIELD L"To" #define LCC_FIELD L"Cc" #define LBCC_FIELD L"Bcc" @@ -47,5 +48,6 @@ #define LSUBJECT_FIELD L"Subject" #define LFOLDER_FIELD L"Folder" #define LBODY_FIELD L"Body" +#define LATTACHMENT_FIELD L"Attachment" #endif /*__MESSAGEHARVESTERDEFS_H__*/ diff -r a5fbfefd615f -r 6547bf8ca13a sis/cpixsearch.pkg.source --- a/sis/cpixsearch.pkg.source Fri Jun 11 14:43:47 2010 +0300 +++ b/sis/cpixsearch.pkg.source Mon Jun 28 10:34:53 2010 +0530 @@ -122,3 +122,12 @@ "\epoc32\release\armv5\$(TARGET)\qcpixsearchclient.dll" - "!:\sys\bin\qcpixsearchclient.dll" @"centrep.sisx", (0x10202BE9) +;Email Plugin +"\epoc32\data\z\resource\plugins\cpixemailplugin.rsc" +-"c:\resource\plugins\cpixemailplugin.rsc" +"\epoc32\release\armv5\$(TARGET)\cpixemailplugin.dll" +-"c:\sys\bin\cpixemailplugin.dll" + +;Qt Email fetcher +"\epoc32\release\armv5\$(TARGET)\qtemailfetcher.dll" +-"c:\sys\bin\qtemailfetcher.dll" \ No newline at end of file diff -r a5fbfefd615f -r 6547bf8ca13a tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw.mmp --- a/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -32,20 +32,14 @@ #ifdef SBSV2 #if defined(__S60_) - OS_LAYER_SYSTEMINCLUDE - #else // __S60_ not defined - SYSTEMINCLUDE /epoc32/include - SYSTEMINCLUDE /epoc32/include/internal + OS_LAYER_SYSTEMINCLUDE #endif // __S60_ /* Source files */ SOURCEPATH ../src #else // SBSV2 not defined #if defined(__S60_) - MW_LAYER_SYSTEMINCLUDE - #else // __S60_ not defined - SYSTEMINCLUDE /epoc32/include - SYSTEMINCLUDE /epoc32/include/internal + MW_LAYER_SYSTEMINCLUDE #endif // __S60_ /* Source files */ diff -r a5fbfefd615f -r 6547bf8ca13a tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw_nrm.mmp --- a/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw_nrm.mmp Fri Jun 11 14:43:47 2010 +0300 +++ b/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw_nrm.mmp Mon Jun 28 10:34:53 2010 +0530 @@ -33,9 +33,6 @@ #ifdef SBSV2 #if defined(__S60_) OSEXT_LAYER_SYSTEMINCLUDE - #else // __S60_ not defined - SYSTEMINCLUDE /epoc32/include - SYSTEMINCLUDE /epoc32/include/internal #endif // __S60_ /* Source files */ @@ -43,9 +40,6 @@ #else // SBSV2 not defined #if defined(__S60_) MW_LAYER_SYSTEMINCLUDE - #else // __S60_ not defined - SYSTEMINCLUDE /epoc32/include - SYSTEMINCLUDE /epoc32/include/internal #endif // __S60_ /* Source files */ diff -r a5fbfefd615f -r 6547bf8ca13a tsrc/cpixmwtester/src/cpixmwtesterblocks.cpp --- a/tsrc/cpixmwtester/src/cpixmwtesterblocks.cpp Fri Jun 11 14:43:47 2010 +0300 +++ b/tsrc/cpixmwtester/src/cpixmwtesterblocks.cpp Mon Jun 28 10:34:53 2010 +0530 @@ -442,19 +442,19 @@ // CCPixMWTester::TestAddUnloadlistL // ----------------------------------------------------------------------------- // -TInt CCPixMWTester::TestAddUnloadlistL( CStifItemParser& aItem) +TInt CCPixMWTester::TestAddUnloadlistL( CStifItemParser& /*aItem*/) { TInt err = KErrNone; CBlacklistMgr* blacklistmanager = CBlacklistMgr::NewL(); CleanupStack::PushL( blacklistmanager ); //Add an Uid to Blacklist DB - blacklistmanager->AddtoUnloadListL( KTestUid ); + blacklistmanager->AddtoDontloadListL( KTestUid ); //Check if the Uid is added to database or not - TBool found = blacklistmanager->FindfromUnloadListL(KTestUid ); + TBool found = blacklistmanager->FindInDontloadListL(KTestUid ); if(!found) err = KErrNotFound; //clear the UID from the database - blacklistmanager->RemoveFromUnloadListL(KTestUid); + blacklistmanager->RemoveFromDontloadListL(KTestUid); CleanupStack::PopAndDestroy( blacklistmanager ); doLog( iLog, err, KNoErrorString ); return err; @@ -464,21 +464,21 @@ // CCPixMWTester::TestRemovefromUnloadlistL // ----------------------------------------------------------------------------- // -TInt CCPixMWTester::TestRemovefromUnloadlistL( CStifItemParser& aItem) +TInt CCPixMWTester::TestRemovefromUnloadlistL( CStifItemParser& /*aItem*/) { TInt err = KErrNotFound; CBlacklistMgr* blacklistmanager = CBlacklistMgr::NewL(); CleanupStack::PushL( blacklistmanager ); //Add an Uid to Blacklist DB - blacklistmanager->AddtoUnloadListL( KTestUid ); + blacklistmanager->AddtoDontloadListL( KTestUid ); //Check if the Uid is added to database or not - TBool found = blacklistmanager->FindfromUnloadListL(KTestUid ); + TBool found = blacklistmanager->FindInDontloadListL(KTestUid ); if(found) { //clear the UID from the database - blacklistmanager->RemoveFromUnloadListL(KTestUid); - found = blacklistmanager->FindfromUnloadListL(KTestUid ); + blacklistmanager->RemoveFromDontloadListL(KTestUid); + found = blacklistmanager->FindInDontloadListL(KTestUid ); if ( !found ) err = KErrNone; } CleanupStack::PopAndDestroy( blacklistmanager );