--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cenrep/readme Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,13 @@
+
+2001f6fb.cre file can be updated with the list of plugins to be avoided for Indexing
+only UID of the plugin has to be added
+
+Steps to update the 2001f6fb.cre
+=====================================
+1.Update the list of UID's in the 2001f6fb.txt file.
+2.copy the file to \epoc32\winscw\c folder
+3.open command prompt to \epoc32\release\winscw\udeb folder
+4.execute "centrepconv.exe 2001f6fb.txt" command. This command will create 2001f6fb.cre file in \epoc32\winscw\c folder
+ Before executing this command make sure the emulator is not running.This command will start the emulator and do the conversion and end the emulator.
+5.Copy the generated 2001f6fb.cre file to \searchsrv\cenrep folder and build the searchsrv component
+6.Create the ROM image
\ No newline at end of file
--- a/harvester/group/bld.inf Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/group/bld.inf Mon Jun 28 10:34:53 2010 +0530
@@ -15,7 +15,6 @@
*
*/
#include "../pluginfw/group/bld.inf"
-#include "../HarvesterServer/group/bld.inf"
PRJ_TESTMMPFILES // Make ARMV5 UREL/UDEB SISX package
//gnumakefile ../sis/makeSis.mk
--- a/harvester/harvesterserver/group/bld.inf Fri Jun 11 14:43:47 2010 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-/*
-* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
-* All rights reserved.
-* This component and the accompanying materials are made available
-* under the terms of "Eclipse Public License v1.0"
-* which accompanies this distribution, and is available
-* at the URL "http://www.eclipse.org/legal/epl-v10.html".
-*
-* Initial Contributors:
-* Nokia Corporation - initial contribution.
-*
-* Contributors:
-*
-* Description:
-*
-*/
-
-
-PRJ_MMPFILES
-
-HarvesterServer.mmp
-
--- a/harvester/harvesterserver/group/harvesterserver.mmp Fri Jun 11 14:43:47 2010 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/*
-* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
-* All rights reserved.
-* This component and the accompanying materials are made available
-* under the terms of "Eclipse Public License v1.0"
-* which accompanies this distribution, and is available
-* at the URL "http://www.eclipse.org/legal/epl-v10.html".
-*
-* Initial Contributors:
-* Nokia Corporation - initial contribution.
-*
-* Contributors:
-*
-* Description:
-*
-*/
-
-
-TARGET CPixHarvesterServer.exe
-TARGETTYPE exe
-UID 0x0 0x2001F6FB
-EPOCSTACKSIZE 0x5000
-EPOCHEAPSIZE 0x50000 0x300000 // From about 65KB to about 3 MB
-
-SOURCEPATH ../data
-START RESOURCE 2001F6FB.rss
-TARGETPATH /private/101f875a/import
-END
-
-USERINCLUDE ../inc
-USERINCLUDE ../../../searchengine/cpix/cpix/inc/public
-USERINCLUDE ../traces
-
-MW_LAYER_SYSTEMINCLUDE
-
-SOURCEPATH ../src
-SOURCE CHarvesterServer.cpp
-SOURCE CHarvesterServerSession.cpp
-SOURCE CIndexingManager.cpp
-SOURCE CBlacklistMgr.cpp
-SOURCE CBlacklistDb.cpp
-SOURCE ccontentinfodb.cpp
-SOURCE contentinfomgr.cpp
-SOURCE ccontentinfo.cpp
-
-LIBRARY euser.lib
-LIBRARY estor.lib
-LIBRARY efsrv.lib
-LIBRARY CPixSearchClient.lib
-LIBRARY CPixHarvesterPluginInterface.lib
-LIBRARY edbms.lib
-LIBRARY sqldb.lib
-LIBRARY centralrepository.lib
-
-// Logging
-LIBRARY flogger.lib
-VENDORID 0
-
-CAPABILITY ALL -TCB -DRM -AllFiles
-
-// End of File
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harvester/harvesterserver/harvester.pro Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+# All rights reserved.
+# This component and the accompanying materials are made available
+# under the terms of "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Nokia Corporation - initial contribution.
+#
+# Contributors:
+#
+# Description:
+#
+
+TEMPLATE = app
+TARGET = cpixharvesterserver
+
+QT += core
+QT -= gui
+
+symbian:TARGET.UID3 = 0x2001f6fb
+symbian:TARGET.CAPABILITY = ALL -TCB -DRM -AllFiles
+
+symbian{
+ INCLUDEPATH += $$MW_LAYER_SYSTEMINCLUDE
+
+ INCLUDEPATH += ../../searchengine/cpix/cpix/inc/public
+
+ HEADERS += inc/CHarvesterServer.h
+ HEADERS += inc/CHarvesterServerSession.h
+ HEADERS += inc/CIndexingManager.h
+ HEADERS += inc/CBlacklistMgr.h
+ HEADERS += inc/CBlacklistDb.h
+ HEADERS += inc/ccotentinfodb.h
+ HEADERS += inc/contentinfomgr.h
+ HEADERS += inc/ccontentinfo.h
+ HEADERS += inc/harvesterserver.pan
+ HEADERS += inc/harvesterservercommons.h
+ HEADERS += inc/qtmythread.h
+ HEADERS += traces/CBlacklistDbTraces.h
+ HEADERS += traces/CBlacklistMgrTraces.h
+ HEADERS += traces/ccontentinfodbTraces.h
+ HEADERS += traces/ccontentinfoTraces.h
+ HEADERS += traces/CIndexingManagerTraces.h
+ HEADERS += traces/contentinfomgrTraces.h
+ HEADERS += traces/OstTraceDefinitions.h
+ HEADERS += traces/fixed_id.definitions
+
+ SOURCES += src/main.cpp
+ SOURCES += src/CHarvesterServer.cpp
+ SOURCES += src/CHarvesterServerSession.cpp
+ SOURCES += src/CIndexingManager.cpp
+ SOURCES += src/CBlacklistMgr.cpp
+ SOURCES += src/CBlacklistDb.cpp
+ SOURCES += src/ccontentinfodb.cpp
+ SOURCES += src/contentinfomgr.cpp
+ SOURCES += src/ccontentinfo.cpp
+
+ resourceTargetBlock = \
+ "SOURCEPATH data" \
+ "START RESOURCE 2001F6FB.rss" \
+ "TARGETPATH /private/101f875a/import"\
+ "END" \
+
+ MMP_RULES += resourceTargetBlock
+
+ MMP_RULES += "EPOCSTACKSIZE 0x5000"
+ MMP_RULES += "EPOCHEAPSIZE 0x50000 0x300000" #From about 65KB to about 3 MB
+
+ LIBS += -leuser -lestor -lCPixSearchClient -lCPixHarvesterPluginInterface -ledbms -lsqldb -lcentralrepository -lflogger
+ LIBS += -lefsrv
+
+}
--- a/harvester/harvesterserver/inc/cblacklistdb.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/inc/cblacklistdb.h Mon Jun 28 10:34:53 2010 +0530
@@ -98,27 +98,27 @@
TBool FindL(TInt32 aPluginUid);
/*
- * @description Adds the given uid of a plugin to the unloadlist table.
+ * @description Adds the given uid of a plugin to the dontloadlist table.
* @param aPluginUid Uid of the plugin
- * @return sysmbian error code
+ * @return symbian error code
* Leaves in case of errors.
*/
- TInt AddtoUnloadListL( TInt32 aPluginUid );
+ TInt AddtoDontloadListL( TInt32 aPluginUid );
/*
- * @description remove the given uid of a plugin to the unloadlist table.
+ * @description remove the given uid of a plugin to the dontloadlist table.
* @param aPluginUid Uid of the plugin
* Leaves in case of errors.
*/
- void RemoveFromUnloadListL( TInt32 aPluginUid );
+ void RemoveFromDontloadListL( TInt32 aPluginUid );
/*
- * @description Find the given uid of a plugin to the unloadlist table.
+ * @description Find the given uid of a plugin to the dontloadlist table.
* @param aPluginUid Uid of the plugin
* @return ETrue if exists else returns EFalse
* Leaves in case of errors.
*/
- TBool FindFromUnloadListL( TInt32 aPluginUid );
+ TBool FindInDontloadListL( TInt32 aPluginUid );
private :
/*
@@ -132,14 +132,14 @@
* @return CDbColSet database column set
* Leaves in case of errors.
*/
- CDbColSet* CreateColumnSetLC();
+ CDbColSet* CreateBlacklistColumnSetLC();
/*
- * @description Creates Column set for unload table.
+ * @description Creates Column set for dontload table.
* @return CDbColSet database column set
* Leaves in case of errors.
*/
- CDbColSet* CreateUnloadColumnSetLC();
+ CDbColSet* CreateDontloadColumnSetLC();
private:
/*
* A handle to a file server session.Owned
--- a/harvester/harvesterserver/inc/cblacklistmgr.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/inc/cblacklistmgr.h Mon Jun 28 10:34:53 2010 +0530
@@ -88,27 +88,27 @@
TBool FindL(TUid aPluginUid , TInt aVersion);
/*
- * @description Adds the given uid of a plugin to the unload list of Blacklist DB.
+ * @description Adds the given uid of a plugin to the dontload list of Blacklist DB.
* @param aPluginUid Uid of the plugin
* @return sysmbian error code
* Leaves in case of errors.
*/
- TInt AddtoUnloadListL( TUid aPluginUid );
+ TInt AddtoDontloadListL( TUid aPluginUid );
/*
- * @description removes the given uid of a plugin from the unload list of Blacklist DB.
+ * @description removes the given uid of a plugin from the dontload list of Blacklist DB.
* @param aPluginUid Uid of the plugin
* Leaves in case of errors.
*/
- void RemoveFromUnloadListL( TUid aPluginUid );
+ void RemoveFromDontloadListL( TUid aPluginUid );
/*
- * @description Checks wether the plugin uid is exists in the database unload list or not.
+ * @description Checks wether the plugin uid is exists in the database dontload list or not.
* @param aPluginUid Uid of the plugin
* @return ETrue if uid exists else returns EFalse
* Leaves in case of errors.
*/
- TBool FindfromUnloadListL(TUid aPluginUid );
+ TBool FindInDontloadListL(TUid aPluginUid );
private:
/*
--- a/harvester/harvesterserver/inc/cindexingmanager.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/inc/cindexingmanager.h Mon Jun 28 10:34:53 2010 +0530
@@ -26,6 +26,7 @@
//Forward Declaration
class CBlacklistMgr;
class CContentInfoMgr;
+class CContentInfo;
class CIndexingManager : public CActive, public MIndexingService
{
@@ -101,13 +102,28 @@
*/
void SaveL();
/**
- * Update content info Db with the plugin details
+ * Add an entry to the content info Db with the plugin details.If an entry with the given
+ * plugin name is already available in contentinfo db then the blacklist status of the plugin
+ * is updated with KEnable.
*/
- void UpdateContentInfoDbL( const TDesC& aPluginName);
+ void UpdateContentInfoDbL( const TDesC& aPluginName, CContentInfo* aContentinfo);
/**
- * Update the unload list in a separate table in blacklist database
+ * Update the dontload list in a separate table in blacklist database.
+ * If any error occurs in reading Uid values from centrep, then the dontload list
+ * is ignored.
*/
- void UpdateUnloadListL();
+ void UpdateDontloadListL();
+ /**
+ * Returns the load status of the plugin. This method will check both tables in
+ * Blacklist database and return the status.
+ * returns ETrue if uid is found in any table of blacklist database else returns EFalse.
+ */
+ TBool GetPluginLoadStatusL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName);
+
+ /**
+ * Loads the Harvesterplugin with given plugin uid
+ */
+ void LoadHarvesterpluginL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName);
private:
CIndexingManager();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harvester/harvesterserver/inc/qtmythread.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,31 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: Utility class for fetching email.
+*
+*/
+
+#ifndef QTMYTHREAD_H_
+#define QTMYTHREAD_H_
+
+#include <QThread>
+
+class HarvesterThread : public QThread
+ {
+ Q_OBJECT
+
+protected:
+ void run();
+ };
+
+#endif /* QTMYTHREAD_H_ */
--- a/harvester/harvesterserver/src/cblacklistdb.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/src/cblacklistdb.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -41,14 +41,14 @@
//SQL query to fetch all the records in database
_LIT(KBlistSqlFormatAll , "SELECT * FROM table");
_LIT(KDriveC, "c:");
-//Unload plugins Table name in blacklist database
-_LIT( KBLUnloadTableName , "unloadtable" );
-//SQL query to fetch all the records in unload table
-_LIT(KUnloadlistSqlFormatAll , "SELECT * FROM unloadtable");
-//SQL query to delete the records with given uid in unload table
-_LIT(KunloadlistSqlDelete, "DELETE FROM unloadtable WHERE uid=%d");
-//SQL query to fetch the records with given uid from unload table
-_LIT(KUnloadlistSqlFormatSeek , "SELECT * FROM unloadtable WHERE uid=%d");
+//dontload plugins Table name in blacklist database
+_LIT( KBLdontloadTableName , "dontloadtable" );
+//SQL query to fetch all the records in dontload table
+_LIT(KdontloadlistSqlFormatAll , "SELECT * FROM dontloadtable");
+//SQL query to delete the records with given uid in dontload table
+_LIT(KdontloadlistSqlDelete, "DELETE FROM dontloadtable WHERE uid=%d");
+//SQL query to fetch the records with given uid from dontload table
+_LIT(KdontloadlistSqlFormatSeek , "SELECT * FROM dontloadtable WHERE uid=%d");
// -----------------------------------------------------------------------------
// CBlacklistDb::NewL()
// -----------------------------------------------------------------------------
@@ -378,11 +378,11 @@
//create the database
User::LeaveIfError( iDatabase.Create( iFs , datafile ) );
- CDbColSet* columns = CreateColumnSetLC();//creates the columns and push to cleanupstack
+ CDbColSet* columns = CreateBlacklistColumnSetLC();//creates the columns and push to cleanupstack
User::LeaveIfError( iDatabase.CreateTable( KBlacklistTableName , *columns ) );
- //Add table to store the unload plugins
- CDbColSet* unloadcolumns = CreateUnloadColumnSetLC(); //creates the columns and push to cleanupstack
- User::LeaveIfError( iDatabase.CreateTable( KBLUnloadTableName , *unloadcolumns ) );
+ //Add table to store the dontload plugins
+ CDbColSet* dontloadcolumns = CreateDontloadColumnSetLC(); //creates the columns and push to cleanupstack
+ User::LeaveIfError( iDatabase.CreateTable( KBLdontloadTableName , *dontloadcolumns ) );
//clean up of variables (columns and dataFile)
CleanupStack::PopAndDestroy( 2 );
@@ -391,12 +391,12 @@
}
// -----------------------------------------------------------------------------
-// CBlacklistDb::CreateColumnSetLC
+// CBlacklistDb::CreateBlacklistColumnSetLC
// -----------------------------------------------------------------------------
//
-CDbColSet* CBlacklistDb::CreateColumnSetLC()
+CDbColSet* CBlacklistDb::CreateBlacklistColumnSetLC()
{
- OstTraceFunctionEntry0( CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY );
+ OstTraceFunctionEntry0( CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY );
CPIXLOGSTRING("CBlacklistDb::CreateColumnSetLC(): Enter");
CDbColSet* columns = CDbColSet::NewLC();
@@ -414,17 +414,17 @@
CPIXLOGSTRING("CBlacklistDb::CreateColumnSetLC(): Exit");
- OstTraceFunctionExit0( CBLACKLISTDB_CREATECOLUMNSETLC_EXIT );
+ OstTraceFunctionExit0( CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT );
return columns; // columns stays on CleanupStack
}
// -----------------------------------------------------------------------------
-// CBlacklistDb::CreateUnloadColumnSetLC
+// CBlacklistDb::CreateDontloadColumnSetLC
// -----------------------------------------------------------------------------
//
-CDbColSet* CBlacklistDb::CreateUnloadColumnSetLC()
+CDbColSet* CBlacklistDb::CreateDontloadColumnSetLC()
{
- OstTraceFunctionEntry0( CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY );
+ OstTraceFunctionEntry0( CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY );
CDbColSet* columns = CDbColSet::NewLC();
//Add uid column
@@ -432,36 +432,39 @@
col.iAttributes = TDbCol::ENotNull ;
columns->AddL( col );
- OstTraceFunctionExit0( CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT );
+ OstTraceFunctionExit0( CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT );
return columns; // columns stays on CleanupStack
}
// -----------------------------------------------------------------------------
-// CBlacklistDb::AddtoUnloadListL
+// CBlacklistDb::AddtoDontloadListL
// -----------------------------------------------------------------------------
//
-TInt CBlacklistDb::AddtoUnloadListL( TInt32 aPluginUid )
+TInt CBlacklistDb::AddtoDontloadListL( TInt32 aPluginUid )
{
- OstTraceFunctionEntry0( CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY );
+ OstTraceFunctionEntry0( CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY );
if ( !iOpened )
return KErrNotReady;
TInt err;
- //Prepare the view
+ //Prepare the view with all the rows in the donload table
RDbView dbView;
CleanupClosePushL( dbView );
- err = dbView.Prepare( iDatabase , TDbQuery( KUnloadlistSqlFormatAll ) ) ;
+ err = dbView.Prepare( iDatabase , TDbQuery( KdontloadlistSqlFormatAll ) ) ;
if ( err == KErrNone )
{
TRAP( err , dbView.InsertL() );
- CDbColSet* colSet = dbView.ColSetL();
- TDbColNo uidcolno = colSet->ColNo( Kuid );
- dbView.SetColL( uidcolno , aPluginUid );
- dbView.PutL();
+ if ( err == KErrNone )
+ {
+ CDbColSet* colSet = dbView.ColSetL();
+ TDbColNo uidcolno = colSet->ColNo( Kuid );
+ dbView.SetColL( uidcolno , aPluginUid );
+ dbView.PutL();
+ }
//If addition failed, rollback
- if(err != KErrNone)
+ else
{
iDatabase.Rollback();
}
@@ -469,50 +472,42 @@
CleanupStack::PopAndDestroy( &dbView ); // dbView/
User::LeaveIfError( iDatabase.Compact() );
- OstTraceFunctionExit0( CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT );
+ OstTraceFunctionExit0( CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT );
return err;
}
// -----------------------------------------------------------------------------
-// CBlacklistDb::RemoveFromUnloadListL
+// CBlacklistDb::RemoveFromDontloadListL
// -----------------------------------------------------------------------------
//
-void CBlacklistDb::RemoveFromUnloadListL( TInt32 aPluginUid )
+void CBlacklistDb::RemoveFromDontloadListL( TInt32 aPluginUid )
{
- OstTraceFunctionEntry0( CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY );
+ OstTraceFunctionEntry0( CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY );
if ( !iOpened )
return ;
//Remove the item record to database
// Create the sql statement. KBlistSqlDelete
TBuf<KBlistSqlStringMaxLength> sql;
- sql.Format( KunloadlistSqlDelete , aPluginUid );
+ sql.Format( KdontloadlistSqlDelete , aPluginUid );
//delete the row.
TInt rowCount( iDatabase.Execute(sql) );
- if(rowCount > 0)
- {
- OstTrace0( TRACE_NORMAL, CBLACKLISTDB_REMOVEFROMUNLOADLISTL, "CBlacklistDb::RemoveFromUnloadListL :: removed UID succesfully" );
- CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): Removed UID succesfully");
- }
- else
- {
- OstTrace0( TRACE_NORMAL, DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL, "CBlacklistDb::RemoveFromUnloadListL:: UID not found" );
- CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): UID not found");
- }
- CPIXLOGSTRING("CBlacklistDb::RemoveFromUnloadListL(): Exit");
+ OstTrace1( TRACE_NORMAL, DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL, "No. of rows removed succesfully is ;RowCount=%d", rowCount );
- OstTraceFunctionExit0( CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT );
+ CPIXLOGSTRING("CBlacklistDb::RemoveFromDontloadListL(): Exit");
+
+ OstTraceFunctionExit0( CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT );
return ;
}
// -----------------------------------------------------------------------------
-// CBlacklistDb::FindFromUnloadListL
+// CBlacklistDb::FindInDontloadListL
// -----------------------------------------------------------------------------
//
-TBool CBlacklistDb::FindFromUnloadListL( TInt32 aPluginUid )
+TBool CBlacklistDb::FindInDontloadListL( TInt32 aPluginUid )
{
- OstTraceFunctionEntry0( CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY );
- CPIXLOGSTRING2("CBlacklistDb::FindFromUnloadListL(): Uid = %x " , aPluginUid );
+ OstTraceFunctionEntry0( CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY );
+ CPIXLOGSTRING2("CBlacklistDb::FindInDontloadListL(): Uid = %x " , aPluginUid );
if ( !iOpened )
return EFalse;
@@ -520,9 +515,9 @@
//Check if the item is available in database
//Prepare the sql
TBuf<KBlistSqlStringMaxLength> sql;
- sql.Format( KUnloadlistSqlFormatSeek , aPluginUid );
+ sql.Format( KdontloadlistSqlFormatSeek , aPluginUid );
TBool found = EFalse;
- //Prepare the view
+ //Prepare the view to get the list of rows which has the given Uid
RDbView dbView;
CleanupClosePushL( dbView );
@@ -533,11 +528,11 @@
if ( isAtRow )
{
- OstTrace0( TRACE_NORMAL, CBLACKLISTDB_FINDFROMUNLOADLISTL, "CBlacklistDb::FindFromUnloadListL::UID found" );
- CPIXLOGSTRING("CBlacklistDb::FindFromUnloadListL(): UID found");
+ OstTrace0( TRACE_NORMAL, CBLACKLISTDB_FINDFROMDONTLOADLISTL, "CBlacklistDb::FindFromDontloadListL::UID found" );
+ CPIXLOGSTRING("CBlacklistDb::FindFromDontloadListL(): UID found");
found = ETrue;
}
CleanupStack::PopAndDestroy( &dbView ); // dbView/
- OstTraceFunctionExit0( CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT );
+ OstTraceFunctionExit0( CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT );
return found;
}
--- a/harvester/harvesterserver/src/cblacklistmgr.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/src/cblacklistmgr.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -153,52 +153,52 @@
}
// -----------------------------------------------------------------------------
-// CBlacklistMgr::AddtoUnloadListL()
+// CBlacklistMgr::AddtoDontloadListL()
// -----------------------------------------------------------------------------
//
-TInt CBlacklistMgr::AddtoUnloadListL( TUid aPluginUid )
+TInt CBlacklistMgr::AddtoDontloadListL( TUid aPluginUid )
{
OstTraceFunctionEntry0( CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY );
- OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_ADDTOUNLOADLISTL, "CBlacklistMgr::AddtoUnloadListL;Uid=%x", aPluginUid.iUid );
- CPIXLOGSTRING2("CBlacklistMgr::AddtoUnloadListL(): Uid = %x " , aPluginUid.iUid );
+ OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_ADDTODONTLOADLISTL, "CBlacklistMgr::AddtoDontloadListL;Uid=%x", aPluginUid.iUid );
+ CPIXLOGSTRING2("CBlacklistMgr::AddtoDontloadListL(): Uid = %x " , aPluginUid.iUid );
//Check if the record with given plugin uid is already available in database or not
//If available just ignore the addition
//If there is no record found in database with given uid, add new record with given uid
TInt err = KErrNone;
- if( !(iBlacklistDb->FindFromUnloadListL( aPluginUid.iUid )) )
+ if( !(iBlacklistDb->FindInDontloadListL( aPluginUid.iUid )) )
{
- err = iBlacklistDb->AddtoUnloadListL( aPluginUid.iUid );
+ err = iBlacklistDb->AddtoDontloadListL( aPluginUid.iUid );
}
- CPIXLOGSTRING("CBlacklistMgr::AddtoUnloadListL(): Exit");
- OstTraceFunctionExit0( CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT );
+ CPIXLOGSTRING("CBlacklistMgr::AddtoDontloadListL(): Exit");
+ OstTraceFunctionExit0( CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT );
return err;
}
// -----------------------------------------------------------------------------
-// CBlacklistMgr::RemoveFromUnloadListL()
+// CBlacklistMgr::RemoveFromDontloadListL()
// -----------------------------------------------------------------------------
//
-void CBlacklistMgr::RemoveFromUnloadListL( TUid aPluginUid )
+void CBlacklistMgr::RemoveFromDontloadListL( TUid aPluginUid )
{
OstTraceFunctionEntry0( CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY );
- OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_REMOVEFROMUNLOADLISTL, "CBlacklistMgr::RemoveFromUnloadListL;Uid=%x", aPluginUid.iUid );
- CPIXLOGSTRING2("CBlacklistMgr::RemoveFromUnloadListL(): Uid = %x " , aPluginUid.iUid );
+ OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_REMOVEFROMUNLOADLISTL, "CBlacklistMgr::RemoveFromDontloadListL;Uid=%x", aPluginUid.iUid );
+ CPIXLOGSTRING2("CBlacklistMgr::RemoveFromDontloadListL(): Uid = %x " , aPluginUid.iUid );
//Remove the item record to database
- iBlacklistDb->RemoveFromUnloadListL( aPluginUid.iUid );
+ iBlacklistDb->RemoveFromDontloadListL( aPluginUid.iUid );
- CPIXLOGSTRING("CBlacklistMgr::RemoveFromUnloadListL(): Exit");
- OstTraceFunctionExit0( CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT );
+ CPIXLOGSTRING("CBlacklistMgr::RemoveFromDontloadListL(): Exit");
+ OstTraceFunctionExit0( CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT );
}
// -----------------------------------------------------------------------------
-// CBlacklistMgr::FindfromUnloadListL()
+// CBlacklistMgr::FindfromDontloadListL()
// -----------------------------------------------------------------------------
//
-TBool CBlacklistMgr::FindfromUnloadListL(TUid aPluginUid )
+TBool CBlacklistMgr::FindInDontloadListL(TUid aPluginUid )
{
- CPIXLOGSTRING2("CBlacklistMgr::FindfromUnloadListL(): Uid = %x " , aPluginUid.iUid );
- OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_FINDFROMUNLOADLISTL, "CBlacklistMgr::FindfromUnloadListL;Uid=%x", aPluginUid.iUid );
- return ( iBlacklistDb->FindFromUnloadListL( aPluginUid.iUid ) );
+ CPIXLOGSTRING2("CBlacklistMgr::FindInDontloadListL(): Uid = %x " , aPluginUid.iUid );
+ OstTrace1( TRACE_NORMAL, CBLACKLISTMGR_FINDINDONTLOADLISTL, "CBlacklistMgr::FindInDontloadListL;Uid=%x", aPluginUid.iUid );
+ return ( iBlacklistDb->FindInDontloadListL( aPluginUid.iUid ) );
}
--- a/harvester/harvesterserver/src/charvesterserver.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/src/charvesterserver.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -129,12 +129,15 @@
//
void CHarvesterServer::ThreadFunctionL()
{
+ //QCoreApplication installs one ActiveScheduler. So removing the Active scheduler install
+ //from ThreadFunctionL
+
// Construct active scheduler
- CActiveScheduler* activeScheduler = new ( ELeave ) CActiveScheduler;
- CleanupStack::PushL(activeScheduler);
+ //CActiveScheduler* activeScheduler = new ( ELeave ) CActiveScheduler;
+ //CleanupStack::PushL(activeScheduler);
// Install active scheduler
- CActiveScheduler::Install(activeScheduler);
+ //CActiveScheduler::Install(activeScheduler);
// Construct server
CHarvesterServer* server = CHarvesterServer::NewLC();
@@ -173,7 +176,7 @@
// Cleanup
CleanupStack::PopAndDestroy( server );
- CleanupStack::PopAndDestroy( activeScheduler );
+ //CleanupStack::PopAndDestroy( activeScheduler );
}
// -----------------------------------------------------------------------------
@@ -207,10 +210,10 @@
// Returns the address of the function to be called.
// -----------------------------------------------------------------------------
//
-TInt E32Main()
- {
- CHarvesterServer::ThreadFunction();
- return KErrNone;
- }
+//TInt E32Main()
+// {
+// CHarvesterServer::ThreadFunction();
+// return KErrNone;
+// }
// End of File
--- a/harvester/harvesterserver/src/cindexingmanager.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/src/cindexingmanager.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -49,11 +49,11 @@
//constants for enable and disable status
const TInt KEnable = 1;
const TInt KDisable = 0;
-
+//Uid of Harvester server cetral repository database
const TUid KCPIXHSrepoUidMenu = {0x2001f6fb};
//Length of uid string in cenrep
-const TInt KuidStringLength = 8;
+const TInt KCenrepUidLength = 8;
// -----------------------------------------------------------------------------
// CHarvesterServer::NewL()
// -----------------------------------------------------------------------------
@@ -141,7 +141,7 @@
//Instantiate Contentinfo manager
iContentInfoMgr = CContentInfoMgr::NewL();
- UpdateUnloadListL();
+ UpdateDontloadListL();
// Load plugins
LoadPluginsL();
@@ -308,6 +308,7 @@
//
void CIndexingManager::LoadPluginsL()
{
+ OstTraceFunctionEntry0( CINDEXINGMANAGER_LOADPLUGINSL_ENTRY );
RImplInfoPtrArray infoArray;
TCleanupItem cleanupItem( CPixSearchECom::CleanupEComArray, &infoArray );
CleanupStack::PushL( cleanupItem );
@@ -316,69 +317,30 @@
TInt count( 0 );
count = infoArray.Count();
- //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: PLUGINS COUNT %d", count );
- CIndexingPlugin* plugin = NULL;
-
TInt contentcount(iContentInfoMgr->GetContentCountL() );
// If the content count in the content info DB is not equal to the plugin count, reset the content info DB
if ( contentcount != count)
iContentInfoMgr->ResetL();
+ CContentInfo* contentinfo = CContentInfo::NewL();
+
for ( TInt i = 0; i < count; i++ )
{
TUid uid = infoArray[i]->ImplementationUid(); // Create the plug-ins
TInt version = infoArray[i]->Version();
- //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: PLUGINS UID %x", uid );
- plugin = NULL;
+ //Update the details of the plugin in Contentinfo DB
+ UpdateContentInfoDbL( infoArray[i]->DisplayName(), contentinfo );
+ //Get the load status of the plugin.
+ TBool pluginloadstatus = GetPluginLoadStatusL ( uid, version, infoArray[i]->DisplayName() );
- UpdateContentInfoDbL( infoArray[i]->DisplayName() );
- TBool loadplugin = ETrue;
- //status of plugin in blacklist table
- TBool pluginblacklisted = iBlacklistMgr->FindL( uid, version);
- //status of plugin in unload table
- TBool loadstatus = iBlacklistMgr->FindfromUnloadListL( uid );
- //Check the Uid in both the tables of the blacklist db
- if ( loadstatus || pluginblacklisted )
- loadplugin = EFalse;
-
- if ( loadstatus )
- {
- //Found in unload list.Update the indexing and blacklist status in contentinfo DB
- iContentInfoMgr->UpdatePluginIndexStatusL( infoArray[i]->DisplayName() , KDisable );
- iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KDisable );
- }
- if ( pluginblacklisted )
- //Update the blacklist status in content info db
- iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KEnable );
-
- if ( loadplugin )
+ if ( pluginloadstatus )
{
- // Plugin is not black listed. Add it to database and try to load the plugin
- iBlacklistMgr->AddL( uid , version );
- OstTrace1( TRACE_NORMAL, CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is added to DB", uid.iUid );
- CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is added to database", uid.iUid);
- TRAPD( err, plugin = CIndexingPlugin::NewL( uid ) );
- //FFLOGSTRING2( "CFastFindHarvesterPluginControl:: ERROR %d", err );
- if ( err == KErrNone )
- {
- // Plugin loaded succesfully. Remove it from the database
- iBlacklistMgr->Remove(uid);
- OstTrace1( TRACE_NORMAL, DUP1_CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is removed from DB", uid.iUid );
- CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is removed from database", uid.iUid);
- iContentInfoMgr->UpdateBlacklistStatusL( infoArray[i]->DisplayName() , KDisable );
- CleanupStack::PushL( plugin );
- plugin->SetObserver( *this );
- plugin->SetSearchSession( iSearchSession );
- iPluginArray.AppendL( plugin ); // and add them to array
- CleanupStack::Pop( plugin );
- OstTrace1( TRACE_NORMAL, DUP2_CINDEXINGMANAGER_LOADPLUGINSL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is loaded successfully", uid.iUid );
- CPIXLOGSTRING2("CIndexingManager::LoadPluginsL(): Plugin with uid = %x is loaded succesfully", uid.iUid);
- }
+ LoadHarvesterpluginL (uid, version, infoArray[i]->DisplayName() );//Load the harvester plugin
}
- }
- CleanupStack::PopAndDestroy( &infoArray ); // infoArray, results in a call to CleanupEComArray
- //FFLOGSTRING( "CFastFindHarvesterPluginControl::LoadPluginsL() plugin!" );
-
+ }
+ delete contentinfo;
+ CleanupStack::PopAndDestroy( &infoArray ); // infoArray, results in a call to CleanupEComArray
+ OstTraceFunctionExit0( CINDEXINGMANAGER_LOADPLUGINSL_EXIT );
}
// -----------------------------------------------------------------------------
@@ -626,7 +588,7 @@
// CIndexingManager::UpdateContentInfoDbL()
// -----------------------------------------------------------------------------
//
-void CIndexingManager::UpdateContentInfoDbL( const TDesC& aPluginName)
+void CIndexingManager::UpdateContentInfoDbL( const TDesC& aPluginName, CContentInfo* aContentinfo)
{
OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY );
TBool iscontentfound = iContentInfoMgr->FindL( aPluginName );
@@ -634,12 +596,11 @@
if( !iscontentfound )
{
//Add the content details to database
- CContentInfo* contentinfo = CContentInfo::NewL();
- contentinfo->SetNameL( aPluginName );
- contentinfo->SetBlacklistStatus( KEnable );
- contentinfo->SetIndexStatus( KEnable );
- iContentInfoMgr->AddL( contentinfo );
- delete contentinfo;
+ aContentinfo->SetNameL( aPluginName );
+ aContentinfo->SetBlacklistStatus( KEnable );
+ aContentinfo->SetIndexStatus( KEnable );
+ iContentInfoMgr->AddL( aContentinfo );
+
}
else
{
@@ -649,13 +610,13 @@
}
// -----------------------------------------------------------------------------
-// CIndexingManager::UpdateUnloadList()
+// CIndexingManager::UpdateDontloadListL()
// -----------------------------------------------------------------------------
//
-void CIndexingManager::UpdateUnloadListL()
+void CIndexingManager::UpdateDontloadListL()
{
- OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY );
- CPIXLOGSTRING("CIndexingManager::UpdateUnloadList : Start");
+ OstTraceFunctionEntry0( CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY );
+ CPIXLOGSTRING("CIndexingManager::UpdateDontloadList : Start");
//Read the list of Uid's from the cenrep and update blacklist database
//Open the unload list common repository
CRepository* unloadrepo = NULL;
@@ -664,10 +625,13 @@
return;
RArray<TUint32> uidlist;
//Read all the key list
+ //Matches occur whenever (key & mask) == (partialKey & mask).
+ //The partial key is guaranteed to be masked before use
+ // To fetch all the keys we have done masking with '0'
TInt error = unloadrepo->FindL( 0, 0, uidlist);
if ( error == KErrNone )
{
- TBuf<KuidStringLength> temp;
+ TBuf<KCenrepUidLength> temp;
//get the Uid of each and every plugin and add it to blacklist database
TInt count = uidlist.Count();
for (int i = 0; i < count; i++ )
@@ -678,9 +642,69 @@
TLex uidvalue(temp);
TInt xerr = uidvalue.Val( value,EHex );
uid.iUid = value;
- (void)iBlacklistMgr->AddtoUnloadListL( uid );
+ (void)iBlacklistMgr->AddtoDontloadListL( uid );
}
}
- CPIXLOGSTRING("CIndexingManager::UpdateUnloadList : End");
- OstTraceFunctionExit0( CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT );
+ CPIXLOGSTRING("CIndexingManager::UpdateDontloadList : End");
+ OstTraceFunctionExit0( CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT );
+ }
+
+// -----------------------------------------------------------------------------
+// CIndexingManager::GetPluginLoadStatus()
+// -----------------------------------------------------------------------------
+//
+TBool CIndexingManager::GetPluginLoadStatusL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName)
+ {
+ OstTraceFunctionEntry0( DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY );
+ //status of plugin in blacklist table
+ TBool pluginblacklisted = iBlacklistMgr->FindL( aPluginUid, aVersion);
+ //status of plugin in unload table
+ TBool loadstatus = iBlacklistMgr->FindInDontloadListL( aPluginUid );
+ //Check the Uid in both the tables of the blacklist db
+// if ( loadstatus || pluginblacklisted )
+// loadplugin = EFalse;
+
+ if ( loadstatus )
+ {
+ //Found in unload list.Update the indexing and blacklist status in contentinfo DB
+ iContentInfoMgr->UpdatePluginIndexStatusL( aPluginName , KDisable );
+ iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KDisable );
+ }
+ if ( pluginblacklisted )
+ //Update the blacklist status in content info db
+ iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KEnable );
+
+ return (! (loadstatus | pluginblacklisted));
}
+
+// -----------------------------------------------------------------------------
+// CIndexingManager::GetPluginLoadStatus()
+// -----------------------------------------------------------------------------
+//
+void CIndexingManager::LoadHarvesterpluginL (TUid aPluginUid, TInt aVersion, const TDesC& aPluginName)
+ {
+ OstTraceFunctionEntry0( CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY );
+ CIndexingPlugin* plugin = NULL;
+ // Plugin is not black listed. Add it to blacklist database
+ iBlacklistMgr->AddL( aPluginUid , aVersion );
+ OstTrace1( TRACE_NORMAL, DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is added to DB", aPluginUid.iUid );
+ CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is added to database", aPluginUid.iUid);
+ //try to load the plugin
+ TRAPD( err, plugin = CIndexingPlugin::NewL( aPluginUid ) );
+ if ( err == KErrNone )
+ {
+ // Plugin loaded succesfully. Remove it from the blacklist database
+ iBlacklistMgr->Remove(aPluginUid);
+ OstTrace1( TRACE_NORMAL, CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is removed from DB", aPluginUid.iUid );
+ CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is removed from database", aPluginUid.iUid);
+ iContentInfoMgr->UpdateBlacklistStatusL( aPluginName , KDisable );
+ CleanupStack::PushL( plugin );
+ plugin->SetObserver( *this );
+ plugin->SetSearchSession( iSearchSession );
+ iPluginArray.AppendL( plugin ); // and add them to array
+ CleanupStack::Pop( plugin );
+ OstTrace1( TRACE_NORMAL, DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL, "CIndexingManager::LoadPluginsL;Plugin with uid=%x is loaded successfully", aPluginUid.iUid );
+ CPIXLOGSTRING2("CIndexingManager::LoadHarvesterpluginL(): Plugin with uid = %x is loaded succesfully", aPluginUid.iUid);
+ }
+ OstTraceFunctionExit0( CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT );
+ }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harvester/harvesterserver/src/main.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,35 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+#include <QtCore>
+#include <QCoreApplication>
+#include "charvesterserver.h"
+#include "qtmythread.h"
+
+int main(int argc, char *argv[])
+ {
+ QCoreApplication a( argc , argv);
+ HarvesterThread mythread;
+ mythread.start();
+ return a.exec();
+ }
+
+void HarvesterThread::run()
+ {
+ //Trapping the error is handled inside the ThreadFunction
+ CHarvesterServer::ThreadFunction();
+ exec();
+ }
--- a/harvester/harvesterserver/traces/CBlacklistDbTraces.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/traces/CBlacklistDbTraces.h Mon Jun 28 10:34:53 2010 +0530
@@ -18,16 +18,16 @@
#define CBLACKLISTDB_UPDATEL_EXIT 0x8a000a
#define CBLACKLISTDB_CREATEDBL_ENTRY 0x8a000b
#define CBLACKLISTDB_CREATEDBL_EXIT 0x8a000c
-#define CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY 0x8a000d
-#define CBLACKLISTDB_CREATECOLUMNSETLC_EXIT 0x8a000e
-#define CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY 0x8a003b
-#define CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT 0x8a003c
-#define CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY 0x8a003d
-#define CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT 0x8a003e
-#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY 0x8a003f
-#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT 0x8a0040
-#define CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY 0x8a0041
-#define CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT 0x8a0042
+#define CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY 0x8a004b
+#define CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT 0x8a004c
+#define CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY 0x8a004d
+#define CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT 0x8a004e
+#define CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY 0x8a004f
+#define CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT 0x8a0050
+#define CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY 0x8a0051
+#define CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT 0x8a0052
+#define CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY 0x8a0053
+#define CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT 0x8a0054
#define CBLACKLISTDB_CONSTRUCTL 0x860001
#define CBLACKLISTDB_ADDL 0x860002
#define CBLACKLISTDB_REMOVE 0x860003
@@ -38,9 +38,8 @@
#define CBLACKLISTDB_UPDATEL 0x860008
#define CBLACKLISTDB_FINDL 0x860009
#define DUP1_CBLACKLISTDB_FINDL 0x86000a
-#define CBLACKLISTDB_REMOVEFROMUNLOADLISTL 0x860027
-#define DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL 0x860028
-#define CBLACKLISTDB_FINDFROMUNLOADLISTL 0x860029
+#define DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL 0x86002d
+#define CBLACKLISTDB_FINDFROMDONTLOADLISTL 0x86002e
inline TBool OstTraceGen2( TUint32 aTraceID, TUint aParam1, TInt aParam2 )
--- a/harvester/harvesterserver/traces/CBlacklistMgrTraces.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/traces/CBlacklistMgrTraces.h Mon Jun 28 10:34:53 2010 +0530
@@ -15,17 +15,17 @@
#define CBLACKLISTMGR_REMOVE_ENTRY 0x8a0015
#define CBLACKLISTMGR_REMOVE_EXIT 0x8a0016
#define CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY 0x8a0043
-#define CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT 0x8a0044
#define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY 0x8a0045
-#define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT 0x8a0046
+#define CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT 0x8a0055
+#define CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT 0x8a0056
#define CBLACKLISTMGR_ADDL 0x86000b
#define CBLACKLISTMGR_REMOVE 0x86000c
#define CBLACKLISTMGR_FINDL 0x86000d
#define DUP1_CBLACKLISTMGR_FINDL 0x86000e
#define DUP2_CBLACKLISTMGR_FINDL 0x86000f
-#define CBLACKLISTMGR_ADDTOUNLOADLISTL 0x86002a
#define CBLACKLISTMGR_REMOVEFROMUNLOADLISTL 0x86002b
-#define CBLACKLISTMGR_FINDFROMUNLOADLISTL 0x86002c
+#define CBLACKLISTMGR_ADDTODONTLOADLISTL 0x86002f
+#define CBLACKLISTMGR_FINDINDONTLOADLISTL 0x860030
inline TBool OstTraceGen2( TUint32 aTraceID, TUint aParam1, TInt aParam2 )
--- a/harvester/harvesterserver/traces/CIndexingManagerTraces.h Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/traces/CIndexingManagerTraces.h Mon Jun 28 10:34:53 2010 +0530
@@ -8,8 +8,13 @@
#define CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY 0x8a0047
#define CINDEXINGMANAGER_UPDATECONTENTINFODBL_EXIT 0x8a0048
-#define CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY 0x8a0049
-#define CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT 0x8a004a
+#define CINDEXINGMANAGER_LOADPLUGINSL_ENTRY 0x8a0057
+#define CINDEXINGMANAGER_LOADPLUGINSL_EXIT 0x8a0058
+#define CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY 0x8a0059
+#define CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT 0x8a005a
+#define DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY 0x8a005b
+#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY 0x8a005c
+#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT 0x8a005d
#define CINDEXINGMANAGER_RUNL 0x860010
#define DUP1_CINDEXINGMANAGER_RUNL 0x860011
#define DUP2_CINDEXINGMANAGER_RUNL 0x860012
@@ -20,9 +25,6 @@
#define DUP7_CINDEXINGMANAGER_RUNL 0x860017
#define DUP8_CINDEXINGMANAGER_RUNL 0x860018
#define DUP9_CINDEXINGMANAGER_RUNL 0x860019
-#define CINDEXINGMANAGER_LOADPLUGINSL 0x86001a
-#define DUP1_CINDEXINGMANAGER_LOADPLUGINSL 0x86001b
-#define DUP2_CINDEXINGMANAGER_LOADPLUGINSL 0x86001c
#define CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001d
#define DUP1_CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001e
#define DUP2_CINDEXINGMANAGER_ADDHARVESTINGQUEUE 0x86001f
@@ -33,6 +35,9 @@
#define CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860024
#define DUP1_CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860025
#define DUP2_CINDEXINGMANAGER_HARVESTINGCOMPLETED 0x860026
+#define DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860031
+#define CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860032
+#define DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL 0x860033
#ifndef __KERNEL_MODE__
--- a/harvester/harvesterserver/traces/fixed_id.definitions Fri Jun 11 14:43:47 2010 +0300
+++ b/harvester/harvesterserver/traces/fixed_id.definitions Mon Jun 28 10:34:53 2010 +0530
@@ -3,36 +3,36 @@
[GROUP]TRACE_NORMAL=0x86
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDL_ENTRY=0x5
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDL_EXIT=0x6
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY=0x3d
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT=0x3e
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTODONTLOADLISTL_ENTRY=0x4f
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTODONTLOADLISTL_EXIT=0x50
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CONSTRUCTL_ENTRY=0x3
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CONSTRUCTL_EXIT=0x4
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY=0xd
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_EXIT=0xe
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_ENTRY=0x4b
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEBLACKLISTCOLUMNSETLC_EXIT=0x4c
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDBL_ENTRY=0xb
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDBL_EXIT=0xc
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY=0x3b
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT=0x3c
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY=0x41
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT=0x42
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_ENTRY=0x4d
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEDONTLOADCOLUMNSETLC_EXIT=0x4e
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDINDONTLOADLISTL_ENTRY=0x53
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDINDONTLOADLISTL_EXIT=0x54
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_NEWL_ENTRY=0x1
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_NEWL_EXIT=0x2
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY=0x3f
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT=0x40
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_ENTRY=0x51
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL_EXIT=0x52
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVE_ENTRY=0x7
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVE_EXIT=0x8
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_UPDATEL_ENTRY=0x9
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_UPDATEL_EXIT=0xa
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDL_ENTRY=0x13
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDL_EXIT=0x14
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTODONTLOADLISTL_EXIT=0x55
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_ENTRY=0x43
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT=0x44
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_CONSTRUCTL_ENTRY=0x11
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_CONSTRUCTL_EXIT=0x12
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_NEWL_ENTRY=0xf
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_NEWL_EXIT=0x10
+[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMDONTLOADLISTL_EXIT=0x56
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_ENTRY=0x45
-[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT=0x46
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVE_ENTRY=0x15
[TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVE_EXIT=0x16
[TRACE]TRACE_FLOW[0x8A]_CCONTENTINFODB_ADDL_ENTRY=0x25
@@ -71,46 +71,50 @@
[TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_NEWL_EXIT=0x18
[TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_SETNAMEL_ENTRY=0x1d
[TRACE]TRACE_FLOW[0x8A]_CCONTENTINFO_SETNAMEL_EXIT=0x1e
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL_ENTRY=0x5c
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL_EXIT=0x5d
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADPLUGINSL_ENTRY=0x57
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_LOADPLUGINSL_EXIT=0x58
[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATECONTENTINFODBL_ENTRY=0x47
[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATECONTENTINFODBL_EXIT=0x48
-[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY=0x49
-[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT=0x4a
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEDONTLOADLISTL_ENTRY=0x59
+[TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEDONTLOADLISTL_EXIT=0x5a
+[TRACE]TRACE_FLOW[0x8A]_DUP1_CINDEXINGMANAGER_GETPLUGINLOADSTATUSL_ENTRY=0x5b
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_ADDL=0x2
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_CONSTRUCTL=0x1
-[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMUNLOADLISTL=0x29
+[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMDONTLOADLISTL=0x2e
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDL=0x9
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDWITHVERSIONL=0x6
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVE=0x3
-[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x27
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_UPDATEL=0x8
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDL=0xb
-[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTOUNLOADLISTL=0x2a
-[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDFROMUNLOADLISTL=0x2c
+[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTODONTLOADLISTL=0x2f
+[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDINDONTLOADLISTL=0x30
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDL=0xd
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_REMOVE=0xc
[TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL=0x2b
[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1d
[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x24
-[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADPLUGINSL=0x1a
+[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x32
[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x21
[TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_RUNL=0x10
[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_FINDL=0xa
[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_FINDWITHVERSIONL=0x7
[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVE=0x4
-[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x28
[TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTMGR_FINDL=0xe
[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1e
[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x25
-[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADPLUGINSL=0x1b
+[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x31
[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x22
[TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_RUNL=0x11
[TRACE]TRACE_NORMAL[0x86]_DUP2_CBLACKLISTDB_REMOVE=0x5
[TRACE]TRACE_NORMAL[0x86]_DUP2_CBLACKLISTMGR_FINDL=0xf
[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x1f
[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_HARVESTINGCOMPLETED=0x26
-[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADPLUGINSL=0x1c
+[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADHARVESTERPLUGINL=0x33
[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_REMOVEHARVESTINGQUEUE=0x23
[TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_RUNL=0x12
+[TRACE]TRACE_NORMAL[0x86]_DUP3_CBLACKLISTDB_REMOVEFROMDONTLOADLISTL=0x2d
[TRACE]TRACE_NORMAL[0x86]_DUP3_CINDEXINGMANAGER_ADDHARVESTINGQUEUE=0x20
[TRACE]TRACE_NORMAL[0x86]_DUP3_CINDEXINGMANAGER_RUNL=0x13
[TRACE]TRACE_NORMAL[0x86]_DUP4_CINDEXINGMANAGER_RUNL=0x14
@@ -119,3 +123,25 @@
[TRACE]TRACE_NORMAL[0x86]_DUP7_CINDEXINGMANAGER_RUNL=0x17
[TRACE]TRACE_NORMAL[0x86]_DUP8_CINDEXINGMANAGER_RUNL=0x18
[TRACE]TRACE_NORMAL[0x86]_DUP9_CINDEXINGMANAGER_RUNL=0x19
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_ENTRY=0x3d
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_ADDTOUNLOADLISTL_EXIT=0x3e
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_ENTRY=0xd
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATECOLUMNSETLC_EXIT=0xe
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_ENTRY=0x3b
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_CREATEUNLOADCOLUMNSETLC_EXIT=0x3c
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_ENTRY=0x41
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_FINDFROMUNLOADLISTL_EXIT=0x42
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_ENTRY=0x3f
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL_EXIT=0x40
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_ADDTOUNLOADLISTL_EXIT=0x44
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CBLACKLISTMGR_REMOVEFROMUNLOADLISTL_EXIT=0x46
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_ENTRY=0x49
+[[OBSOLETE]][TRACE]TRACE_FLOW[0x8A]_CINDEXINGMANAGER_UPDATEUNLOADLISTL_EXIT=0x4a
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_FINDFROMUNLOADLISTL=0x29
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x27
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_ADDTOUNLOADLISTL=0x2a
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CBLACKLISTMGR_FINDFROMUNLOADLISTL=0x2c
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_CINDEXINGMANAGER_LOADPLUGINSL=0x1a
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP1_CBLACKLISTDB_REMOVEFROMUNLOADLISTL=0x28
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP1_CINDEXINGMANAGER_LOADPLUGINSL=0x1b
+[[OBSOLETE]][TRACE]TRACE_NORMAL[0x86]_DUP2_CINDEXINGMANAGER_LOADPLUGINSL=0x1c
--- a/layers.sysdef.xml Fri Jun 11 14:43:47 2010 +0300
+++ b/layers.sysdef.xml Mon Jun 28 10:34:53 2010 +0530
@@ -7,7 +7,8 @@
<layer name="mw_layer">
<module name="searchsrv">
<unit unitID="orgdo.searchsrv.bldinf" mrp="" bldFile="&layer_real_source_path;/group" name="searchsrv_bldinf"/>
- <unit unitID="orgdo.searchsrv.pro" mrp="" bldFile="&layer_real_source_path;/qcpix" name="searchsrv_pro" proFile="qcpix.pro"/>
+ <unit unitID="orgdo.searchsrv_harvestersrv.pro" mrp="" bldFile="&layer_real_source_path;/harvester/harvesterserver" name="harvestersrv_pro" proFile="harvester.pro"/>
+ <unit unitID="orgdo.searchsrv_qcpix.pro" mrp="" bldFile="&layer_real_source_path;/qcpix" name="searchsrv_pro" proFile="qcpix.pro"/>
</module>
</layer>
</systemModel>
--- a/qcpix/qcpixsearchclient.pro Fri Jun 11 14:43:47 2010 +0300
+++ b/qcpix/qcpixsearchclient.pro Mon Jun 28 10:34:53 2010 +0530
@@ -41,6 +41,8 @@
VERSION = 1.0.0
LIBS += -leuser -lcpixsearchclient
+ INCLUDEPATH += $$APP_LAYER_SYSTEMINCLUDE
+
HEADERS += src/platform/s60/inc/qcpixdocumentprivate.h \
src/platform/s60/inc/qcpixdocumentfieldprivate.h \
src/platform/s60/inc/qcpixsearcherprivate.h \
--- a/qcpix/tsrc/qtcpixunittests/qtcpixunittests.pro Fri Jun 11 14:43:47 2010 +0300
+++ b/qcpix/tsrc/qtcpixunittests/qtcpixunittests.pro Mon Jun 28 10:34:53 2010 +0530
@@ -39,7 +39,9 @@
"data/segments \epoc32\winscw\c\private\2001f6f7\indexing\indexdb\root\contact\_0\segments" \
"data/cpixreg.txt \epoc32\winscw\c\private\2001f6f7\cpixreg.txt" \
"data/config.ini \epoc32\winscw\c\system\data\config.ini"
-
+
+ INCLUDEPATH += $$APP_LAYER_SYSTEMINCLUDE
+
TARGET.CAPABILITY = ALL -TCB -DRM
TARGET.UID3 = 0xE76C2AE7
LIBS += -lqcpixsearchclient
--- a/rom/cpix_mw.iby Fri Jun 11 14:43:47 2010 +0300
+++ b/rom/cpix_mw.iby Mon Jun 28 10:34:53 2010 +0530
@@ -54,6 +54,8 @@
data=DATAZ_\PRIVATE\101f875a\import\20029ab8.rsc private\101f875a\import\20029ab8.rsc
+data=DATAZ_\resource\cpix\analyzer.loc \resource\cpix\analyzer.loc
+data=DATAZ_\resource\cpix\thaidict.sm \resource\cpix\thaidict.sm
data=ZSYSTEM\install\cpixsearch_stub.sis System\Install\cpixsearch_stub.sis
data=DATAZ_\private\10202be9\2001f6fb.cre private\10202be9\2001f6fb.cre
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/data/resource/analyzer.loc Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,92 @@
+/**
+ * This file defines the default analyzer used for CPix.
+ * The default analyzer should its behaviour depending of used
+ * locale.
+ *
+ * The codes that are used in switch should follow ISO-892-1 standard
+ * or ISO-839-2 standard, when 2 letter codes are not available.
+ *
+ * WARNING: It is not guaranteed that in Symbian platform the language
+ * codes are translated to ISO code. Also symbian platform codes of form
+ * 's29' for Taiwan Chinese or 's1' for English are supported.
+ *
+ * Refer for CPiX documentation for this file's syntax.
+ */
+
+config_switch {
+
+ /**
+ * The prefiltering is done e.g, when searching "$cat", "$cat.cal" etc.
+ */
+ case 'prefix':
+ locale_switch {
+
+ // French
+ case 'fr': stdtokens>stdfilter>lowercase>elision(fr);
+
+ // Default
+ default: stdtokens>stdfilter>lowercase;
+ };
+
+ /**
+ * Analyzers used for queries and indexing
+ */
+ default:
+
+ locale_switch {
+
+ // French
+ case 'fr': stdtokens>stdfilter>lowercase>elision(fr)>stop(fr);
+
+ // Hebrew
+ case 'he':
+ config_switch {
+ case 'query': // do not use prefix filter, when searchign
+ stdtokens>stdfilter>lowercase>stop(en);
+ default: // use prefix filter only when indexing
+ stdtokens>stdfilter>lowercase>prefix(he)>stop(en);
+ };
+
+ // English
+ case 'en': stdtokens>stdfilter>lowercase>stop(en);
+
+ // Thai
+ case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);
+
+ /*
+ * Far east asian languages
+ *
+ * note: Hong and Taiwanese are not differentiated.
+ * What are their language codes?
+ *
+ * note: Should we include also ISO-839-2 codes in here
+ *
+ * note: Japan is no more supported. Let's used ngram for it anyway
+ */
+ case 'jp', 'zh', 'ch':
+ ngram(1)>lowercase>stop(en);
+
+ /**
+ * Korean
+ *
+ * note: Because special optimizations, different analyzers
+ * are used for queries and indexing
+ *
+ *
+ * WARNING: Korean analyzer is not properly tested (!)
+ */
+ case 'ko':
+ config_switch {
+ case 'query': koreanquery>lowercase>stop(en);
+ default: korean>lowercase>stop(en);
+ };
+
+ /**
+ * Default option; used for most languages and should work 'ok'
+ * for most alphabetic writing systems.
+ *
+ * note: Should we include english stop word list?
+ */
+ default: stdtokens>stdfilter>lowercase;
+ };
+}
Binary file searchengine/cpix/cpix/data/resource/thaidict.sm has changed
--- a/searchengine/cpix/cpix/group/bld.inf Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/group/bld.inf Mon Jun 28 10:34:53 2010 +0530
@@ -22,3 +22,10 @@
PRJ_MMPFILES
cpix.mmp
+PRJ_EXPORTS
+
+../data/resource/analyzer.loc /epoc32/data/z/resource/cpix/analyzer.loc
+../data/resource/thaidict.sm /epoc32/data/z/resource/cpix/thaidict.sm
+../data/resource/analyzer.loc /epoc32/release/winscw/udeb/z/resource/cpix/analyzer.loc
+../data/resource/thaidict.sm /epoc32/release/winscw/udeb/z/resource/cpix/thaidict.sm
+
--- a/searchengine/cpix/cpix/group/cpix.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/group/cpix.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -54,6 +54,7 @@
SOURCE common/cloners.cpp
SOURCE ifieldfilter.cpp
SOURCE prefixopt.cpp
+SOURCE localization.cpp
SOURCE filters/quadfilter.cpp
SOURCE fileparser/fileparser.cpp
SOURCE fileparser/textfileparser.cpp
@@ -73,12 +74,23 @@
SOURCE spi/s60/audiometadata.cpp
USERINCLUDE ../../../../searchsrv_plat/cpix_utility_api/inc
+SOURCEPATH ../src
+SOURCE customanalyzer.cpp
+SOURCE prefixqueryparser.cpp
+SOURCE queryparser.cpp
+SOURCEPATH ../src/spi/s60
+SOURCE s60locale.cpp
+SOURCEPATH ../src/spi
+SOURCE locale.cpp
+SOURCEPATH ../src/common
+
USERINCLUDE ../inc/public
USERINCLUDE ../inc/private
USERINCLUDE ../../../oss/cl/clucene/src
USERINCLUDE ../../../oss/sb/snowball/include
USERINCLUDE ../../../util/cpixtools/inc/public
USERINCLUDE ../../../oss/cl/clucene/src/CLucene
+USERINCLUDE ../../../oss/loc/analysis/inc/public
USERINCLUDE ../../../cpix/cpix/src
MW_LAYER_SYSTEMINCLUDE
@@ -94,8 +106,9 @@
//LIBRARY libm.lib
//LIBRARY euser.lib
-//STATICLIBRARY libclucene.lib
-//STATICLIBRARY libstemmer.lib
+STATICLIBRARY libclucene.lib
+STATICLIBRARY libstemmer.lib
+STATICLIBRARY libanalysis.lib
// For SPI
//LIBRARY efsrv.lib
--- a/searchengine/cpix/cpix/inc/private/analyzer.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/analyzer.h Mon Jun 28 10:34:53 2010 +0530
@@ -32,6 +32,7 @@
namespace lucene {
namespace analysis {
class TokenStream;
+ class Analyzer;
}
namespace util {
class Reader;
@@ -40,9 +41,8 @@
namespace Cpix
{
- namespace AnalyzerExp {
- class Piping;
- }
+ class InitParams;
+
struct TokenizerClassEntry;
struct FilterClassEntry;
@@ -50,13 +50,80 @@
class Document;
class Field;
- class DocumentFieldIterator;
+ class DocumentFieldIterator;
+
+ class LocaleSwitchStreamFactory;
+ class CustomAnalyzer;
+
+ namespace AnalyzerExp {
+ class LocaleSwitch;
+ class Piping;
+ }
}
// Class definitions
namespace Cpix
{
+ class Analysis {
+
+ public:
+
+ /**
+ * Initializes the Analysis. Uses init paremeters' resource dir
+ * to locate & load analysis & localization related resources.
+ *
+ * NOTE: The init is made to work in a fault-tolerant fashion.
+ * If needed resource files are not found, a warning is logged
+ * (if logging is enabled) and some meaningful default is used instead.
+ * If logging is not enabled, init _may fail silently_.
+ */
+ static void init(InitParams& ip);
+
+ /**
+ * Releases all resources that are used by analysis.
+ */
+ static void shutdown();
+
+ /**
+ * Returns the default analyzer. This analyzer is likely localized
+ * and will analyze differently depending of what locale is currently
+ * active.
+ */
+ static lucene::analysis::Analyzer& getDefaultAnalyzer();
+
+ /**
+ * Returns the query analyzer. This analyzer is likely localized
+ * and will analyze differently depending of what locale is currently
+ * active.
+ */
+ static lucene::analysis::Analyzer& getQueryAnalyzer();
+
+ /**
+ * Returns the query filter analyzer. This analyzer is likely localized
+ * and will analyze differently depending of what locale is currently
+ * active.
+ */
+ static lucene::analysis::Analyzer& getPrefixAnalyzer();
+
+ private:
+
+ Analysis(InitParams& ip);
+
+ std::auto_ptr<AnalyzerExp::Piping> parse(std::string path);
+
+ static Analysis* theInstance_;
+
+ std::auto_ptr<CustomAnalyzer> defaultAnalyzer_;
+
+ std::auto_ptr<CustomAnalyzer> queryAnalyzer_;
+
+ std::auto_ptr<CustomAnalyzer> prefixAnalyzer_;
+
+ };
+
+
+
/**
* This is a special filter that is used to generate prefixes
* of the searched words.
@@ -167,68 +234,7 @@
lucene::analysis::Analyzer* analyzer_;
};
-
-
- /**
- * Forms a series of analyzers, tokenizers and filters based on textual
- * analyzer definition.
- */
- class CustomAnalyzer : public lucene::analysis::Analyzer
- {
- public:
-
- /**
- * Constructs a custom analyzer based on given definition string.
- * See CPix documentation to see, how proper analyzer definition
- * strings ought to be formed.
- *
- * Throws on failure, e.g. if definition parsing fails, if
- * declared identifiers are not found and if parameters are wrong.
- */
- CustomAnalyzer(const wchar_t* definition);
-
- /**
- * For internal usage only. Constructs analyzer from a parsed
- * definition string or from a fragment of a parsed definition
- * string.
- */
- CustomAnalyzer(const Cpix::AnalyzerExp::Piping& definition);
-
- virtual ~CustomAnalyzer();
-
- /**
- * Token stream is based on the analyzer definition string
- */
- lucene::analysis::TokenStream*
- tokenStream(const wchar_t * fieldName,
- lucene::util::Reader * reader);
-
- private:
-
- /**
- * Setups the TokenStream factory based on the analyzer
- * definition stored in the piping
- */
- void setup(const Cpix::AnalyzerExp::Piping& definition);
-
- /**
- * Return TokenizerClassEntry, which matches the given
- * identifier.
- */
- static TokenizerClassEntry&
- CustomAnalyzer::getTokenizerEntry(std::wstring id);
-
- /**
- * Return FilterClassEntry, which matches the given
- * identifier.
- */
- static FilterClassEntry&
- CustomAnalyzer::getFilterEntry(std::wstring id);
-
- private:
-
- std::auto_ptr<TokenStreamFactory> factory_;
- };
+ std::auto_ptr<lucene::analysis::Analyzer> CreateDefaultAnalyzer();
}
--- a/searchengine/cpix/cpix/inc/private/analyzerexp.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/analyzerexp.h Mon Jun 28 10:34:53 2010 +0530
@@ -43,20 +43,21 @@
namespace AnalyzerExp {
/** Identifiers for the tokens. Extends the list present in the cpixparsetools.h */
- enum TokenType {
- TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8
- TOKEN_RIGHT_BRACKET,
- TOKEN_COMMA, // 10
- TOKEN_PIPE,
- TOKEN_SWITCH,
- TOKEN_CASE,
- TOKEN_DEFAULT,
- TOKEN_LEFT_BRACE, // 15
- TOKEN_RIGHT_BRACE,
- TOKEN_COLON,
- TOKEN_TERMINATOR
- };
-
+
+ extern const wchar_t TOKEN_LEFT_BRACKET[];
+ extern const wchar_t TOKEN_RIGHT_BRACKET[];
+ extern const wchar_t TOKEN_COMMA[];
+ extern const wchar_t TOKEN_PIPE[];
+ extern const wchar_t TOKEN_SWITCH[];
+ extern const wchar_t TOKEN_LOCALE_SWITCH[];
+ extern const wchar_t TOKEN_CONFIG_SWITCH[];
+ extern const wchar_t TOKEN_CASE[];
+ extern const wchar_t TOKEN_DEFAULT[];
+ extern const wchar_t TOKEN_LEFT_BRACE[];
+ extern const wchar_t TOKEN_RIGHT_BRACE[];
+ extern const wchar_t TOKEN_COLON[];
+ extern const wchar_t TOKEN_TERMINATOR[];
+
/**
* Tokenizer used for analyzer definition strings' lexical analysis
*/
@@ -70,6 +71,8 @@
virtual Cpt::Lex::TokenizerState consume(const wchar_t* cursor);
private: // data
Cpt::Lex::WhitespaceTokenizer ws_;
+ Cpt::Lex::LineCommentTokenizer lcomment_;
+ Cpt::Lex::SectionCommentTokenizer scomment_;
Cpt::Lex::IdTokenizer ids_;
Cpt::Lex::StrLitTokenizer strlits_;
Cpt::Lex::IntLitTokenizer intlits_;
@@ -79,6 +82,8 @@
Cpt::Lex::SymbolTokenizer cm_; // comma
Cpt::Lex::SymbolTokenizer pp_; // pipe symbol '>'
Cpt::Lex::SymbolTokenizer sw_; // switch
+ Cpt::Lex::SymbolTokenizer lsw_; // locale switch
+ Cpt::Lex::SymbolTokenizer csw_; // config switch
Cpt::Lex::SymbolTokenizer cs_; // case
Cpt::Lex::SymbolTokenizer df_; // default
Cpt::Lex::SymbolTokenizer lbc_; // left brace
@@ -198,18 +203,18 @@
};
/**
- * A case of switch statement. Of form: "case 'field':
+ * A case of switch statement. Of form: "case 'case':
* tokenizer>filter>filter;"
*/
class Case : public Exp {
public:
- Case(const std::vector<std::wstring> & fields,
+ Case(const std::vector<std::wstring> & cases,
std::auto_ptr<Piping> piping);
virtual ~Case();
- const std::vector<std::wstring>& fields() const;
+ const std::vector<std::wstring>& cases() const;
const Piping& piping() const;
private:
- std::vector<std::wstring> fields_;
+ std::vector<std::wstring> cases_;
std::auto_ptr<Piping> piping_;
};
@@ -228,7 +233,38 @@
Cpt::auto_vector<Case> cases_;
std::auto_ptr<Piping> def_;
};
- std::auto_ptr<Piping> ParsePiping(Cpt::Parser::Lexer& lexer);
+
+ /**
+ * LocaleSwitch expression
+ */
+ class LocaleSwitch : public Exp {
+ public:
+ LocaleSwitch(Cpt::auto_vector<Case> & cases,
+ std::auto_ptr<Piping> def);
+ virtual ~LocaleSwitch();
+ const std::vector<Case*>& cases() const;
+ const Piping& def() const;
+ public:
+ Cpt::auto_vector<Case> cases_;
+ std::auto_ptr<Piping> def_;
+ };
+
+ /**
+ * ConfigSwitch expression
+ */
+ class ConfigSwitch : public Exp {
+ public:
+ ConfigSwitch(Cpt::auto_vector<Case> & cases,
+ std::auto_ptr<Piping> def);
+ virtual ~ConfigSwitch();
+ const std::vector<Case*>& cases() const;
+ const Piping& def() const;
+ public:
+ Cpt::auto_vector<Case> cases_;
+ std::auto_ptr<Piping> def_;
+ };
+
+ std::auto_ptr<Piping> ParsePiping(const wchar_t* definition);
}
}
--- a/searchengine/cpix/cpix/inc/private/cluceneext.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/cluceneext.h Mon Jun 28 10:34:53 2010 +0530
@@ -91,6 +91,68 @@
{
namespace util
{
+ /**
+ * Frees one reference out of Clucene object without desctroying it
+ * Used to pass newly created Terms for queries. Queries are allowed
+ * to take full ownership of them.
+ */
+ template <class T>
+ inline T* freeref(T* t) {
+ t->__cl_decref();
+ return t;
+ }
+ template<class T>
+ class auto_ref {
+
+ public:
+ /**
+ * NOTE: Constructing auto_ref does not increased referred
+ * item's reference count.
+ */
+ auto_ref(T* ref) : ref_( ref ) {}
+
+ auto_ref(auto_ptr<T> ref) : ref_( ref.release() ) {}
+
+
+ void reset(auto_ptr<T> ref) {
+ _CLDECDELETE( ref_ );
+ ref_ = ref.release();
+ }
+
+ void reset(T* ref) {
+ _CLDECDELETE( ref_ );
+ ref_ = ref;
+ }
+
+ operator auto_ptr<T> () {
+ return auto_ptr<T>(release());
+ }
+
+ T* release() {
+ T* ret = ref_;
+ ref_ = 0;
+ return ret;
+ }
+
+ /**
+ * Decreases referred item's reference count
+ */
+ ~auto_ref() {
+ _CLDECDELETE( ref_ );
+ }
+
+ T* get() {
+ return ref_;
+ }
+
+ T* operator->() {
+ return ref_;
+ }
+
+ private:
+
+ T* ref_;
+ };
/**
* This class is almost like clucene::util::FileReader,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/customanalyzer.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,119 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#ifndef CUSTOMANALYZER_H_
+#define CUSTOMANALYZER_H_
+
+// Forward declarations
+namespace Cpt {
+ namespace Parser {
+ class Lexer;
+ }
+}
+namespace Cpix {
+ namespace AnalyzerExp {
+ class Piping;
+ class LocaleSwitch;
+ class ConfigSwitch;
+ }
+ struct TokenizerClassEntry;
+ struct FilterClassEntry;
+}
+
+
+namespace Cpix {
+
+ /**
+ * Creates token stream for the given reader and fieldName.
+ * This class in in many ways similar to CLucene analyzer class
+ * definition.
+ */
+ class TokenStreamFactory {
+ public:
+ virtual ~TokenStreamFactory();
+ virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName,
+ lucene::util::Reader * reader) = 0;
+ };
+
+ /**
+ * Forms a series of analyzers, tokenizers and filters based on textual
+ * analyzer definition.
+ */
+ class CustomAnalyzer : public lucene::analysis::Analyzer, public TokenStreamFactory
+ {
+ public:
+
+ /**
+ * Constructs a custom analyzer based on given definition string.
+ * See CPix documentation to see, how proper analyzer definition
+ * strings ought to be formed.
+ *
+ * Throws on failure, e.g. if definition parsing fails, if
+ * declared identifiers are not found and if parameters are wrong.
+ */
+ CustomAnalyzer(const wchar_t* definition, const wchar_t* config = NULL);
+
+ /**
+ * For internal usage only. Constructs analyzer from a parsed
+ * definition string or from a fragment of a parsed definition
+ * string.
+ */
+ CustomAnalyzer(const Cpix::AnalyzerExp::Piping& definition, const wchar_t* config = NULL);
+
+ virtual ~CustomAnalyzer();
+
+ /**
+ * Token stream is based on the analyzer definition string
+ */
+ lucene::analysis::TokenStream*
+ tokenStream(const wchar_t * fieldName,
+ lucene::util::Reader * reader);
+
+ private:
+
+ /**
+ * Setups the TokenStream factory based on the analyzer
+ * definition stored in the piping
+ */
+ void setup(const Cpix::AnalyzerExp::Piping& definition, const wchar_t* config);
+
+ /**
+ * Return TokenizerClassEntry, which matches the given
+ * identifier.
+ */
+ static std::auto_ptr<TokenStreamFactory> resolveConfigSwitch(const Cpix::AnalyzerExp::ConfigSwitch& csw, const wchar_t* config);
+
+ /**
+ * Return TokenizerClassEntry, which matches the given
+ * identifier.
+ */
+ static TokenizerClassEntry& getTokenizerEntry(std::wstring id);
+
+ /**
+ * Return FilterClassEntry, which matches the given
+ * identifier.
+ */
+ static FilterClassEntry& getFilterEntry(std::wstring id);
+
+ private:
+
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+}
+
+#endif /* CUSTOMANALYZER_H_ */
--- a/searchengine/cpix/cpix/inc/private/initparams.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/initparams.h Mon Jun 28 10:34:53 2010 +0530
@@ -340,6 +340,8 @@
// for cpixreg.txt and automatic index paths
std::string cpixDir_;
+ // for localization information
+ std::string resourceDir_;
// log related parameters
std::string logFileBase_;
@@ -402,6 +404,10 @@
void setCpixDir(const char * value);
+ const char * getResourceDir() const;
+ void setResourceDir(const char * value);
+
+
const char * getLogFileBase() const;
void setLogFileBase(const char * value);
--- a/searchengine/cpix/cpix/inc/private/iqrytype.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/iqrytype.h Mon Jun 28 10:34:53 2010 +0530
@@ -23,7 +23,10 @@
#include <list>
#include <string>
+#include "cpixtools.h"
+#include "cpixexc.h"
#include "common/refcountedbase.h"
+#include "cpixparsetools.h"
namespace lucene
{
@@ -267,16 +270,6 @@
class QryCall
{
private:
-
- enum TokenType
- {
- DOLLAR = Cpt::Lex::TOKEN_LAST_RESERVED,
- LESSTHAN,
- GREATERTHAN,
- COMMA,
- LEFTPARENTHESIS,
- RIGHTPARENTHESIS
- };
/**
* This static member (tokenizer_) has const-usage
@@ -296,7 +289,7 @@
* parse().
*/
typedef int State;
- typedef int Symbol;
+ typedef Cpt::Lex::token_type_t Symbol;
typedef std::pair<State, Symbol> StateSymbolPair;
typedef std::map<StateSymbolPair, State> TransitionTable;
static TransitionTable * transitions_;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/localization.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,94 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#ifndef LOCALIZATION_H_
+#define LOCALIZATION_H_
+
+#include <string>
+#include <vector>
+#include "cpixsynctools.h"
+
+namespace Cpix {
+
+ /**
+ * Class that is used for storing and retrieving used locale
+ */
+ class Localization {
+
+ public:
+
+ /**
+ * Returns a list of language names.
+ *
+ * MT safe
+ */
+ std::vector<std::wstring> getLanguageNames();
+
+ /**
+ * Sets the used locale, if locale is set to be "auto",
+ * underlying mechanism will consult environment for
+ * maintaining correct locale.
+ *
+ * MT safe
+ */
+ void setLocale(const char* locale);
+
+ /**
+ * Sets the used locale, if locale is set to be "auto",
+ * underlying mechanism will consult environment for
+ * maintaining correct locale.
+ *
+ * MT safe
+ */
+ void setLocale(const wchar_t* locale);
+
+ public: // static API
+
+ /**
+ * Accessor for the localization singleton instance
+ *
+ * NOTE: Should be called during init. Otherwise, if two threads try
+ * to access localization instance at the same time, memory
+ * leak may result. In this case two singleton instances may
+ * be constructed.
+ */
+ static Localization& instance();
+
+ /**
+ * Shutsdown
+ */
+ static void shutdown();
+
+ private:
+
+ Localization();
+
+ static Localization* theInstance_;
+
+ private:
+
+ Cpt::Mutex mutex_;
+
+ bool auto_;
+
+ std::vector<std::wstring> languageNames_;
+
+ };
+
+}
+#endif /* LOCALIZATION_H_ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/prefixqueryparser.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,81 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#ifndef PREFIXQUERYPARSER_H_
+#define PREFIXQUERYPARSER_H_
+
+#include <memory>
+
+#include "queryparser.h"
+
+#include "cpixmaindefs.h"
+
+namespace lucene {
+ namespace analysis {
+ class Token;
+ }
+ namespace search {
+ class Query;
+ }
+}
+namespace Cpt {
+ namespace Lex {
+ class Token;
+ }
+}
+
+namespace Cpix {
+
+ /**
+ * Cpix special query parser.
+ *
+ * Potential parameters
+ *
+ * * Target Field (makes sense)
+ * * QueryAnalyzer (doesn't make sense)
+ * * PrefixAnalyzer (doesn't make sense)
+ */
+ class PrefixQueryParser : public IQueryParser {
+
+ public:
+
+ PrefixQueryParser(const wchar_t* field = LCPIX_DEFAULT_FIELD);
+
+ virtual ~PrefixQueryParser();
+
+ virtual std::auto_ptr<lucene::search::Query> parse(const wchar_t* query);
+
+ virtual const wchar_t* getField() const;
+
+ virtual void setDefaultOperator(cpix_QP_Operator op);
+
+ private:
+
+ std::auto_ptr<lucene::search::Query> toQuery(Cpt::Lex::Token word);
+
+ bool usePrefixFor(lucene::analysis::Token& token);
+
+ private:
+
+ std::wstring field_;
+
+ };
+
+}
+
+#endif /* PREFIXQUERYPARSER_H_ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/queryparser.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,130 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#ifndef QUERYPARSER_H_
+#define QUERYPARSER_H_
+
+#include <memory>
+
+#include "CLucene.h"
+#include "CLucene/queryParser/MultiFieldQueryParser.h"
+
+#include "prefixopt.h"
+
+#include "cpixsearch.h"
+
+namespace Cpix {
+
+ class IQueryParser {
+
+ public:
+
+ virtual ~IQueryParser();
+
+ virtual std::auto_ptr<lucene::search::Query> parse(const wchar_t* query) = 0;
+
+ // Not really a fundamental property of a query parser:
+ virtual const wchar_t* getField() const = 0;
+
+ virtual void setDefaultOperator(cpix_QP_Operator op) = 0;
+ };
+
+ class CLuceneQueryParser : public IQueryParser {
+
+ public:
+
+ ~CLuceneQueryParser();
+
+ CLuceneQueryParser(const wchar_t* field, lucene::analysis::Analyzer& analyzer);
+
+ virtual std::auto_ptr<lucene::search::Query> parse(const wchar_t* query);
+
+ virtual const wchar_t* getField() const;
+
+ virtual void setDefaultOperator(cpix_QP_Operator op);
+
+ private:
+
+ std::auto_ptr<lucene::queryParser::QueryParser> parser_;
+
+ };
+
+ class CLuceneMultiFieldQueryParser : public IQueryParser {
+
+ public:
+
+ ~CLuceneMultiFieldQueryParser();
+
+ CLuceneMultiFieldQueryParser(const wchar_t** fields,
+ lucene::analysis::Analyzer& analyzer,
+ lucene::queryParser::BoostMap& boostMap);
+
+ virtual std::auto_ptr<lucene::search::Query> parse(const wchar_t* query);
+
+ virtual const wchar_t* getField() const;
+
+ virtual void setDefaultOperator(cpix_QP_Operator op);
+
+ private:
+
+ std::auto_ptr<lucene::queryParser::QueryParser> parser_;
+
+ };
+
+
+ class PrefixOptQueryParser : public IQueryParser {
+
+ public:
+
+ PrefixOptQueryParser(std::auto_ptr<IQueryParser> parser);
+
+ ~PrefixOptQueryParser();
+
+ virtual std::auto_ptr<lucene::search::Query> parse(const wchar_t* query);
+
+ virtual const wchar_t* getField() const;
+
+ virtual void setDefaultOperator(cpix_QP_Operator op);
+
+ private:
+
+ PrefixOptQueryRewriter prefixOpt_;
+
+ std::auto_ptr<IQueryParser> parser_;
+
+ };
+
+ //
+ // Following factory methods apply necessary optimization wraps
+ // over the query parsers.
+ //
+
+
+ IQueryParser* CreateCLuceneQueryParser(const wchar_t* defaultField,
+ lucene::analysis::Analyzer* analyzer);
+
+ IQueryParser* CreateCLuceneMultiFieldQueryParser(
+ const wchar_t* fields[],
+ lucene::analysis::Analyzer* analyzer,
+ lucene::queryParser::BoostMap* boostMap);
+
+ IQueryParser* CreatePrefixQueryParser(const wchar_t* field);
+
+}
+
+#endif /* QUERYPARSER_H_ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/spi/locale.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,41 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#ifndef LOCALE_H_
+#define LOCALE_H_
+
+#include <string>
+#include <vector>
+
+namespace Cpix {
+
+ namespace Spi {
+
+ extern const wchar_t* SymbianLanguageCodePrefix;
+
+ /**
+ * Returns a vector containing a list language names that is
+ * ordered by priority.
+ */
+ std::vector<std::wstring> GetLanguageNames();
+
+ }
+}
+
+
+#endif /* LOCALE_H_ */
--- a/searchengine/cpix/cpix/inc/private/wrappertraitsdb.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/private/wrappertraitsdb.h Mon Jun 28 10:34:53 2010 +0530
@@ -106,6 +106,7 @@
namespace Cpix {
class CustomAnalyzer;
class SystemAnalyzer;
+ class IQueryParser;
}
@@ -195,24 +196,12 @@
typedef Cpix::SystemAnalyzer NativeClass;
};
-
-struct cpix_MultiFieldQueryParser : public cpix_QueryParser { };
-
-
template<>
struct WrapperTraits<cpix_QueryParser>
{
- typedef lucene::queryParser::QueryParser NativeClass;
+ typedef Cpix::IQueryParser NativeClass;
};
-
-template<>
-struct WrapperTraits<cpix_MultiFieldQueryParser>
-{
- typedef lucene::queryParser::MultiFieldQueryParser NativeClass;
-};
-
-
template<>
struct WrapperTraits<cpix_BoostMap>
{
--- a/searchengine/cpix/cpix/inc/public/appclass-hierarchy.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/public/appclass-hierarchy.txt Mon Jun 28 10:34:53 2010 +0530
@@ -23,124 +23,150 @@
| [ _mimetype (opt) ] {EStoreYes | EIndexNo}
|
+-- msg
- | [ To ] {EStoreYes | EIndexTokenized} {ExcerptYes, if present}
- | [ From ] {EStoreYes | EIndexTokenized} {ExcerptYes, if present}
- | [ Body ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Folder ] {EStoreYes | EIndexNo} {ExcerptNA}
- | [ Subject ] {EStoreYes | EIndexTokenized} {ExcperptNo}
- |
+ | [ To ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes, if present}
+ | [ From ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes, if present}
+ | [ Body ] {EStoreYes | EIndexTokenized } {ExcerptYes}
+ | [ Folder ] {EStoreYes | EIndexNo} {ExcerptNA}
+ | [ Subject ] {EStoreYes | EIndexTokenized} {ExcperptNo}
+ | [ Attachment ] {EStoreYes | EIndexTokenized} {ExcperptNo}
|
+-- file
| |
- | |
+ | |
| +-- content
- | | [ Contents ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | | [ BaseName ] {EStoreNo | EIndexTokenized} {ExcerptNo}
- | | [ Extension ] {EStoreNo | EIndexTokenized} {ExcerptNo}
- | |
- | +-- folder
- | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ Extension ] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
+ | | [ Contents ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | | [ BaseName ] {EStoreNo | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | | [ Extension ] {EStoreNo | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | |
+ | +-- folder
+ | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | [ Extension ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
|
|
+-- media
| |
| |
| +-- audio
- | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
- | | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
- | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ Album ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ AlbumArtist ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ OriginalArtist ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ Composer ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ Artist ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | |
+ | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
+ | | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
+ | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | | [ Artist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ Album ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ AlbumArtist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ OriginalArtist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ Composer ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ Author ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Track ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | | [ CaptureDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | | [ Duration ] {EStoreYes | EIndexTokenized} {ExcerptNo}
| |
| +-- image
- | | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
- | | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
- | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | | [ DateTimeOrignal] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
- | |
+ | | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
+ | | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
+ | | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | | [ DateTimeOrignal ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
| |
| +-- video
- | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
- | [ Title ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
- | [ Extension ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Artist ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Author ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- |
+ | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ MediaId ] {EStoreYes | EIndexUnTokenized} {ExcerptNo}
+ | [ Title ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | [ ItemType ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | [ Extension ] {EStoreYes | EIndexTokenized } {ExcerptYes}
+ | [ Artist ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Author ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Comment ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Copyright ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Genre ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Size ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ ResolutionUnit ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Legal ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Track ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ CaptureDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | [ LastModifiedDate ] {EStoreYes | EIndexUnTokenized} {ExcerptYes}
+ | [ Duration ] {EStoreYes | EIndexTokenized} {ExcerptNo}
|
+-- contact
/* The order of fields in excerpt is as below. The order in this case
* is the order of fields shown when you 'Edit' the contact.
*/
- | [ GivenName ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ FamilyName ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ PhoneNumber ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ EMail ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ SIPID ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ CompanyName ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ JobTitle ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Note ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ GivenName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | [ FamilyName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | [ PhoneNumber ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ EMail ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ SIPID ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ CompanyName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ JobTitle ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Note ] {EStoreYes | EIndexTokenized} {ExcerptYes}
/* The following fields are not displayed when 'Edit'-ing the contact.
* The order here is arbitrary.
*/
- | [ Address ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ SecondName ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Suffix ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ URL ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ PostOffice ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ ExtendedAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Locality ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Region ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ PostCode ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Country ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Spouse ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Children ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Class ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Prefix ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ AdditionalName ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Fax ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ GivenNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ FamilyNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ CompanyNamePronunciation ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Address ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ SecondName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Suffix ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ URL ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ PostOffice ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ ExtendedAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Locality ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Region ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ PostCode ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Country ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Spouse ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Children ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Class ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Prefix ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ AdditionalName ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Fax ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Assistant ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ Department ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptYes}
+ | [ IMAddress ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ ServiceProvider ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Birthday ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ Anniversary ] {EStoreYes | EIndexTokenized} {ExcerptNo}
|
|
+-- calendar
- | [ Summary ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ Location ] {EStoreYes | EIndexTokenized} {ExcerptYes}
- | [ StartTime ] (YYYYMMDDHHSS) (TimeZone UTC) {EStoreYes | EIndexUnTokenized} {ExcerptNA}
- | [ EndTime ] (YYYYMMDDHHSS) (TimeZone UTC) {EStoreYes | EIndexUnTokenized} {ExcerptNA}
+ | [ Summary ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ Description ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Location ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ StartTime ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} (YYYY MM DD HH SS) (TimeZone UTC)
+ | [ EndTime ] {EStoreYes | EIndexUnTokenized} {ExcerptNA} (YYYY MM DD HH SS) (TimeZone UTC)
+ | [ Priority ] {EStoreYes | EIndexTokenized} {ExcerptNo}
|
|
+-- bookmark
- | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ Url ] {EStoreYes | EIndexTokenized} {ExcerptYes}
+ | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ Url ] {EStoreYes | EIndexTokenized} {ExcerptYes}
|
|
+-- applications
- | [ Name ] {EStoreYes | EIndexTokenized} {ExcerptNo}
- | [ Uid ] {EStoreYes | EIndexTokenized | ENoAggregate} {ExcerptNo}
- | [ Path ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ Name ] {EStoreYes | EIndexTokenized | EIndexFreeText} {ExcerptNo}
+ | [ Uid ] {EStoreYes | EIndexTokenized | ENoAggregate} {ExcerptNo}
+ | [ Path ] {EStoreYes | EIndexTokenized} {ExcerptNo}
|
|
+-- notes
- | [ Date ] (YYYYMMDDHHSS) {EStoreYes | EIndexUnTokenized} {ExcerptNA}
- | [ Memo ] {EStoreYes | EIndexTokenized} {ExcerptNo}
+ | [ Date ] {EStoreYes | EIndexUnTokenized} {ExcerptNA}
+ | [ Memo ] {EStoreYes | EIndexTokenized} {ExcerptNo}
For instance, a document for an email message should have the
--- a/searchengine/cpix/cpix/inc/public/cpixidxdb.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/public/cpixidxdb.h Mon Jun 28 10:34:53 2010 +0530
@@ -132,8 +132,21 @@
* Initializes an excerpt processor internal state instance.
*/
void cpix_init_EPIState(cpix_EPIState * state);
-
-
+
+ extern const char* cpix_LOCALE_AUTO;
+
+ /**
+ * Sets the locale used by CPix. Locale is used in indexing and
+ * searching for text's lexical analysis. Text of different languages
+ * may be treated differently. At this point, the locale
+ * should only hold languge code following ISO 639-1 two letter
+ * format or ISO 639-2 three letter format, if two letter format is
+ * not available. If cpix_LOCALE_AUTO is given, locale is left to
+ * be determined automatically by cpix.
+ *
+ * @param locale the new locale. Should be a language code of ISO 639-1 standard
+ */
+ void cpix_SetLocale(cpix_Result* result, const char* locale);
/**
* A simple utility function getting the first couple of words of
--- a/searchengine/cpix/cpix/inc/public/cpixinit.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/public/cpixinit.h Mon Jun 28 10:34:53 2010 +0530
@@ -102,7 +102,20 @@
void cpix_InitParams_setCpixDir(cpix_InitParams * thisIp,
const char * value);
-
+ /**
+ * Gets / sets property "resourceDir".
+ *
+ * ResourceDir is the path to the directory where cpix should look for:
+ *
+ * (a) Localization data
+ *
+ * Must not be NULL or empty string.
+ *
+ * Default value is cf DEFAULT_CPIX_DIR in cfg/indevicecfg.h.
+ */
+ const char * cpix_InitParams_getResourceDir(cpix_InitParams * thisIp);
+ void cpix_InitParams_setResourceDir(cpix_InitParams * thisIp,
+ const char * value);
/**
* Gets / sets property "logFileBase".
--- a/searchengine/cpix/cpix/inc/public/cpixsearch.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/inc/public/cpixsearch.h Mon Jun 28 10:34:53 2010 +0530
@@ -148,7 +148,10 @@
const wchar_t * fieldName,
cpix_Analyzer * analyzer);
-
+
+ cpix_QueryParser *
+ cpix_CreatePrefixQueryParser(cpix_Result * result,
+ const wchar_t * fieldName);
/**
* Constructs a special type of query parser, a multi-field query
--- a/searchengine/cpix/cpix/src/analyzer.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/analyzer.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -15,30 +15,36 @@
*
*/
-
-#include "CLucene.h"
-#include "CLucene/analysis/AnalysisHeader.h"
-#include "CLucene/analysis/Analyzers.h"
-
-#include "analyzer.h"
-#include "analyzerexp.h"
-#include "cpixanalyzer.h"
-#include "cluceneext.h"
-
-#include "cpixexc.h"
-#include "cpixparsetools.h"
-
+// general utilities
#include "wchar.h"
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
+#include <fstream>
+#include <algorithm>
-#include "document.h"
+// clucene
+#include "CLucene.h"
+
+// support
+#include "cpixparsetools.h"
+#include "cpixfstools.h"
-#include "indevicecfg.h"
+// internal
+#include "analyzer.h"
+#include "cpixanalyzer.h"
+#include "cpixexc.h"
+#include "document.h"
+#include "cluceneext.h"
+#include "indevicecfg.h"
+#include "initparams.h"
+#include "thaianalysis.h"
-#include "initparams.h"
+#include "analyzerexp.h"
+#include "customanalyzer.h"
+#include "common/cpixlog.h"
+
namespace
{
const char AGGR_NONFILEREADERPROXY_ERR[]
@@ -46,11 +52,111 @@
const char AGGR_STREAMREADER_ERR[]
= "Aggregating streamValue-fields not implemented";
+
+ const char THAI_LANGUAGE_FILE[]
+ = "thaidict.sm";
+
+ const char ANALYZER_FILE[]
+ = "analyzer.loc";
+
+ const wchar_t DEFAULT_ANALYZER_CONFIG[]
+ = L"default";
+
+ const wchar_t QUERY_ANALYZER_CONFIG[]
+ = L"query";
+
+ const wchar_t PREFIX_ANALYZER_CONFIG[]
+ = L"prefix";
+
+// const wchar_t CPIX_ANALYZER_FALLBACK[]
+// = CPIX_ANALYZER_STANDARD;
+//
+// const wchar_t CPIX_PREFIX_ANALYZER_FALLBACK[]
+// = CPIX_TOKENIZER_LETTER L">" CPIX_FILTER_LOWERCASE;
+
+
}
namespace Cpix {
+
+Analysis* Analysis::theInstance_ = NULL;
+
+ void Analysis::init(InitParams& ip) {
+ // Init thai analysis with thai dictionary
+ std::string thai( Cpt::appendpath(ip.getResourceDir(),
+ THAI_LANGUAGE_FILE) );
+
+ if ( Cpt::filesize( thai.c_str() ) ) {
+ analysis::InitThaiAnalysis(thai.c_str());
+ } else {
+ logMsg(CPIX_LL_WARNING,
+ "Thai dictionary could not be found. Thai analysis will NOT work.");
+ }
+
+ // Setup the analysis instance
+ theInstance_ = new Analysis(ip);
+ }
+
+ Analysis::Analysis(InitParams& ip)
+ : defaultAnalyzer_(),
+ queryAnalyzer_(),
+ prefixAnalyzer_() {
+
+ auto_ptr<AnalyzerExp::Piping> p = parse( Cpt::appendpath( ip.getResourceDir(), ANALYZER_FILE ) );
+
+ defaultAnalyzer_.reset( new CustomAnalyzer( *p, DEFAULT_ANALYZER_CONFIG ) );
+ queryAnalyzer_.reset( new CustomAnalyzer( *p, QUERY_ANALYZER_CONFIG ) );
+ prefixAnalyzer_.reset( new CustomAnalyzer( *p, PREFIX_ANALYZER_CONFIG ) );
+ }
+
+ auto_ptr<AnalyzerExp::Piping> Analysis::parse(std::string path) {
+ std::wifstream in(path.c_str());
+ auto_ptr<AnalyzerExp::Piping> ret;
+ if ( in ) {
+
+ // Reserve constant size buffer and populate it with definition
+ //
+ int filesize = Cpt::filesize(path.c_str());
+ Cpt::auto_array<wchar_t> buf( new wchar_t[filesize+1] );
+ in.read(buf.get(), filesize);
+ buf.get()[filesize] = '\0';
+ if ( !in.fail() ) {
+ try {
+ ret = AnalyzerExp::ParsePiping( buf.get() );
+ } catch (...) {}
+ }
+ in.close();
+ }
+
+ if ( !ret.get() ) {
+ THROW_CPIXEXC("Analyzer definition not found. %s could not be opened. ", path.c_str());
+ }
+ return ret;
+ }
+
+ void Analysis::shutdown() {
+ analysis::ShutdownThaiAnalysis();
+ delete theInstance_;
+ theInstance_ = NULL;
+ }
+
+ lucene::analysis::Analyzer& Analysis::getDefaultAnalyzer() {
+ // TODO: Assert( theInstance_ );
+ return *theInstance_->defaultAnalyzer_;
+ }
+
+ lucene::analysis::Analyzer& Analysis::getQueryAnalyzer() {
+ // TODO: Assert( theInstance_ );
+ return *theInstance_->queryAnalyzer_;
+ }
+
+ lucene::analysis::Analyzer& Analysis::getPrefixAnalyzer() {
+ // TODO: Assert( theInstance_ );
+ return *theInstance_->prefixAnalyzer_;
+ }
+
PrefixGenerator::PrefixGenerator(
lucene::analysis::TokenStream* in,
bool deleteTS,
@@ -221,488 +327,5 @@
return analyzer_->tokenStream( fieldName, reader );
}
}
-
- //
- // Following sections provide the glue code for connecting the
- // analyzer definition syntax with analyzer, tokenizers and filter
- // implementations.
- //
- // The glue code is template heavy with the indent of providing
- // automation for associating specific keywords with specific
- // analyzers, tokenizers and filters implementing corresponding
- // CLucene abstractions. Additional classes are needed only if
- // filters, tokenizers, etc. accept parameters.
- //
- // NOTE: To understand the analyzers, it is sufficient to understand
- // that an analyzer transforms characters stream into specific token streams
- // (e.g. character stream 'foobarmetawords' can be transformed into token
- // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main
- // parts which are tokenization and filtering. Tokenization converts
- // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr')
- // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' ->
- // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for
- // constructing a tokenizer and a sequence of filters to perform
- // these required tasks.
- //
- // See the documentation around TokenizerClassEntries and
- // FilterClassEntries to see how implementations not taking parameters
- // can be easily added.
- //
-
- using namespace Cpix::AnalyzerExp;
-
- /**
- * Creates token stream for the given reader and fieldName.
- * This class in in many ways similar to CLucene analyzer class
- * definition.
- */
- class TokenStreamFactory {
- public:
- virtual ~TokenStreamFactory();
- virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName,
- lucene::util::Reader * reader) = 0;
- };
-
- TokenStreamFactory::~TokenStreamFactory() {};
-
- /**
- * Template class used to create CLucene tokenizers. Template
- * parameter T must implement lucene::analysis::Tokenizer abstraction.
- */
- template<class T>
- class TokenizerFactory : public TokenStreamFactory
- {
- public:
- TokenizerFactory(const Invokation& invokation) {
- if (invokation.params().size() > 0) {
- THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
- invokation.id().c_str());
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/,
- lucene::util::Reader * reader) {
- return _CLNEW T(reader);
- }
- };
-
- /**
- * Template class wrapping CLucene analyzers. Template parameter T must
- * implement lucene::analysis::Analyzer abstraction.
- */
- template<class T>
- class AnalyzerWrap : public TokenStreamFactory
- {
- public:
- AnalyzerWrap(const Invokation& invokation) : analyzer_() {
- if (invokation.params().size() > 0) {
- THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
- invokation.id().c_str());
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return analyzer_.tokenStream(fieldName, reader);
- }
- private:
- T analyzer_;
- };
-
- /**
- * Template class associated with CLucene filter and a TokenStreamFactory.
- * Uses TokenStreamFactory to transform given character stream into tokenstream
- * and then applies the given Clucene filter to the token stream.
- * The template parameter T must implement lucene::analysis::Filter abstraction.
- */
- template<class T>
- class FilterFactory : public TokenStreamFactory
- {
- public:
- FilterFactory(const Invokation& invokation, auto_ptr<TokenStreamFactory> factory) : factory_(factory) {
- if (invokation.params().size() > 0) {
- THROW_CPIXEXC(L"Filter %S does not accept parameters",
- invokation.id().c_str());
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return _CLNEW T(factory_->tokenStream(fieldName, reader), true);
- }
- private:
- std::auto_ptr<TokenStreamFactory> factory_;
- };
-
- /**
- * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized
- * template is needed because perfield analyzer accepts parameters
- * (specific analyzers for different field plus default analyzer)
- */
- template<>
- class AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper> : public TokenStreamFactory {
- public:
- AnalyzerWrap(const Switch& sw) : analyzer_(0) {
- using namespace Cpt::Parser;
- using namespace lucene::analysis;
-
- analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def()));
-
- for (int i = 0; i < sw.cases().size(); i++) {
- const Case& cs = *sw.cases()[i];
- for (int j = 0; j < cs.fields().size(); j++) {
- analyzer_->addAnalyzer( cs.fields()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping() ) );
- }
- }
- }
- virtual ~AnalyzerWrap() {
- _CLDELETE(analyzer_);
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return analyzer_->tokenStream(fieldName, reader);
- }
- private:
- lucene::analysis::PerFieldAnalyzerWrapper* analyzer_;
- };
-
-
-
- /**
- * Specialized StopFilter factory. Specialized filter is needed
- * because StopFilter needs parameters (stop word list or a language)
- */
- template<>
- class FilterFactory<lucene::analysis::StopFilter> : public TokenStreamFactory
- {
- public:
- FilterFactory(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory)
- :words_(0), ownWords_(0), factory_(factory) {
- using namespace Cpt::Parser;
- if (invokation.params().size() == 1 && dynamic_cast<Identifier*>(invokation.params()[0])) {
- Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
- //cpix_LangCode lang;
- if (id->id() == CPIX_WLANG_EN) {
- words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS;
- } else {
- THROW_CPIXEXC(L"No prepared stopword list for language code '%S'",
- id->id().c_str());
- }
- } else {
- ownWords_ = new wchar_t*[invokation.params().size()+1];
- memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1));
- // FIXE: args may leak
- for (int i = 0; i < invokation.params().size(); i++) {
- StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
- if (lit) {
- const wstring& str = lit->text();
- ownWords_[i] = new wchar_t[str.length()+1];
- wcscpy(ownWords_[i], str.c_str());
- } else {
- THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters.");
- }
- }
- }
-
- }
- virtual ~FilterFactory() {
- if (ownWords_) {
- for (int i = 0; ownWords_[i]; i++) {
- delete[] ownWords_[i];
- }
- delete[] ownWords_;
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast<const wchar_t**>(ownWords_) : words_);
- }
- private:
- const wchar_t **words_;
- wchar_t **ownWords_; // owned
- std::auto_ptr<TokenStreamFactory> factory_;
- };
-
- /**
- * Specialized SnowballFilter factory is needed, because SnowballFilter
- * accepts parameters (the language).
- */
- template<>
- class FilterFactory<lucene::analysis::SnowballFilter> : public TokenStreamFactory
- {
- public:
- FilterFactory(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory)
- : factory_(factory) {
- using namespace Cpt::Parser;
- if (invokation.params().size() != 1 || !dynamic_cast<Identifier*>(invokation.params()[0])) {
- THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." );
- }
- Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
- if (id->id() == CPIX_WLANG_EN) {
- lang_ = cpix_LANG_EN;
- } else {
- THROW_CPIXEXC(L"Language identifier %S is not supported for stemming",
- id->id().c_str());
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_);
- }
- private:
- cpix_LangCode lang_;
- std::auto_ptr<TokenStreamFactory> factory_;
- };
-
- /**
- * Specialized LengthFilter factory is needed, because length filter
- * accepts parameters (minimum length and maximum length)
- */
- template<>
- class FilterFactory<lucene::analysis::LengthFilter> : public TokenStreamFactory
- {
- public:
- FilterFactory(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory)
- : factory_(factory) {
- using namespace Cpt::Parser;
- if (!(invokation.params().empty())) {
- if (invokation.params().size() != 2 ||
- !dynamic_cast<IntegerLit*>(invokation.params()[0]) ||
- !dynamic_cast<IntegerLit*>(invokation.params()[1])) {
- THROW_CPIXEXC("Length filter takes exactly two integer parameters");
- }
- min_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
- max_ = dynamic_cast<IntegerLit*>(invokation.params()[1])->value();
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ );
- }
- private:
- int min_, max_;
- std::auto_ptr<TokenStreamFactory> factory_;
- };
-
- /**
- * Specialized PrefixGenerator factory is needed, because PrefixGenerator
- * requires the max prefix size.
- */
- template<>
- class FilterFactory<PrefixGenerator> : public TokenStreamFactory
- {
- public:
- FilterFactory(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory)
- : factory_(factory) {
- using namespace Cpt::Parser;
- if (invokation.params().empty()) {
- if (invokation.params().size() != 1 ||
- !dynamic_cast<IntegerLit*>(invokation.params()[0])) {
- THROW_CPIXEXC("Prefix generator takes exactly one integer parameter");
- }
- maxPrefixLength_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
- }
- }
- virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
- lucene::util::Reader * reader) {
- return _CLNEW PrefixGenerator(factory_->tokenStream(fieldName, reader), true, maxPrefixLength_ );
- }
- private:
- int maxPrefixLength_;
- std::auto_ptr<TokenStreamFactory> factory_;
- };
-
-
- typedef auto_ptr<TokenStreamFactory> (*TokenizerFactoryCreator)(const Invokation& invokation);
- typedef auto_ptr<TokenStreamFactory> (*FilterFactoryCreator)(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory);
- /**
- * Sets up a tokenizer factory with given invokation parameters
- */
- template<class T>
- struct TokenizerFactoryCtor
- {
- static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
- return auto_ptr<TokenStreamFactory>(new TokenizerFactory<T>(invokation));
- }
- };
-
- /**
- * Sets up an analyzer wrap with given invokation parameters
- */
- template<class T>
- struct AnalyzerWrapCtor
- {
- static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
- return auto_ptr<TokenStreamFactory>(new AnalyzerWrap<T>(invokation));
- }
- };
-
- /**
- * Sets up a filter factory with given invokation parameters
- */
- template<class T>
- struct FilterFactoryCtor
- {
- static auto_ptr<TokenStreamFactory> create(const Invokation& invokation,
- auto_ptr<TokenStreamFactory> factory) {
- return auto_ptr<TokenStreamFactory>(new FilterFactory<T>(invokation, factory));
- }
- };
-
- struct TokenizerClassEntry {
- const wchar_t *id_;
- TokenizerFactoryCreator createFactory_;
- };
-
- //
- // Following TokenizerClassEntries and FilterClassEntries contain
- // the mapping from tokenizer/analyzer/filter names into glue code
- // templates providing the implementations.
- //
-
- TokenizerClassEntry TokenizerClassEntries[] = {
- {CPIX_TOKENIZER_STANDARD, TokenizerFactoryCtor<lucene::analysis::standard::StandardTokenizer>::create},
- {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor<lucene::analysis::WhitespaceTokenizer>::create},
- {CPIX_TOKENIZER_LETTER, TokenizerFactoryCtor<lucene::analysis::LetterTokenizer>::create},
- {CPIX_TOKENIZER_KEYWORD, TokenizerFactoryCtor<lucene::analysis::KeywordTokenizer>::create},
- {CPIX_ANALYZER_STANDARD, AnalyzerWrapCtor<lucene::analysis::standard::StandardAnalyzer>::create},
-
-// TODO: Add more Tokenizers/Analyzers
-
-// Example tokenizer (works as such if tokenizers don't take parameters)
-// {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor<MyTokenizer>::create},
-
-// Example analyzer (works as such if analyzer don't take parameters)
-// {CPIX_ANALYZER_MYANALYZER, AnalyzerWrapCtor<MyAnalyzer>::create},
-
- {0, 0}
- };
-
- struct FilterClassEntry {
- const wchar_t *id_;
- FilterFactoryCreator createFactory_;
- };
-
- FilterClassEntry FilterClassEntries[] = {
- {CPIX_FILTER_STANDARD, FilterFactoryCtor<lucene::analysis::standard::StandardFilter>::create},
- {CPIX_FILTER_LOWERCASE, FilterFactoryCtor<lucene::analysis::LowerCaseFilter>::create},
- {CPIX_FILTER_ACCENT, FilterFactoryCtor<lucene::analysis::ISOLatin1AccentFilter>::create},
- {CPIX_FILTER_STOP, FilterFactoryCtor<lucene::analysis::StopFilter>::create},
- {CPIX_FILTER_STEM, FilterFactoryCtor<lucene::analysis::SnowballFilter>::create},
- {CPIX_FILTER_LENGTH, FilterFactoryCtor<lucene::analysis::LengthFilter>::create},
- {CPIX_FILTER_PREFIXES, FilterFactoryCtor<PrefixGenerator>::create},
-
-// TODO: Add more Filters
-
-// Example filter (works as such if analyzer don't take parameters)
-// {CPIX_FILTER_MYFILTER, FilterFactoryCtor<MyFilter>::create},
-
- {0, 0}
- };
-
- CustomAnalyzer::CustomAnalyzer(const wchar_t* definition)
- {
- using namespace Cpt::Lex;
- using namespace Cpt::Parser;
-
-
- try
- {
- // 1. Setup an tokenizer
- Cpix::AnalyzerExp::Tokenizer
- tokenizer;
- StdLexer
- lexer(tokenizer, definition);
-
- // 2. Parse
- std::auto_ptr<Piping>
- def = ParsePiping(lexer);
- lexer.eatEof();
-
- // 3. Setup this item based on parsed definition
- setup(*def);
- }
- catch (Cpt::ITxtCtxtExc & exc)
- {
- // provide addition info for thrown exception
- exc.setContext(definition);
-
- // throw it fwd
- throw;
- }
- }
-
- CustomAnalyzer::CustomAnalyzer(const Piping& definition)
- {
- setup(definition);
- }
- using namespace Cpt::Parser;
-
- void CustomAnalyzer::setup(const Piping& piping) {
-
- // If the first item is invokation, create corresponding analyzer/tokenizer
- if (dynamic_cast<const Invokation*>(&piping.tokenizer()))
- {
- const Invokation& tokenizer = dynamic_cast<const Invokation&>(piping.tokenizer());
- TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() );
- factory_ = tokenizerEntry.createFactory_( tokenizer );
- } else {
- // If the first item is switch statement, create per-field analyzer
- const Switch& tokenizer = dynamic_cast<const Switch&>(piping.tokenizer());
- factory_ = new AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper>( tokenizer );
- }
-
- // Add filters
- const std::vector<Invokation*>& filters = piping.filters();
- for (int i = 0; i < filters.size(); i++) {
- FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() );
- factory_ = filterEntry.createFactory_( *filters[i], factory_ );
- }
- }
-
- TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) {
-
- // Looks for a match in the TokenizerClassEntries. After finding
- // a match it returns a proper tokenizer/analyzer implementation provider
- //
- for (int i = 0; TokenizerClassEntries[i].id_; i++) {
- if (id == std::wstring(TokenizerClassEntries[i].id_)) {
- return TokenizerClassEntries[i];
- }
- }
-
- THROW_CPIXEXC(L"Unknown tokenizer '%S'.",
- id.c_str());
- }
-
- FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) {
-
- // Looks for a match in the FilterClassEntries. After finding
- // a match it returns a proper tokenizer/analyzer implementation
- // provider
- //
- for (int i = 0; FilterClassEntries[i].id_; i++) {
- if (id == std::wstring(FilterClassEntries[i].id_)) {
- return FilterClassEntries[i];
- }
- }
-
- THROW_CPIXEXC(L"Unknown filter '%S'.",
- id.c_str());
- }
-
- CustomAnalyzer::~CustomAnalyzer() {}
-
- lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t * fieldName,
- lucene::util::Reader * reader) {
- // Utilizes the the token stream factory to form token stream.
- // token stream factory is prepared during custom analyzer construction
- // and based on the analyzer definition string.
-
- return factory_->tokenStream(fieldName, reader);
- }
-
}
--- a/searchengine/cpix/cpix/src/analyzerexp.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/analyzerexp.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -16,7 +16,6 @@
*/
-
#include "analyzerexp.h"
#include "indevicecfg.h"
@@ -25,8 +24,28 @@
namespace AnalyzerExp {
+ const wchar_t TOKEN_LEFT_BRACKET[] = L"(";
+ const wchar_t TOKEN_RIGHT_BRACKET[] = L")";
+ const wchar_t TOKEN_COMMA[] = L"comma";
+ const wchar_t TOKEN_PIPE[] = L">";
+ const wchar_t TOKEN_SWITCH[] = L"switch";
+ const wchar_t TOKEN_LOCALE_SWITCH[] = L"locale_switch";
+ const wchar_t TOKEN_CONFIG_SWITCH[] = L"config_switch";
+ const wchar_t TOKEN_CASE[] = L"case";
+ const wchar_t TOKEN_DEFAULT[] = L"default";
+ const wchar_t TOKEN_LEFT_BRACE[] = L"{";
+ const wchar_t TOKEN_RIGHT_BRACE[] = L"}";
+ const wchar_t TOKEN_COLON[] = L";";
+ const wchar_t TOKEN_TERMINATOR[] = L";";
+
+
+ std::auto_ptr<Piping> ParsePiping(Cpt::Parser::Lexer& lexer);
+
+
Tokenizer::Tokenizer()
: ws_(),
+ lcomment_(),
+ scomment_(),
ids_(),
strlits_('\''),
intlits_(),
@@ -36,6 +55,8 @@
cm_(TOKEN_COMMA, L","),
pp_(TOKEN_PIPE, CPIX_PIPE),
sw_(TOKEN_SWITCH, CPIX_SWITCH),
+ lsw_(TOKEN_LOCALE_SWITCH, CPIX_LOCALE_SWITCH),
+ csw_(TOKEN_CONFIG_SWITCH, CPIX_CONFIG_SWITCH),
cs_(TOKEN_CASE, CPIX_CASE),
df_(TOKEN_DEFAULT, CPIX_DEFAULT),
lbc_(TOKEN_LEFT_BRACE, L"{"),
@@ -43,24 +64,29 @@
cl_(TOKEN_COLON, L":"),
tr_(TOKEN_TERMINATOR, L";")
{
- tokenizers_ = new Cpt::Lex::Tokenizer*[17];
- tokenizers_[0] = &ws_;
- tokenizers_[1] = &lb_;
- tokenizers_[2] = &rb_;
- tokenizers_[3] = &cm_;
- tokenizers_[4] = &pp_;
- tokenizers_[5] = &sw_;
- tokenizers_[6] = &cs_;
- tokenizers_[7] = &df_;
- tokenizers_[8] = &lbc_;
- tokenizers_[9] = &rbc_;
- tokenizers_[10] = &cl_;
- tokenizers_[11] = &tr_;
- tokenizers_[12] = &ids_;
- tokenizers_[13] = &strlits_;
- tokenizers_[14] = &intlits_;
- tokenizers_[15] = &reallits_;
- tokenizers_[16] = 0;
+ int i = 0;
+ tokenizers_ = new Cpt::Lex::Tokenizer*[21];
+ tokenizers_[i++] = &ws_;
+ tokenizers_[i++] = &lcomment_;
+ tokenizers_[i++] = &scomment_;
+ tokenizers_[i++] = &lb_;
+ tokenizers_[i++] = &rb_;
+ tokenizers_[i++] = &cm_;
+ tokenizers_[i++] = &pp_;
+ tokenizers_[i++] = &sw_;
+ tokenizers_[i++] = &lsw_;
+ tokenizers_[i++] = &csw_;
+ tokenizers_[i++] = &cs_;
+ tokenizers_[i++] = &df_;
+ tokenizers_[i++] = &lbc_;
+ tokenizers_[i++] = &rbc_;
+ tokenizers_[i++] = &cl_;
+ tokenizers_[i++] = &tr_;
+ tokenizers_[i++] = &ids_;
+ tokenizers_[i++] = &strlits_;
+ tokenizers_[i++] = &intlits_;
+ tokenizers_[i++] = &reallits_;
+ tokenizers_[i++] = 0;
tokenizer_.reset( new Cpt::Lex::MultiTokenizer(tokenizers_) );
}
@@ -137,12 +163,11 @@
return filters_;
}
- Case::Case(const std::vector<std::wstring>& fields, std::auto_ptr<Piping> piping)
- : fields_(fields), piping_(piping) {
- }
+ Case::Case(const std::vector<std::wstring>& cases, std::auto_ptr<Piping> piping)
+ : cases_(cases), piping_(piping) {}
Case::~Case() {};
- const std::vector<std::wstring>& Case::fields() const { return fields_; }
- const Piping& Case::piping() const { return *piping_; }
+ const std::vector<std::wstring>& Case::cases() const { return cases_; }
+ const Piping& Case::piping() const { return *piping_; }
Switch::Switch(Cpt::auto_vector<Case>& cases, std::auto_ptr<Piping> def)
: cases_(cases), def_(def) {
@@ -153,7 +178,67 @@
const std::vector<Case*>& Switch::cases() const { return cases_; }
const Piping& Switch::def() const { return *def_; }
-
+
+ LocaleSwitch::LocaleSwitch(Cpt::auto_vector<Case>& cases, std::auto_ptr<Piping> def)
+ : cases_(cases), def_(def) {
+ }
+
+ LocaleSwitch::~LocaleSwitch() {
+ }
+
+ const std::vector<Case*>& LocaleSwitch::cases() const { return cases_; }
+ const Piping& LocaleSwitch::def() const { return *def_; }
+
+ ConfigSwitch::ConfigSwitch(Cpt::auto_vector<Case>& cases, std::auto_ptr<Piping> def)
+ : cases_(cases), def_(def) {
+ }
+
+ ConfigSwitch::~ConfigSwitch() {
+ }
+
+ const std::vector<Case*>& ConfigSwitch::cases() const { return cases_; }
+ const Piping& ConfigSwitch::def() const { return *def_; }
+
+ //
+ // Parsing methods
+ // ---------------
+ //
+
+ //
+ // How the parsing is implemented?
+ // --
+ //
+ // Parsing uses the Lexer - object from Cpt::Parser package.
+ // The basic way how lexer operates is that the lexer
+ // converts a source stream of characters lazily into
+ // stream of tokens. If the lexer object fails at tokenizing
+ // the character stream because syntax error, LexException
+ // is thrown.
+ //
+ // The produced stream of tokens can be iterated
+ // with 'eat' methods. Typically one moves forward in the
+ // token stream by 'eating' specific tokens, e.g. by
+ // command lexer.eat(TOKEN_LEFT_BRACKET). If the 'eaten'
+ // token is not of the specified type, parse exception is
+ // raised. In cases, where token can be of a number of types,
+ // use of lexer.peek() is adviced.
+ //
+
+ //
+ // Example code of using lexer for parsing syntax '(ID[, STRING])':
+ //
+ // lexer.eat(TOKEN_LEFT_BRACKET);
+ // std::string id = lexer.parseId();
+ // if (lexer.peek().type() == TOKEN_COMMA) {
+ // lexer.eat(TOKEN_COMMA);
+ // std::string str = lexer.parseString();
+ // )
+ // lexer.eat(TOKEN_RIGHT_BRACKET);
+ //
+
+ // Atomic expressions, e.g. "'foo'", "4", "4.5", "id"
+ //
+
std::auto_ptr<StringLit> ParseString(Cpt::Parser::Lexer& lexer)
{
return std::auto_ptr<StringLit>(new StringLit(lexer.eatString()));
@@ -176,13 +261,12 @@
std::auto_ptr<Exp> ParseParameter(Cpt::Parser::Lexer& lexer)
{
- switch (lexer.peek().type()) {
- case Cpt::Lex::TOKEN_ID: return std::auto_ptr<Exp>( ParseIdentifier(lexer).release() );
- case Cpt::Lex::TOKEN_STRLIT: return std::auto_ptr<Exp>( ParseString(lexer).release() );
- case Cpt::Lex::TOKEN_INTLIT: return std::auto_ptr<Exp>( ParseInteger(lexer).release() );
- case Cpt::Lex::TOKEN_REALLIT: return std::auto_ptr<Exp>( ParseReal(lexer).release() );
- default: throw Cpt::Parser::ParseException(L"Expected literal . ", lexer.peek());
- }
+ Cpt::Lex::token_type_t type = lexer.peek().type();
+ if (type == Cpt::Lex::TOKEN_ID) return std::auto_ptr<Exp>( ParseIdentifier(lexer).release() );
+ if (type == Cpt::Lex::TOKEN_STRLIT) return std::auto_ptr<Exp>( ParseString(lexer).release() );
+ if (type == Cpt::Lex::TOKEN_INTLIT) return std::auto_ptr<Exp>( ParseInteger(lexer).release() );
+ if (type == Cpt::Lex::TOKEN_REALLIT)return std::auto_ptr<Exp>( ParseReal(lexer).release() );
+ throw Cpt::Parser::ParseException(L"Expected literal. ", lexer.peek());
}
std::auto_ptr<Parameters> ParseParameters(Cpt::Parser::Lexer& lexer)
@@ -255,10 +339,47 @@
return std::auto_ptr<Switch>(new Switch(cases, def));
}
+
+ std::auto_ptr<LocaleSwitch> ParseLocaleSwitch(Cpt::Parser::Lexer& lexer)
+ {
+ lexer.eat(TOKEN_LOCALE_SWITCH);
+ lexer.eat(TOKEN_LEFT_BRACE);
+ Cpt::auto_vector<Case> cases;
+ while (lexer && lexer.peek().type() == TOKEN_CASE) {
+ cases.donate_back(ParseCase(lexer));
+ }
+ std::auto_ptr<Piping> def = ParseDefault(lexer);
+ lexer.eat(TOKEN_RIGHT_BRACE);
+ return std::auto_ptr<LocaleSwitch>(new LocaleSwitch(cases, def));
+ }
+
+ std::auto_ptr<ConfigSwitch> ParseConfigSwitch(Cpt::Parser::Lexer& lexer)
+ {
+ lexer.eat(TOKEN_CONFIG_SWITCH);
+ lexer.eat(TOKEN_LEFT_BRACE);
+ Cpt::auto_vector<Case> cases;
+ while (lexer && lexer.peek().type() == TOKEN_CASE) {
+ cases.donate_back(ParseCase(lexer));
+ }
+ std::auto_ptr<Piping> def = ParseDefault(lexer);
+ lexer.eat(TOKEN_RIGHT_BRACE);
+
+ return std::auto_ptr<ConfigSwitch>(new ConfigSwitch(cases, def));
+ }
+
+
+ // Tokenizer can be either in Invocation form or switch-case
+ // structure
+ //
+
std::auto_ptr<Exp> ParseTokenizer(Cpt::Parser::Lexer& lexer) {
if (lexer.peek().type() == TOKEN_SWITCH) {
return std::auto_ptr<Exp>(ParseSwitch(lexer).release());
+ } else if (lexer.peek().type() == TOKEN_LOCALE_SWITCH) {
+ return std::auto_ptr<Exp>(ParseLocaleSwitch(lexer).release());
+ } else if (lexer.peek().type() == TOKEN_CONFIG_SWITCH) {
+ return std::auto_ptr<Exp>(ParseConfigSwitch(lexer).release());
} else {
return std::auto_ptr<Exp>(ParseRelaxedInvokation(lexer).release());
}
@@ -275,6 +396,32 @@
}
return std::auto_ptr<Piping>(new Piping(tokenizer, filters));
}
+
+ std::auto_ptr<Piping> ParsePiping(const wchar_t* definition) {
+ using namespace Cpt::Lex;
+ using namespace Cpt::Parser;
+
+ try {
+ // 1. Setup an tokenizer
+ Cpix::AnalyzerExp::Tokenizer
+ tokenizer;
+ StdLexer
+ lexer(tokenizer, definition);
+
+ // 2. Parse
+ std::auto_ptr<Piping>
+ def = ParsePiping(lexer);
+ lexer.eatEof();
+
+ return def;
+ } catch (Cpt::ITxtCtxtExc & exc) {
+ // provide addition info for thrown exception
+ exc.setContext(definition);
+
+ // throw it fwd
+ throw;
+ }
+ }
}
--- a/searchengine/cpix/cpix/src/cpixanalyzer.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/cpixanalyzer.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -35,6 +35,7 @@
// CPix impl details
#include "cluceneext.h"
#include "analyzer.h"
+#include "customanalyzer.h"
#include "cpixsearch.h"
#include "cpixidxdb.h"
#include "idxdb.h"
@@ -141,9 +142,9 @@
// the wrapper custom must be released in any case, as
// it was just a first step in the construction
- // sequence
+ // sequence
+ cpix_Analyzer_destroy(custom);
}
- cpix_Analyzer_destroy(custom);
return system;
}
--- a/searchengine/cpix/cpix/src/cpixinit.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/cpixinit.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -83,6 +83,27 @@
value));
}
+const char * cpix_InitParams_getResourceDir(cpix_InitParams * thisIp)
+{
+ using namespace Cpix;
+
+ return XlateExc(thisIp,
+ Caller(thisIp,
+ &InitParams::getResourceDir));
+}
+
+
+void cpix_InitParams_setResourceDir(cpix_InitParams * thisIp,
+ const char * value)
+{
+ using namespace Cpix;
+
+ XlateExc(thisIp,
+ Caller(thisIp,
+ &InitParams::setResourceDir,
+ value));
+}
+
const char * cpix_InitParams_getLogFileBase(cpix_InitParams * thisIp)
{
--- a/searchengine/cpix/cpix/src/cpixsearch.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/cpixsearch.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -39,6 +39,7 @@
#include "iqrytype.h"
#include "document.h"
#include "analyzer.h"
+#include "queryparser.h"
/***********************************************************
@@ -56,22 +57,76 @@
const wchar_t * fieldName,
cpix_Analyzer * analyzer)
{
- using namespace lucene::analysis;
-
- Cpix::SystemAnalyzer
+ lucene::analysis::Analyzer
* a = Cast2Native<cpix_Analyzer>(analyzer);
cpix_QueryParser
* rv = NULL;
- rv = Create(result,
- CallCtor(rv,
- fieldName,
- static_cast<Analyzer*>(a)));
-
+ Cpix::IQueryParser* parser =
+ XlateExc(
+ result,
+ CallFreeFunc(
+ &Cpix::CreateCLuceneQueryParser,
+ fieldName,
+ a));
+
+ if ( cpix_Succeeded( result ) ) {
+ CreateWrapper(parser, result, rv);
+ }
return rv;
}
+cpix_QueryParser *
+ cpix_CreatePrefixQueryParser(cpix_Result * result,
+ const wchar_t * fieldName)
+{
+ using namespace lucene::analysis;
+
+ cpix_QueryParser
+ * rv = NULL;
+
+ Cpix::IQueryParser* parser =
+ XlateExc(
+ result,
+ CallFreeFunc(
+ &Cpix::CreatePrefixQueryParser,
+ fieldName));
+
+ if ( cpix_Succeeded( result ) ) {
+ CreateWrapper(parser, result, rv);
+ }
+ return rv;
+}
+
+
+
+cpix_QueryParser *
+ cpix_CreateMultiFieldQueryParser(cpix_Result * result,
+ const wchar_t * fieldNames[],
+ cpix_Analyzer * analyzer,
+ cpix_BoostMap * boosts)
+{
+ cpix_QueryParser
+ * rv = NULL;
+
+ lucene::analysis::Analyzer
+ * a = Cast2Native<cpix_Analyzer>(analyzer);
+
+ Cpix::IQueryParser* parser =
+ XlateExc(
+ result,
+ CallFreeFunc(
+ &Cpix::CreateCLuceneMultiFieldQueryParser,
+ fieldNames,
+ a,
+ Cast2Native<cpix_BoostMap>(boosts)));
+
+ if ( cpix_Succeeded( result ) ) {
+ CreateWrapper(parser, result, rv);
+ }
+ return rv;
+}
cpix_BoostMap *
@@ -121,39 +176,16 @@
DestroyWrapper(thisMap);
}
-
-cpix_QueryParser *
-cpix_CreateMultiFieldQueryParser(cpix_Result * result,
- const wchar_t * fieldNames[],
- cpix_Analyzer * analyzer,
- cpix_BoostMap * boosts)
-{
- cpix_MultiFieldQueryParser
- * rv = NULL;
-
- rv = Create(result,
- CallCtor(rv,
- fieldNames,
- Cast2Native<cpix_Analyzer>(analyzer),
- Cast2Native<cpix_BoostMap>(boosts)));
-
- return rv;
-}
-
-
-void
+void
cpix_QueryParser_setDefaultOperator(cpix_QueryParser * thisQueryParser,
cpix_QP_Operator op)
{
- using namespace lucene::queryParser;
-
XlateExc(thisQueryParser,
Caller(thisQueryParser,
- &QueryParser::setDefaultOperator,
- static_cast<int>(op)));
+ &Cpix::IQueryParser::setDefaultOperator,
+ op));
}
-
cpix_Query *
cpix_QueryParser_parse(cpix_QueryParser * thisQueryParser,
const wchar_t * queryStr)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/customanalyzer.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,797 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+// system library
+#include "wchar.h"
+#include <string>
+#include <vector>
+#include <sstream>
+#include <iostream>
+#include <glib.h>
+
+// clucene
+#include "CLucene.h"
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/analysis/Analyzers.h"
+
+// local libary
+#include "thaianalysis.h"
+#include "ngram.h"
+#include "koreananalyzer.h"
+#include "cjkanalyzer.h"
+#include "cpixparsetools.h"
+#include "prefixfilter.h"
+
+// cpix internal
+#include "customanalyzer.h"
+#include "cpixanalyzer.h"
+#include "analyzer.h"
+#include "cluceneext.h"
+#include "analyzerexp.h"
+#include "indevicecfg.h"
+#include "cpixexc.h"
+#include "localization.h"
+
+namespace Cpix {
+
+ //
+ // Following sections provide the glue code for connecting the
+ // analyzer definition syntax with analyzer, tokenizers and filter
+ // implementations.
+ //
+ // The glue code is template heavy with the indent of providing
+ // automation for associating specific keywords with specific
+ // analyzers, tokenizers and filters implementing corresponding
+ // CLucene abstractions. Additional classes are needed only if
+ // filters, tokenizers, etc. accept parameters.
+ //
+ // NOTE: To understand the analyzers, it is sufficient to understand
+ // that an analyzer transforms characters stream into specific token streams
+ // (e.g. character stream 'foobarmetawords' can be transformed into token
+ // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main
+ // parts which are tokenization and filtering. Tokenization converts
+ // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr')
+ // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' ->
+ // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for
+ // constructing a tokenizer and a sequence of filters to perform
+ // these required tasks.
+ //
+ // See the documentation around TokenizerClassEntries and
+ // FilterClassEntries to see how implementations not taking parameters
+ // can be easily added.
+ //
+
+ using namespace Cpix::AnalyzerExp;
+
+// Safe assumption
+#define MAX_LANGCODE_LENGTH 256
+
+ class LocaleSwitchStreamFactory : public TokenStreamFactory {
+ public:
+
+ LocaleSwitchStreamFactory(const AnalyzerExp::LocaleSwitch& sw, const wchar_t* config);
+
+ ~LocaleSwitchStreamFactory();
+
+ virtual lucene::analysis::TokenStream* tokenStream(const wchar_t * fieldName,
+ lucene::util::Reader * reader);
+
+ lucene::analysis::TokenStream* tokenStream(std::vector<std::wstring>& languages,
+ const wchar_t * fieldName,
+ lucene::util::Reader * reader);
+
+ private:
+ std::map<std::wstring, CustomAnalyzer*> analyzers_;
+ std::auto_ptr<CustomAnalyzer> default_;
+ };
+
+
+ TokenStreamFactory::~TokenStreamFactory() {};
+
+ LocaleSwitchStreamFactory::LocaleSwitchStreamFactory(const LocaleSwitch& sw, const wchar_t* config) {
+ for (int i = 0; i < sw.cases().size(); i++) {
+ const Case& cs = *sw.cases()[i];
+ for (int j = 0; j < cs.cases().size(); j++) {
+ std::wstring c = cs.cases()[j];
+ if (analyzers_.count(c)) delete analyzers_[c];
+ analyzers_[c] = new CustomAnalyzer(cs.piping(), config);
+ }
+ }
+ default_.reset(new CustomAnalyzer(sw.def()));
+ }
+
+ LocaleSwitchStreamFactory::~LocaleSwitchStreamFactory() {
+ typedef std::map<std::wstring, CustomAnalyzer*>::iterator iter;
+ for (iter i = analyzers_.begin(); i != analyzers_.end(); i++) {
+ delete i->second;
+ }
+ }
+
+ lucene::analysis::TokenStream*
+ LocaleSwitchStreamFactory::tokenStream(const wchar_t * fieldName,
+ lucene::util::Reader * reader) {
+ std::vector<std::wstring> languages =
+ Localization::instance().getLanguageNames();
+
+ return tokenStream(languages, fieldName, reader);
+ }
+
+ lucene::analysis::TokenStream*
+ LocaleSwitchStreamFactory::tokenStream(std::vector<std::wstring>& languages,
+ const wchar_t * fieldName,
+ lucene::util::Reader * reader) {
+ for (int i = 0; i < languages.size(); i++) {
+ if ( analyzers_.count(languages[i]) ) {
+ return analyzers_[languages[i]]->tokenStream( fieldName, reader );
+ }
+ }
+ return default_->tokenStream( fieldName, reader );
+ }
+
+ class DefaultTokenStreamFactory : public TokenStreamFactory {
+ public:
+
+ enum Target {
+ NORMAL,
+ INDEXING,
+ QUERY,
+ PREFIX
+ };
+
+ DefaultTokenStreamFactory(const Invokation& invokation) {
+ if (invokation.params().size() == 1) {
+ const Identifier* id = dynamic_cast<const Identifier*>( invokation.params()[0] );
+ if ( id ) {
+ if ( id->id() == CPIX_ID_INDEXING ) {
+ target_ = INDEXING;
+ } else if ( id->id() == CPIX_ID_QUERY ) {
+ target_ = QUERY;
+ } else if ( id->id() == CPIX_ID_PREFIX ) {
+ target_ = PREFIX;
+ } else {
+ THROW_CPIXEXC(L"Default analyzer does not accept %S for parameter", id->id().c_str());
+ }
+ } else {
+ THROW_CPIXEXC(L"Default accepts only identifier as a parameter.");
+ }
+ } else if (invokation.params().size() > 1) {
+ THROW_CPIXEXC(L"Default analyzer does not accept more than one parameter");
+ } else {
+ target_ = NORMAL;
+ }
+ }
+
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ switch (target_) {
+ case QUERY:
+ return Analysis::getQueryAnalyzer().tokenStream( fieldName, reader );
+ case PREFIX:
+ return Analysis::getPrefixAnalyzer().tokenStream( fieldName, reader );
+ }
+ return Analysis::getDefaultAnalyzer().tokenStream( fieldName, reader );
+ }
+
+ private:
+
+ Target target_;
+
+ };
+
+ /**
+ * Template class used to create CLucene tokenizers. Template
+ * parameter T must implement lucene::analysis::Tokenizer abstraction.
+ */
+ template<class T>
+ class TokenizerFactory : public TokenStreamFactory
+ {
+ public:
+ TokenizerFactory(const Invokation& invokation) {
+ if (invokation.params().size() > 0) {
+ THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
+ invokation.id().c_str());
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/,
+ lucene::util::Reader * reader) {
+ return _CLNEW T(reader);
+ }
+ };
+
+ template<>
+ class TokenizerFactory<analysis::CjkNGramTokenizer> : public TokenStreamFactory
+ {
+ public:
+ static const int DefaultNgramSize = 1;
+ TokenizerFactory(const Invokation& invokation) {
+ using namespace Cpix::AnalyzerExp;
+ if (invokation.params().size() > 1) {
+ THROW_CPIXEXC(L"Cjk Ngram tokenizer does not accept more than one parameter",
+ invokation.id().c_str());
+ }
+ if (invokation.params().size() == DefaultNgramSize) {
+ IntegerLit* ngramSize = dynamic_cast<IntegerLit*>(invokation.params()[0]);
+ if ( ngramSize ) {
+ ngramSize_ = ngramSize->value();
+ } else {
+ THROW_CPIXEXC(L"Cjk Ngram tokenizer parameter must be an integer");
+ }
+ } else {
+ ngramSize_ = 1;
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * /*fieldName*/,
+ lucene::util::Reader * reader) {
+ return _CLNEW analysis::CjkNGramTokenizer(reader, ngramSize_);
+ }
+
+ private:
+
+ int ngramSize_;
+ };
+
+
+ /**
+ * Template class wrapping CLucene analyzers. Template parameter T must
+ * implement lucene::analysis::Analyzer abstraction.
+ */
+ template<class T>
+ class AnalyzerWrap : public TokenStreamFactory
+ {
+ public:
+ AnalyzerWrap(const Invokation& invokation) : analyzer_() {
+ if (invokation.params().size() > 0) {
+ THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
+ invokation.id().c_str());
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return analyzer_.tokenStream(fieldName, reader);
+ }
+ private:
+ T analyzer_;
+ };
+
+ /**
+ * Template class associated with CLucene filter and a TokenStreamFactory.
+ * Uses TokenStreamFactory to transform given character stream into tokenstream
+ * and then applies the given Clucene filter to the token stream.
+ * The template parameter T must implement lucene::analysis::Filter abstraction.
+ */
+ template<class T>
+ class FilterFactory : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation, auto_ptr<TokenStreamFactory> factory) : factory_(factory) {
+ if (invokation.params().size() > 0) {
+ THROW_CPIXEXC(L"Filter %S does not accept parameters",
+ invokation.id().c_str());
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW T(factory_->tokenStream(fieldName, reader), true);
+ }
+ private:
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized
+ * template is needed because perfield analyzer accepts parameters
+ * (specific analyzers for different field plus default analyzer)
+ */
+ template<>
+ class AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper> : public TokenStreamFactory {
+ public:
+ AnalyzerWrap(const Switch& sw, const wchar_t* config) : analyzer_(0) {
+ using namespace Cpt::Parser;
+ using namespace lucene::analysis;
+
+ analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def()));
+
+ for (int i = 0; i < sw.cases().size(); i++) {
+ const Case& cs = *sw.cases()[i];
+ for (int j = 0; j < cs.cases().size(); j++) {
+ analyzer_->addAnalyzer( cs.cases()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping(), config ) );
+ }
+ }
+ }
+ virtual ~AnalyzerWrap() {
+ _CLDELETE(analyzer_);
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return analyzer_->tokenStream(fieldName, reader);
+ }
+ private:
+ lucene::analysis::PerFieldAnalyzerWrapper* analyzer_;
+ };
+
+
+
+ /**
+ * Specialized StopFilter factory. Specialized filter is needed
+ * because StopFilter needs parameters (stop word list or a language)
+ */
+ template<>
+ class FilterFactory<lucene::analysis::StopFilter> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ :words_(0), ownWords_(0), factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() == 1 && dynamic_cast<Identifier*>(invokation.params()[0])) {
+ Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+ //cpix_LangCode lang;
+ if (id->id() == CPIX_WLANG_EN) {
+ words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS;
+ } else if (id->id() == CPIX_WLANG_FR) {
+ words_ = analysis::NonEnglishStopWords::FRENCH_STOP_WORDS;
+ } else {
+ THROW_CPIXEXC(L"No prepared stopword list for language code '%S'",
+ id->id().c_str());
+ }
+ } else {
+ ownWords_ = new wchar_t*[invokation.params().size()+1];
+ memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1));
+ // FIXE: args may leak
+ for (int i = 0; i < invokation.params().size(); i++) {
+ StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
+ if (lit) {
+ const wstring& str = lit->text();
+ ownWords_[i] = new wchar_t[str.length()+1];
+ wcscpy(ownWords_[i], str.c_str());
+ } else {
+ THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters.");
+ }
+ }
+ }
+
+ }
+ virtual ~FilterFactory() {
+ if (ownWords_) {
+ for (int i = 0; ownWords_[i]; i++) {
+ delete[] ownWords_[i];
+ }
+ delete[] ownWords_;
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast<const wchar_t**>(ownWords_) : words_);
+ }
+ private:
+ const wchar_t **words_;
+ wchar_t **ownWords_; // owned
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized SnowballFilter factory is needed, because SnowballFilter
+ * accepts parameters (the language).
+ */
+ template<>
+ class FilterFactory<lucene::analysis::SnowballFilter> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ : factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() != 1 || !dynamic_cast<Identifier*>(invokation.params()[0])) {
+ THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." );
+ }
+ Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+ if (id->id() == CPIX_WLANG_EN) {
+ lang_ = cpix_LANG_EN;
+ } else {
+ THROW_CPIXEXC(L"Language identifier %S is not supported for stemming",
+ id->id().c_str());
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_);
+ }
+ private:
+ cpix_LangCode lang_;
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized LengthFilter factory is needed, because length filter
+ * accepts parameters (minimum length and maximum length)
+ */
+ template<>
+ class FilterFactory<lucene::analysis::LengthFilter> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ : factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() != 2 ||
+ !dynamic_cast<IntegerLit*>(invokation.params()[0]) ||
+ !dynamic_cast<IntegerLit*>(invokation.params()[1])) {
+ THROW_CPIXEXC("Length filter takes exactly two integer parameters");
+ }
+ min_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
+ max_ = dynamic_cast<IntegerLit*>(invokation.params()[1])->value();
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ );
+ }
+ private:
+ int min_, max_;
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized PrefixGenerator factory is needed, because PrefixGenerator
+ * requires the max prefix size.
+ */
+ template<>
+ class FilterFactory<PrefixGenerator> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ : factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() != 1 ||
+ !dynamic_cast<IntegerLit*>(invokation.params()[0])) {
+ THROW_CPIXEXC("Prefix generator takes exactly one integer parameter");
+ }
+ maxPrefixLength_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW PrefixGenerator(factory_->tokenStream(fieldName, reader), true, maxPrefixLength_ );
+ }
+ private:
+ int maxPrefixLength_;
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized PrefixFilter factory is needed, because prefix filter
+ * accepts parameters (language set or prefixes)
+ */
+ template<>
+ class FilterFactory<analysis::PrefixFilter> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ : prefixes_(0), ownPrefixes_(0), factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() == 1 &&
+ dynamic_cast<Identifier*>(invokation.params()[0])) {
+ Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+ //cpix_LangCode lang;
+ if (id->id() == CPIX_WLANG_HE) {
+ prefixes_ = analysis::HebrewPrefixes;
+ } else {
+ THROW_CPIXEXC(L"No prepared prefix list for language code '%S'",
+ id->id().c_str());
+ }
+ } else {
+ ownPrefixes_ = new wchar_t*[invokation.params().size()+1];
+ memset(ownPrefixes_, 0, sizeof(wchar_t*)*(invokation.params().size()+1));
+ // FIXE: args may leak
+ for (int i = 0; i < invokation.params().size(); i++) {
+ StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
+ if (lit) {
+ const wstring& str = lit->text();
+ ownPrefixes_[i] = new wchar_t[str.length()+1];
+ wcscpy(ownPrefixes_[i], str.c_str());
+ } else {
+ THROW_CPIXEXC(L"PrefixFilter accepts only language identifer or list of strings as a parameters.");
+ }
+ }
+ }
+ }
+ virtual ~FilterFactory() {
+ if (ownPrefixes_) {
+ for (int i = 0; ownPrefixes_[i]; i++) {
+ delete[] ownPrefixes_[i];
+ }
+ delete[] ownPrefixes_;
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW analysis::PrefixFilter(factory_->tokenStream(fieldName, reader), true, ownPrefixes_ ? const_cast<const wchar_t**>(ownPrefixes_) : prefixes_);
+ }
+ private:
+ const wchar_t **prefixes_;
+ wchar_t **ownPrefixes_; // owned
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ /**
+ * Specialized ElisionFilter factory is needed, because elision filter
+ * accepts parameters (language set or articles)
+ */
+ template<>
+ class FilterFactory<analysis::ElisionFilter> : public TokenStreamFactory
+ {
+ public:
+ FilterFactory(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory)
+ : articles_(0), ownArticles_(0), factory_(factory) {
+ using namespace Cpt::Parser;
+ if (invokation.params().size() == 1 &&
+ dynamic_cast<Identifier*>(invokation.params()[0])) {
+ Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+ //cpix_LangCode lang;
+ if (id->id() == CPIX_WLANG_FR) {
+ articles_ = analysis::FrenchArticles;
+ } else {
+ THROW_CPIXEXC(L"No prepared article list for language code '%S'",
+ id->id().c_str());
+ }
+ } else {
+ ownArticles_ = new wchar_t*[invokation.params().size()+1];
+ memset(ownArticles_, 0, sizeof(wchar_t*)*(invokation.params().size()+1));
+ // FIXE: args may leak
+ for (int i = 0; i < invokation.params().size(); i++) {
+ StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
+ if (lit) {
+ const wstring& str = lit->text();
+ ownArticles_[i] = new wchar_t[str.length()+1];
+ wcscpy(ownArticles_[i], str.c_str());
+ } else {
+ THROW_CPIXEXC(L"PrefixFilter accepts only language identifer or list of strings as a parameters.");
+ }
+ }
+ }
+ }
+ virtual ~FilterFactory() {
+ if (ownArticles_) {
+ for (int i = 0; ownArticles_[i]; i++) {
+ delete[] ownArticles_[i];
+ }
+ delete[] ownArticles_;
+ }
+ }
+ virtual lucene::analysis::TokenStream* tokenStream(const TCHAR * fieldName,
+ lucene::util::Reader * reader) {
+ return _CLNEW analysis::ElisionFilter(factory_->tokenStream(fieldName, reader), true, ownArticles_ ? const_cast<const wchar_t**>(ownArticles_) : articles_);
+ }
+ private:
+ const wchar_t **articles_;
+ wchar_t **ownArticles_; // owned
+ std::auto_ptr<TokenStreamFactory> factory_;
+ };
+
+ typedef auto_ptr<TokenStreamFactory> (*TokenizerFactoryCreator)(const Invokation& invokation);
+ typedef auto_ptr<TokenStreamFactory> (*FilterFactoryCreator)(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory);
+
+ template<class T>
+ struct TokenStreamFactoryCtor
+ {
+ static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
+ return auto_ptr<TokenStreamFactory>(new T(invokation));
+ }
+ };
+
+ /**
+ * Sets up a tokenizer factory with given invokation parameters
+ */
+ template<class T>
+ struct TokenizerFactoryCtor
+ {
+ static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
+ return auto_ptr<TokenStreamFactory>(new TokenizerFactory<T>(invokation));
+ }
+ };
+
+ /**
+ * Sets up an analyzer wrap with given invokation parameters
+ */
+ template<class T>
+ struct AnalyzerWrapCtor
+ {
+ static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
+ return auto_ptr<TokenStreamFactory>(new AnalyzerWrap<T>(invokation));
+ }
+ };
+
+ /**
+ * Sets up a filter factory with given invokation parameters
+ */
+ template<class T>
+ struct FilterFactoryCtor
+ {
+ static auto_ptr<TokenStreamFactory> create(const Invokation& invokation,
+ auto_ptr<TokenStreamFactory> factory) {
+ return auto_ptr<TokenStreamFactory>(new FilterFactory<T>(invokation, factory));
+ }
+ };
+
+ struct TokenizerClassEntry {
+ const wchar_t *id_;
+ TokenizerFactoryCreator createFactory_;
+ };
+
+ //
+ // Following TokenizerClassEntries and FilterClassEntries contain
+ // the mapping from tokenizer/analyzer/filter names into glue code
+ // templates providing the implementations.
+ //
+
+ TokenizerClassEntry TokenizerClassEntries[] = {
+ {CPIX_TOKENIZER_STANDARD, TokenizerFactoryCtor<lucene::analysis::standard::StandardTokenizer>::create},
+ {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor<lucene::analysis::WhitespaceTokenizer>::create},
+ {CPIX_TOKENIZER_LETTER, TokenizerFactoryCtor<lucene::analysis::LetterTokenizer>::create},
+ {CPIX_TOKENIZER_KEYWORD, TokenizerFactoryCtor<lucene::analysis::KeywordTokenizer>::create},
+ {CPIX_TOKENIZER_CJK, TokenizerFactoryCtor<lucene::analysis::cjk::CJKTokenizer>::create},
+ {CPIX_TOKENIZER_NGRAM, TokenizerFactoryCtor<analysis::CjkNGramTokenizer>::create},
+ {CPIX_TOKENIZER_KOREAN, TokenizerFactoryCtor<analysis::KoreanTokenizer>::create},
+ {CPIX_TOKENIZER_KOREAN_QUERY,TokenizerFactoryCtor<analysis::KoreanQueryTokenizer>::create},
+
+ {CPIX_ANALYZER_STANDARD, AnalyzerWrapCtor<lucene::analysis::standard::StandardAnalyzer>::create},
+ {CPIX_ANALYZER_DEFAULT, TokenStreamFactoryCtor<DefaultTokenStreamFactory>::create},
+
+ // TODO: Add more Tokenizers/Analyzers
+
+ // Example tokenizer (works as such if tokenizers don't take parameters)
+ // {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor<MyTokenizer>::create},
+
+ // Example analyzer (works as such if analyzer don't take parameters)
+ // {CPIX_ANALYZER_MYANALYZER, AnalyzerWrapCtor<MyAnalyzer>::create},
+
+ {0, 0}
+ };
+
+ struct FilterClassEntry {
+ const wchar_t *id_;
+ FilterFactoryCreator createFactory_;
+ };
+
+ FilterClassEntry FilterClassEntries[] = {
+ {CPIX_FILTER_STANDARD, FilterFactoryCtor<lucene::analysis::standard::StandardFilter>::create},
+ {CPIX_FILTER_LOWERCASE, FilterFactoryCtor<lucene::analysis::LowerCaseFilter>::create},
+ {CPIX_FILTER_ACCENT, FilterFactoryCtor<lucene::analysis::ISOLatin1AccentFilter>::create},
+ {CPIX_FILTER_STOP, FilterFactoryCtor<lucene::analysis::StopFilter>::create},
+ {CPIX_FILTER_STEM, FilterFactoryCtor<lucene::analysis::SnowballFilter>::create},
+ {CPIX_FILTER_LENGTH, FilterFactoryCtor<lucene::analysis::LengthFilter>::create},
+ {CPIX_FILTER_PREFIXES, FilterFactoryCtor<PrefixGenerator>::create},
+ {CPIX_FILTER_THAI, FilterFactoryCtor<analysis::ThaiWordFilter>::create},
+ {CPIX_FILTER_PREFIX, FilterFactoryCtor<analysis::PrefixFilter>::create},
+ {CPIX_FILTER_ELISION, FilterFactoryCtor<analysis::ElisionFilter>::create},
+
+ // TODO: Add more Filters
+
+ // Example filter (works as such if filter don't take parameters)
+ // {CPIX_FILTER_MYFILTER, FilterFactoryCtor<MyFilter>::create},
+
+ {0, 0}
+ };
+
+ CustomAnalyzer::CustomAnalyzer(const wchar_t* definition, const wchar_t* config) {
+ std::auto_ptr<Piping> piping = AnalyzerExp::ParsePiping( definition );
+ setup( *piping, config );
+ }
+
+ CustomAnalyzer::CustomAnalyzer(const Piping& definition, const wchar_t* config) {
+ setup(definition, config);
+ }
+
+ using namespace Cpt::Parser;
+
+ void CustomAnalyzer::setup(const Piping& piping, const wchar_t* config) {
+
+ // If the first item is invokation, create corresponding analyzer/tokenizer
+ if (dynamic_cast<const Invokation*>(&piping.tokenizer())) {
+ const Invokation& tokenizer = dynamic_cast<const Invokation&>(piping.tokenizer());
+ TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() );
+ factory_ = tokenizerEntry.createFactory_( tokenizer );
+ } else if (dynamic_cast<const Switch*>(&piping.tokenizer())) {
+ // If the first item is switch statement, create per-field analyzer
+ const Switch& tokenizer = dynamic_cast<const Switch&>(piping.tokenizer());
+ factory_ = new AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper>( tokenizer, config );
+ } else if (dynamic_cast<const LocaleSwitch*>(&piping.tokenizer())) {
+ const LocaleSwitch& tokenizer = dynamic_cast<const LocaleSwitch&>(piping.tokenizer());
+ factory_ = new LocaleSwitchStreamFactory( tokenizer, config );
+ } else if (dynamic_cast<const ConfigSwitch*>(&piping.tokenizer())) {
+ const ConfigSwitch& tokenizer = dynamic_cast<const ConfigSwitch&>(piping.tokenizer());
+ factory_ = resolveConfigSwitch( tokenizer, config );
+ } else {
+ THROW_CPIXEXC(L"Analyzer definition syntax did not begin with valid tokenizer");
+ }
+
+ // Add filters
+ const std::vector<Invokation*>& filters = piping.filters();
+ for (int i = 0; i < filters.size(); i++) {
+ FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() );
+ factory_ = filterEntry.createFactory_( *filters[i], factory_ );
+ }
+ }
+
+ std::auto_ptr<TokenStreamFactory> CustomAnalyzer::resolveConfigSwitch(const ConfigSwitch& csw, const wchar_t* config) {
+ if (config) {
+ for (int i = 0; i < csw.cases().size(); i++) {
+ const Case& cs = *csw.cases()[i];
+ for (int j = 0; j < cs.cases().size(); j++) {
+ if (wcscmp(config, cs.cases()[j].c_str()) == 0) {
+ return std::auto_ptr<TokenStreamFactory>(
+ new CustomAnalyzer(cs.piping(), config));
+ }
+ }
+ }
+ }
+ return std::auto_ptr<TokenStreamFactory>(new CustomAnalyzer(csw.def(), config));
+ }
+
+ TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) {
+
+ // Looks for a match in the TokenizerClassEntries. After finding
+ // a match it returns a proper tokenizer/analyzer implementation provider
+ //
+ for (int i = 0; TokenizerClassEntries[i].id_; i++) {
+ if (id == std::wstring(TokenizerClassEntries[i].id_)) {
+ return TokenizerClassEntries[i];
+ }
+ }
+
+ THROW_CPIXEXC(L"Unknown tokenizer '%S'.",
+ id.c_str());
+ }
+
+ FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) {
+
+ // Looks for a match in the FilterClassEntries. After finding
+ // a match it returns a proper tokenizer/analyzer implementation
+ // provider
+ //
+ for (int i = 0; FilterClassEntries[i].id_; i++) {
+ if (id == std::wstring(FilterClassEntries[i].id_)) {
+ return FilterClassEntries[i];
+ }
+ }
+
+ THROW_CPIXEXC(L"Unknown filter '%S'.",
+ id.c_str());
+ }
+
+ CustomAnalyzer::~CustomAnalyzer() {}
+
+ lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t * fieldName,
+ lucene::util::Reader * reader) {
+ // Utilizes the the token stream factory to form token stream.
+ // token stream factory is prepared during custom analyzer construction
+ // and based on the analyzer definition string.
+
+ return factory_->tokenStream(fieldName, reader);
+ }
+
+ std::auto_ptr<lucene::analysis::Analyzer> CreateDefaultAnalyzer()
+ {
+ return
+ std::auto_ptr<lucene::analysis::Analyzer>(
+ new SystemAnalyzer(_CLNEW lucene::analysis::standard::StandardAnalyzer()));
+ }
+
+}
--- a/searchengine/cpix/cpix/src/fileparser/fileparser.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/fileparser/fileparser.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -162,7 +162,8 @@
wFullName.c_str(),
cpix_STORE_YES
| cpix_INDEX_TOKENIZED
- | cpix_AGGREGATE_YES));
+ | cpix_AGGREGATE_YES
+ | cpix_FREE_TEXT));
doc->add(newField.get());
newField.release();
@@ -172,7 +173,8 @@
wBaseName.c_str(),
cpix_STORE_NO
| cpix_INDEX_TOKENIZED
- | cpix_AGGREGATE_YES));
+ | cpix_AGGREGATE_YES
+ | cpix_FREE_TEXT));
doc->add(newField.get());
newField.release();
@@ -180,7 +182,8 @@
wExtension.c_str(),
cpix_STORE_NO
| cpix_INDEX_TOKENIZED
- | cpix_AGGREGATE_YES));
+ | cpix_AGGREGATE_YES
+ | cpix_FREE_TEXT));
doc->add(newField.get());
newField.release();
}
--- a/searchengine/cpix/cpix/src/iidxdb.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/iidxdb.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -25,6 +25,9 @@
#include "iqrytype.h" // 'unnecessary' dependency for static instance releasing
#include "ifieldfilter.h" // for static instance releasing
+#include "analyzer.h"
+#include "localization.h"
+
namespace Cpix {
@@ -184,6 +187,9 @@
cleanupClLockDir();
// TODO init lucene (??? operation not provided)
+
+ Localization::instance();
+ Analysis::init(*ip);
IdxDbMgr::init(*ip);
ShutdownSentry
@@ -277,6 +283,26 @@
}
try
+ {
+ Analysis::shutdown();
+ }
+ catch (...)
+ {
+ logMsg(CPIX_LL_ERROR,
+ "Cpix shutdownAll: FAILED. Analysis::shutdown ########\n");
+ }
+
+ try
+ {
+ Localization::shutdown();
+ }
+ catch (...)
+ {
+ logMsg(CPIX_LL_ERROR,
+ "Cpix shutdownAll: FAILED. Localization::shutdown ########\n");
+ }
+
+ try
{
_lucene_shutdown();
}
--- a/searchengine/cpix/cpix/src/initparams.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/initparams.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -36,7 +36,8 @@
InitParams::InitParams()
- : cpixDir_(DEFAULT_CPIX_DIR)
+ : cpixDir_(DEFAULT_CPIX_DIR),
+ resourceDir_(DEFAULT_RESOURCE_DIR)
{
setenv("LUCENE_LOCK_DIR_ENV_1",DEFAULT_CLUCENE_LOCK_DIR,1);
}
@@ -58,6 +59,22 @@
cpixDir_ = value;
}
+ const char * InitParams::getResourceDir() const
+ {
+ return resourceDir_.c_str();
+ }
+
+ void InitParams::setResourceDir(const char * value)
+ {
+ if (value == NULL
+ || strlen(value) == 0)
+ {
+ THROW_CPIXEXC("Value for property resourceDir cannot be NULL or empty string");
+ }
+
+ resourceDir_ = value;
+ }
+
const char * InitParams::getCluceneLockDir() const
{
return getenv("LUCENE_LOCK_DIR_ENV_1");
--- a/searchengine/cpix/cpix/src/iqrytype.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/iqrytype.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -309,6 +309,17 @@
}
+namespace {
+
+ const wchar_t DOLLAR[] = L"$";
+ const wchar_t LESSTHAN[] = L"<";
+ const wchar_t GREATERTHAN[] = L">";
+ const wchar_t COMMA[] = L",";
+ const wchar_t LEFTPARENTHESIS[] = L"(";
+ const wchar_t RIGHTPARENTHESIS[] = L")";
+}
+
+
namespace Cpix
{
@@ -569,7 +580,7 @@
Tokens
source(tokenizer(),
qryStr);
- WhiteSpaceFilter
+ StdFilter
tokens(source);
State
@@ -761,8 +772,7 @@
IQryType * IQryType::parseQry(cpix_QueryParser * queryParser,
const wchar_t * qryStr)
{
- IQryType
- * rv = NULL;
+ auto_ptr<IQryType> rv( NULL );
QryCall
qryCall(qryStr);
@@ -783,13 +793,13 @@
qryCall.qryTypeId_.c_str());
}
- rv = qti->factory_();
+ rv.reset( qti->factory_() );
rv->setUp(queryParser,
qryCall.args_,
qryCall.innerQryStr_.c_str());
- return rv;
+ return rv.release();
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/localization.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,112 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#include "CLucene.h"
+#include "CLucene\queryParser\Multifieldqueryparser.h"
+
+#include "cpixidxdb.h"
+
+#include "cpixstrtools.h"
+#include "cpixhits.h"
+#include "cpixsearch.h"
+#include "iidxdb.h"
+#include "cpixutil.h"
+#include "localization.h"
+
+#include "spi/locale.h"
+
+#include "glib.h"
+
+const char* cpix_LOCALE_AUTO = "auto";
+const wchar_t* cpix_WIDE_LOCALE_AUTO = L"auto";
+
+namespace Cpix {
+
+ Localization* Localization::theInstance_ = NULL;
+
+ Localization& Localization::instance() {
+ if ( !theInstance_ ) {
+ theInstance_ = new Localization();
+ }
+ return *theInstance_;
+ }
+
+ void Localization::shutdown() {
+ delete theInstance_;
+ theInstance_ = false;
+ }
+
+ Localization::Localization()
+ : mutex_(),
+ auto_( true ),
+ languageNames_(){}
+
+
+ void Localization::setLocale(const wchar_t* locale) {
+ Cpt::SyncRegion lock( mutex_ );
+
+ languageNames_.resize(0);
+ if ( wcscmp( locale, cpix_WIDE_LOCALE_AUTO ) == 0 ) {
+ auto_ = true;
+ } else {
+ auto_ = false;
+ languageNames_.push_back(locale);
+ }
+ }
+
+ void Localization::setLocale(const char* locale) {
+ Cpt::auto_array<wchar_t> wlocale(locale, strlen(locale));
+ setLocale(wlocale.get());
+ }
+
+
+ std::vector<std::wstring> Localization::getLanguageNames() {
+ Cpt::SyncRegion lock( mutex_ );
+
+ if ( auto_ ) {
+ // might be slow
+ return Spi::GetLanguageNames();
+ } else {
+ return languageNames_;
+ }
+ }
+
+}
+
+class SetLocaleFunctor
+{
+ private:
+
+ const char* locale_;
+
+ public:
+
+ typedef void result_type; // returns nothing
+
+ SetLocaleFunctor(const char * locale)
+ : locale_(locale) {}
+
+ void operator()() {
+ Cpix::Localization::instance().setLocale( locale_ );
+ }
+};
+
+void cpix_SetLocale(cpix_Result* result, const char* locale) {
+ XlateExc(result,
+ SetLocaleFunctor(locale));
+}
+
--- a/searchengine/cpix/cpix/src/prefixopt.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/prefixopt.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -23,6 +23,8 @@
#include "cpixstrtools.h"
+#include "cluceneext.h"
+
namespace Cpix {
using namespace lucene::search;
@@ -60,7 +62,7 @@
// rewrite term
std::wstring text( term->text() );
text = text.substr(0, length);
- term = new Term(prefixField_.c_str(), text.c_str(), true);
+ term = lucene::util::freeref( _CLNEW Term(prefixField_.c_str(), text.c_str()) );
// rewrite query
std::auto_ptr<Query> ret( new TermQuery( term ) );
@@ -75,7 +77,9 @@
if ( boolq )
{
// Just modify the query
- Cpt::auto_array<BooleanClause*> clauses( boolq->getClauses() );
+ Cpt::auto_array<BooleanClause*> clauses( new BooleanClause*[boolq->getClauseCount() + 1]);
+
+ boolq->getClauses( clauses.get() );
for ( int i = 0; i < boolq->getClauseCount(); i++ )
{
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/prefixqueryparser.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,201 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#include "CLucene.h"
+
+#include "cpixmaindefs.h"
+
+// internal libs
+#include "cpixparsetools.h"
+
+// internal
+#include "analyzer.h"
+
+#include "prefixqueryparser.h"
+
+#include "cpixanalyzer.h"
+#include "cluceneext.h"
+
+#include "tinyunicode.h"
+
+#include "cpixexc.h"
+
+namespace Cpix {
+
+ using namespace lucene::analysis;
+ using namespace lucene::search;
+ using namespace lucene::document;
+ using namespace lucene::util;
+ using lucene::index::Term;
+ using namespace std;
+
+ namespace {
+
+ /**
+ * Small optimization to avoid creating extra boolean queries
+ */
+ class QueryConstructor {
+
+ public:
+ QueryConstructor() : q_(), bq_(0) {}
+
+ auto_ptr<Query> operator()() {
+ return q_;
+ }
+ void add(auto_ptr<Query> q) {
+ if ( q.get() ) {
+ if ( bq_ ) {
+ bq_->add( q.release(), true, true, false );
+ } else {
+ if ( q_.get() ) {
+ auto_ptr<BooleanQuery> bq( new BooleanQuery() );
+ bq_ = bq.get();
+ bq_->add( q_.release(), true, true, false );
+ bq_->add( q.release(), true, true, false );
+ q_.reset( bq.release() );
+ } else {
+ q_ = q;
+ }
+ }
+ }
+ }
+ inline void add(Query* q) {
+ add( auto_ptr<Query>( q ) );
+ }
+
+ private:
+
+ auto_ptr<Query> q_;
+ BooleanQuery* bq_;
+
+ };
+
+ /**
+ * TokenStream interface with one modification:
+ * * Ability to check if returned token was last one in the stream
+ */
+ class HasNextTokenStream {
+
+ public:
+
+ HasNextTokenStream(TokenStream* tokens)
+ : i_(true),
+ next_(),
+ buf_(),
+ tokens_( tokens ){
+ next_ = tokens_->next(&buf_[0]);
+ }
+
+ inline Token& next() {
+ next_ = tokens_->next(&buf_[i_]);
+ i_ = !i_;
+ return buf_[i_];
+ }
+
+ inline bool hasNext() {
+ return next_;
+ }
+
+ private:
+ bool i_, next_;
+ Token buf_[2];
+ auto_ptr<TokenStream> tokens_;
+ };
+
+
+ }
+
+ PrefixQueryParser::PrefixQueryParser(const wchar_t* field)
+ : field_(field) {}
+
+ PrefixQueryParser::~PrefixQueryParser() {}
+
+ auto_ptr<Query> PrefixQueryParser::parse(const wchar_t* query) {
+ Cpt::Lex::WhitespaceSplitter split(query);
+ QueryConstructor ret;
+ while ( split ) {
+ ret.add( toQuery( split++ ) );
+ }
+ return ret();
+ }
+
+ const wchar_t* PrefixQueryParser::getField() const {
+ return field_.c_str();
+ }
+
+ void PrefixQueryParser::setDefaultOperator(cpix_QP_Operator op) {
+ THROW_CPIXEXC("Prefix query parser does not support setting the default operator.");
+ }
+
+ bool PrefixQueryParser::usePrefixFor(lucene::analysis::Token& token) {
+ return !analysis::unicode::IsCjk(token.termText()[0]);
+ }
+
+ auto_ptr<Query>
+ PrefixQueryParser::toQuery(Cpt::Lex::Token word) {
+ Analyzer& preAnalyzer( Analysis::getPrefixAnalyzer() );
+ StringReader reader( word.begin(), word.length() );
+ HasNextTokenStream tokens(
+ preAnalyzer.tokenStream( field_.c_str(),
+ &reader ) );
+
+ QueryConstructor ret;
+
+ while ( tokens.hasNext() ) {
+ lucene::analysis::Token& token = tokens.next();
+
+ if ( usePrefixFor(token) ) {
+ if (!tokens.hasNext()) {
+ // Turn only last token of this word into prefix query
+ ret.add(
+ _CLNEW PrefixQuery( freeref( _CLNEW Term( field_.c_str(),
+ token.termText() ) ) ) );
+ } else {
+ // Others tokens can be normal term queries
+ ret.add(
+ _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(),
+ token.termText() ) ) ) );
+ }
+ } else {
+ Analyzer& termAnalyzer = Analysis::getQueryAnalyzer();
+ StringReader reader( token.termText(), token.termTextLength() );
+ HasNextTokenStream tokens(
+ termAnalyzer.tokenStream( field_.c_str(),
+ &reader ) );
+
+ Token& first = tokens.next();
+ if (tokens.hasNext()) { // more than one
+ auto_ptr<PhraseQuery> phrase( _CLNEW PhraseQuery() );
+ phrase->add( freeref( _CLNEW Term( field_.c_str(),
+ first.termText() ) ) );
+ while (tokens.hasNext()) {
+ phrase->add( freeref( _CLNEW Term( field_.c_str(),
+ tokens.next().termText() ) ) );
+ }
+ ret.add( std::auto_ptr<Query>( phrase.release() ) );
+ } else {
+ ret.add(
+ _CLNEW TermQuery( freeref( _CLNEW Term( field_.c_str(),
+ first.termText() ) ) ) );
+ }
+ }
+ }
+ return ret();
+ }
+
+}
--- a/searchengine/cpix/cpix/src/qrytypes/cluceneqrytype.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/qrytypes/cluceneqrytype.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -34,7 +34,8 @@
//Introduced for prefix optimization.
#include "prefixopt.h"
#include "cpixmaindefs.h"
-#include "iqrytype.h"
+
+#include "queryparser.h"
namespace Cpix
{
@@ -48,7 +49,6 @@
//
// private members
//
- lucene::queryParser::QueryParser * clQueryParser_;
lucene::search::Query * clQuery_;
public:
@@ -60,8 +60,7 @@
// lifetime management
//
LuceneQryType()
- : clQueryParser_(NULL),
- clQuery_(NULL)
+ : clQuery_(NULL)
{
;
}
@@ -81,40 +80,34 @@
const std::list<std::wstring> & args,
const wchar_t * qryStr)
{
- clQueryParser_ = Cast2Native<cpix_QueryParser>(queryParser);
-
- if (args.size() > 0)
+ if (args.size() > 0)
{
THROW_CPIXEXC(PL_ERROR "No arguments needed here");
}
+
+ IQueryParser* qp = Cast2Native<cpix_QueryParser>(queryParser);
+ clQuery_ = qp->parse(qryStr).release();
- //Can we do get rid of this parse here?
- clQuery_ = clQueryParser_->parse(qryStr);
- PrefixOptQueryRewriter prefixOpt_(OPTIMIZED_PREFIX_MAX_LENGTH,
- LCPIX_DEFAULT_FIELD,
- LCPIX_DEFAULT_PREFIX_FIELD );
- //Switch query ownership to stack and back
- std::auto_ptr<lucene::search::Query> q( clQuery_ ); clQuery_ = NULL;
- clQuery_ = prefixOpt_.rewrite( q ).release();
-
- if (clQuery_ == NULL)
- {
+ if (clQuery_ == NULL)
+ {
THROW_CPIXEXC("Query reduced to empty query.");
- }
+ }
}
virtual cpix_Hits * search(cpix_IdxSearcher * idxSearcher)
{
return CLuceneSearchIdx(idxSearcher,
- clQuery_);
+ clQuery_
+ );
}
virtual cpix_Hits * search(cpix_IdxDb * idxDb)
{
return CLuceneSearchIdx(idxDb,
- clQuery_);
+ clQuery_
+ );
}
private:
--- a/searchengine/cpix/cpix/src/qrytypes/dumpqrytype.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/qrytypes/dumpqrytype.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -50,6 +50,10 @@
namespace
{
+ const wchar_t STAR[] = L"*";
+ const wchar_t AND1[] = L"and1";
+ const wchar_t AND2[] = L"and2";
+
/**
* Parses the syntax "* ( ( AND | && ) QRY )?", setting the member
* clQryStr_ (clucene query) to QRY, if any.
@@ -58,19 +62,12 @@
{
private:
- enum TokenType
- {
- STAR = Cpt::Lex::TOKEN_LAST_RESERVED,
- AND1,
- AND2,
- };
-
Cpt::Lex::MultiTokenizer * tokenizer_;
// for the transition table definition, see comments for parse()
typedef int State;
- typedef int Symbol;
+ typedef Cpt::Lex::token_type_t Symbol;
typedef std::pair<State, Symbol> StateSymbolPair;
typedef std::map<StateSymbolPair, State> TransitionTable;
TransitionTable transitions_;
@@ -220,7 +217,7 @@
Tokens
source(*tokenizer_,
qryStr);
- WhiteSpaceFilter
+ StdFilter
tokens(source);
State
--- a/searchengine/cpix/cpix/src/qrytypes/prefixqrytype.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/qrytypes/prefixqrytype.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -32,8 +32,10 @@
#include "cpixutil.h"
#include "iqrytype.h"
#include "analyzer.h"
+#include "customanalyzer.h"
#include "cpixmaindefs.h"
+#include "queryparser.h"
namespace Cpix
@@ -48,7 +50,7 @@
//
// private members
//
- lucene::queryParser::QueryParser * clQueryParser_;
+ IQueryParser * clQueryParser_;
lucene::search::Query * clQuery_;
public:
@@ -82,7 +84,8 @@
getAnalyzedString(qryStr, mQryStr );
clQueryParser_ = Cast2Native<cpix_QueryParser>(queryParser);
- clQuery_ = clQueryParser_->parse((const wchar_t *)mQryStr);
+ clQuery_ = clQueryParser_->parse((const wchar_t *)mQryStr).release();
+
free(mQryStr);
--- a/searchengine/cpix/cpix/src/qrytypes/termsqrytype.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/qrytypes/termsqrytype.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -32,6 +32,8 @@
#include "cpixutil.h"
#include "iqrytype.h"
+#include "queryparser.h"
+
namespace Cpix
{
@@ -73,7 +75,7 @@
THROW_CPIXEXC("Too many arguments for terms search");
}
- lucene::queryParser::QueryParser
+ IQueryParser
* qp = Cast2Native<cpix_QueryParser>(queryParser);
fieldName_ = qp->getField();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/queryparser.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,130 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#include "queryParser.h"
+
+#include "cpixmaindefs.h"
+
+#include "initparams.h"
+
+#include "prefixqueryparser.h"
+
+#include "cpixexc.h"
+
+namespace Cpix {
+
+ IQueryParser::~IQueryParser() {}
+
+ CLuceneQueryParser::CLuceneQueryParser(const wchar_t* defaultField,
+ lucene::analysis::Analyzer& analyzer)
+ : parser_() {
+ parser_.reset(_CLNEW lucene::queryParser::QueryParser(defaultField, &analyzer));
+ }
+
+ CLuceneQueryParser::~CLuceneQueryParser() {}
+
+ std::auto_ptr<lucene::search::Query> CLuceneQueryParser::parse(const wchar_t* query) {
+ return std::auto_ptr<lucene::search::Query>( parser_->parse(query) );
+ }
+
+ const wchar_t* CLuceneQueryParser::getField() const {
+ return parser_->getField();
+ }
+
+ void CLuceneQueryParser::setDefaultOperator(cpix_QP_Operator op) {
+ parser_->setDefaultOperator(static_cast<int>(op));
+ }
+
+ PrefixOptQueryParser::PrefixOptQueryParser(std::auto_ptr<IQueryParser> parser)
+ :
+ prefixOpt_(OPTIMIZED_PREFIX_MAX_LENGTH,
+ LCPIX_DEFAULT_FIELD,
+ LCPIX_DEFAULT_PREFIX_FIELD ),
+ parser_( parser )
+ {}
+
+ PrefixOptQueryParser::~PrefixOptQueryParser() {}
+
+ std::auto_ptr<lucene::search::Query> PrefixOptQueryParser::parse(const wchar_t* query) {
+ return prefixOpt_.rewrite( parser_->parse(query) );
+ }
+
+ const wchar_t* PrefixOptQueryParser::getField() const {
+ return parser_->getField();
+ }
+
+ void PrefixOptQueryParser::setDefaultOperator(cpix_QP_Operator op) {
+ parser_->setDefaultOperator(op);
+ }
+
+ CLuceneMultiFieldQueryParser::CLuceneMultiFieldQueryParser(
+ const wchar_t** fields,
+ lucene::analysis::Analyzer& analyzer,
+ lucene::queryParser::BoostMap& boostMap)
+ : parser_() {
+ parser_.reset(
+ _CLNEW lucene::queryParser::MultiFieldQueryParser( fields, &analyzer, &boostMap ));
+
+ }
+
+ CLuceneMultiFieldQueryParser::~CLuceneMultiFieldQueryParser() {}
+
+ std::auto_ptr<lucene::search::Query>
+ CLuceneMultiFieldQueryParser::parse(const wchar_t* query) {
+ return std::auto_ptr<lucene::search::Query>( parser_->parse( query ) );
+
+ }
+
+ const wchar_t* CLuceneMultiFieldQueryParser::getField() const {
+ THROW_CPIXEXC("Multi field query parser does not support getField operation");
+ }
+
+ void CLuceneMultiFieldQueryParser::setDefaultOperator(cpix_QP_Operator op) {
+ parser_->setDefaultOperator(static_cast<int>(op));
+ }
+
+ IQueryParser* CreateCLuceneQueryParser(const wchar_t* defaultField,
+ lucene::analysis::Analyzer* analyzer) {
+ return
+ new PrefixOptQueryParser(
+ std::auto_ptr<IQueryParser>(
+ new CLuceneQueryParser(defaultField, *analyzer)));
+ }
+
+ IQueryParser* CreateCLuceneMultiFieldQueryParser(
+ const wchar_t* fields[],
+ lucene::analysis::Analyzer* analyzer,
+ lucene::queryParser::BoostMap* boostMap) {
+ return
+ new PrefixOptQueryParser(
+ std::auto_ptr<IQueryParser>(
+ new CLuceneMultiFieldQueryParser(fields,
+ *analyzer,
+ *boostMap)));
+
+ }
+
+ IQueryParser* CreatePrefixQueryParser(const wchar_t* field) {
+ return
+ new PrefixOptQueryParser(
+ std::auto_ptr<IQueryParser>(
+ new PrefixQueryParser(field)));
+ }
+
+
+}
--- a/searchengine/cpix/cpix/src/rotlogger.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/rotlogger.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -93,8 +93,12 @@
THROW_CPIXEXC("Cannot dup() STDERR_FILENO");
}
- Cpt_EINTR_RETRY_SP( close(STDOUT_FILENO) );
- Cpt_EINTR_RETRY_SP( close(STDERR_FILENO) );
+ int
+ result;
+ Cpt_EINTR_RETRY(result,
+ close(STDOUT_FILENO));
+ Cpt_EINTR_RETRY(result,
+ close(STDERR_FILENO));
redirectStdOutErr();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/spi/locale.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,27 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#include "spi/locale.h"
+
+namespace Cpix {
+
+ namespace Spi {
+
+ const wchar_t* SymbianLanguageCodePrefix = L"s";
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/spi/s60/s60locale.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,74 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#include <e32std.h>
+#include <sstream>
+
+#include "spi/locale.h"
+
+namespace Cpix {
+
+ namespace Spi {
+
+ struct LangCodeTranslationEntry {
+ int symbianCode_;
+ const wchar_t* isoCode_;
+ };
+
+ LangCodeTranslationEntry LangCodeTranslations[] = {
+ {ELangEnglish, L"en"},
+ {ELangCanadianEnglish, L"en"},
+ {ELangInternationalEnglish, L"en"},
+ {ELangSouthAfricanEnglish, L"en"},
+
+ {ELangFrench, L"fr"},
+ {ELangSwissFrench, L"fr"},
+ {ELangBelgianFrench, L"fr"},
+ {ELangInternationalFrench, L"fr"},
+ {ELangCanadianFrench, L"fr"},
+
+ {ELangHebrew, L"he"},
+
+ {ELangTaiwanChinese, L"ch"},
+ {ELangHongKongChinese, L"ch"},
+ {ELangPrcChinese, L"ch"},
+ {ELangThai, L"th"},
+ {ELangJapanese, L"jp"},
+ {ELangKorean, L"ko"},
+
+ {ELangNone, 0}
+ };
+
+ std::vector<std::wstring> GetLanguageNames() {
+ TLanguage lang = User::Language();
+
+ std::vector<std::wstring> ret;
+ std::wostringstream code;
+ code<<SymbianLanguageCodePrefix<<lang;
+ ret.push_back(code.str());
+
+ for (int i = 0; LangCodeTranslations[i].symbianCode_ != ELangNone; i++) {
+ if ( LangCodeTranslations[i].symbianCode_ == lang ) {
+ ret.push_back( LangCodeTranslations[i].isoCode_ );
+ }
+ }
+
+ return ret;
+ }
+
+ }
+}
--- a/searchengine/cpix/cpixrotlog/group/cpixrotlog.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpixrotlog/group/cpixrotlog.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -54,8 +54,10 @@
STATICLIBRARY libcrt0.lib
STATICLIBRARY libcpix.lib
STATICLIBRARY libclucene.lib
+STATICLIBRARY libstemmer.lib
+//STATICLIBRARY libitk.lib
STATICLIBRARY libcpixtools.lib
-STATICLIBRARY libstemmer.lib
+STATICLIBRARY libanalysis.lib
// For SPI
LIBRARY efsrv.lib
--- a/searchengine/cpix/tsrc/cpixsample/src/cpixsample.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixsample/src/cpixsample.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -28,12 +28,12 @@
#define FIELD_ALPHA1 L"Alpha1"
-#define DOC1CONTENT L"mary had little lamb issue its anirban fleece was black as coal"
-#define DOC2CONTENT L"sri rama jeyam and it 3gpp_70.jpg 170(kb).jpg is shankar.rajendran@yahoo.co.in then www.google.com U.S.A. file.txt"
+#define DOC1CONTENT L"mary had little lamb issue its 9740069217 9999ss ssss7 sad76asd 12222ds asdfa23sdf234sdf anirban fleece was black as coal"
+#define DOC2CONTENT L"sri rama jeyam and it 3gpp_70.jpg 170(kb).jpg is ss asd shankar.rajendran@yahoo.co.in then www.google.com U.S.A. file.txt"
// The term that will be present in multiple documents.
-#define SEARCH_TERM L"$prefix(\"had\")"
+#define SEARCH_TERM L"$prefix(\"9999ss\")"
int testInit(cpix_Analyzer **analyzer_, cpix_IdxDb **idxDb_)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/ch_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,55 @@
+using analyzer "natural(indexing)" for indexing
+
+using analyzer "natural(query)" for searching
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_hk\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_hk\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_hk\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_hk\4.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_prc\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_prc\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_prc\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_prc\4.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_simple\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_simple\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_simple\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_simple\4.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_tw\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_tw\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_tw\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ch_tw\4.txt
+
+Indexed empty item.
+
+searching:
+
+search "å°æ¹¾":
+Number of hits: 2
+DOC (!:\data\cpixunittestcorpus\loc\ch_prc\3.txt): 内容涉åŠå°æ¹¾ã€ä¸ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç‰ã€‚
+DOC (!:\data\cpixunittestcorpus\loc\ch_simple\2.txt): 美国总统奥巴马星期一(11月16日)在上海与ä¸å›½é’年对è¯ï¼Œä»–回ç”了现场å¬ä¼—和网民的æ问,内容涉åŠå°æ¹¾ã€ä¸ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç‰ã€‚
+
+search "ä¸å›½":
+Number of hits: 5
+DOC (!:\data\cpixunittestcorpus\loc\ch_simple\1.txt): 奥巴马在上海与ä¸å›½é’年人对è¯
+DOC (!:\data\cpixunittestcorpus\loc\ch_prc\1.txt): 美国总统奥巴马星期一(11月16日)在上海与ä¸å›½é’年对è¯ï¼Œ
+DOC (!:\data\cpixunittestcorpus\loc\ch_simple\4.txt): 这次对è¯ç”±å¤æ—¦å¤§å¦æ ¡é•¿æ¨çŽ‰è‰¯ä¸»æŒã€‚美国驻ä¸å›½å¤§ä½¿æ´ªåšåŸ¹è‡´è¾žã€‚
+DOC (!:\data\cpixunittestcorpus\loc\ch_simple\2.txt): 美国总统奥巴马星期一(11月16日)在上海与ä¸å›½é’年对è¯ï¼Œä»–回ç”了现场å¬ä¼—和网民的æ问,内容涉åŠå°æ¹¾ã€ä¸ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç‰ã€‚
+DOC (!:\data\cpixunittestcorpus\loc\ch_simple\3.txt): 奥巴马在上海科技馆é¢å¯¹500ä½™å上海é’年以åŠæ•°ä»¥ä¸‡è®¡çš„ä¸å›½äº’è”网使用者进行了一场问ç”会。
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/currentlocale_C_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,38 @@
+using analyzer "natural"
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
+Indexed empty item.
+
+searching:
+
+search "happy":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happiness":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+search "happening":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 0
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/en_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,30 @@
+using analyzer "natural(indexing)" for indexing
+
+using analyzer "natural(query)" for searching
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+
+Indexed empty item.
+
+searching:
+
+search "happy":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happiness":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+search "happening":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/jp_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,40 @@
+using analyzer "natural(indexing)" for indexing
+
+using analyzer "natural(query)" for searching
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp\4.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp_old\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp_old\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp_old\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\jp_old\4.txt
+
+Indexed empty item.
+
+searching:
+
+search "国際":
+Number of hits: 4
+DOC (!:\data\cpixunittestcorpus\loc\jp\3.txt): ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ãƒã‚°ãƒ©ãƒ ã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„
+DOC (!:\data\cpixunittestcorpus\loc\jp_old\3.txt): ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ãƒã‚°ãƒ©ãƒ ã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„
+DOC (!:\data\cpixunittestcorpus\loc\jp\1.txt): 国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
+DOC (!:\data\cpixunittestcorpus\loc\jp_old\1.txt): 国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
+
+search "日本":
+Number of hits: 4
+DOC (!:\data\cpixunittestcorpus\loc\jp\2.txt): æ—¥æœ¬ç ”ç©¶ãƒ»çŸ¥çš„äº¤æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥ã‚’実施ã—ã¦ãŠã‚Šã€
+DOC (!:\data\cpixunittestcorpus\loc\jp_old\2.txt): æ—¥æœ¬ç ”ç©¶ãƒ»çŸ¥çš„äº¤æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥ã‚’実施ã—ã¦ãŠã‚Šã€
+DOC (!:\data\cpixunittestcorpus\loc\jp\1.txt): 国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
+DOC (!:\data\cpixunittestcorpus\loc\jp_old\1.txt): 国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/ko_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,26 @@
+using analyzer "natural(indexing)" for indexing
+
+using analyzer "natural(query)" for searching
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ko\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ko\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ko\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\ko\4.txt
+
+Indexed empty item.
+
+searching:
+
+search "ìŠ¤í† ë¦¬":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\loc\ko\1.txt): ì œê°€ ë…¼ìŠ¤í†±ì„ ë³´ëŠ”ë°ìš”, 김지우가 ìŠ¤í† ë¦¬ìƒìœ¼ë¡œ ì¼ë³¸ìœ¼ë¡œ 간다구 하네요
+
+search "ì¸ë¬¼":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\loc\ko\4.txt): 그때 여승í˜ì²˜ëŸ¼ 논스톱 극중 ì¸ë¬¼ì—ì„œ ë¹ íŠ¸ë¦¬ëŠ”ê±´ê°€ìš”?
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/loc/th_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,28 @@
+using analyzer "natural(indexing)" for indexing
+
+using analyzer "natural(query)" for searching
+
+indexing:
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\3.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\4.txt
+
+Indexed empty item.
+
+searching:
+
+search "ดาวตà¸":
+Number of hits: 2
+DOC (!:\data\cpixunittestcorpus\loc\th\3.txt): จะเà¸à¸´à¸”ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸à¸ˆà¸²à¸à¸à¸¥à¸¸à¹ˆà¸¡à¸”าวสิงโตหรืà¸à¸à¸™à¸”าวตà¸à¹€à¸¥à¹‚à¸à¸™à¸´à¸„ส์ที่นัà¸à¸”าราศาสตร์ทั้งหลายคาดว่าจะมีประมาณ 100-150 ดวงต่à¸à¸Šà¸±à¹ˆà¸§à¹‚มง
+DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+
+search "มี":
+Number of hits: 2
+DOC (!:\data\cpixunittestcorpus\loc\th\2.txt): จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ 2541-2544 คืà¸à¹ƒà¸™à¸„ืนวันที่ 17 ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+DOC (!:\data\cpixunittestcorpus\loc\th\3.txt): จะเà¸à¸´à¸”ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸à¸ˆà¸²à¸à¸à¸¥à¸¸à¹ˆà¸¡à¸”าวสิงโตหรืà¸à¸à¸™à¸”าวตà¸à¹€à¸¥à¹‚à¸à¸™à¸´à¸„ส์ที่นัà¸à¸”าราศาสตร์ทั้งหลายคาดว่าจะมีประมาณ 100-150 ดวงต่à¸à¸Šà¸±à¹ˆà¸§à¹‚มง
+
--- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/parsing_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/parsing_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -7,12 +7,17 @@
Creating analyzer letter>lowercase>stop(en)
Creating analyzer letter>lowercase>stop('a', 'an', 'the')
Creating analyzer letter><lowercase
-Analyzer creation failed with Unrecognized syntax: '<' at: "letter>*here*<lowercase"
+Analyzer creation failed with Unrecognized syntax: '<' at:
+"letter>*here*<lowercase"
Creating analyzer 38j_d fad23 4?q ca'wRA
-Analyzer creation failed with Expected identifier instead of token '38' of type 5 at: "*here*38*here*j_d fad23 4?q ca'wRA"
+Analyzer creation failed with Expected identifier instead of token '38' of type integer at:
+"*here*38*here*j_d fad23 4?q ca'wRA"
Creating analyzer letter>>lowercase
-Analyzer creation failed with Expected identifier instead of token '>' of type 11 at: "letter>*here*>*here*lowercase"
+Analyzer creation failed with Expected identifier instead of token '>' of type > at:
+"letter>*here*>*here*lowercase"
Creating analyzer >letter>>lowercase lowercase
-Analyzer creation failed with Expected identifier instead of token '>' of type 11 at: "*here*>*here*letter>>lowercase lowercase"
+Analyzer creation failed with Expected identifier instead of token '>' of type > at:
+"*here*>*here*letter>>lowercase lowercase"
Creating analyzer letter lowercase
-Analyzer creation failed with Expected EOF instead of 'lowercase' of type 3 at: "letter *here*lowercase*here*"
+Analyzer creation failed with Expected EOF instead of 'lowercase' of type identifier at:
+"letter *here*lowercase*here*"
--- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/usage_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/usage_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,4 +1,6 @@
-Indexing and searching with whitespace
+using analyzer "whitespace"
+
+indexing:
Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
@@ -8,12 +10,33 @@
Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
Indexed empty item.
+
+searching:
+
+search "happy":
Number of hits: 0
+
+search "happiness":
Number of hits: 0
+
+search "happening":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
-Indexing and searching with letter>lowercase
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 0
+
+using analyzer "letter>lowercase"
+
+indexing:
Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
@@ -23,14 +46,36 @@
Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
Indexed empty item.
+
+searching:
+
+search "happy":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happiness":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+search "happening":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
-Indexing and searching with stdtokens>lowercase>stem(en)
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+
+using analyzer "stdtokens>lowercase>accent"
+
+indexing:
Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
@@ -40,16 +85,35 @@
Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
Indexed empty item.
-Number of hits: 2
-DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+searching:
+
+search "happy":
+Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
-Number of hits: 2
+
+search "happiness":
+Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
-DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happening":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
-Indexing and searching with letter>lowercase>stop(en)
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 0
+
+using analyzer "letter>lowercase>stop(en)"
+
+indexing:
Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
@@ -59,14 +123,36 @@
Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
Indexed empty item.
+
+searching:
+
+search "happy":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happiness":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+search "happening":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
-Indexing and searching with letter>lowercase>stop('a', 'an', 'the')
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+
+using analyzer "letter>lowercase>stop('a', 'an', 'the')"
+
+indexing:
Indexed file: !:\data\cpixunittestcorpus\stem\en\1.txt
@@ -76,10 +162,30 @@
Indexed file: !:\data\cpixunittestcorpus\stem\en\4.txt
+Indexed file: !:\data\cpixunittestcorpus\loc\th\1.txt
+
+Indexed file: !:\data\cpixunittestcorpus\loc\th\2.txt
+
Indexed empty item.
+
+searching:
+
+search "happy":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\1.txt): I am happy.
+
+search "happiness":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\2.txt): Oh happiness!
+
+search "happening":
Number of hits: 1
DOC (!:\data\cpixunittestcorpus\stem\en\4.txt): What is happening here?
+
+search "ดาวตà¸":
+Number of hits: 0
+
+search "มี":
+Number of hits: 1
+DOC (!:\data\cpixunittestcorpus\loc\th\1.txt): ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+
--- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -5,10 +5,17 @@
'Oh' 'happiness'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'Nothing' 'important' 'in' 'here' 'So' 'don't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What' 'is' 'happening' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon' 'nyt' 'teetä'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541' '-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
Analyzer "whitespace":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'I' 'am' 'happy.'
@@ -16,10 +23,17 @@
'Oh' 'happiness!'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What' 'is' 'happening' 'here?'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon' 'nyt' 'teetä.'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee' 'näin!'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
Analyzer "whitespace>lowercase":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'i' 'am' 'happy.'
@@ -27,10 +41,17 @@
'oh' 'happiness!'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'nothing' 'important' 'in' 'here.' 'so' 'don't' 'even' 'look.' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'is' 'happening' 'here?'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'juon' 'nyt' 'teetä.'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'tee' 'näin!'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
Analyzer "whitespace>accent":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'I' 'am' 'happy.'
@@ -38,10 +59,17 @@
'Oh' 'happiness!'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What' 'is' 'happening' 'here?'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon' 'nyt' 'teeta.'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee' 'nain!'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
Analyzer "letter":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'I' 'am' 'happy'
@@ -49,10 +77,17 @@
'Oh' 'happiness'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'Nothing' 'important' 'in' 'here' 'So' 'don' 't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What' 'is' 'happening' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon' 'nyt' 'teetä'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+
Analyzer "letter>lowercase":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'i' 'am' 'happy'
@@ -60,10 +95,17 @@
'oh' 'happiness'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'is' 'happening' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'juon' 'nyt' 'teetä'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+
Analyzer "keyword":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'I am happy.
@@ -83,10 +125,21 @@
nothing
whatsoever.
'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What is happening here?
+
+'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon nyt teetä.'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee näin! '
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ 2541-2544 คืà¸à¹ƒà¸™à¸„ืนวันที่ 17 ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+'
+
Analyzer "keyword>lowercase":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'i am happy.
@@ -106,43 +159,57 @@
nothing
whatsoever.
'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what is happening here?
+
+'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'juon nyt teetä.'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'tee näin! '
-Analyzer "stdtokens>lowercase>accent>stem(en)":
-File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
- 'i' 'am' 'happi'
-File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
- 'oh' 'happi'
-File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
- 'noth' 'import' 'in' 'here' 'so' 'don't' 'even' 'look' 'becaus' 'you' 'shall' 'find' 'noth' 'whatsoev'
-File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
- 'juon' 'nyt' 'teeta'
-File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
- 'tee' 'nain'
-Analyzer "letter>lowercase>accent>stop(en)":
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ 2541-2544 คืà¸à¹ƒà¸™à¸„ืนวันที่ 17 ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552
+'
+
+Analyzer "letter>lowercase>stop(en)":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'i' 'am' 'happy'
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
'oh' 'happiness'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
- 'juon' 'nyt' 'teeta'
+ 'juon' 'nyt' 'teetä'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
- 'tee' 'nain'
-Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'näin')":
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+
+Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'am' 'happy'
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
'happiness'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'is' 'happening' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'juon' 'teetä'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
- 'tee'
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+
Analyzer "letter>length(2, 4)":
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
'am'
@@ -150,7 +217,140 @@
'Oh'
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
'in' 'here' 'So' 'don' 'even' 'look' 'you' 'find'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'What' 'is' 'here'
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
'Juon' 'nyt'
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
'Tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ยน' 'ทย' 'นว' 'ณฑ' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'าวว' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'คร' 'งสำค' 'à¸à¸—' 'วเม' 'à¸à¸›' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' 'พฤศจ' 'à¸à¸²à¸¢à¸™'
+
+Analyzer "standard>prefixes(1)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'a' 'h'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'o' 'h'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'n' 'i' 'h' 's' 'd' 'e' 'l' 'b' 'y' 's' 'f' 'n' 'w'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'w' 'h' 'h'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'j' 'n' 't'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 't' 'n'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ป' '7' '-' 'พ' 'ย' 'น' 'ต' 'ป' '1' '-' 'พ' '2'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จ' '2' '-' 'ค' '1' 'ต' '1' 'พ' '2'
+
+Analyzer "standard>prefixes(2)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am'|'a' 'ha'|'h'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh'|'o' 'ha'|'h'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'no'|'n' 'im'|'i' 'he'|'h' 'so'|'s' 'do'|'d' 'ev'|'e' 'lo'|'l' 'be'|'b' 'yo'|'y' 'sh'|'s' 'fi'|'f' 'no'|'n' 'wh'|'w'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'wh'|'w' 'ha'|'h' 'he'|'h'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'ju'|'j' 'ny'|'n' 'te'|'t'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'te'|'t' 'nä'|'n'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปร'|'ป' '7' '-1'|'-' 'พ' 'ยน'|'ย' 'นา'|'น' 'ตั'|'ต' 'ปร'|'ป' '17'|'1' '-1'|'-' 'พฤ'|'พ' '25'|'2'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะ'|'จ' '25'|'2' '-2'|'-' 'คื'|'ค' '17'|'1' 'ต่'|'ต' '18'|'1' 'พฤ'|'พ' '25'|'2'
+
+Analyzer "standard>prefixes(3)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am'|'a' 'hap'|'ha'|'h'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh'|'o' 'hap'|'ha'|'h'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'not'|'no'|'n' 'imp'|'im'|'i' 'her'|'he'|'h' 'so'|'s' 'don'|'do'|'d' 'eve'|'ev'|'e' 'loo'|'lo'|'l' 'bec'|'be'|'b' 'you'|'yo'|'y' 'sha'|'sh'|'s' 'fin'|'fi'|'f' 'not'|'no'|'n' 'wha'|'wh'|'w'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'wha'|'wh'|'w' 'hap'|'ha'|'h' 'her'|'he'|'h'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juo'|'ju'|'j' 'nyt'|'ny'|'n' 'tee'|'te'|'t'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee'|'te'|'t' 'näi'|'nä'|'n'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปรา'|'ปร'|'ป' '7' '-18'|'-1'|'-' 'พ' 'ยนี'|'ยน'|'ย' 'นาย'|'นา'|'น' 'ตัน'|'ตั'|'ต' 'ปรา'|'ปร'|'ป' '17'|'1' '-18'|'-1'|'-' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม'|'จะ'|'จ' '254'|'25'|'2' '-25'|'-2'|'-' 'คืà¸'|'คื'|'ค' '17'|'1' 'ต่à¸'|'ต่'|'ต' '18'|'1' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2'
+
+Analyzer "stdtokens>stdfilter>lowercase>thai>stop(en)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้à¸à¸‡' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่à¸' 'ปี' '2541' '-2544' 'คืà¸' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่à¸' 'เนื่à¸à¸‡' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
+Analyzer "cjk>stop(en)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปร' 'รา' 'าà¸' 'à¸à¸' 'à¸à¸' 'à¸à¸²' 'าร' 'รณ' 'à¸à¸™' 'นด' 'ดา' 'าว' 'วต' 'ตà¸' '17' '18' 'พ' 'ยน' 'นา' 'าย' 'ยว' 'วร' 'รว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปร' 'รา' 'าช' 'ชà¸' 'ภ' 'ม' 'ป' 'à¸à¸' 'à¸à¸²' 'าท' 'à¸à¸‡' 'งถ' 'นด' 'าน' 'นด' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'ไท' 'ทย' 'ยà¸' 'à¸à¸¥' 'าว' 'วว' 'า' '17' '18' 'พฤ' 'ฤศ' 'ศจ' 'à¸à¸²' 'าย' 'ยน' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะ' 'ะม' 'ปร' 'รา' 'าà¸' 'à¸à¸' 'à¸à¸' 'à¸à¸²' 'าร' 'รณ' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'คร' 'งส' 'สำ' 'ำค' 'à¸à¸—' 'ชา' 'าว' 'วไ' 'ไท' 'ทย' 'ยเ' 'เค' 'คย' 'ยป' 'ปร' 'ระ' 'ะท' 'บใ' 'ใจ' 'จม' 'มา' 'าà¹' 'à¹à¸¥' 'วเ' 'เม' 'à¸à¸›' '2541' '2544' 'ค' 'à¸à¹ƒ' 'ใน' 'นค' 'นว' 'นท' '17' 'ต' 'à¸à¹€' 'เน' 'à¸à¸‡' 'งว' 'นท' '18' 'พฤ' 'ฤศ' 'ศจ' 'à¸à¸²' 'าย' 'ยน' '2552'
+
+Analyzer "ngram(1)>lowercase>stop(en)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' '17' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' '2541' '2544' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' '17' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552'
+
+Analyzer "ngram(2)>lowercase>stop(en)":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“' 'à¸à¸™à¸”าวตà¸17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชà¸' 'ภ' 'ม' 'ป' 'à¸à¸à¸²à¸—' 'à¸à¸‡à¸–' 'นด' 'านดาราศาสตร' 'ไทยà¸à¸¥' 'าวว' 'า' '17' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะม' 'ปราà¸à¸à¸à¸²à¸£à¸“' 'ดาราศาสตร' 'คร' 'งสำค' 'à¸à¸—' 'ชาวไทยเคยประท' 'บใจมาà¹à¸¥' 'วเม' 'à¸à¸›' '2541' '2544' 'ค' 'à¸à¹ƒà¸™à¸„' 'นว' 'นท' '17' 'ต' 'à¸à¹€à¸™' 'à¸à¸‡à¸§' 'นท' '18' 'พฤศจ' 'à¸à¸²à¸¢à¸™' '2552'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,44 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\loc\ch_hk\1.txt tokenized:
+ '陶' '傑' '(' '1958' '年' '8' '月' '17' '日' ')' ','
+File !:\data\cpixunittestcorpus\loc\ch_hk\2.txt tokenized:
+ '原' 'å' 'ä¿‚' '曹' 'æ·' ',' '香' '港' 'å°ˆ' '欄' '作' '家' 'åŠ' '傳' '媒' 'å·¥' '作' '者' ','
+File !:\data\cpixunittestcorpus\loc\ch_hk\3.txt tokenized:
+ '有' '香' '江' '第' '一' 'æ‰' 'å' '嘅' '稱' '號' ','
+File !:\data\cpixunittestcorpus\loc\ch_hk\4.txt tokenized:
+ '以' 'æ–‡' 'ç†' 'è¾›' 'è¾£' 'ç«‹' 'å ´' '親' '西' 'æ–¹' '(' 'å°¤' 'å…¶' '是' '英' '國' ')' '見' '稱'
+File !:\data\cpixunittestcorpus\loc\ch_prc\1.txt tokenized:
+ '美' '国' '总' '统' '奥' 'å·´' '马' '星' '期' '一' '(' '11' '月' '16' 'æ—¥' ')' '在' '上' 'æµ·' '与' 'ä¸' '国' 'é’' 'å¹´' '对' 'è¯' ','
+File !:\data\cpixunittestcorpus\loc\ch_prc\2.txt tokenized:
+ 'ä»–' '回' 'ç”' '了' '现' '场' 'å¬' 'ä¼—' 'å’Œ' '网' 'æ°‘' 'çš„' 'æ' 'é—®' ','
+File !:\data\cpixunittestcorpus\loc\ch_prc\3.txt tokenized:
+ '内' '容' '涉' 'åŠ' 'å°' 'æ¹¾' 'ä¸' '美' 'è´¸' '易' '以' 'åŠ' '环' '境' 'é—®' '题' 'ç‰'
+File !:\data\cpixunittestcorpus\loc\ch_prc\4.txt tokenized:
+ '奥' 'å·´' '马' '在' '上' 'æµ·' '科' '技' '馆' 'é¢' '对' '500' 'ä½™' 'å' '上' 'æµ·' 'é’' 'å¹´'
+File !:\data\cpixunittestcorpus\loc\ch_simple\1.txt tokenized:
+ '奥' 'å·´' '马' '在' '上' 'æµ·' '与' 'ä¸' '国' 'é’' 'å¹´' '人' '对' 'è¯'
+File !:\data\cpixunittestcorpus\loc\ch_simple\2.txt tokenized:
+ '美' '国' '总' '统' '奥' 'å·´' '马' '星' '期' '一' '(' '11' '月' '16' 'æ—¥' ')' '在' '上' 'æµ·' '与' 'ä¸' '国' 'é’' 'å¹´' '对' 'è¯' ',' 'ä»–' '回' 'ç”' '了' '现' '场' 'å¬' 'ä¼—' 'å’Œ' '网' 'æ°‘' 'çš„' 'æ' 'é—®' ',' '内' '容' '涉' 'åŠ' 'å°' 'æ¹¾' 'ä¸' '美' 'è´¸' '易' '以' 'åŠ' '环' '境' 'é—®' '题' 'ç‰'
+File !:\data\cpixunittestcorpus\loc\ch_simple\3.txt tokenized:
+ '奥' 'å·´' '马' '在' '上' 'æµ·' '科' '技' '馆' 'é¢' '对' '500' 'ä½™' 'å' '上' 'æµ·' 'é’' 'å¹´' '以' 'åŠ' 'æ•°' '以' '万' '计' 'çš„' 'ä¸' '国' '互' 'è”' '网' '使' '用' '者' 'è¿›' 'è¡Œ' '了' '一' '场' 'é—®' 'ç”' '会'
+File !:\data\cpixunittestcorpus\loc\ch_simple\4.txt tokenized:
+ 'è¿™' '次' '对' 'è¯' 'ç”±' 'å¤' 'æ—¦' '大' 'å¦' 'æ ¡' 'é•¿' 'æ¨' '玉' '良' '主' 'æŒ' '美' '国' 'é©»' 'ä¸' '国' '大' '使' 'æ´ª' 'åš' '培' '致' '辞'
+File !:\data\cpixunittestcorpus\loc\ch_tw\1.txt tokenized:
+ '拈' '花' '惹' 'è‰' '趣' 'å—' '投' '花' 'å‰' '嘉' 'å¹´' 'è¯' 'ç’€' 'ç’¨' 'è¿Ž' '賓'
+File !:\data\cpixunittestcorpus\loc\ch_tw\2.txt tokenized:
+ '2009' 'å—' '投' '花' 'å‰' '嘉' 'å¹´' 'è¯' '花' 'ç¾' '幸' 'ç¦' '暢' 'éŠ' 'å—' '投'
+File !:\data\cpixunittestcorpus\loc\ch_tw\3.txt tokenized:
+ 'æ–¼' 'æ—¥' 'å‰' '11' '15' 'å‡' 'å—' '投' '縣' '埔' '里' '鎮' '埔' '里' '花' 'å‰' '物' 'æµ' 'ä¸' '心' '隆' 'é‡' 'ç™»' 'å ´' ','
+File !:\data\cpixunittestcorpus\loc\ch_tw\4.txt tokenized:
+ '在' '為' '期' '五' 'å' '天' 'çš„' 'æ´»' 'å‹•' 'ä¸' ','
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/currentlocale_C_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,18 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541' '-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,20 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,28 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\loc\jp\1.txt tokenized:
+ '国' 'éš›' '交' 'æµ' '基' '金' '(' 'ジャパンファウンデ' 'ション' ')' 'ã¯' '主' 'ã«' 'æ–‡' '化' '芸' 'è¡“' '交' 'æµ' 'æµ·' '外' 'ã«' 'ãŠ' 'ã‘' 'ã‚‹' 'æ—¥' '本' '語' 'æ•™' '育'
+File !:\data\cpixunittestcorpus\loc\jp\2.txt tokenized:
+ 'æ—¥' '本' 'ç ”' '究' '知' 'çš„' '交' 'æµ' 'ã®' '3' 'ã¤' 'ã®' '分' '野' 'ã«' 'ãŠ' 'ã„' 'ã¦' '事' 'æ¥' 'ã‚’' '実' 'æ–½' 'ã—' 'ã¦' 'ãŠ' 'ã‚Š'
+File !:\data\cpixunittestcorpus\loc\jp\3.txt tokenized:
+ 'ã' 'ã‚Œ' 'ãž' 'ã‚Œ' 'ã®' '分' '野' 'ã§' 'å…¬' 'å‹Ÿ' 'プãƒã‚°ãƒ©ãƒ ' 'ã«' 'よ' 'ã‚Š' '国' 'éš›' '交' 'æµ' '事' 'æ¥' 'ã‚’' 'ä¼' 'ç”»' 'ã™' 'ã‚‹' '個' '人' 'ã‚„'
+File !:\data\cpixunittestcorpus\loc\jp\4.txt tokenized:
+ '団' '体' 'ã«' '対' 'ã—' 'ã¦' '助' 'æˆ' '金' 'ç ”' '究' '奨' 'å¦' '金' 'ç‰' 'ã‚’' 'æ' 'ä¾›' 'ã—' 'ã¦' 'ã„' 'ã¾' 'ã™'
+File !:\data\cpixunittestcorpus\loc\jp_old\1.txt tokenized:
+ '国' 'éš›' '交' 'æµ' '基' '金' '(' 'ジャパンファウンデ' 'ション' ')' 'ã¯' '主' 'ã«' 'æ–‡' '化' '芸' 'è¡“' '交' 'æµ' 'æµ·' '外' 'ã«' 'ãŠ' 'ã‘' 'ã‚‹' 'æ—¥' '本' '語' 'æ•™' '育'
+File !:\data\cpixunittestcorpus\loc\jp_old\2.txt tokenized:
+ 'æ—¥' '本' 'ç ”' '究' '知' 'çš„' '交' 'æµ' 'ã®' '3' 'ã¤' 'ã®' '分' '野' 'ã«' 'ãŠ' 'ã„' 'ã¦' '事' 'æ¥' 'ã‚’' '実' 'æ–½' 'ã—' 'ã¦' 'ãŠ' 'ã‚Š'
+File !:\data\cpixunittestcorpus\loc\jp_old\3.txt tokenized:
+ 'ã' 'ã‚Œ' 'ãž' 'ã‚Œ' 'ã®' '分' '野' 'ã§' 'å…¬' 'å‹Ÿ' 'プãƒã‚°ãƒ©ãƒ ' 'ã«' 'よ' 'ã‚Š' '国' 'éš›' '交' 'æµ' '事' 'æ¥' 'ã‚’' 'ä¼' 'ç”»' 'ã™' 'ã‚‹' '個' '人' 'ã‚„'
+File !:\data\cpixunittestcorpus\loc\jp_old\4.txt tokenized:
+ '団' '体' 'ã«' '対' 'ã—' 'ã¦' '助' 'æˆ' '金' 'ç ”' '究' '奨' 'å¦' '金' 'ç‰' 'ã‚’' 'æ' 'ä¾›' 'ã—' 'ã¦' 'ã„' 'ã¾' 'ã™'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,20 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\loc\ko\1.txt tokenized:
+ 'ì œ'|'제'|'á„Œ' 'ê°€'|'가'|'á„€' 'ë…¼' '스'|'스'|'ᄉ' '톱'|'á„ᅩᆸ'|'á„á…©'|'á„' 'ì„'|'을'|'á„‹á…³'|'á„‹' 'ë³´' '는'|'는'|'á„‚á…³'|'á„‚' 'ë°'|'데'|'ᄃ' 'ìš”'|'á„‹á…'|'á„‹' 'ê¹€' '지'|'지'|'á„Œ' 'ìš°'|'á„‹á…®'|'á„‹' 'ê°€'|'가'|'á„€' '스' 'í† '|'á„á…©'|'á„' '리'|'á„…á…µ'|'á„…' 'ìƒ'|'상'|'사'|'ᄉ' '으'|'á„‹á…³'|'á„‹' 'ë¡œ'|'á„…á…©'|'á„…' 'ì¼' '본'|'본'|'보'|'ᄇ' '으'|'á„‹á…³'|'á„‹' 'ë¡œ'|'á„…á…©'|'á„…' 'ê°„' '다'|'다'|'ᄃ' '구'|'구'|'á„€' '하' '네'|'á„‚á…¦'|'á„‚' 'ìš”'|'á„‹á…'|'á„‹'
+File !:\data\cpixunittestcorpus\loc\ko\2.txt tokenized:
+ 'ê·¸'|'그'|'á„€' '러'|'á„…á…¥'|'á„…' 'ê³ '|'고'|'á„€' '나'|'á„‚á…¡'|'á„‚' 'ì„œ'|'서'|'ᄉ' '다' 'ìŒ'|'음'|'á„‹á…³'|'á„‹' 'ì´' '야'|'á„‹á…£'|'á„‹' '기'|'기'|'á„€' '예' 'ê³ '|'고'|'á„€' '는'|'는'|'á„‚á…³'|'á„‚' '안' '나'|'á„‚á…¡'|'á„‚' '오'|'á„‹á…©'|'á„‹' 'ê³ '|'고'|'á„€' 'ê¹€' '지'|'지'|'á„Œ' 'ìš°'|'á„‹á…®'|'á„‹' 'ì˜'|'á„‹á…´'|'á„‹' '첨' '부'|'부'|'ᄇ' 'í„°'|'á„á…¥'|'á„' 'ì—¬' '태'|'á„á…¢'|'á„' '까'|'á„á…¡'|'á„' '지'|'지'|'á„Œ' 'ì˜'|'á„‹á…´'|'á„‹' 'ì´' '미'|'미'|'ᄆ' '지'|'지'|'á„Œ' '만'|'만'|'마'|'ᄆ' 'ë³´' 'ì—¬'|'á„‹á…§'|'á„‹' '주'|'주'|'á„Œ' 'ê³ '|'고'|'á„€'
+File !:\data\cpixunittestcorpus\loc\ko\3.txt tokenized:
+ 'ê·¸'|'그'|'á„€' '냥'|'냥'|'á„‚á…£'|'á„‚' 'ë' '냈'|'냈'|'á„‚á…¢'|'á„‚' '는'|'는'|'á„‚á…³'|'á„‚' 'ë°'|'데'|'ᄃ' 'ìš”'|'á„‹á…'|'á„‹' 'ì´' 'ê±°'|'거'|'á„€' '진' '짜'|'á„á…¡'|'á„' 'ì´' '별'|'별'|'벼'|'ᄇ' '하'|'á„’á…¡'|'á„’' '는'|'는'|'á„‚á…³'|'á„‚' 'ê±°'|'거'|'á„€' 'ê°™' '다'|'다'|'ᄃ' '는'|'는'|'á„‚á…³'|'á„‚' 'ìƒ' 'ê°'|'각'|'가'|'á„€' 'ì´'|'á„‹á…µ'|'á„‹'
+File !:\data\cpixunittestcorpus\loc\ko\4.txt tokenized:
+ 'ê·¸'|'그'|'á„€' 'ë•Œ'|'á„„á…¢'|'á„„' 'ì—¬' '승'|'승'|'스'|'ᄉ' 'í˜'|'혁'|'á„’á…§'|'á„’' '처'|'á„Žá…¥'|'á„Ž' '럼'|'럼'|'á„…á…¥'|'á„…' 'ë…¼' '스'|'스'|'ᄉ' '톱'|'á„ᅩᆸ'|'á„á…©'|'á„' 'ê·¹' '중'|'중'|'주'|'á„Œ' 'ì¸' '물'|'물'|'무'|'ᄆ' 'ì—'|'á„‹á…¦'|'á„‹' 'ì„œ'|'서'|'ᄉ' 'ë¹ ' '트'|'á„á…³'|'á„' '리'|'á„…á…µ'|'á„…' '는'|'는'|'á„‚á…³'|'á„‚' 'ê±´'|'건'|'거'|'á„€' 'ê°€'|'가'|'á„€' 'ìš”'|'á„‹á…'|'á„‹'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,20 @@
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+
+Analyzer "natural":
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้à¸à¸‡' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่à¸' 'ปี' '2541' '-2544' 'คืà¸' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่à¸' 'เนื่à¸à¸‡' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\3.txt tokenized:
+ 'จะ' 'เà¸à¸´à¸”' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' 'จาà¸' 'à¸à¸¥à¸¸à¹ˆà¸¡' 'ดาว' 'สิงโต' 'หรืà¸' 'à¸à¸™' 'ดาวตà¸' 'เลโà¸' 'นิคส์' 'ที่' 'นัà¸' 'ดาราศาสตร์' 'ทั้ง' 'หลาย' 'คาด' 'ว่า' 'จะ' 'มี' 'ประมาณ' '100' '-150' 'ดวง' 'ต่à¸' 'ชั่วโมง'
+File !:\data\cpixunittestcorpus\loc\th\4.txt tokenized:
+ 'เมื่à¸' 'วัน' 'ที่' '8' 'มีนา' 'ที่' 'ผ่าน' 'มา' 'ผม' 'ได้' 'ไป' 'งาน' 'ที่' 'โรงเรียน' 'เหมืà¸à¸™' 'เช่น' 'ทุà¸' 'ปี' 'ตà¸à¸™' 'à¸à¸¥à¸±à¸š' 'เดิน' 'มา' 'ตาม' 'ตึà¸' 'ยาว' 'เพื่à¸' 'จะ' 'à¸à¸¥à¸±à¸š' 'มาท' 'า' 'งป' 'ระตู' 'ด้าน' 'เพาะ' 'ช่าง'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,103 @@
+locale=en:
+Analyzer "
+locale_switch {
+ case 'en': stdtokens>stdfilter>lowercase>stop(en);
+ case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);
+ case 'ca': stdtokens>stdfilter>lowercase>accent;
+ default: stdtokens>stdfilter>lowercase;
+}":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541' '-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
+
+locale=th:
+Analyzer "
+locale_switch {
+ case 'en': stdtokens>stdfilter>lowercase>stop(en);
+ case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);
+ case 'ca': stdtokens>stdfilter>lowercase>accent;
+ default: stdtokens>stdfilter>lowercase;
+}":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'à¸à¸™' 'ดาวตà¸' '7' '-18' 'พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชà¸à¹Œ' 'ภูมิปัà¸à¸à¸²' 'ท้à¸à¸‡' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'à¸à¸¥à¹ˆà¸²à¸§' 'ว่า' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะ' 'มี' 'ปราà¸à¸à¸à¸²à¸£à¸“์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัà¸' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'à¹à¸¥à¹‰à¸§' 'เมื่à¸' 'ปี' '2541' '-2544' 'คืà¸' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่à¸' 'เนื่à¸à¸‡' 'วัน' 'ที่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
+
+locale=ca:
+Analyzer "
+locale_switch {
+ case 'en': stdtokens>stdfilter>lowercase>stop(en);
+ case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);
+ case 'ca': stdtokens>stdfilter>lowercase>accent;
+ default: stdtokens>stdfilter>lowercase;
+}":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'is' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teeta'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'nain'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541' '-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
+
+default locale:
+Analyzer "
+locale_switch {
+ case 'en': stdtokens>stdfilter>lowercase>stop(en);
+ case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);
+ case 'ca': stdtokens>stdfilter>lowercase>accent;
+ default: stdtokens>stdfilter>lowercase;
+}":
+File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
+ 'i' 'am' 'happy'
+File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
+ 'oh' 'happiness'
+File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
+ 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
+File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
+ 'what' 'is' 'happening' 'here'
+File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
+ 'juon' 'nyt' 'teetä'
+File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
+ 'tee' 'näin'
+File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
+ 'ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸' '7' '-18' 'พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸²' '17' '-18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
+ 'จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ' '2541' '-2544' 'คืà¸à¹ƒà¸™à¸„ืนวันที่' '17' 'ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่' '18' 'พฤศจิà¸à¸²à¸¢à¸™' '2552'
+
--- a/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,4 +1,8 @@
-ParseException: Unexpected EOF at: "foobar(zap, foo, 'bar', 'raf', do, *here*"
-LexException: Unrecognized syntax: ''a, raboof)' at: "foobar(zap, foo, 'bar', *here*'a, raboof)"
-LexException: Unrecognized syntax: '!' at: "foobar(*here*!zap, foo, 'bar', 'a', raboof)"
-ParseException: Expected token of type 10 instead of token 'raboof' of type 3 at: "foobar(zap, foo, 'bar', 'a' *here*raboof*here*)"
+ParseException: Unexpected EOF at:
+"foobar(zap, foo, 'bar', 'raf', do, *here*"
+LexException: Unrecognized syntax: ''a, raboof)' at:
+"foobar(zap, foo, 'bar', *here*'a, raboof)"
+LexException: Unrecognized syntax: '!' at:
+"foobar(*here*!zap, foo, 'bar', 'a', raboof)"
+ParseException: Expected comma instead of token 'raboof' of type identifier at:
+"foobar(zap, foo, 'bar', 'a' *here*raboof*here*)"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/optimized_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1010 @@
+Started indexing.
+Indexed 200 documents.
+Search with b* found 59 hits
+Search with r* found 48 hits
+Search with n* found 62 hits
+Search with u* found 100 hits
+Search with e* found 47 hits
+Search with u* found 100 hits
+Search with m* found 73 hits
+Search with h* found 92 hits
+Search with g* found 75 hits
+Search with u* found 100 hits
+Search with m* found 73 hits
+Search with d* found 77 hits
+Search with p* found 43 hits
+Search with v* found 8 hits
+Search with q* found 3 hits
+Search with v* found 8 hits
+Search with w* found 107 hits
+Search with m* found 73 hits
+Search with l* found 93 hits
+Search with k* found 22 hits
+Search with y* found 33 hits
+Search with b* found 59 hits
+Search with n* found 62 hits
+Search with w* found 107 hits
+Search with y* found 33 hits
+Search with b* found 59 hits
+Search with q* found 3 hits
+Search with c* found 97 hits
+Search with w* found 107 hits
+Search with c* found 97 hits
+Search with k* found 22 hits
+Search with x* found 5 hits
+Search with v* found 8 hits
+Search with y* found 33 hits
+Search with t* found 81 hits
+Search with b* found 59 hits
+Search with v* found 8 hits
+Search with g* found 75 hits
+Search with i* found 98 hits
+Search with e* found 47 hits
+Search with e* found 47 hits
+Search with w* found 107 hits
+Search with h* found 92 hits
+Search with t* found 81 hits
+Search with v* found 8 hits
+Search with a* found 67 hits
+Search with p* found 43 hits
+Search with u* found 100 hits
+Search with o* found 63 hits
+Search with d* found 77 hits
+Search with f* found 42 hits
+Search with o* found 63 hits
+Search with g* found 75 hits
+Search with u* found 100 hits
+Search with n* found 62 hits
+Search with g* found 75 hits
+Search with v* found 8 hits
+Search with e* found 47 hits
+Search with k* found 22 hits
+Search with v* found 8 hits
+Search with j* found 23 hits
+Search with v* found 8 hits
+Search with t* found 81 hits
+Search with f* found 42 hits
+Search with w* found 107 hits
+Search with p* found 43 hits
+Search with i* found 98 hits
+Search with s* found 85 hits
+Search with y* found 33 hits
+Search with s* found 85 hits
+Search with w* found 107 hits
+Search with d* found 77 hits
+Search with s* found 85 hits
+Search with g* found 75 hits
+Search with w* found 107 hits
+Search with o* found 63 hits
+Search with i* found 98 hits
+Search with o* found 63 hits
+Search with l* found 93 hits
+Search with x* found 5 hits
+Search with t* found 81 hits
+Search with q* found 3 hits
+Search with o* found 63 hits
+Search with a* found 67 hits
+Search with m* found 73 hits
+Search with c* found 97 hits
+Search with i* found 98 hits
+Search with k* found 22 hits
+Search with g* found 75 hits
+Search with t* found 81 hits
+Search with g* found 75 hits
+Search with r* found 48 hits
+Search with r* found 48 hits
+Search with d* found 77 hits
+Search with a* found 67 hits
+Search with o* found 63 hits
+Search with s* found 85 hits
+Search with i* found 98 hits
+Search with j* found 23 hits
+Search with t* found 81 hits
+Search with ci* found 2 hits
+Search with wu* found 0 hits
+Search with qv* found 0 hits
+Search with ly* found 0 hits
+Search with mw* found 0 hits
+Search with wg* found 0 hits
+Search with pn* found 0 hits
+Search with je* found 1 hits
+Search with pt* found 0 hits
+Search with ry* found 0 hits
+Search with pa* found 9 hits
+Search with qh* found 0 hits
+Search with dq* found 0 hits
+Search with xy* found 4 hits
+Search with cj* found 0 hits
+Search with se* found 7 hits
+Search with rs* found 0 hits
+Search with bk* found 2 hits
+Search with qo* found 0 hits
+Search with ke* found 2 hits
+Search with nj* found 0 hits
+Search with mf* found 0 hits
+Search with xv* found 0 hits
+Search with kn* found 1 hits
+Search with sc* found 4 hits
+Search with oi* found 0 hits
+Search with eg* found 0 hits
+Search with si* found 7 hits
+Search with ys* found 0 hits
+Search with jb* found 0 hits
+Search with de* found 30 hits
+Search with hw* found 1 hits
+Search with yk* found 0 hits
+Search with iq* found 0 hits
+Search with yu* found 2 hits
+Search with uo* found 0 hits
+Search with eh* found 3 hits
+Search with ue* found 0 hits
+Search with gf* found 0 hits
+Search with uy* found 0 hits
+Search with jj* found 0 hits
+Search with jn* found 0 hits
+Search with pc* found 0 hits
+Search with xp* found 0 hits
+Search with vh* found 0 hits
+Search with sb* found 0 hits
+Search with ob* found 0 hits
+Search with xn* found 0 hits
+Search with li* found 22 hits
+Search with en* found 11 hits
+Search with db* found 0 hits
+Search with ck* found 0 hits
+Search with lw* found 0 hits
+Search with ot* found 3 hits
+Search with cj* found 0 hits
+Search with sn* found 0 hits
+Search with tc* found 0 hits
+Search with cl* found 6 hits
+Search with hc* found 0 hits
+Search with df* found 0 hits
+Search with mw* found 0 hits
+Search with gb* found 0 hits
+Search with xe* found 0 hits
+Search with ql* found 0 hits
+Search with px* found 0 hits
+Search with ys* found 0 hits
+Search with yc* found 0 hits
+Search with dm* found 0 hits
+Search with ys* found 0 hits
+Search with ge* found 8 hits
+Search with cb* found 0 hits
+Search with tx* found 1 hits
+Search with gx* found 0 hits
+Search with kn* found 1 hits
+Search with bn* found 0 hits
+Search with un* found 4 hits
+Search with mb* found 0 hits
+Search with qk* found 0 hits
+Search with hh* found 0 hits
+Search with yw* found 0 hits
+Search with hx* found 0 hits
+Search with sj* found 0 hits
+Search with cv* found 0 hits
+Search with vc* found 0 hits
+Search with qf* found 0 hits
+Search with iv* found 0 hits
+Search with gc* found 0 hits
+Search with tm* found 6 hits
+Search with bg* found 0 hits
+Search with ce* found 1 hits
+Search with uw* found 0 hits
+Search with rh* found 0 hits
+Search with xk* found 0 hits
+Search with ui* found 0 hits
+Search with tt* found 1 hits
+Search with hc* found 0 hits
+Search with ta* found 13 hits
+Search with lw* found 0 hits
+Search with wj* found 0 hits
+Search with bp* found 0 hits
+Started indexing.
+Indexed 200 documents.
+Search with o* found 129 hits
+Search with j* found 43 hits
+Search with l* found 193 hits
+Search with v* found 19 hits
+Search with n* found 128 hits
+Search with i* found 192 hits
+Search with k* found 40 hits
+Search with q* found 6 hits
+Search with o* found 129 hits
+Search with n* found 128 hits
+Search with u* found 203 hits
+Search with l* found 193 hits
+Search with k* found 40 hits
+Search with o* found 129 hits
+Search with v* found 19 hits
+Search with l* found 193 hits
+Search with y* found 69 hits
+Search with q* found 6 hits
+Search with v* found 19 hits
+Search with v* found 19 hits
+Search with m* found 153 hits
+Search with d* found 163 hits
+Search with x* found 13 hits
+Search with g* found 143 hits
+Search with e* found 105 hits
+Search with l* found 193 hits
+Search with f* found 78 hits
+Search with d* found 163 hits
+Search with u* found 203 hits
+Search with g* found 143 hits
+Search with s* found 180 hits
+Search with k* found 40 hits
+Search with r* found 86 hits
+Search with h* found 167 hits
+Search with i* found 192 hits
+Search with i* found 192 hits
+Search with p* found 76 hits
+Search with s* found 180 hits
+Search with y* found 69 hits
+Search with g* found 143 hits
+Search with g* found 143 hits
+Search with u* found 203 hits
+Search with s* found 180 hits
+Search with t* found 176 hits
+Search with l* found 193 hits
+Search with q* found 6 hits
+Search with f* found 78 hits
+Search with l* found 193 hits
+Search with h* found 167 hits
+Search with b* found 113 hits
+Search with j* found 43 hits
+Search with v* found 19 hits
+Search with g* found 143 hits
+Search with h* found 167 hits
+Search with e* found 105 hits
+Search with k* found 40 hits
+Search with u* found 203 hits
+Search with k* found 40 hits
+Search with n* found 128 hits
+Search with p* found 76 hits
+Search with s* found 180 hits
+Search with j* found 43 hits
+Search with c* found 173 hits
+Search with n* found 128 hits
+Search with q* found 6 hits
+Search with m* found 153 hits
+Search with v* found 19 hits
+Search with i* found 192 hits
+Search with g* found 143 hits
+Search with u* found 203 hits
+Search with q* found 6 hits
+Search with o* found 129 hits
+Search with r* found 86 hits
+Search with l* found 193 hits
+Search with i* found 192 hits
+Search with e* found 105 hits
+Search with c* found 173 hits
+Search with p* found 76 hits
+Search with r* found 86 hits
+Search with l* found 193 hits
+Search with s* found 180 hits
+Search with b* found 113 hits
+Search with h* found 167 hits
+Search with a* found 141 hits
+Search with k* found 40 hits
+Search with o* found 129 hits
+Search with k* found 40 hits
+Search with f* found 78 hits
+Search with b* found 113 hits
+Search with b* found 113 hits
+Search with w* found 187 hits
+Search with t* found 176 hits
+Search with m* found 153 hits
+Search with a* found 141 hits
+Search with h* found 167 hits
+Search with f* found 78 hits
+Search with m* found 153 hits
+Search with d* found 163 hits
+Search with p* found 76 hits
+Search with u* found 203 hits
+Search with bg* found 0 hits
+Search with ks* found 0 hits
+Search with su* found 12 hits
+Search with wu* found 1 hits
+Search with lq* found 0 hits
+Search with jg* found 0 hits
+Search with rq* found 0 hits
+Search with gc* found 0 hits
+Search with ht* found 0 hits
+Search with hi* found 4 hits
+Search with wh* found 47 hits
+Search with fj* found 0 hits
+Search with hm* found 8 hits
+Search with ov* found 4 hits
+Search with se* found 20 hits
+Search with rt* found 0 hits
+Search with me* found 65 hits
+Search with oh* found 12 hits
+Search with yo* found 22 hits
+Search with dm* found 0 hits
+Search with fo* found 13 hits
+Search with tx* found 1 hits
+Search with hc* found 0 hits
+Search with cp* found 0 hits
+Search with vm* found 1 hits
+Search with xs* found 0 hits
+Search with td* found 9 hits
+Search with ed* found 0 hits
+Search with su* found 12 hits
+Search with yn* found 0 hits
+Search with bs* found 0 hits
+Search with jo* found 5 hits
+Search with wx* found 0 hits
+Search with vy* found 0 hits
+Search with mb* found 0 hits
+Search with ls* found 0 hits
+Search with pf* found 0 hits
+Search with sx* found 0 hits
+Search with iw* found 0 hits
+Search with pg* found 0 hits
+Search with jp* found 0 hits
+Search with cf* found 0 hits
+Search with vi* found 1 hits
+Search with io* found 0 hits
+Search with ek* found 0 hits
+Search with ef* found 0 hits
+Search with dn* found 0 hits
+Search with vd* found 0 hits
+Search with ls* found 0 hits
+Search with cb* found 0 hits
+Search with vp* found 0 hits
+Search with vo* found 1 hits
+Search with vq* found 0 hits
+Search with mg* found 0 hits
+Search with ne* found 36 hits
+Search with oa* found 0 hits
+Search with tq* found 0 hits
+Search with fp* found 0 hits
+Search with co* found 72 hits
+Search with eg* found 0 hits
+Search with bk* found 3 hits
+Search with le* found 71 hits
+Search with xk* found 0 hits
+Search with hm* found 8 hits
+Search with dl* found 0 hits
+Search with pb* found 0 hits
+Search with cl* found 14 hits
+Search with pa* found 14 hits
+Search with ce* found 1 hits
+Search with ir* found 0 hits
+Search with iw* found 0 hits
+Search with rd* found 0 hits
+Search with qa* found 0 hits
+Search with ss* found 0 hits
+Search with qa* found 0 hits
+Search with yr* found 3 hits
+Search with km* found 0 hits
+Search with vl* found 0 hits
+Search with wg* found 0 hits
+Search with xc* found 0 hits
+Search with rn* found 0 hits
+Search with ev* found 3 hits
+Search with bv* found 0 hits
+Search with vf* found 0 hits
+Search with be* found 17 hits
+Search with yj* found 0 hits
+Search with cr* found 9 hits
+Search with mu* found 16 hits
+Search with ti* found 50 hits
+Search with nk* found 0 hits
+Search with io* found 0 hits
+Search with cs* found 0 hits
+Search with da* found 23 hits
+Search with gd* found 4 hits
+Search with ge* found 13 hits
+Search with hb* found 0 hits
+Search with tn* found 0 hits
+Search with ww* found 1 hits
+Search with kt* found 0 hits
+Search with cl* found 14 hits
+Started indexing.
+Indexed 200 documents.
+Search with a* found 238 hits
+Search with c* found 265 hits
+Search with u* found 316 hits
+Search with c* found 265 hits
+Search with v* found 32 hits
+Search with k* found 53 hits
+Search with y* found 93 hits
+Search with p* found 133 hits
+Search with s* found 303 hits
+Search with m* found 268 hits
+Search with a* found 238 hits
+Search with d* found 248 hits
+Search with d* found 248 hits
+Search with e* found 151 hits
+Search with x* found 17 hits
+Search with j* found 63 hits
+Search with e* found 151 hits
+Search with f* found 134 hits
+Search with m* found 268 hits
+Search with n* found 197 hits
+Search with j* found 63 hits
+Search with t* found 267 hits
+Search with o* found 193 hits
+Search with g* found 236 hits
+Search with i* found 282 hits
+Search with n* found 197 hits
+Search with d* found 248 hits
+Search with s* found 303 hits
+Search with j* found 63 hits
+Search with g* found 236 hits
+Search with g* found 236 hits
+Search with k* found 53 hits
+Search with k* found 53 hits
+Search with d* found 248 hits
+Search with o* found 193 hits
+Search with g* found 236 hits
+Search with n* found 197 hits
+Search with o* found 193 hits
+Search with v* found 32 hits
+Search with i* found 282 hits
+Search with d* found 248 hits
+Search with x* found 17 hits
+Search with l* found 292 hits
+Search with j* found 63 hits
+Search with c* found 265 hits
+Search with k* found 53 hits
+Search with s* found 303 hits
+Search with i* found 282 hits
+Search with p* found 133 hits
+Search with h* found 259 hits
+Search with v* found 32 hits
+Search with y* found 93 hits
+Search with b* found 173 hits
+Search with m* found 268 hits
+Search with h* found 259 hits
+Search with l* found 292 hits
+Search with b* found 173 hits
+Search with l* found 292 hits
+Search with g* found 236 hits
+Search with m* found 268 hits
+Search with r* found 123 hits
+Search with m* found 268 hits
+Search with w* found 297 hits
+Search with e* found 151 hits
+Search with s* found 303 hits
+Search with o* found 193 hits
+Search with k* found 53 hits
+Search with g* found 236 hits
+Search with d* found 248 hits
+Search with i* found 282 hits
+Search with p* found 133 hits
+Search with i* found 282 hits
+Search with i* found 282 hits
+Search with d* found 248 hits
+Search with r* found 123 hits
+Search with m* found 268 hits
+Search with n* found 197 hits
+Search with m* found 268 hits
+Search with u* found 316 hits
+Search with d* found 248 hits
+Search with t* found 267 hits
+Search with t* found 267 hits
+Search with f* found 134 hits
+Search with x* found 17 hits
+Search with g* found 236 hits
+Search with m* found 268 hits
+Search with j* found 63 hits
+Search with j* found 63 hits
+Search with x* found 17 hits
+Search with s* found 303 hits
+Search with w* found 297 hits
+Search with r* found 123 hits
+Search with h* found 259 hits
+Search with t* found 267 hits
+Search with x* found 17 hits
+Search with a* found 238 hits
+Search with i* found 282 hits
+Search with k* found 53 hits
+Search with h* found 259 hits
+Search with n* found 197 hits
+Search with sy* found 1 hits
+Search with wd* found 0 hits
+Search with cq* found 0 hits
+Search with ps* found 0 hits
+Search with gn* found 0 hits
+Search with va* found 0 hits
+Search with hd* found 0 hits
+Search with xn* found 0 hits
+Search with qk* found 0 hits
+Search with aq* found 0 hits
+Search with dw* found 0 hits
+Search with lk* found 0 hits
+Search with sj* found 0 hits
+Search with nc* found 0 hits
+Search with uu* found 0 hits
+Search with rp* found 0 hits
+Search with vq* found 0 hits
+Search with va* found 0 hits
+Search with kn* found 11 hits
+Search with sq* found 2 hits
+Search with br* found 23 hits
+Search with qk* found 0 hits
+Search with ur* found 52 hits
+Search with bn* found 0 hits
+Search with cb* found 0 hits
+Search with hf* found 0 hits
+Search with xs* found 0 hits
+Search with rq* found 0 hits
+Search with ef* found 0 hits
+Search with ub* found 0 hits
+Search with cn* found 0 hits
+Search with ta* found 27 hits
+Search with gp* found 0 hits
+Search with bq* found 0 hits
+Search with dv* found 1 hits
+Search with hh* found 0 hits
+Search with ny* found 0 hits
+Search with rj* found 0 hits
+Search with qs* found 0 hits
+Search with yu* found 25 hits
+Search with tg* found 0 hits
+Search with ct* found 1 hits
+Search with yt* found 0 hits
+Search with lg* found 0 hits
+Search with cg* found 0 hits
+Search with hg* found 0 hits
+Search with vd* found 0 hits
+Search with hd* found 0 hits
+Search with uk* found 0 hits
+Search with tb* found 0 hits
+Search with ge* found 19 hits
+Search with iu* found 0 hits
+Search with fc* found 0 hits
+Search with gx* found 0 hits
+Search with vf* found 0 hits
+Search with sr* found 1 hits
+Search with mu* found 26 hits
+Search with mn* found 0 hits
+Search with qx* found 0 hits
+Search with vs* found 0 hits
+Search with gg* found 0 hits
+Search with af* found 16 hits
+Search with jj* found 0 hits
+Search with if* found 0 hits
+Search with te* found 25 hits
+Search with ga* found 7 hits
+Search with io* found 0 hits
+Search with wn* found 0 hits
+Search with sd* found 0 hits
+Search with lo* found 113 hits
+Search with le* found 89 hits
+Search with jx* found 0 hits
+Search with bv* found 0 hits
+Search with ns* found 0 hits
+Search with vk* found 0 hits
+Search with nc* found 0 hits
+Search with qp* found 0 hits
+Search with ha* found 109 hits
+Search with yr* found 3 hits
+Search with hu* found 14 hits
+Search with wn* found 0 hits
+Search with xh* found 0 hits
+Search with eu* found 0 hits
+Search with vx* found 0 hits
+Search with bj* found 0 hits
+Search with om* found 0 hits
+Search with ox* found 0 hits
+Search with mr* found 6 hits
+Search with ta* found 27 hits
+Search with kr* found 0 hits
+Search with kb* found 1 hits
+Search with wd* found 0 hits
+Search with qe* found 0 hits
+Search with gs* found 0 hits
+Search with yn* found 0 hits
+Search with nx* found 0 hits
+Search with cl* found 17 hits
+Search with fi* found 41 hits
+Search with hb* found 0 hits
+Search with ik* found 2 hits
+Started indexing.
+Indexed 200 documents.
+Search with l* found 402 hits
+Search with x* found 26 hits
+Search with w* found 401 hits
+Search with c* found 365 hits
+Search with x* found 26 hits
+Search with j* found 81 hits
+Search with t* found 365 hits
+Search with u* found 410 hits
+Search with l* found 402 hits
+Search with h* found 376 hits
+Search with m* found 371 hits
+Search with y* found 136 hits
+Search with i* found 357 hits
+Search with j* found 81 hits
+Search with c* found 365 hits
+Search with b* found 241 hits
+Search with q* found 26 hits
+Search with i* found 357 hits
+Search with t* found 365 hits
+Search with p* found 179 hits
+Search with w* found 401 hits
+Search with i* found 357 hits
+Search with n* found 258 hits
+Search with b* found 241 hits
+Search with v* found 44 hits
+Search with s* found 416 hits
+Search with l* found 402 hits
+Search with f* found 187 hits
+Search with w* found 401 hits
+Search with u* found 410 hits
+Search with p* found 179 hits
+Search with k* found 73 hits
+Search with u* found 410 hits
+Search with m* found 371 hits
+Search with m* found 371 hits
+Search with s* found 416 hits
+Search with x* found 26 hits
+Search with i* found 357 hits
+Search with n* found 258 hits
+Search with k* found 73 hits
+Search with r* found 170 hits
+Search with b* found 241 hits
+Search with j* found 81 hits
+Search with c* found 365 hits
+Search with m* found 371 hits
+Search with l* found 402 hits
+Search with e* found 210 hits
+Search with d* found 338 hits
+Search with w* found 401 hits
+Search with x* found 26 hits
+Search with s* found 416 hits
+Search with v* found 44 hits
+Search with i* found 357 hits
+Search with i* found 357 hits
+Search with w* found 401 hits
+Search with h* found 376 hits
+Search with e* found 210 hits
+Search with i* found 357 hits
+Search with m* found 371 hits
+Search with b* found 241 hits
+Search with f* found 187 hits
+Search with d* found 338 hits
+Search with n* found 258 hits
+Search with a* found 314 hits
+Search with r* found 170 hits
+Search with c* found 365 hits
+Search with v* found 44 hits
+Search with q* found 26 hits
+Search with k* found 73 hits
+Search with l* found 402 hits
+Search with d* found 338 hits
+Search with f* found 187 hits
+Search with o* found 283 hits
+Search with m* found 371 hits
+Search with h* found 376 hits
+Search with c* found 365 hits
+Search with a* found 314 hits
+Search with l* found 402 hits
+Search with f* found 187 hits
+Search with y* found 136 hits
+Search with m* found 371 hits
+Search with b* found 241 hits
+Search with u* found 410 hits
+Search with w* found 401 hits
+Search with l* found 402 hits
+Search with r* found 170 hits
+Search with e* found 210 hits
+Search with p* found 179 hits
+Search with d* found 338 hits
+Search with r* found 170 hits
+Search with s* found 416 hits
+Search with k* found 73 hits
+Search with w* found 401 hits
+Search with g* found 325 hits
+Search with n* found 258 hits
+Search with o* found 283 hits
+Search with i* found 357 hits
+Search with j* found 81 hits
+Search with h* found 376 hits
+Search with v* found 44 hits
+Search with uk* found 0 hits
+Search with bm* found 0 hits
+Search with yk* found 0 hits
+Search with oa* found 0 hits
+Search with yv* found 0 hits
+Search with bl* found 5 hits
+Search with yw* found 0 hits
+Search with kl* found 0 hits
+Search with qp* found 0 hits
+Search with bt* found 0 hits
+Search with jw* found 0 hits
+Search with hg* found 0 hits
+Search with du* found 89 hits
+Search with wm* found 0 hits
+Search with ef* found 4 hits
+Search with kb* found 1 hits
+Search with rn* found 0 hits
+Search with nr* found 0 hits
+Search with xe* found 0 hits
+Search with tw* found 5 hits
+Search with bu* found 40 hits
+Search with ka* found 12 hits
+Search with rv* found 0 hits
+Search with nj* found 0 hits
+Search with lp* found 0 hits
+Search with fu* found 10 hits
+Search with om* found 0 hits
+Search with br* found 28 hits
+Search with ha* found 154 hits
+Search with gn* found 0 hits
+Search with fq* found 0 hits
+Search with rx* found 0 hits
+Search with ef* found 4 hits
+Search with rf* found 0 hits
+Search with ml* found 0 hits
+Search with cn* found 0 hits
+Search with gn* found 0 hits
+Search with pb* found 0 hits
+Search with je* found 1 hits
+Search with ku* found 0 hits
+Search with vr* found 0 hits
+Search with rk* found 0 hits
+Search with hu* found 21 hits
+Search with co* found 159 hits
+Search with ul* found 0 hits
+Search with fd* found 0 hits
+Search with ew* found 0 hits
+Search with dl* found 0 hits
+Search with eu* found 0 hits
+Search with qq* found 0 hits
+Search with gs* found 0 hits
+Search with go* found 268 hits
+Search with iw* found 0 hits
+Search with pr* found 46 hits
+Search with bc* found 0 hits
+Search with ow* found 0 hits
+Search with ui* found 0 hits
+Search with je* found 1 hits
+Search with dl* found 0 hits
+Search with ub* found 0 hits
+Search with ya* found 29 hits
+Search with ee* found 0 hits
+Search with yh* found 0 hits
+Search with pg* found 0 hits
+Search with cg* found 0 hits
+Search with wk* found 8 hits
+Search with bg* found 0 hits
+Search with yk* found 0 hits
+Search with dp* found 0 hits
+Search with cg* found 0 hits
+Search with tt* found 13 hits
+Search with fq* found 0 hits
+Search with co* found 159 hits
+Search with ug* found 0 hits
+Search with cq* found 0 hits
+Search with hc* found 0 hits
+Search with sn* found 1 hits
+Search with is* found 1 hits
+Search with ux* found 0 hits
+Search with yy* found 0 hits
+Search with gx* found 0 hits
+Search with jh* found 0 hits
+Search with gi* found 17 hits
+Search with rj* found 0 hits
+Search with aw* found 4 hits
+Search with rw* found 0 hits
+Search with qy* found 0 hits
+Search with nu* found 6 hits
+Search with pl* found 22 hits
+Search with bs* found 0 hits
+Search with ck* found 0 hits
+Search with ww* found 1 hits
+Search with xf* found 1 hits
+Search with pu* found 9 hits
+Search with fq* found 0 hits
+Search with tl* found 0 hits
+Search with rf* found 0 hits
+Search with sx* found 0 hits
+Search with ql* found 0 hits
+Search with ks* found 0 hits
+Started indexing.
+Indexed 200 documents.
+Search with k* found 89 hits
+Search with e* found 279 hits
+Search with p* found 215 hits
+Search with b* found 306 hits
+Search with e* found 279 hits
+Search with g* found 408 hits
+Search with v* found 50 hits
+Search with t* found 462 hits
+Search with r* found 213 hits
+Search with a* found 394 hits
+Search with o* found 373 hits
+Search with v* found 50 hits
+Search with m* found 478 hits
+Search with n* found 315 hits
+Search with s* found 530 hits
+Search with n* found 315 hits
+Search with u* found 513 hits
+Search with l* found 527 hits
+Search with i* found 453 hits
+Search with a* found 394 hits
+Search with e* found 279 hits
+Search with f* found 244 hits
+Search with n* found 315 hits
+Search with v* found 50 hits
+Search with m* found 478 hits
+Search with h* found 497 hits
+Search with u* found 513 hits
+Search with d* found 427 hits
+Search with u* found 513 hits
+Search with h* found 497 hits
+Search with y* found 169 hits
+Search with f* found 244 hits
+Search with l* found 527 hits
+Search with o* found 373 hits
+Search with i* found 453 hits
+Search with p* found 215 hits
+Search with w* found 503 hits
+Search with g* found 408 hits
+Search with m* found 478 hits
+Search with q* found 34 hits
+Search with i* found 453 hits
+Search with d* found 427 hits
+Search with m* found 478 hits
+Search with v* found 50 hits
+Search with r* found 213 hits
+Search with i* found 453 hits
+Search with j* found 106 hits
+Search with o* found 373 hits
+Search with t* found 462 hits
+Search with t* found 462 hits
+Search with p* found 215 hits
+Search with a* found 394 hits
+Search with b* found 306 hits
+Search with f* found 244 hits
+Search with w* found 503 hits
+Search with o* found 373 hits
+Search with m* found 478 hits
+Search with t* found 462 hits
+Search with t* found 462 hits
+Search with j* found 106 hits
+Search with b* found 306 hits
+Search with s* found 530 hits
+Search with q* found 34 hits
+Search with m* found 478 hits
+Search with k* found 89 hits
+Search with b* found 306 hits
+Search with f* found 244 hits
+Search with h* found 497 hits
+Search with i* found 453 hits
+Search with t* found 462 hits
+Search with y* found 169 hits
+Search with q* found 34 hits
+Search with w* found 503 hits
+Search with n* found 315 hits
+Search with o* found 373 hits
+Search with q* found 34 hits
+Search with v* found 50 hits
+Search with a* found 394 hits
+Search with g* found 408 hits
+Search with r* found 213 hits
+Search with w* found 503 hits
+Search with x* found 39 hits
+Search with s* found 530 hits
+Search with x* found 39 hits
+Search with d* found 427 hits
+Search with r* found 213 hits
+Search with o* found 373 hits
+Search with s* found 530 hits
+Search with l* found 527 hits
+Search with j* found 106 hits
+Search with c* found 473 hits
+Search with o* found 373 hits
+Search with e* found 279 hits
+Search with v* found 50 hits
+Search with d* found 427 hits
+Search with o* found 373 hits
+Search with w* found 503 hits
+Search with i* found 453 hits
+Search with y* found 169 hits
+Search with h* found 497 hits
+Search with ea* found 66 hits
+Search with bc* found 0 hits
+Search with nr* found 0 hits
+Search with um* found 0 hits
+Search with sb* found 0 hits
+Search with ep* found 0 hits
+Search with yy* found 0 hits
+Search with nf* found 0 hits
+Search with qf* found 0 hits
+Search with xe* found 0 hits
+Search with qc* found 0 hits
+Search with su* found 33 hits
+Search with xw* found 0 hits
+Search with mx* found 0 hits
+Search with fn* found 0 hits
+Search with fm* found 0 hits
+Search with ni* found 23 hits
+Search with ob* found 1 hits
+Search with bl* found 7 hits
+Search with pt* found 0 hits
+Search with nu* found 9 hits
+Search with lo* found 222 hits
+Search with vb* found 0 hits
+Search with tn* found 0 hits
+Search with gr* found 15 hits
+Search with rw* found 0 hits
+Search with ul* found 0 hits
+Search with uu* found 0 hits
+Search with kj* found 0 hits
+Search with sp* found 12 hits
+Search with wb* found 0 hits
+Search with ek* found 0 hits
+Search with ls* found 1 hits
+Search with nm* found 0 hits
+Search with he* found 171 hits
+Search with iu* found 0 hits
+Search with bt* found 2 hits
+Search with lw* found 0 hits
+Search with vg* found 0 hits
+Search with mc* found 2 hits
+Search with ah* found 66 hits
+Search with cw* found 1 hits
+Search with sy* found 1 hits
+Search with sd* found 0 hits
+Search with in* found 28 hits
+Search with uf* found 0 hits
+Search with qa* found 0 hits
+Search with rd* found 1 hits
+Search with uf* found 0 hits
+Search with pc* found 0 hits
+Search with lb* found 0 hits
+Search with ym* found 0 hits
+Search with ul* found 0 hits
+Search with ms* found 19 hits
+Search with ty* found 4 hits
+Search with xt* found 0 hits
+Search with ga* found 15 hits
+Search with tb* found 0 hits
+Search with yo* found 26 hits
+Search with gh* found 1 hits
+Search with ce* found 6 hits
+Search with ov* found 10 hits
+Search with gg* found 0 hits
+Search with yd* found 0 hits
+Search with no* found 157 hits
+Search with ia* found 0 hits
+Search with rh* found 0 hits
+Search with on* found 81 hits
+Search with ub* found 0 hits
+Search with gq* found 0 hits
+Search with de* found 135 hits
+Search with nl* found 0 hits
+Search with gh* found 1 hits
+Search with pf* found 0 hits
+Search with vx* found 0 hits
+Search with oa* found 0 hits
+Search with ed* found 2 hits
+Search with vk* found 0 hits
+Search with ju* found 64 hits
+Search with qa* found 0 hits
+Search with my* found 125 hits
+Search with ae* found 0 hits
+Search with io* found 0 hits
+Search with tg* found 0 hits
+Search with sb* found 0 hits
+Search with wx* found 0 hits
+Search with hk* found 0 hits
+Search with lo* found 222 hits
+Search with tb* found 0 hits
+Search with vr* found 0 hits
+Search with cn* found 0 hits
+Search with sg* found 1 hits
+Search with qq* found 0 hits
+Search with td* found 9 hits
+Search with ok* found 125 hits
+Search with dd* found 0 hits
+Search with lf* found 0 hits
+Search with jt* found 0 hits
+Search with ve* found 17 hits
+Search with aq* found 0 hits
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/prefixopt/unoptimized_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1010 @@
+Started indexing.
+Indexed 200 documents.
+Search with b* found 59 hits
+Search with r* found 48 hits
+Search with n* found 62 hits
+Search with u* found 100 hits
+Search with e* found 47 hits
+Search with u* found 100 hits
+Search with m* found 73 hits
+Search with h* found 92 hits
+Search with g* found 75 hits
+Search with u* found 100 hits
+Search with m* found 73 hits
+Search with d* found 77 hits
+Search with p* found 43 hits
+Search with v* found 8 hits
+Search with q* found 3 hits
+Search with v* found 8 hits
+Search with w* found 107 hits
+Search with m* found 73 hits
+Search with l* found 93 hits
+Search with k* found 22 hits
+Search with y* found 33 hits
+Search with b* found 59 hits
+Search with n* found 62 hits
+Search with w* found 107 hits
+Search with y* found 33 hits
+Search with b* found 59 hits
+Search with q* found 3 hits
+Search with c* found 97 hits
+Search with w* found 107 hits
+Search with c* found 97 hits
+Search with k* found 22 hits
+Search with x* found 5 hits
+Search with v* found 8 hits
+Search with y* found 33 hits
+Search with t* found 81 hits
+Search with b* found 59 hits
+Search with v* found 8 hits
+Search with g* found 75 hits
+Search with i* found 98 hits
+Search with e* found 47 hits
+Search with e* found 47 hits
+Search with w* found 107 hits
+Search with h* found 92 hits
+Search with t* found 81 hits
+Search with v* found 8 hits
+Search with a* found 67 hits
+Search with p* found 43 hits
+Search with u* found 100 hits
+Search with o* found 63 hits
+Search with d* found 77 hits
+Search with f* found 42 hits
+Search with o* found 63 hits
+Search with g* found 75 hits
+Search with u* found 100 hits
+Search with n* found 62 hits
+Search with g* found 75 hits
+Search with v* found 8 hits
+Search with e* found 47 hits
+Search with k* found 22 hits
+Search with v* found 8 hits
+Search with j* found 23 hits
+Search with v* found 8 hits
+Search with t* found 81 hits
+Search with f* found 42 hits
+Search with w* found 107 hits
+Search with p* found 43 hits
+Search with i* found 98 hits
+Search with s* found 85 hits
+Search with y* found 33 hits
+Search with s* found 85 hits
+Search with w* found 107 hits
+Search with d* found 77 hits
+Search with s* found 85 hits
+Search with g* found 75 hits
+Search with w* found 107 hits
+Search with o* found 63 hits
+Search with i* found 98 hits
+Search with o* found 63 hits
+Search with l* found 93 hits
+Search with x* found 5 hits
+Search with t* found 81 hits
+Search with q* found 3 hits
+Search with o* found 63 hits
+Search with a* found 67 hits
+Search with m* found 73 hits
+Search with c* found 97 hits
+Search with i* found 98 hits
+Search with k* found 22 hits
+Search with g* found 75 hits
+Search with t* found 81 hits
+Search with g* found 75 hits
+Search with r* found 48 hits
+Search with r* found 48 hits
+Search with d* found 77 hits
+Search with a* found 67 hits
+Search with o* found 63 hits
+Search with s* found 85 hits
+Search with i* found 98 hits
+Search with j* found 23 hits
+Search with t* found 81 hits
+Search with ci* found 2 hits
+Search with wu* found 0 hits
+Search with qv* found 0 hits
+Search with ly* found 0 hits
+Search with mw* found 0 hits
+Search with wg* found 0 hits
+Search with pn* found 0 hits
+Search with je* found 1 hits
+Search with pt* found 0 hits
+Search with ry* found 0 hits
+Search with pa* found 9 hits
+Search with qh* found 0 hits
+Search with dq* found 0 hits
+Search with xy* found 4 hits
+Search with cj* found 0 hits
+Search with se* found 7 hits
+Search with rs* found 0 hits
+Search with bk* found 2 hits
+Search with qo* found 0 hits
+Search with ke* found 2 hits
+Search with nj* found 0 hits
+Search with mf* found 0 hits
+Search with xv* found 0 hits
+Search with kn* found 1 hits
+Search with sc* found 4 hits
+Search with oi* found 0 hits
+Search with eg* found 0 hits
+Search with si* found 7 hits
+Search with ys* found 0 hits
+Search with jb* found 0 hits
+Search with de* found 30 hits
+Search with hw* found 1 hits
+Search with yk* found 0 hits
+Search with iq* found 0 hits
+Search with yu* found 2 hits
+Search with uo* found 0 hits
+Search with eh* found 3 hits
+Search with ue* found 0 hits
+Search with gf* found 0 hits
+Search with uy* found 0 hits
+Search with jj* found 0 hits
+Search with jn* found 0 hits
+Search with pc* found 0 hits
+Search with xp* found 0 hits
+Search with vh* found 0 hits
+Search with sb* found 0 hits
+Search with ob* found 0 hits
+Search with xn* found 0 hits
+Search with li* found 22 hits
+Search with en* found 11 hits
+Search with db* found 0 hits
+Search with ck* found 0 hits
+Search with lw* found 0 hits
+Search with ot* found 3 hits
+Search with cj* found 0 hits
+Search with sn* found 0 hits
+Search with tc* found 0 hits
+Search with cl* found 6 hits
+Search with hc* found 0 hits
+Search with df* found 0 hits
+Search with mw* found 0 hits
+Search with gb* found 0 hits
+Search with xe* found 0 hits
+Search with ql* found 0 hits
+Search with px* found 0 hits
+Search with ys* found 0 hits
+Search with yc* found 0 hits
+Search with dm* found 0 hits
+Search with ys* found 0 hits
+Search with ge* found 8 hits
+Search with cb* found 0 hits
+Search with tx* found 1 hits
+Search with gx* found 0 hits
+Search with kn* found 1 hits
+Search with bn* found 0 hits
+Search with un* found 4 hits
+Search with mb* found 0 hits
+Search with qk* found 0 hits
+Search with hh* found 0 hits
+Search with yw* found 0 hits
+Search with hx* found 0 hits
+Search with sj* found 0 hits
+Search with cv* found 0 hits
+Search with vc* found 0 hits
+Search with qf* found 0 hits
+Search with iv* found 0 hits
+Search with gc* found 0 hits
+Search with tm* found 6 hits
+Search with bg* found 0 hits
+Search with ce* found 1 hits
+Search with uw* found 0 hits
+Search with rh* found 0 hits
+Search with xk* found 0 hits
+Search with ui* found 0 hits
+Search with tt* found 1 hits
+Search with hc* found 0 hits
+Search with ta* found 13 hits
+Search with lw* found 0 hits
+Search with wj* found 0 hits
+Search with bp* found 0 hits
+Started indexing.
+Indexed 200 documents.
+Search with o* found 129 hits
+Search with j* found 43 hits
+Search with l* found 193 hits
+Search with v* found 19 hits
+Search with n* found 128 hits
+Search with i* found 192 hits
+Search with k* found 40 hits
+Search with q* found 6 hits
+Search with o* found 129 hits
+Search with n* found 128 hits
+Search with u* found 203 hits
+Search with l* found 193 hits
+Search with k* found 40 hits
+Search with o* found 129 hits
+Search with v* found 19 hits
+Search with l* found 193 hits
+Search with y* found 69 hits
+Search with q* found 6 hits
+Search with v* found 19 hits
+Search with v* found 19 hits
+Search with m* found 153 hits
+Search with d* found 163 hits
+Search with x* found 13 hits
+Search with g* found 143 hits
+Search with e* found 105 hits
+Search with l* found 193 hits
+Search with f* found 78 hits
+Search with d* found 163 hits
+Search with u* found 203 hits
+Search with g* found 143 hits
+Search with s* found 180 hits
+Search with k* found 40 hits
+Search with r* found 86 hits
+Search with h* found 167 hits
+Search with i* found 192 hits
+Search with i* found 192 hits
+Search with p* found 76 hits
+Search with s* found 180 hits
+Search with y* found 69 hits
+Search with g* found 143 hits
+Search with g* found 143 hits
+Search with u* found 203 hits
+Search with s* found 180 hits
+Search with t* found 176 hits
+Search with l* found 193 hits
+Search with q* found 6 hits
+Search with f* found 78 hits
+Search with l* found 193 hits
+Search with h* found 167 hits
+Search with b* found 113 hits
+Search with j* found 43 hits
+Search with v* found 19 hits
+Search with g* found 143 hits
+Search with h* found 167 hits
+Search with e* found 105 hits
+Search with k* found 40 hits
+Search with u* found 203 hits
+Search with k* found 40 hits
+Search with n* found 128 hits
+Search with p* found 76 hits
+Search with s* found 180 hits
+Search with j* found 43 hits
+Search with c* found 173 hits
+Search with n* found 128 hits
+Search with q* found 6 hits
+Search with m* found 153 hits
+Search with v* found 19 hits
+Search with i* found 192 hits
+Search with g* found 143 hits
+Search with u* found 203 hits
+Search with q* found 6 hits
+Search with o* found 129 hits
+Search with r* found 86 hits
+Search with l* found 193 hits
+Search with i* found 192 hits
+Search with e* found 105 hits
+Search with c* found 173 hits
+Search with p* found 76 hits
+Search with r* found 86 hits
+Search with l* found 193 hits
+Search with s* found 180 hits
+Search with b* found 113 hits
+Search with h* found 167 hits
+Search with a* found 141 hits
+Search with k* found 40 hits
+Search with o* found 129 hits
+Search with k* found 40 hits
+Search with f* found 78 hits
+Search with b* found 113 hits
+Search with b* found 113 hits
+Search with w* found 187 hits
+Search with t* found 176 hits
+Search with m* found 153 hits
+Search with a* found 141 hits
+Search with h* found 167 hits
+Search with f* found 78 hits
+Search with m* found 153 hits
+Search with d* found 163 hits
+Search with p* found 76 hits
+Search with u* found 203 hits
+Search with bg* found 0 hits
+Search with ks* found 0 hits
+Search with su* found 12 hits
+Search with wu* found 1 hits
+Search with lq* found 0 hits
+Search with jg* found 0 hits
+Search with rq* found 0 hits
+Search with gc* found 0 hits
+Search with ht* found 0 hits
+Search with hi* found 4 hits
+Search with wh* found 47 hits
+Search with fj* found 0 hits
+Search with hm* found 8 hits
+Search with ov* found 4 hits
+Search with se* found 20 hits
+Search with rt* found 0 hits
+Search with me* found 65 hits
+Search with oh* found 12 hits
+Search with yo* found 22 hits
+Search with dm* found 0 hits
+Search with fo* found 13 hits
+Search with tx* found 1 hits
+Search with hc* found 0 hits
+Search with cp* found 0 hits
+Search with vm* found 1 hits
+Search with xs* found 0 hits
+Search with td* found 9 hits
+Search with ed* found 0 hits
+Search with su* found 12 hits
+Search with yn* found 0 hits
+Search with bs* found 0 hits
+Search with jo* found 5 hits
+Search with wx* found 0 hits
+Search with vy* found 0 hits
+Search with mb* found 0 hits
+Search with ls* found 0 hits
+Search with pf* found 0 hits
+Search with sx* found 0 hits
+Search with iw* found 0 hits
+Search with pg* found 0 hits
+Search with jp* found 0 hits
+Search with cf* found 0 hits
+Search with vi* found 1 hits
+Search with io* found 0 hits
+Search with ek* found 0 hits
+Search with ef* found 0 hits
+Search with dn* found 0 hits
+Search with vd* found 0 hits
+Search with ls* found 0 hits
+Search with cb* found 0 hits
+Search with vp* found 0 hits
+Search with vo* found 1 hits
+Search with vq* found 0 hits
+Search with mg* found 0 hits
+Search with ne* found 36 hits
+Search with oa* found 0 hits
+Search with tq* found 0 hits
+Search with fp* found 0 hits
+Search with co* found 72 hits
+Search with eg* found 0 hits
+Search with bk* found 3 hits
+Search with le* found 71 hits
+Search with xk* found 0 hits
+Search with hm* found 8 hits
+Search with dl* found 0 hits
+Search with pb* found 0 hits
+Search with cl* found 14 hits
+Search with pa* found 14 hits
+Search with ce* found 1 hits
+Search with ir* found 0 hits
+Search with iw* found 0 hits
+Search with rd* found 0 hits
+Search with qa* found 0 hits
+Search with ss* found 0 hits
+Search with qa* found 0 hits
+Search with yr* found 3 hits
+Search with km* found 0 hits
+Search with vl* found 0 hits
+Search with wg* found 0 hits
+Search with xc* found 0 hits
+Search with rn* found 0 hits
+Search with ev* found 3 hits
+Search with bv* found 0 hits
+Search with vf* found 0 hits
+Search with be* found 17 hits
+Search with yj* found 0 hits
+Search with cr* found 9 hits
+Search with mu* found 16 hits
+Search with ti* found 50 hits
+Search with nk* found 0 hits
+Search with io* found 0 hits
+Search with cs* found 0 hits
+Search with da* found 23 hits
+Search with gd* found 4 hits
+Search with ge* found 13 hits
+Search with hb* found 0 hits
+Search with tn* found 0 hits
+Search with ww* found 1 hits
+Search with kt* found 0 hits
+Search with cl* found 14 hits
+Started indexing.
+Indexed 200 documents.
+Search with a* found 238 hits
+Search with c* found 265 hits
+Search with u* found 316 hits
+Search with c* found 265 hits
+Search with v* found 32 hits
+Search with k* found 53 hits
+Search with y* found 93 hits
+Search with p* found 133 hits
+Search with s* found 303 hits
+Search with m* found 268 hits
+Search with a* found 238 hits
+Search with d* found 248 hits
+Search with d* found 248 hits
+Search with e* found 151 hits
+Search with x* found 17 hits
+Search with j* found 63 hits
+Search with e* found 151 hits
+Search with f* found 134 hits
+Search with m* found 268 hits
+Search with n* found 197 hits
+Search with j* found 63 hits
+Search with t* found 267 hits
+Search with o* found 193 hits
+Search with g* found 236 hits
+Search with i* found 282 hits
+Search with n* found 197 hits
+Search with d* found 248 hits
+Search with s* found 303 hits
+Search with j* found 63 hits
+Search with g* found 236 hits
+Search with g* found 236 hits
+Search with k* found 53 hits
+Search with k* found 53 hits
+Search with d* found 248 hits
+Search with o* found 193 hits
+Search with g* found 236 hits
+Search with n* found 197 hits
+Search with o* found 193 hits
+Search with v* found 32 hits
+Search with i* found 282 hits
+Search with d* found 248 hits
+Search with x* found 17 hits
+Search with l* found 292 hits
+Search with j* found 63 hits
+Search with c* found 265 hits
+Search with k* found 53 hits
+Search with s* found 303 hits
+Search with i* found 282 hits
+Search with p* found 133 hits
+Search with h* found 259 hits
+Search with v* found 32 hits
+Search with y* found 93 hits
+Search with b* found 173 hits
+Search with m* found 268 hits
+Search with h* found 259 hits
+Search with l* found 292 hits
+Search with b* found 173 hits
+Search with l* found 292 hits
+Search with g* found 236 hits
+Search with m* found 268 hits
+Search with r* found 123 hits
+Search with m* found 268 hits
+Search with w* found 297 hits
+Search with e* found 151 hits
+Search with s* found 303 hits
+Search with o* found 193 hits
+Search with k* found 53 hits
+Search with g* found 236 hits
+Search with d* found 248 hits
+Search with i* found 282 hits
+Search with p* found 133 hits
+Search with i* found 282 hits
+Search with i* found 282 hits
+Search with d* found 248 hits
+Search with r* found 123 hits
+Search with m* found 268 hits
+Search with n* found 197 hits
+Search with m* found 268 hits
+Search with u* found 316 hits
+Search with d* found 248 hits
+Search with t* found 267 hits
+Search with t* found 267 hits
+Search with f* found 134 hits
+Search with x* found 17 hits
+Search with g* found 236 hits
+Search with m* found 268 hits
+Search with j* found 63 hits
+Search with j* found 63 hits
+Search with x* found 17 hits
+Search with s* found 303 hits
+Search with w* found 297 hits
+Search with r* found 123 hits
+Search with h* found 259 hits
+Search with t* found 267 hits
+Search with x* found 17 hits
+Search with a* found 238 hits
+Search with i* found 282 hits
+Search with k* found 53 hits
+Search with h* found 259 hits
+Search with n* found 197 hits
+Search with sy* found 1 hits
+Search with wd* found 0 hits
+Search with cq* found 0 hits
+Search with ps* found 0 hits
+Search with gn* found 0 hits
+Search with va* found 0 hits
+Search with hd* found 0 hits
+Search with xn* found 0 hits
+Search with qk* found 0 hits
+Search with aq* found 0 hits
+Search with dw* found 0 hits
+Search with lk* found 0 hits
+Search with sj* found 0 hits
+Search with nc* found 0 hits
+Search with uu* found 0 hits
+Search with rp* found 0 hits
+Search with vq* found 0 hits
+Search with va* found 0 hits
+Search with kn* found 11 hits
+Search with sq* found 2 hits
+Search with br* found 23 hits
+Search with qk* found 0 hits
+Search with ur* found 52 hits
+Search with bn* found 0 hits
+Search with cb* found 0 hits
+Search with hf* found 0 hits
+Search with xs* found 0 hits
+Search with rq* found 0 hits
+Search with ef* found 0 hits
+Search with ub* found 0 hits
+Search with cn* found 0 hits
+Search with ta* found 27 hits
+Search with gp* found 0 hits
+Search with bq* found 0 hits
+Search with dv* found 1 hits
+Search with hh* found 0 hits
+Search with ny* found 0 hits
+Search with rj* found 0 hits
+Search with qs* found 0 hits
+Search with yu* found 25 hits
+Search with tg* found 0 hits
+Search with ct* found 1 hits
+Search with yt* found 0 hits
+Search with lg* found 0 hits
+Search with cg* found 0 hits
+Search with hg* found 0 hits
+Search with vd* found 0 hits
+Search with hd* found 0 hits
+Search with uk* found 0 hits
+Search with tb* found 0 hits
+Search with ge* found 19 hits
+Search with iu* found 0 hits
+Search with fc* found 0 hits
+Search with gx* found 0 hits
+Search with vf* found 0 hits
+Search with sr* found 1 hits
+Search with mu* found 26 hits
+Search with mn* found 0 hits
+Search with qx* found 0 hits
+Search with vs* found 0 hits
+Search with gg* found 0 hits
+Search with af* found 16 hits
+Search with jj* found 0 hits
+Search with if* found 0 hits
+Search with te* found 25 hits
+Search with ga* found 7 hits
+Search with io* found 0 hits
+Search with wn* found 0 hits
+Search with sd* found 0 hits
+Search with lo* found 113 hits
+Search with le* found 89 hits
+Search with jx* found 0 hits
+Search with bv* found 0 hits
+Search with ns* found 0 hits
+Search with vk* found 0 hits
+Search with nc* found 0 hits
+Search with qp* found 0 hits
+Search with ha* found 109 hits
+Search with yr* found 3 hits
+Search with hu* found 14 hits
+Search with wn* found 0 hits
+Search with xh* found 0 hits
+Search with eu* found 0 hits
+Search with vx* found 0 hits
+Search with bj* found 0 hits
+Search with om* found 0 hits
+Search with ox* found 0 hits
+Search with mr* found 6 hits
+Search with ta* found 27 hits
+Search with kr* found 0 hits
+Search with kb* found 1 hits
+Search with wd* found 0 hits
+Search with qe* found 0 hits
+Search with gs* found 0 hits
+Search with yn* found 0 hits
+Search with nx* found 0 hits
+Search with cl* found 17 hits
+Search with fi* found 41 hits
+Search with hb* found 0 hits
+Search with ik* found 2 hits
+Started indexing.
+Indexed 200 documents.
+Search with l* found 402 hits
+Search with x* found 26 hits
+Search with w* found 401 hits
+Search with c* found 365 hits
+Search with x* found 26 hits
+Search with j* found 81 hits
+Search with t* found 365 hits
+Search with u* found 410 hits
+Search with l* found 402 hits
+Search with h* found 376 hits
+Search with m* found 371 hits
+Search with y* found 136 hits
+Search with i* found 357 hits
+Search with j* found 81 hits
+Search with c* found 365 hits
+Search with b* found 241 hits
+Search with q* found 26 hits
+Search with i* found 357 hits
+Search with t* found 365 hits
+Search with p* found 179 hits
+Search with w* found 401 hits
+Search with i* found 357 hits
+Search with n* found 258 hits
+Search with b* found 241 hits
+Search with v* found 44 hits
+Search with s* found 416 hits
+Search with l* found 402 hits
+Search with f* found 187 hits
+Search with w* found 401 hits
+Search with u* found 410 hits
+Search with p* found 179 hits
+Search with k* found 73 hits
+Search with u* found 410 hits
+Search with m* found 371 hits
+Search with m* found 371 hits
+Search with s* found 416 hits
+Search with x* found 26 hits
+Search with i* found 357 hits
+Search with n* found 258 hits
+Search with k* found 73 hits
+Search with r* found 170 hits
+Search with b* found 241 hits
+Search with j* found 81 hits
+Search with c* found 365 hits
+Search with m* found 371 hits
+Search with l* found 402 hits
+Search with e* found 210 hits
+Search with d* found 338 hits
+Search with w* found 401 hits
+Search with x* found 26 hits
+Search with s* found 416 hits
+Search with v* found 44 hits
+Search with i* found 357 hits
+Search with i* found 357 hits
+Search with w* found 401 hits
+Search with h* found 376 hits
+Search with e* found 210 hits
+Search with i* found 357 hits
+Search with m* found 371 hits
+Search with b* found 241 hits
+Search with f* found 187 hits
+Search with d* found 338 hits
+Search with n* found 258 hits
+Search with a* found 314 hits
+Search with r* found 170 hits
+Search with c* found 365 hits
+Search with v* found 44 hits
+Search with q* found 26 hits
+Search with k* found 73 hits
+Search with l* found 402 hits
+Search with d* found 338 hits
+Search with f* found 187 hits
+Search with o* found 283 hits
+Search with m* found 371 hits
+Search with h* found 376 hits
+Search with c* found 365 hits
+Search with a* found 314 hits
+Search with l* found 402 hits
+Search with f* found 187 hits
+Search with y* found 136 hits
+Search with m* found 371 hits
+Search with b* found 241 hits
+Search with u* found 410 hits
+Search with w* found 401 hits
+Search with l* found 402 hits
+Search with r* found 170 hits
+Search with e* found 210 hits
+Search with p* found 179 hits
+Search with d* found 338 hits
+Search with r* found 170 hits
+Search with s* found 416 hits
+Search with k* found 73 hits
+Search with w* found 401 hits
+Search with g* found 325 hits
+Search with n* found 258 hits
+Search with o* found 283 hits
+Search with i* found 357 hits
+Search with j* found 81 hits
+Search with h* found 376 hits
+Search with v* found 44 hits
+Search with uk* found 0 hits
+Search with bm* found 0 hits
+Search with yk* found 0 hits
+Search with oa* found 0 hits
+Search with yv* found 0 hits
+Search with bl* found 5 hits
+Search with yw* found 0 hits
+Search with kl* found 0 hits
+Search with qp* found 0 hits
+Search with bt* found 0 hits
+Search with jw* found 0 hits
+Search with hg* found 0 hits
+Search with du* found 89 hits
+Search with wm* found 0 hits
+Search with ef* found 4 hits
+Search with kb* found 1 hits
+Search with rn* found 0 hits
+Search with nr* found 0 hits
+Search with xe* found 0 hits
+Search with tw* found 5 hits
+Search with bu* found 40 hits
+Search with ka* found 12 hits
+Search with rv* found 0 hits
+Search with nj* found 0 hits
+Search with lp* found 0 hits
+Search with fu* found 10 hits
+Search with om* found 0 hits
+Search with br* found 28 hits
+Search with ha* found 154 hits
+Search with gn* found 0 hits
+Search with fq* found 0 hits
+Search with rx* found 0 hits
+Search with ef* found 4 hits
+Search with rf* found 0 hits
+Search with ml* found 0 hits
+Search with cn* found 0 hits
+Search with gn* found 0 hits
+Search with pb* found 0 hits
+Search with je* found 1 hits
+Search with ku* found 0 hits
+Search with vr* found 0 hits
+Search with rk* found 0 hits
+Search with hu* found 21 hits
+Search with co* found 159 hits
+Search with ul* found 0 hits
+Search with fd* found 0 hits
+Search with ew* found 0 hits
+Search with dl* found 0 hits
+Search with eu* found 0 hits
+Search with qq* found 0 hits
+Search with gs* found 0 hits
+Search with go* found 268 hits
+Search with iw* found 0 hits
+Search with pr* found 46 hits
+Search with bc* found 0 hits
+Search with ow* found 0 hits
+Search with ui* found 0 hits
+Search with je* found 1 hits
+Search with dl* found 0 hits
+Search with ub* found 0 hits
+Search with ya* found 29 hits
+Search with ee* found 0 hits
+Search with yh* found 0 hits
+Search with pg* found 0 hits
+Search with cg* found 0 hits
+Search with wk* found 8 hits
+Search with bg* found 0 hits
+Search with yk* found 0 hits
+Search with dp* found 0 hits
+Search with cg* found 0 hits
+Search with tt* found 13 hits
+Search with fq* found 0 hits
+Search with co* found 159 hits
+Search with ug* found 0 hits
+Search with cq* found 0 hits
+Search with hc* found 0 hits
+Search with sn* found 1 hits
+Search with is* found 1 hits
+Search with ux* found 0 hits
+Search with yy* found 0 hits
+Search with gx* found 0 hits
+Search with jh* found 0 hits
+Search with gi* found 17 hits
+Search with rj* found 0 hits
+Search with aw* found 4 hits
+Search with rw* found 0 hits
+Search with qy* found 0 hits
+Search with nu* found 6 hits
+Search with pl* found 22 hits
+Search with bs* found 0 hits
+Search with ck* found 0 hits
+Search with ww* found 1 hits
+Search with xf* found 1 hits
+Search with pu* found 9 hits
+Search with fq* found 0 hits
+Search with tl* found 0 hits
+Search with rf* found 0 hits
+Search with sx* found 0 hits
+Search with ql* found 0 hits
+Search with ks* found 0 hits
+Started indexing.
+Indexed 200 documents.
+Search with k* found 89 hits
+Search with e* found 279 hits
+Search with p* found 215 hits
+Search with b* found 306 hits
+Search with e* found 279 hits
+Search with g* found 408 hits
+Search with v* found 50 hits
+Search with t* found 462 hits
+Search with r* found 213 hits
+Search with a* found 394 hits
+Search with o* found 373 hits
+Search with v* found 50 hits
+Search with m* found 478 hits
+Search with n* found 315 hits
+Search with s* found 530 hits
+Search with n* found 315 hits
+Search with u* found 513 hits
+Search with l* found 527 hits
+Search with i* found 453 hits
+Search with a* found 394 hits
+Search with e* found 279 hits
+Search with f* found 244 hits
+Search with n* found 315 hits
+Search with v* found 50 hits
+Search with m* found 478 hits
+Search with h* found 497 hits
+Search with u* found 513 hits
+Search with d* found 427 hits
+Search with u* found 513 hits
+Search with h* found 497 hits
+Search with y* found 169 hits
+Search with f* found 244 hits
+Search with l* found 527 hits
+Search with o* found 373 hits
+Search with i* found 453 hits
+Search with p* found 215 hits
+Search with w* found 503 hits
+Search with g* found 408 hits
+Search with m* found 478 hits
+Search with q* found 34 hits
+Search with i* found 453 hits
+Search with d* found 427 hits
+Search with m* found 478 hits
+Search with v* found 50 hits
+Search with r* found 213 hits
+Search with i* found 453 hits
+Search with j* found 106 hits
+Search with o* found 373 hits
+Search with t* found 462 hits
+Search with t* found 462 hits
+Search with p* found 215 hits
+Search with a* found 394 hits
+Search with b* found 306 hits
+Search with f* found 244 hits
+Search with w* found 503 hits
+Search with o* found 373 hits
+Search with m* found 478 hits
+Search with t* found 462 hits
+Search with t* found 462 hits
+Search with j* found 106 hits
+Search with b* found 306 hits
+Search with s* found 530 hits
+Search with q* found 34 hits
+Search with m* found 478 hits
+Search with k* found 89 hits
+Search with b* found 306 hits
+Search with f* found 244 hits
+Search with h* found 497 hits
+Search with i* found 453 hits
+Search with t* found 462 hits
+Search with y* found 169 hits
+Search with q* found 34 hits
+Search with w* found 503 hits
+Search with n* found 315 hits
+Search with o* found 373 hits
+Search with q* found 34 hits
+Search with v* found 50 hits
+Search with a* found 394 hits
+Search with g* found 408 hits
+Search with r* found 213 hits
+Search with w* found 503 hits
+Search with x* found 39 hits
+Search with s* found 530 hits
+Search with x* found 39 hits
+Search with d* found 427 hits
+Search with r* found 213 hits
+Search with o* found 373 hits
+Search with s* found 530 hits
+Search with l* found 527 hits
+Search with j* found 106 hits
+Search with c* found 473 hits
+Search with o* found 373 hits
+Search with e* found 279 hits
+Search with v* found 50 hits
+Search with d* found 427 hits
+Search with o* found 373 hits
+Search with w* found 503 hits
+Search with i* found 453 hits
+Search with y* found 169 hits
+Search with h* found 497 hits
+Search with ea* found 66 hits
+Search with bc* found 0 hits
+Search with nr* found 0 hits
+Search with um* found 0 hits
+Search with sb* found 0 hits
+Search with ep* found 0 hits
+Search with yy* found 0 hits
+Search with nf* found 0 hits
+Search with qf* found 0 hits
+Search with xe* found 0 hits
+Search with qc* found 0 hits
+Search with su* found 33 hits
+Search with xw* found 0 hits
+Search with mx* found 0 hits
+Search with fn* found 0 hits
+Search with fm* found 0 hits
+Search with ni* found 23 hits
+Search with ob* found 1 hits
+Search with bl* found 7 hits
+Search with pt* found 0 hits
+Search with nu* found 9 hits
+Search with lo* found 222 hits
+Search with vb* found 0 hits
+Search with tn* found 0 hits
+Search with gr* found 15 hits
+Search with rw* found 0 hits
+Search with ul* found 0 hits
+Search with uu* found 0 hits
+Search with kj* found 0 hits
+Search with sp* found 12 hits
+Search with wb* found 0 hits
+Search with ek* found 0 hits
+Search with ls* found 1 hits
+Search with nm* found 0 hits
+Search with he* found 171 hits
+Search with iu* found 0 hits
+Search with bt* found 2 hits
+Search with lw* found 0 hits
+Search with vg* found 0 hits
+Search with mc* found 2 hits
+Search with ah* found 66 hits
+Search with cw* found 1 hits
+Search with sy* found 1 hits
+Search with sd* found 0 hits
+Search with in* found 28 hits
+Search with uf* found 0 hits
+Search with qa* found 0 hits
+Search with rd* found 1 hits
+Search with uf* found 0 hits
+Search with pc* found 0 hits
+Search with lb* found 0 hits
+Search with ym* found 0 hits
+Search with ul* found 0 hits
+Search with ms* found 19 hits
+Search with ty* found 4 hits
+Search with xt* found 0 hits
+Search with ga* found 15 hits
+Search with tb* found 0 hits
+Search with yo* found 26 hits
+Search with gh* found 1 hits
+Search with ce* found 6 hits
+Search with ov* found 10 hits
+Search with gg* found 0 hits
+Search with yd* found 0 hits
+Search with no* found 157 hits
+Search with ia* found 0 hits
+Search with rh* found 0 hits
+Search with on* found 81 hits
+Search with ub* found 0 hits
+Search with gq* found 0 hits
+Search with de* found 135 hits
+Search with nl* found 0 hits
+Search with gh* found 1 hits
+Search with pf* found 0 hits
+Search with vx* found 0 hits
+Search with oa* found 0 hits
+Search with ed* found 2 hits
+Search with vk* found 0 hits
+Search with ju* found 64 hits
+Search with qa* found 0 hits
+Search with my* found 125 hits
+Search with ae* found 0 hits
+Search with io* found 0 hits
+Search with tg* found 0 hits
+Search with sb* found 0 hits
+Search with wx* found 0 hits
+Search with hk* found 0 hits
+Search with lo* found 222 hits
+Search with tb* found 0 hits
+Search with vr* found 0 hits
+Search with cn* found 0 hits
+Search with sg* found 1 hits
+Search with qq* found 0 hits
+Search with td* found 9 hits
+Search with ok* found 125 hits
+Search with dd* found 0 hits
+Search with lf* found 0 hits
+Search with jt* found 0 hits
+Search with ve* found 17 hits
+Search with aq* found 0 hits
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+陶傑(1958年8月17日),
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+原å係曹æ·ï¼Œé¦™æ¸¯å°ˆæ¬„作家åŠå‚³åª’工作者,
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+有「香江第一æ‰åã€å˜…稱號,
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_hk/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+以文ç†è¾›è¾£ã€ç«‹å ´è¦ªè¥¿æ–¹ï¼ˆå°¤å…¶æ˜¯è‹±åœ‹ï¼‰è¦‹ç¨±ã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+美国总统奥巴马星期一(11月16日)在上海与ä¸å›½é’年对è¯ï¼Œ
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+他回ç”了现场å¬ä¼—和网民的æ问,
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+内容涉åŠå°æ¹¾ã€ä¸ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç‰ã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_prc/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+奥巴马在上海科技馆é¢å¯¹500ä½™å上海é’å¹´
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+奥巴马在上海与ä¸å›½é’年人对è¯
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+美国总统奥巴马星期一(11月16日)在上海与ä¸å›½é’年对è¯ï¼Œä»–回ç”了现场å¬ä¼—和网民的æ问,内容涉åŠå°æ¹¾ã€ä¸ç¾Žè´¸æ˜“以åŠçŽ¯å¢ƒé—®é¢˜ç‰ã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+奥巴马在上海科技馆é¢å¯¹500ä½™å上海é’年以åŠæ•°ä»¥ä¸‡è®¡çš„ä¸å›½äº’è”网使用者进行了一场问ç”会。
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_simple/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+这次对è¯ç”±å¤æ—¦å¤§å¦æ ¡é•¿æ¨çŽ‰è‰¯ä¸»æŒã€‚美国驻ä¸å›½å¤§ä½¿æ´ªåšåŸ¹è‡´è¾žã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+拈花惹è‰è¶£ å—投花å‰å˜‰å¹´è¯ç’€ç’¨è¿Žè³“
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+2009å—投花å‰å˜‰å¹´è¯â”€ã€ŒèŠ±ç¾å¹¸ç¦â€§æš¢éŠå—投ã€
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+於日å‰(11/15)å‡å—投縣埔里鎮埔里花å‰ç‰©æµä¸å¿ƒéš†é‡ç™»å ´ï¼Œ
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ch_tw/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+在為期五å天的活動ä¸ï¼Œ
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+Un approvisionnement sûr et durable
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+L'énergie est au cœur de nos vies. Nous dépendons d'elle pour
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+nous déplacer, pour chauffer nos maisons ou les rafraîchir,
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/fr/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+pour faire fonctionner nos usines, nos exploitations agricoles et nos
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+שפעת החזירי×: תושבת × ×¦×¨×ª עילית בת 51 ×œ×œ× ×ž×—×œ×•×ª רקע מתה
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+בבית ×”×—×•×œ×™× "העמק".החולה טופלה ×תמול בתרופה ×”× ×™×¡×™×•× ×™×ª
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+"פרהמיביר",××•×œ× ×ž×¦×‘×” הוסיף להידרדר
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/he/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+,והבוקר × ×§×‘×¢ מותה.החולה, תושבת × ×¦×¨×ª עילית
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+æ—¥æœ¬ç ”ç©¶ãƒ»çŸ¥çš„äº¤æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥ã‚’実施ã—ã¦ãŠã‚Šã€
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ãƒã‚°ãƒ©ãƒ ã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+団体ã«å¯¾ã—ã¦åŠ©æˆé‡‘ã€ç ”究奨å¦é‡‘ç‰ã‚’æä¾›ã—ã¦ã„ã¾ã™ã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+国際交æµåŸºé‡‘(ジャパンファウンデーション)ã¯ã€ä¸»ã«æ–‡åŒ–芸術交æµã€æµ·å¤–ã«ãŠã‘る日本語教育ã€
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+æ—¥æœ¬ç ”ç©¶ãƒ»çŸ¥çš„äº¤æµã®3ã¤ã®åˆ†é‡Žã«ãŠã„ã¦äº‹æ¥ã‚’実施ã—ã¦ãŠã‚Šã€
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+ãã‚Œãžã‚Œã®åˆ†é‡Žã§å…¬å‹Ÿãƒ—ãƒã‚°ãƒ©ãƒ ã«ã‚ˆã‚Šå›½éš›äº¤æµäº‹æ¥ã‚’ä¼ç”»ã™ã‚‹å€‹äººã‚„
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/jp_old/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+団体ã«å¯¾ã—ã¦åŠ©æˆé‡‘ã€ç ”究奨å¦é‡‘ç‰ã‚’æä¾›ã—ã¦ã„ã¾ã™ã€‚
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+ì œê°€ ë…¼ìŠ¤í†±ì„ ë³´ëŠ”ë°ìš”, 김지우가 ìŠ¤í† ë¦¬ìƒìœ¼ë¡œ ì¼ë³¸ìœ¼ë¡œ 간다구 하네요
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+ê·¸ëŸ¬ê³ ë‚˜ì„œ ë‹¤ìŒ ì´ì•¼ê¸° ì˜ˆê³ ëŠ” ì•ˆë‚˜ì˜¤ê³ ê¹€ì§€ìš°ì˜ ì²¨ë¶€í„° ì—¬íƒœê¹Œì§€ì˜ ì´ë¯¸ì§€ë§Œ 보여주ê³
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+그냥 ë냈는ë°ìš”, ì´ê±° 진짜 ì´ë³„하는거 같다는 ìƒê°ì´.
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/ko/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+그때 여승í˜ì²˜ëŸ¼ 논스톱 극중 ì¸ë¬¼ì—ì„œ ë¹ íŠ¸ë¦¬ëŠ”ê±´ê°€ìš”?
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/1.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชà¸à¹Œà¸ ูมิปัà¸à¸à¸²à¸—้à¸à¸‡à¸–ิ่นด้านดาราศาสตร์ไทยà¸à¸¥à¹ˆà¸²à¸§à¸§à¹ˆà¸² 17-18 พฤศจิà¸à¸²à¸¢à¸™ 2552
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/2.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+จะมีปราà¸à¸à¸à¸²à¸£à¸“์ดาราศาสตร์ครั้งสำคัà¸à¸—ี่ชาวไทยเคยประทับใจมาà¹à¸¥à¹‰à¸§à¹€à¸¡à¸·à¹ˆà¸à¸›à¸µ 2541-2544 คืà¸à¹ƒà¸™à¸„ืนวันที่ 17 ต่à¸à¹€à¸™à¸·à¹ˆà¸à¸‡à¸§à¸±à¸™à¸—ี่ 18 พฤศจิà¸à¸²à¸¢à¸™ 2552
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/3.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+จะเà¸à¸´à¸”ปราà¸à¸à¸à¸²à¸£à¸“์à¸à¸™à¸”าวตà¸à¸ˆà¸²à¸à¸à¸¥à¸¸à¹ˆà¸¡à¸”าวสิงโตหรืà¸à¸à¸™à¸”าวตà¸à¹€à¸¥à¹‚à¸à¸™à¸´à¸„ส์ที่นัà¸à¸”าราศาสตร์ทั้งหลายคาดว่าจะมีประมาณ 100-150 ดวงต่à¸à¸Šà¸±à¹ˆà¸§à¹‚มง
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/loc/th/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+เมื่à¸à¸§à¸±à¸™à¸—ี่ 8 มีนา ที่ผ่านมาผมได้ไปงานที่โรงเรียน เหมืà¸à¸™à¹€à¸Šà¹ˆà¸™à¸—ุà¸à¸›à¸µà¸•à¸à¸™à¸à¸¥à¸±à¸šà¹€à¸”ินมา ตามตึà¸à¸¢à¸²à¸§à¹€à¸žà¸·à¹ˆà¸à¸ˆà¸°à¸à¸¥à¸±à¸šà¸¡à¸²à¸—างประตูด้านเพาะช่าง
\ No newline at end of file
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/Albert Einstein, E mc 2.mp3 has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/One Step For Man.mp3 has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/media/The Eagle Has Landed.mp3 has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/DCTDecode.pdf has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/geology.pdf has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/samplepdf.pdf has changed
Binary file searchengine/cpix/tsrc/cpixunittest/data/cpixunittestcorpus/pdf/windjack.pdf has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/group/4.txt Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,1 @@
+,והבוקר × ×§×‘×¢ מותה.החולה, תושבת × ×¦×¨×ª עילית
\ No newline at end of file
--- a/searchengine/cpix/tsrc/cpixunittest/group/bld.inf Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/group/bld.inf Mon Jun 28 10:34:53 2010 +0530
@@ -17,6 +17,13 @@
PRJ_TESTEXPORTS
//
+// CPix Localization Resources
+//
+
+../../../cpix/data/resource/analyzer.loc /epoc32/winscw/c/Data/cpixunittestcorpus/resource/analyzer.loc
+../../../cpix/data/resource/thaidict.sm /epoc32/winscw/c/Data/cpixunittestcorpus/resource/thaidict.sm
+
+//
// Test Definition Files (For ITK IOCapture testing)
//
@@ -224,18 +231,32 @@
../data/cpixunittest/whitebox/delta/wrn-complete/startStage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/whitebox/delta/wrn-complete/startStage_exp_out.txt
../data/cpixunittest/whitebox/delta/wrn-complete/recoveredStage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/whitebox/delta/wrn-complete/recoveredStage_exp_out.txt
-
-
../data/cpixunittest/analysis/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/parsing_exp_out.txt
../data/cpixunittest/analysis/switchParsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/switchParsing_exp_out.txt
../data/cpixunittest/analysis/usage_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/usage_exp_out.txt
+
+../data/cpixunittest/analysis/loc/currentLocale_C_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/currentLocale_C_exp_out.txt
+../data/cpixunittest/analysis/loc/en_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/en_exp_out.txt
+../data/cpixunittest/analysis/loc/th_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/th_exp_out.txt
+../data/cpixunittest/analysis/loc/ch_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/ch_exp_out.txt
+../data/cpixunittest/analysis/loc/ko_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/ko_exp_out.txt
+../data/cpixunittest/analysis/loc/jp_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/loc/jp_exp_out.txt
+
../data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt
-../data/cpixunittest/analysis/whitebox/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing_exp_out.txt
-../data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt
+../data/cpixunittest/analysis/whitebox/parsing_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing_exp_out.txt
+../data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsing2_exp_out.txt
../data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/parsingerrors_exp_out.txt
../data/cpixunittest/analysis/whitebox/switchAnalyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/switchAnalyzer_exp_out.txt
+../data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt
../data/cpixunittest/analysis/whitebox/tokenization_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/tokenization_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/currentLocale_C_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/currentLocale_C_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/en_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/th_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/ch_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/ko_exp_out.txt
+../data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/analysis/whitebox/loc/jp_exp_out.txt
+
../data/cpixunittest/utf8/utf8_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8/utf8_exp_out.txt
../data/cpixunittest/maps/searchMaps_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/maps/searchMaps_exp_out.txt
@@ -285,6 +306,12 @@
../data/cpixunittest/utf8path/harvest_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8path/harvest_exp_out.txt
../data/cpixunittest/utf8path/search_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/utf8path/search_exp_out.txt
+../data/cpixunittest/prefixopt/unoptimized_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/prefixopt/unoptimized_out.txt
+../data/cpixunittest/prefixopt/optimized_exp_out.txt /epoc32/winscw/c/Data/cpixunittest/prefixopt/optimized_exp_out.txt
+
+
+
+
//
// Corpus files
@@ -360,11 +387,45 @@
"../data/cpixunittestcorpus/query/query8.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/query/query8.txt"
"../data/cpixunittestcorpus/query/query9.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/query/query9.txt"
-"../data/cpixunittestcorpus/pdf/ctutor.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/ctutor.pdf"
-"../data/cpixunittestcorpus/pdf/Empty.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/Empty.pdf"
-"../data/cpixunittestcorpus/pdf/geology.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/geology.pdf"
-"../data/cpixunittestcorpus/pdf/samplepdf.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/samplepdf.pdf"
-"../data/cpixunittestcorpus/pdf/windjack.pdf" "/epoc32/winscw/c/Data/cpixunittestcorpus/pdf/windjack.PDF"
+"../data/cpixunittestcorpus/loc/fr/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/1.txt"
+"../data/cpixunittestcorpus/loc/fr/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/2.txt"
+"../data/cpixunittestcorpus/loc/fr/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/3.txt"
+"../data/cpixunittestcorpus/loc/fr/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/fr/4.txt"
+
+"../data/cpixunittestcorpus/loc/he/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/1.txt"
+"../data/cpixunittestcorpus/loc/he/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/2.txt"
+"../data/cpixunittestcorpus/loc/he/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/he/3.txt"
+"../data/cpixunittestcorpus/loc/he/4.txt"
+
+"../data/cpixunittestcorpus/loc/th/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/1.txt"
+"../data/cpixunittestcorpus/loc/th/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/2.txt"
+"../data/cpixunittestcorpus/loc/th/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/3.txt"
+"../data/cpixunittestcorpus/loc/th/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/th/4.txt"
+
+"../data/cpixunittestcorpus/loc/ch_prc/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/1.txt"
+"../data/cpixunittestcorpus/loc/ch_prc/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/2.txt"
+"../data/cpixunittestcorpus/loc/ch_prc/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/3.txt"
+"../data/cpixunittestcorpus/loc/ch_prc/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_prc/4.txt"
+
+"../data/cpixunittestcorpus/loc/ch_simple/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/1.txt"
+"../data/cpixunittestcorpus/loc/ch_simple/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/2.txt"
+"../data/cpixunittestcorpus/loc/ch_simple/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/3.txt"
+"../data/cpixunittestcorpus/loc/ch_simple/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_simple/4.txt"
+
+"../data/cpixunittestcorpus/loc/ch_hk/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/1.txt"
+"../data/cpixunittestcorpus/loc/ch_hk/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/2.txt"
+"../data/cpixunittestcorpus/loc/ch_hk/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/3.txt"
+"../data/cpixunittestcorpus/loc/ch_hk/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_hk/4.txt"
+
+"../data/cpixunittestcorpus/loc/ch_tw/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/1.txt"
+"../data/cpixunittestcorpus/loc/ch_tw/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/2.txt"
+"../data/cpixunittestcorpus/loc/ch_tw/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/3.txt"
+"../data/cpixunittestcorpus/loc/ch_tw/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/ch_tw/4.txt"
+
+"../data/cpixunittestcorpus/loc/jp/1.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/1.txt"
+"../data/cpixunittestcorpus/loc/jp/2.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/2.txt"
+"../data/cpixunittestcorpus/loc/jp/3.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/3.txt"
+"../data/cpixunittestcorpus/loc/jp/4.txt" "/epoc32/winscw/c/Data/cpixunittestcorpus/loc/jp/4.txt"
PRJ_TESTMMPFILES
cpixunittest.mmp
--- a/searchengine/cpix/tsrc/cpixunittest/group/cpixunittest.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/group/cpixunittest.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -71,7 +71,8 @@
SOURCE config.cpp
SOURCE clq/uxqry.cpp
SOURCE clq/clqry.cpp
-SOURCE pdftests.cpp querytest.cpp std_log_result.cpp misc.cpp
+SOURCE pdftests.cpp querytest.cpp
+SOURCE localetestinfos.cpp misc.cpp std_log_result.cpp
USERINCLUDE ../../../../../searchsrv_plat/cpix_utility_api/inc
USERINCLUDE ../inc
@@ -94,6 +95,7 @@
STATICLIBRARY libstemmer.lib
STATICLIBRARY libitk.lib
STATICLIBRARY libcpixtools.lib
+STATICLIBRARY libanalysis.lib
// For SPI
LIBRARY efsrv.lib
--- a/searchengine/cpix/tsrc/cpixunittest/inc/config.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/inc/config.h Mon Jun 28 10:34:53 2010 +0530
@@ -25,9 +25,13 @@
//
#ifdef __WINS__
+#define RESOURCE_PATH "c:\\data\\cpixunittestcorpus\\resource\\"
#define CORPUS_PATH "c:\\data\\cpixunittestcorpus"
+#define LCORPUS_PATH L"c:\\data\\cpixunittestcorpus"
#else // __ARMv5__, assume memory card
+#define RESOURCE_PATH "e:\\data\\cpixunittestcorpus\\resource\\"
#define CORPUS_PATH "e:\\data\\cpixunittestcorpus"
+#define LCORPUS_PATH L"e:\\data\\cpixunittestcorpus"
#endif
#define SMS_TEST_CORPUS_PATH CORPUS_PATH "\\sms\\SMS_corpus.txt"
@@ -35,6 +39,9 @@
#define JPG_TEST_CORPUS_PATH CORPUS_PATH "\\jpgs"
#define MAPS_TEST_CORPUS_PATH CORPUS_PATH "\\maps"
#define MEDIA_TEST_CORPUS_PATH CORPUS_PATH "\\media"
+#define LOC_TEST_CORPUS_PATH CORPUS_PATH "\\loc"
+
+#define LJPG_TEST_CORPUS_PATH LCORPUS_PATH L"\\jpgs"
// TODO: Obsolete these
#define FILE_TEST_CORPUS_PATH STEM_TEST_CORPUS_PATH
@@ -46,6 +53,8 @@
#define CPIX_FILE_IDXDB "\\root\\file"
+#define CPIXUNITTEST_DIRECTORY "c:\\data\\cpixunittest"
+
/**
* Zero terminated array of file names
*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/inc/localetestinfos.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,51 @@
+#ifndef LOCALETESTINFOS_H_
+#define LOCALETESTINFOS_H_
+
+/**
+ * Information around testing various locales.
+ */
+
+struct EnglishLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct FrenchLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct HebrewLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct ThaiLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct ChineseLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct KoreanLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+struct JapaneseLocale {
+ static const char* LOCALE;
+ static const char* FILES[];
+ static const wchar_t* QUERIES[];
+};
+
+
+#endif /* LOCALETESTINFOS_H_ */
--- a/searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -1,24 +1,6 @@
-/*
-* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
-* All rights reserved.
-* This component and the accompanying materials are made available
-* under the terms of "Eclipse Public License v1.0"
-* which accompanies this distribution, and is available
-* at the URL "http://www.eclipse.org/legal/epl-v10.html".
-*
-* Initial Contributors:
-* Nokia Corporation - initial contribution.
-*
-* Contributors:
-*
-* Description:
-*
-*/
-
#include <wchar.h>
#include <stddef.h>
-
#include <iostream>
#include "cpixidxdb.h"
@@ -28,215 +10,202 @@
#include "config.h"
#include "testutils.h"
-#include "std_log_result.h"
-
// For testing custom analyzer
#include "CLucene.h"
#include "CLucene\analysis\AnalysisHeader.h"
#include "CLucene\util\stringreader.h"
-#include "analyzer.h"
#include "analyzerexp.h"
+#include "customanalyzer.h"
+
+#include "localetestinfos.h"
+
+#include "spi/locale.h"
+#include "cpixstrtools.h"
using namespace Cpt::Lex;
using namespace Cpt::Parser;
using namespace Cpix::AnalyzerExp;
void PrintToken(Cpt::Lex::Token token) {
- switch (token.type()) {
- case TOKEN_WS: printf("space"); break;
- case TOKEN_ID: printf("id"); break;
- case TOKEN_LIT: printf("lit"); break;
- case TOKEN_STRLIT: printf("str-lit"); break;
- case TOKEN_REALLIT: printf("real-lit"); break;
- case TOKEN_INTLIT: printf("int-lit"); break;
- case TOKEN_LEFT_BRACKET: printf("lbr"); break;
- case TOKEN_RIGHT_BRACKET: printf("rbr"); break;
- case TOKEN_COMMA: printf("comma"); break;
- case TOKEN_PIPE: printf("pipe"); break;
- case TOKEN_SWITCH : printf("sw"); break;
- case TOKEN_CASE : printf("case"); break;
- case TOKEN_DEFAULT : printf("default"); break;
- case TOKEN_LEFT_BRACE : printf("lbc"); break;
- case TOKEN_RIGHT_BRACE : printf("rbc"); break;
- case TOKEN_COLON : printf("cl"); break;
- case TOKEN_TERMINATOR : printf("tr"); break;
-
- default: printf("unknown"); break;
- }
- printf("('%S')", (token.text()).c_str());
+ printf("%S('%S')", token.type(), token.text());
}
-void TestTokenization6(Itk::TestMgr * )
+void TestTokenization6(Itk::TestMgr * testMgr)
{
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
- Cpix::AnalyzerExp::Tokenizer tokenizer;
+ Cpix::AnalyzerExp::Tokenizer tokenizer;
Tokens source(tokenizer,
L"switch { "
L"case '_docuid', '_mimetype': keywords;"
L"case '_baseappclass': whitespace>lowercase;"
L"default: natural(en); "
L"}");
- WhiteSpaceFilter
+ StdFilter
tokens(source);
- while (tokens) PrintToken(tokens++);
- testResultXml(xml_file);
+ while (tokens) PrintToken(tokens++);
}
-void TestParsing(Itk::TestMgr* )
+void TestParsing(Itk::TestMgr* mgr)
{
Cpix::AnalyzerExp::Tokenizer tokenizer;
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
+
Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)");
- WhiteSpaceFilter tokens(source);
+ StdFilter tokens(source);
Lexer lexer(tokens);
-
- Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) ");
- WhiteSpaceFilter tokens2(source2);
- Lexer lexer2(tokens2);
+
+ const wchar_t* text = L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) ";
Tokens source3(tokenizer, L"foobar(zap, 0, 0.0045, 4, 'a', 9223.031)");
- WhiteSpaceFilter tokens3(source3);
+ StdFilter tokens3(source3);
Lexer lexer3(tokens3);
try {
auto_ptr<Invokation> invoke = ParseInvokation(lexer);
lexer.eatEof();
- printf("Invoke identifier: %S\n", (invoke->id()).c_str());
+ printf("Invoke identifier: %S\n", invoke->id());
printf("%d parameters\n", invoke->params().size());
- auto_ptr<Piping> piping = ParsePiping(lexer2);
- lexer2.eatEof();
+ auto_ptr<Piping> piping = ParsePiping(text);
printf("piping done.\n");
if (dynamic_cast<const Invokation*>(&piping->tokenizer())) {
- printf("Tokenizer: %S\n", dynamic_cast<const Invokation&>(piping->tokenizer()).id().c_str());
+ printf("Tokenizer: %S\n", dynamic_cast<const Invokation&>(piping->tokenizer()).id());
}
printf("%d filters\n", piping->filters().size());
invoke = ParseInvokation(lexer3);
lexer3.eatEof();
- printf("Invoke identifier: %S\n", (invoke->id()).c_str());
+ printf("Invoke identifier: %S\n", invoke->id());
printf("%d parameters\n", invoke->params().size());
} catch (ParseException& e) {
- assert_failed = 1;
printf("ParseException: %S\n", e.wWhat());
} catch (LexException& e) {
- assert_failed = 1;
printf("LexException: %S\n", e.wWhat());
}
- testResultXml(xml_file);
}
-void TestSwitch(Itk::TestMgr* )
+void TestSwitch(Itk::TestMgr* mgr)
{
Cpix::AnalyzerExp::Tokenizer tokenizer;
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
- const wchar_t* text;
- Tokens source(tokenizer, text =
+
+ const wchar_t* text =
L"switch { "
L"case '_docuid', '_mimetype': keywords;"
L"case '_baseappclass': whitespace>lowercase;"
L"default: natural(en); "
- L"}");
- WhiteSpaceFilter tokens(source);
- Lexer lexer(tokens);
+ L"}";
try {
- auto_ptr<Piping> sw = ParsePiping(lexer);
- lexer.eatEof();
+ auto_ptr<Piping> sw = ParsePiping(text);
if (dynamic_cast<const Switch*>(&sw->tokenizer())) {
const Switch* s = dynamic_cast<const Switch*>(&sw->tokenizer());
for (int i = 0; i < s->cases().size(); i++) {
const Case* c = s->cases()[i];
printf("case ");
- for (int j = 0; j < c->fields().size(); j++) {
- printf("%S", (c->fields()[j]).c_str());
+ for (int j = 0; j < c->cases().size(); j++) {
+ printf("%S", c->cases()[j]);
}
printf(": ...\n");
-// wcout<<L":"<<s->def().tokenizer().id();
+ // wcout<<L":"<<s->def().tokenizer().id();
+ }
+ printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
+ }
+ } catch (ParseException& e) {
+ // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl;
+ e.setContext(text);
+ printf("ParseException: %S\n", e.wWhat());
+ } catch (LexException& e) {
+ // OBS wcout<<L"LexException: "<<e.describe(text)<<endl;
+ e.setContext(text);
+ printf("LexException: %S\n", e.wWhat());
+ }
+}
+
+void TestConfigSwitch(Itk::TestMgr* mgr)
+{
+ Cpix::AnalyzerExp::Tokenizer tokenizer;
+
+ const wchar_t* text =
+ L"config_switch { "
+ L"case 'indexing': korean;"
+ L"case 'query': koreanquery;"
+ L"case 'prefix': letter;"
+ L"default: korean;"
+ L"}";
+
+ try {
+ auto_ptr<Piping> sw = ParsePiping(text);
+ if (dynamic_cast<const ConfigSwitch*>(&sw->tokenizer())) {
+ const ConfigSwitch* s = dynamic_cast<const ConfigSwitch*>(&sw->tokenizer());
+ for (int i = 0; i < s->cases().size(); i++) {
+ const Case* c = s->cases()[i];
+ printf("case ");
+ for (int j = 0; j < c->cases().size(); j++) {
+ printf("%S", c->cases()[j]);
+ }
+ printf(": ...\n");
+ // wcout<<L":"<<s->def().tokenizer().id();
}
printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
}
} catch (ParseException& e) {
// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl;
- assert_failed = 1;
e.setContext(text);
printf("ParseException: %S\n", e.wWhat());
} catch (LexException& e) {
// OBS wcout<<L"LexException: "<<e.describe(text)<<endl;
- assert_failed = 1;
e.setContext(text);
printf("LexException: %S\n", e.wWhat());
}
- testResultXml(xml_file);
}
-void TestParsingErrors(Itk::TestMgr* )
+
+void TestParsingErrors(Itk::TestMgr* mgr)
{
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
Cpix::AnalyzerExp::Tokenizer tokenizer;
// eof
- const wchar_t* text;
- StdLexer eof(tokenizer, text = L"foobar(zap, foo, 'bar', 'raf', do, ");
+ const wchar_t* text;
try {
- ParsePiping(eof);
- eof.eatEof();
+ ParsePiping( text = L"foobar(zap, foo, 'bar', 'raf', do, " );
} catch (ParseException& e) {
- // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl;
- e.setContext(text);
printf("ParseException: %S\n", e.wWhat());
}
-
// Unfinished literal
- StdLexer lit(tokenizer, text = L"foobar(zap, foo, 'bar', 'a, raboof)");
try {
- ParsePiping(lit);
- lit.eatEof();
+ ParsePiping(text = L"foobar(zap, foo, 'bar', 'a, raboof)");
} catch (LexException& e) { // syntax error
- // OBS wcout<<L"LexException: "<<e.describe(text)<<endl;
- e.setContext(text);
printf("LexException: %S\n", e.wWhat());
} catch (ParseException& e) { // syntax error
- // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl;
- e.setContext(text);
printf("ParseException: %S\n", e.wWhat());
}
// Unknown token
- StdLexer unknown(tokenizer, text = L"foobar(!zap, foo, 'bar', 'a', raboof)");
try {
- ParsePiping(unknown);
- unknown.eatEof();
+ ParsePiping(text = L"foobar(!zap, foo, 'bar', 'a', raboof)");
} catch (LexException& e) { // syntax error
- // OBS wcout<<L"LexException: "<<e.describe(text)<<endl;
- e.setContext(text);
printf("LexException: %S\n", e.wWhat());
}
// Missing comma
- StdLexer comma(tokenizer, text = L"foobar(zap, foo, 'bar', 'a' raboof)");
try {
- ParsePiping(comma);
- comma.eatEof();
+ ParsePiping(text = L"foobar(zap, foo, 'bar', 'a' raboof)");
} catch (ParseException& e) {
- // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl;
- e.setContext(text);
printf("ParseException: %S\n", e.wWhat());
}
- testResultXml(xml_file);
+
}
const char * CustomAnalyzerTestDocs[] = {
- FILE_TEST_CORPUS_PATH "\\en\\1.txt",
- FILE_TEST_CORPUS_PATH "\\en\\2.txt",
- FILE_TEST_CORPUS_PATH "\\en\\3.txt",
- FILE_TEST_CORPUS_PATH "\\fi\\1.txt",
- FILE_TEST_CORPUS_PATH "\\fi\\2.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\1.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\2.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\3.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\4.txt",
+
+ STEM_TEST_CORPUS_PATH "\\fi\\1.txt",
+ STEM_TEST_CORPUS_PATH "\\fi\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\th\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\th\\2.txt",
+
NULL
};
@@ -258,7 +227,9 @@
printf("\n");
}
-void TestCustomAnalyzer(Itk::TestMgr * , const wchar_t* definition)
+void TestCustomAnalyzer(Itk::TestMgr * testMgr,
+ const char** files,
+ const wchar_t* definition)
{
using namespace lucene::analysis;
using namespace lucene::util;
@@ -267,22 +238,25 @@
CustomAnalyzer analyzer(definition);
printf("Analyzer \"%S\":\n", definition);
- for (int i = 0; CustomAnalyzerTestDocs[i]; i++)
+ for (int i = 0; files[i]; i++)
{
- printf("File !%s tokenized:\n", (CustomAnalyzerTestDocs[i]+1));
- FileReader file( CustomAnalyzerTestDocs[i], DEFAULT_ENCODING );
+ printf("File !%s tokenized:\n", (files[i]+1));
+ FileReader file( files[i], DEFAULT_ENCODING );
TokenStream* stream = analyzer.tokenStream( L"field", &file );
PrintTokenStream( stream );
stream->close();
_CLDELETE( stream );
}
+ printf("\n");
+}
+
+void TestCustomAnalyzer(Itk::TestMgr * testMgr, const wchar_t* definition) {
+ TestCustomAnalyzer(testMgr, CustomAnalyzerTestDocs, definition);
}
void TestCustomAnalyzers(Itk::TestMgr * testMgr)
{
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
TestCustomAnalyzer(testMgr, L"stdtokens");
TestCustomAnalyzer(testMgr, L"whitespace");
TestCustomAnalyzer(testMgr, L"whitespace>lowercase");
@@ -291,14 +265,68 @@
TestCustomAnalyzer(testMgr, L"letter>lowercase");
TestCustomAnalyzer(testMgr, L"keyword");
TestCustomAnalyzer(testMgr, L"keyword>lowercase");
- TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)");
- TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)");
- TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')");
+// TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>stem(en)"); // Does not work with NON-ASCII
+ TestCustomAnalyzer(testMgr, L"letter>lowercase>stop(en)");
+ TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')");
TestCustomAnalyzer(testMgr, L"letter>length(2, 4)");
- testResultXml(xml_file);
+ TestCustomAnalyzer(testMgr, L"standard>prefixes(1)");
+ TestCustomAnalyzer(testMgr, L"standard>prefixes(2)");
+ TestCustomAnalyzer(testMgr, L"standard>prefixes(3)");
+ TestCustomAnalyzer(testMgr, L"stdtokens>stdfilter>lowercase>thai>stop(en)");
+ TestCustomAnalyzer(testMgr, L"cjk>stop(en)");
+ TestCustomAnalyzer(testMgr, L"ngram(1)>lowercase>stop(en)");
+ TestCustomAnalyzer(testMgr, L"ngram(2)>lowercase>stop(en)");
}
-void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field)
+void TestTokenizationWithLocales(Itk::TestMgr * testMgr) {
+ printf("locale=en\n");
+ cpix_Result result;
+ cpix_SetLocale( &result, "en" );
+ TestCustomAnalyzer(testMgr, L"natural");
+
+ printf("locale=th\n");
+ cpix_SetLocale( &result, "th" );
+ TestCustomAnalyzer(testMgr, L"natural");
+
+ printf("locale=ko\n");
+ cpix_SetLocale( &result, "ko" );
+ TestCustomAnalyzer(testMgr, L"natural");
+
+ printf("locale=zh\n");
+ cpix_SetLocale( &result, "zh" );
+ TestCustomAnalyzer(testMgr, L"natural");
+
+ printf("locale=jp\n");
+ cpix_SetLocale( &result, "jp" );
+ TestCustomAnalyzer(testMgr, L"natural");
+
+ cpix_SetLocale( &result, cpix_LOCALE_AUTO );
+}
+
+template<typename T>
+void TestTokenizationWithLocale(Itk::TestMgr * testMgr) {
+ cpix_Result result;
+ cpix_SetLocale( &result, T::LOCALE );
+ TestCustomAnalyzer(testMgr, EnglishLocale::FILES, L"natural");
+ TestCustomAnalyzer(testMgr, T::FILES, L"natural");
+ cpix_SetLocale( &result, cpix_LOCALE_AUTO );
+}
+
+
+template<typename T>
+void AddTokenizationWithLocaleTest(Itk::SuiteTester* suite) {
+ suite->add(T::LOCALE,
+ &TestTokenizationWithLocale<T>,
+ T::LOCALE);
+}
+
+void TestTokenizationWithCurrentLocale(Itk::TestMgr * testMgr) {
+ cpix_Result result;
+ cpix_SetLocale( &result, cpix_LOCALE_AUTO );
+ TestCustomAnalyzer(testMgr, L"natural");
+}
+
+void TestAnalyzerWithField(Itk::TestMgr * testMgr, const wchar_t* definition, const wchar_t* field)
{
using namespace lucene::analysis;
using namespace lucene::util;
@@ -317,9 +345,7 @@
void TestSwitchAnalyzers(Itk::TestMgr * testMgr)
{
- char *xml_file = (char*)__FUNCTION__;
- assert_failed = 0;
- const wchar_t* sw = L"\n"
+ const wchar_t* sw = L"\n"
L"switch {\n"
L" case '_docuid': keyword;\n"
L" case '_appclass': whitespace>lowercase;\n"
@@ -331,23 +357,80 @@
TestAnalyzerWithField(testMgr, sw, L"Title");
TestAnalyzerWithField(testMgr, sw, L"message");
TestAnalyzerWithField(testMgr, sw, L"field");
- testResultXml(xml_file);
}
+void TestLocaleSwitchAnalyzers(Itk::TestMgr * testMgr)
+{
+ const wchar_t* sw = L"\n"
+ L"locale_switch {\n"
+ L" case 'en': stdtokens>stdfilter>lowercase>stop(en);\n"
+ L" case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);\n"
+ L" case 'ca': stdtokens>stdfilter>lowercase>accent;\n"
+ L" default: stdtokens>stdfilter>lowercase;\n"
+ L"}";
+ cpix_Result result;
+ printf("locale=en:\n");
+ cpix_SetLocale( &result, "en" );
+ TestCustomAnalyzer(testMgr, sw);
+ printf("\n");
+ printf("locale=th:\n");
+ cpix_SetLocale( &result, "th" );
+ TestCustomAnalyzer(testMgr, sw);
+ printf("\n");
+ printf("locale=ca:\n");
+ cpix_SetLocale( &result, "ca" );
+ TestCustomAnalyzer(testMgr, sw);
+ printf("\n");
+ printf("default locale:\n");
+ cpix_SetLocale( &result, "fail" );
+ TestCustomAnalyzer(testMgr, sw);
+ cpix_SetLocale( &result, cpix_LOCALE_AUTO );
+}
+
+
+Itk::TesterBase * CreateAnalysisWhiteBoxLocalizationTests() {
+ using namespace Itk;
+
+ SuiteTester
+ * tests = new SuiteTester("loc");
+
+ std::string locale;
+ locale = "currentlocale_";
+
+ Cpt::auto_array<char> name( Cpix::Spi::GetLanguageNames()[0].c_str() );
+ locale += name.get();
+
+ tests->add(locale.c_str(),
+ &TestTokenizationWithCurrentLocale,
+ locale.c_str());
+
+ AddTokenizationWithLocaleTest<EnglishLocale>(tests);
+ AddTokenizationWithLocaleTest<FrenchLocale>(tests);
+ AddTokenizationWithLocaleTest<HebrewLocale>(tests);
+ AddTokenizationWithLocaleTest<ThaiLocale>(tests);
+ AddTokenizationWithLocaleTest<KoreanLocale>(tests);
+ AddTokenizationWithLocaleTest<ChineseLocale>(tests);
+ AddTokenizationWithLocaleTest<JapaneseLocale>(tests);
+
+ return tests;
+}
Itk::TesterBase * CreateAnalysisWhiteBoxTests()
{
using namespace Itk;
SuiteTester
- * analysisTests = new SuiteTester("analysiswhitebox");
+ * analysisTests = new SuiteTester("whitebox");
analysisTests->add("analyzer",
&TestCustomAnalyzers,
"analyzer");
- analysisTests->add("switchanalyzer",
+ analysisTests->add("switchAnalyzer",
&TestSwitchAnalyzers,
- "switchanalyzer");
+ "switchAnalyzer");
+ analysisTests->add("localeSwitchAnalyzer",
+ &TestLocaleSwitchAnalyzers,
+ "localeSwitchAnalyzer");
analysisTests->add("tokenization",
TestTokenization6,
"tokenization");
@@ -357,10 +440,14 @@
analysisTests->add("parsing2",
TestSwitch,
"parsing2");
+ analysisTests->add("parsing3",
+ TestConfigSwitch,
+ "parsing3");
analysisTests->add("parsingerrors",
TestParsingErrors,
"parsingerrors");
-
+
+ analysisTests->add(CreateAnalysisWhiteBoxLocalizationTests());
return analysisTests;
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/tsrc/cpixunittest/src/localetestinfos.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,140 @@
+/*
+ * analysisutil.cpp
+ *
+ * Created on: Mar 25, 2010
+ * Author: admin
+ */
+
+#include "localetestinfos.h"
+#include "config.h"
+
+const char* EnglishLocale::LOCALE = "en";
+const char* EnglishLocale::FILES[] = {
+ STEM_TEST_CORPUS_PATH "\\en\\1.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\2.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\3.txt",
+ STEM_TEST_CORPUS_PATH "\\en\\4.txt",
+ 0
+};
+const wchar_t* EnglishLocale::QUERIES[] = {
+ L"happy",
+ L"happiness",
+ L"happening",
+ 0
+};
+
+
+const char* FrenchLocale::LOCALE = "fr";
+
+const char* FrenchLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\fr\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\fr\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\fr\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\fr\\4.txt",
+ 0
+};
+
+const wchar_t* FrenchLocale::QUERIES[] = {
+ L"d'\xe9nergie",
+ L"\xe9nergie",
+ L"elle",
+ 0
+};
+
+const char* HebrewLocale::LOCALE = "he";
+
+const char* HebrewLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\he\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\he\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\he\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\he\\4.txt",
+ 0
+};
+
+const wchar_t* HebrewLocale::QUERIES[] = {
+ L"\x05e9\x05e4\x05e2\x05ea", // L"שפעת",
+ L"\x05e4\x05e2\x05ea", // L"פעת",
+ L"\x05e9\x05e4\x05e2*", // L"שפע*",
+ L"\x05e4\x05e8*", //L"פר*",
+ 0
+};
+
+const char* ThaiLocale::LOCALE = "th";
+const char* ThaiLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\th\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\th\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\th\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\th\\4.txt",
+ 0
+};
+const wchar_t* ThaiLocale::QUERIES[] = {
+ L"\x0E14\x0E32\x0E27\x0E15\x0E01", // a thai word
+ L"\x0E21\x0E35", // another thai word
+ 0
+};
+
+
+const char* ChineseLocale::LOCALE = "ch";
+const char* ChineseLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\ch_hk\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_hk\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_hk\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_hk\\4.txt",
+
+ LOC_TEST_CORPUS_PATH "\\ch_prc\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_prc\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_prc\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_prc\\4.txt",
+
+ LOC_TEST_CORPUS_PATH "\\ch_simple\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_simple\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_simple\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_simple\\4.txt",
+
+ LOC_TEST_CORPUS_PATH "\\ch_tw\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_tw\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_tw\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\ch_tw\\4.txt",
+
+ 0
+};
+const wchar_t* ChineseLocale::QUERIES[] = {
+ L"\x53f0\x6e7e", // a chinese word
+ L"\x4e2d\x56fd", // another chinese word
+ 0
+};
+
+const char* KoreanLocale::LOCALE = "ko";
+const char* KoreanLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\ko\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\ko\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\ko\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\ko\\4.txt",
+
+ 0
+};
+const wchar_t* KoreanLocale::QUERIES[] = {
+ L"\xc2a4\xd1a0\xb9ac", // a korean word
+ L"\xc778\xbb3c", // another korean word
+ 0
+};
+
+const char* JapaneseLocale::LOCALE = "jp";
+const char* JapaneseLocale::FILES[] = {
+ LOC_TEST_CORPUS_PATH "\\jp\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\jp\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\jp\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\jp\\4.txt",
+
+ LOC_TEST_CORPUS_PATH "\\jp_old\\1.txt",
+ LOC_TEST_CORPUS_PATH "\\jp_old\\2.txt",
+ LOC_TEST_CORPUS_PATH "\\jp_old\\3.txt",
+ LOC_TEST_CORPUS_PATH "\\jp_old\\4.txt",
+
+ 0
+};
+const wchar_t* JapaneseLocale::QUERIES[] = {
+ L"\x56fd\x969b", // a japanese word
+ L"\x65e5\x672c", // another japanese word
+ 0
+};
--- a/searchengine/cpix/tsrc/perfmetrics/group/perfmetrics.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/tsrc/perfmetrics/group/perfmetrics.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -60,6 +60,7 @@
STATICLIBRARY libstemmer.lib
STATICLIBRARY libitk.lib
STATICLIBRARY libcpixtools.lib
+STATICLIBRARY libanalysis.lib
// For SPI
LIBRARY efsrv.lib
--- a/searchengine/util/cpixtools/inc/public/cpixfstools.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/cpixtools/inc/public/cpixfstools.h Mon Jun 28 10:34:53 2010 +0530
@@ -28,14 +28,7 @@
// with EINTR which means that they just have to be retried. Now, even
// if OpenC does not support this, the code here is supposed to be
// platform independent and must work on a true *NIX (like linux).
-//_SP is for single parameter
-#define Cpt_EINTR_RETRY_SP(op) while ((op == -1) && (errno == EINTR)) { ; /* NOP */ }
-
-// A lot of POSIX system calls (open, close, read, write) can fail
-// with EINTR which means that they just have to be retried. Now, even
-// if OpenC does not support this, the code here is supposed to be
-// platform independent and must work on a true *NIX (like linux).
-#define Cpt_EINTR_RETRY(res,op) while (((res=op) == -1) && (errno == EINTR)) { ; /* NOP */ }
+#define Cpt_EINTR_RETRY(res,op) while (((res=op) == -1) && (errno == EINTR)) { res ++; /* To avoid compiler warning: FIXME */ }
// Same as Cpt_EINTR_RETRY, but for cases when the return value is not
@@ -167,11 +160,22 @@
off_t filesize(int fileDesc);
+ /**
+ * Obtain the size of a directory
+ *
+ * @param the path of the directory
+ */
+ off_t dirsize(const char * path);
/**
* Returns when the file was last modified or 0 if some error occurred.
*/
time_t filemodified(const char * path);
+
+ /**
+ * Adds delimiter if needed
+ */
+ std::string appendpath(const char* path, const char* item);
/**
* Reads line to buffer
--- a/searchengine/util/cpixtools/inc/public/cpixparsetools.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/cpixtools/inc/public/cpixparsetools.h Mon Jun 28 10:34:53 2010 +0530
@@ -68,23 +68,19 @@
* (e.g. "file*.tx?") itself is not supported)
*/
namespace Lex {
+
+ typedef const wchar_t* token_type_t;
- /**
- * Basic token types
- */
- enum TokenType {
- TOKEN_UNKNOWN = 0,
- TOKEN_EOF = 1,
- TOKEN_WS,
- TOKEN_ID,
- TOKEN_STRLIT,
- TOKEN_INTLIT,
- TOKEN_REALLIT,
- TOKEN_LIT,
-
- TOKEN_LAST_RESERVED // 8
- };
+ extern token_type_t TOKEN_UNKNOWN;
+ extern token_type_t TOKEN_EOF;
+ extern token_type_t TOKEN_WS;
+ extern token_type_t TOKEN_COMMENT;
+ extern token_type_t TOKEN_ID;
+ extern token_type_t TOKEN_STRLIT;
+ extern token_type_t TOKEN_INTLIT;
+ extern token_type_t TOKEN_REALLIT;
+ extern token_type_t TOKEN_LIT;
class LexException : public ITxtCtxtExc {
public:
@@ -106,15 +102,15 @@
*/
class Token {
public:
- Token(int type, const wchar_t* begin, const wchar_t* end);
+ Token(token_type_t type, const wchar_t* begin, const wchar_t* end);
Token();
- int type() const;
+ const wchar_t* type() const;
const wchar_t* begin() const;
const wchar_t* end() const;
int length() const;
std::wstring text() const;
private:
- int type_;
+ token_type_t type_;
const wchar_t* begin_;
const wchar_t* end_;
};
@@ -221,17 +217,61 @@
class SymbolTokenizer : public Tokenizer {
public:
- SymbolTokenizer(int tokenType, const wchar_t* symbol);
+ SymbolTokenizer(const wchar_t* tokenType, const wchar_t* symbol);
virtual void reset();
virtual Token get();
virtual TokenizerState consume(const wchar_t* cursor);
private:
const wchar_t* begin_;
const wchar_t* end_;
- int tokenType_;
+ token_type_t tokenType_;
const wchar_t* symbol_;
};
-
+
+ /**
+ * C style line comment, e.g. // comment
+ */
+ class LineCommentTokenizer : public Tokenizer {
+ public:
+ LineCommentTokenizer();
+ virtual void reset();
+ virtual Token get();
+ virtual TokenizerState consume(const wchar_t* cursor);
+ private:
+ enum State {
+ READY,
+ SLASH_CONSUMED,
+ COMMENT,
+ FINISHED
+ };
+ State state_;
+ const wchar_t* begin_;
+ const wchar_t* end_;
+ };
+
+ /**
+ * C++ style section comments. Like the one's surrounding this comment
+ */
+ class SectionCommentTokenizer : public Tokenizer {
+ public:
+ SectionCommentTokenizer();
+ virtual void reset();
+ virtual Token get();
+ virtual TokenizerState consume(const wchar_t* cursor);
+ private:
+ enum State {
+ READY,
+ SLASH_CONSUMED,
+ COMMENT,
+ STAR_CONSUMED,
+ FINISH
+ };
+ State state_;
+ const wchar_t* begin_;
+ const wchar_t* end_;
+
+ };
+
/**
* Tokenizes text by using given tokenizers. Text is consumed
* until no tokenizer is in hungry state e.g., all tokenizers
@@ -303,6 +343,16 @@
virtual ~TokenIterator();
};
+
+ class WhitespaceSplitter : public TokenIterator {
+ public:
+ WhitespaceSplitter(const wchar_t* text);
+ virtual operator bool();
+ virtual Token operator++(int);
+ public:
+ const wchar_t* begin_;
+ const wchar_t* end_;
+ };
/**
* Uses tokenizer for converting given text into token stream
@@ -328,9 +378,9 @@
/**
* Filters out all tokens of type TOKEN_WS
*/
- class WhiteSpaceFilter : public TokenIterator {
+ class StdFilter : public TokenIterator {
public:
- WhiteSpaceFilter(TokenIterator& tokens);
+ StdFilter(TokenIterator& tokens);
virtual operator bool();
virtual Token operator++(int);
private:
@@ -425,7 +475,7 @@
Lexer(Lex::TokenIterator& tokens);
// throws ParseException instead of LexException on EOF.
virtual Lex::Token operator++(int);
- Lex::Token eat(int tokenType);
+ Lex::Token eat(Lex::token_type_t tokenType);
void eatEof();
std::wstring eatId();
std::wstring eatString();
@@ -441,7 +491,7 @@
StdLexer(Lex::Tokenizer& tokens, const wchar_t* text);
private:
Lex::Tokens tokens_;
- Lex::WhiteSpaceFilter ws_;
+ Lex::StdFilter filter_;
};
} // Parser
--- a/searchengine/util/cpixtools/src/cpixfstools.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/cpixtools/src/cpixfstools.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -52,7 +52,7 @@
size_t
i = len-1;
- while (1)/*(i >= 0)*/ {
+ while (i > 0) {
char c = child[i];
if (c == '\\' ||
c == '/') {
@@ -63,9 +63,8 @@
}
if (i+1 >= FILENAME_MAX
- || i+1 >= bufSize)
- //|| i < 0)
- {
+ || i+1 >= bufSize
+ || i == 0) {
return -1;
}
@@ -87,7 +86,10 @@
Cpt_EINTR_RETRY_PTR(d, opendir(path));
if (d) {
- Cpt_EINTR_RETRY_SP( closedir(d) );
+ int
+ result;
+ Cpt_EINTR_RETRY(result, closedir(d));
+
rv = true;
}
@@ -205,7 +207,7 @@
if (getparent(parent, sizeof(parent), path) >= 0) {
// make the parent
- (void)mkdirs(parent, mod);
+ mkdirs(parent, mod);
}
return mkdir(path, mod);
@@ -225,8 +227,10 @@
mod));
if (fd != -1)
{
-
- Cpt_EINTR_RETRY_SP( close(fd) );
+ int
+ result;
+ Cpt_EINTR_RETRY(result,
+ close(fd));
}
return fd == -1 ? -1 : 0;
@@ -246,7 +250,10 @@
if (rv)
{
- Cpt_EINTR_RETRY_SP( close(fd) );
+ int
+ result;
+ Cpt_EINTR_RETRY(result,
+ close(fd));
}
return rv;
@@ -318,8 +325,56 @@
return rv;
}
+
+ namespace
+ {
+ class DirectorySizeCalculator : public IFileVisitor
+ {
+ public:
+
+ DirectorySizeCalculator()
+ : totalSize_(0)
+ {}
+
+ virtual bool visitFile(const char * path)
+ {
+ totalSize_ += filesize(path);
+ return true;
+ }
+
+ virtual DirVisitResult visitDirPre(const char * path)
+ {
+ //To avoid compiler warning.
+ std::string ret = path;
+
+ return IFV_CONTINUE;
+ }
+
+ virtual bool visitDirPost(const char * path)
+ {
+ std::string ret = path;
+ ret.empty();
+ return true;
+ }
+
+ long totalSize()
+ {
+ return totalSize_;
+ }
+
+ private:
+
+ long totalSize_;
+
+ };
+ }
-
+ off_t dirsize(const char* path)
+ {
+ DirectorySizeCalculator sizeCalculator;
+ traverse(path, &sizeCalculator);
+ return sizeCalculator.totalSize();
+ }
time_t filemodified(const char * path)
{
@@ -341,6 +396,17 @@
return rv;
}
+ std::string appendpath(const char* path, const char* item)
+ {
+ std::string ret;
+ ret += path;
+ if (ret[ret.length()-1] != '\\' && ret[ret.length()-1] != '/') {
+ ret += DIR_SEPARATOR;
+ }
+ ret += item;
+ return ret;
+ }
+
bool fgetline(FILE* file, std::string& line)
{
std::ostringstream buf;
@@ -655,7 +721,10 @@
DIRSentry::~DIRSentry()
{
- Cpt_EINTR_RETRY_SP( closedir(d_) );
+ int
+ result;
+
+ Cpt_EINTR_RETRY(result, closedir(d_));
}
@@ -769,7 +838,11 @@
if (fileDesc_ != NULL
&& *fileDesc_ != -1)
{
- Cpt_EINTR_RETRY_SP( close(*fileDesc_) );
+ int
+ result;
+
+ Cpt_EINTR_RETRY(result,
+ close(*fileDesc_));
}
}
@@ -790,7 +863,10 @@
{
if ( file_ != NULL )
{
- Cpt_EINTR_RETRY_SP( fclose(file_) );
+ int result;
+
+ Cpt_EINTR_RETRY(result,
+ fclose(file_));
}
}
--- a/searchengine/util/cpixtools/src/cpixparsetools.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/cpixtools/src/cpixparsetools.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -27,11 +27,67 @@
#include <iostream>
#include <sstream>
#include <stdlib.h>
+#include "wctype.h"
+
+namespace {
+
+ std::wstring describeException(std::wstring what, const wchar_t* context, const wchar_t* where, const wchar_t* where2) {
+ std::wstring line;
+ int l = 0;
+ bool found = false;
+
+ for (; ; context++) {
+ if (context == where) {
+ line += L"*here*";
+ found = true;
+ if (!where2) break;
+ }
+ if (context == where2) {
+ line += L"*here*";
+ break;
+ }
+ if (!*context) {
+ line += L"*here*";
+ break;
+ } else if (*context == '\n' && !found) {
+ l++;
+ line = L"";
+ } else {
+ line += *context;
+ }
+ }
+ for (; *context && *context != '\n' && *context != '\r'; context++) {
+ line += *context;
+ }
+
+ std::wostringstream tmp;
+ tmp<<what;
+ tmp<<L" at";
+ if ( l ) {
+ tmp<<L" line "<<(l+1);
+ }
+ tmp<<L": \n\"";
+ tmp<<line;
+ tmp<<L"\"";
+ return tmp.str();
+ }
+
+}
namespace Cpt {
namespace Lex {
+
+ token_type_t TOKEN_UNKNOWN = L"unknown";
+ token_type_t TOKEN_EOF = L"eof";
+ token_type_t TOKEN_WS = L"whitespace";
+ token_type_t TOKEN_COMMENT = L"comment";
+ token_type_t TOKEN_ID = L"identifier";
+ token_type_t TOKEN_STRLIT = L"string";
+ token_type_t TOKEN_INTLIT = L"integer";
+ token_type_t TOKEN_REALLIT = L"real number";
+ token_type_t TOKEN_LIT = L"literal";
const wchar_t ESCAPE_SYMBOL = '\\';
@@ -56,31 +112,12 @@
const wchar_t* LexException::wWhat() const throw() {
return wWhat_.c_str();
}
-
- void LexException::setContext(const wchar_t * context)
- {
- // TODO legacy of implementation of obsoleted describe() -
- // it can be optimized by doind direct substring - concat
- // operations instead of looping through context
- std::wstring tmp;
- tmp += wWhat_;
- tmp += L" at: \"";
- for (; ; context++) {
- if (context == where_) {
- tmp += L"*here*";
- }
- if (!*context) {
- break;
- }
- tmp += *context;
- }
- tmp += L"\"";
-
- wWhat_ = tmp;
+
+ void LexException::setContext(const wchar_t * context) {
+ wWhat_ = describeException(wWhat_, context, where_, NULL);
}
-
- Token::Token(int type, const wchar_t* begin, const wchar_t* end)
+ Token::Token(const wchar_t* type, const wchar_t* begin, const wchar_t* end)
: type_(type), begin_(begin), end_(end) {
}
@@ -88,7 +125,7 @@
: type_(0), begin_(0), end_(0) {
}
- int Token::type() const { return type_; };
+ token_type_t Token::type() const { return type_; };
const wchar_t* Token::begin() const { return begin_; };
const wchar_t* Token::end() const { return end_; };
int Token::length() const { return end_ - begin_; };
@@ -289,7 +326,7 @@
return TOKENIZER_HUNGRY;
}
- SymbolTokenizer::SymbolTokenizer(int tokenType, const wchar_t* symbol)
+ SymbolTokenizer::SymbolTokenizer(token_type_t tokenType, const wchar_t* symbol)
: tokenType_( tokenType ),
symbol_( symbol )
{
@@ -316,6 +353,84 @@
return TOKENIZER_FAILED;
}
}
+
+ LineCommentTokenizer::LineCommentTokenizer() : state_( READY ) {}
+
+ void LineCommentTokenizer::reset() {
+ state_ = READY;
+ }
+ Token LineCommentTokenizer::get() {
+ return Token( TOKEN_COMMENT, begin_, end_ );
+ }
+
+ TokenizerState LineCommentTokenizer::consume(const wchar_t* cursor) {
+ switch (state_) {
+ case READY:
+ if (*cursor == '/') {
+ begin_ = cursor;
+ state_ = SLASH_CONSUMED;
+ return TOKENIZER_HUNGRY;
+ }
+ break;
+ case SLASH_CONSUMED:
+ if (*cursor == '/') {
+ state_ = COMMENT;
+ return TOKENIZER_HUNGRY;
+ }
+ break;
+ case COMMENT:
+ if (*cursor == '\n' || *cursor == '\r' || *cursor == '\0') {
+ state_ = FINISHED;
+ end_ = cursor;
+ return TOKENIZER_FINISHED;
+ }
+ return TOKENIZER_HUNGRY;
+ }
+ return TOKENIZER_FAILED;
+ }
+
+ SectionCommentTokenizer::SectionCommentTokenizer() : state_( READY ) {}
+
+ void SectionCommentTokenizer::reset() {
+ state_ = READY;
+ }
+ Token SectionCommentTokenizer::get() {
+ return Token( TOKEN_COMMENT, begin_, end_ );
+ }
+ TokenizerState SectionCommentTokenizer::consume(const wchar_t* cursor) {
+ if (*cursor == '\0') return TOKENIZER_FAILED;
+ switch (state_) {
+ case READY:
+ if (*cursor == '/') {
+ begin_ = cursor;
+ state_ = SLASH_CONSUMED;
+ return TOKENIZER_HUNGRY;
+ }
+ break;
+ case SLASH_CONSUMED:
+ if (*cursor == '*') {
+ state_ = COMMENT;
+ return TOKENIZER_HUNGRY;
+ }
+ break;
+ case COMMENT:
+ if (*cursor == '*') {
+ state_ = STAR_CONSUMED;
+ }
+ return TOKENIZER_HUNGRY;
+ case STAR_CONSUMED:
+ if (*cursor == '/') {
+ end_ = cursor+1;
+ return TOKENIZER_FINISHED;
+ } else {
+ if (*cursor != '*') {
+ state_ = COMMENT;
+ }
+ return TOKENIZER_HUNGRY;
+ }
+ }
+ return TOKENIZER_FAILED;
+ }
MultiTokenizer::MultiTokenizer(Tokenizer** tokenizers, bool ownTokenizers)
: ownTokenizers_(ownTokenizers)
@@ -458,6 +573,28 @@
}
TokenIterator::~TokenIterator() {}
+
+ WhitespaceSplitter::WhitespaceSplitter(const wchar_t* text)
+ : begin_( text ), end_( 0 ) {}
+
+ WhitespaceSplitter::operator bool() {
+ if ( !end_ && *begin_ ) {
+ // skip whitespace
+ while (iswspace(*begin_)) begin_++;
+ end_ = begin_;
+ // consume letters
+ while (*end_ && !iswspace(*end_)) end_++;
+ }
+ return *begin_;
+ }
+
+ Token WhitespaceSplitter::operator++(int) {
+ if (!*this) throw LexException(L"Out of tokens.", begin_);
+ Token ret(TOKEN_UNKNOWN, begin_, end_);
+ begin_ = end_;
+ end_ = 0;
+ return ret;
+ }
Tokens::Tokens(Tokenizer& tokenizer, const wchar_t* text)
: cursor_(text),
@@ -504,16 +641,16 @@
}
}
- WhiteSpaceFilter::WhiteSpaceFilter(TokenIterator& tokens)
+ StdFilter::StdFilter(TokenIterator& tokens)
: tokens_(tokens), next_(), hasNext_(false) {}
- WhiteSpaceFilter::operator bool()
+ StdFilter::operator bool()
{
prepareNext();
return hasNext_;
}
- Token WhiteSpaceFilter::operator++(int)
+ Token StdFilter::operator++(int)
{
prepareNext();
if (!hasNext_) {
@@ -522,15 +659,17 @@
hasNext_ = false;
return next_;
}
- void WhiteSpaceFilter::prepareNext()
+ void StdFilter::prepareNext()
{
while (!hasNext_ && tokens_) {
next_ = tokens_++;
- if (next_.type() != TOKEN_WS) {
+ if (next_.type() != TOKEN_WS
+ && next_.type() != TOKEN_COMMENT) {
hasNext_ = true;
}
}
}
+
TokenReader::TokenReader(TokenIterator& tokens)
: tokens_(tokens),
@@ -613,30 +752,7 @@
void ParseException::setContext(const wchar_t * context)
{
- // TODO legacy of implementation of obsoleted describe() -
- // it can be optimized by doind direct substring - concat
- // operations instead of looping through context
- std::wstring tmp;
- tmp += wWhat_;
- tmp += L" at: \"";
- if (where_.type() == Lex::TOKEN_EOF) {
- tmp += context;
- tmp += L"*here*";
- } else {
- for (; ; context++) {
- if (context == where_.begin()) {
- tmp += L"*here*";
- }
- if (context == where_.end()) {
- tmp += L"*here*";
- }
- if (!*context) break;
- tmp += *context;
- }
- }
- tmp += L"\"";
-
- wWhat_ = tmp;
+ wWhat_ = describeException(wWhat_, context, where_.begin(), where_.end());
}
namespace Lit {
@@ -706,11 +822,11 @@
throw ParseException(L"Unexpected EOF", Lex::Token(Lex::TOKEN_EOF, 0, 0));
}
- Lex::Token Lexer::eat(int tokenType) {
+ Lex::Token Lexer::eat(Lex::token_type_t tokenType) {
Lex::Token token = ((*this)++);
if (token.type() != tokenType) {
std::wostringstream msg;
- msg<<"Expected token of type "<<tokenType<<" instead of token '"<<token.text()<<"' of type "<<token.type();
+ msg<<"Expected "<<tokenType<<" instead of token '"<<token.text()<<"' of type "<<token.type();
throw ParseException(msg.str().c_str(), token);
}
return token;
@@ -747,9 +863,9 @@
}
StdLexer::StdLexer(Lex::Tokenizer& tokenizer, const wchar_t* text)
- : Lexer(ws_),
+ : Lexer(filter_),
tokens_(tokenizer, text),
- ws_(tokens_)
+ filter_(tokens_)
{}
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/syntaxerrors_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/syntaxerrors_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,4 +1,6 @@
-id('stdtokens')pipe('>')id('lowercase')pipe('>')id('stopwords')lbr('(')lit(''a'')comma(',')space(' ')lit(''an'')comma(',')lit(''the)>stem('')id('en')
-LexException: Unrecognized syntax: '')' at: "stdtokens>lowercase>stopwords('a', 'an','the)>stem('en*here*')"
-id('fas')lit('-324')id('we')
-LexException: Unrecognized syntax: '?' at: "fas-324we*here*?`213ff3*21(+"
+identifier('stdtokens')pipe('>')identifier('lowercase')pipe('>')identifier('stopwords')left bracket('(')literal(''a'')comma(',')whitespace(' ')literal(''an'')comma(',')literal(''the)>stem('')identifier('en')
+LexException: Unrecognized syntax: '')' at:
+"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en*here*')"
+identifier('fas')literal('-324')identifier('we')
+LexException: Unrecognized syntax: '?' at:
+"fas-324we*here*?`213ff3*21(+"
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization1_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization1_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,1 +1,1 @@
-id('stdtokens')pipe('>')id('lowercase')pipe('>')id('stopwords')lbr('(')lit(''a'')comma(',')lit(''an'')comma(',')lit(''the'')rbr(')')pipe('>')id('stem')lbr('(')lit(''en'')rbr(')')
+identifier('stdtokens')pipe('>')identifier('lowercase')pipe('>')identifier('stopwords')left bracket('(')literal(''a'')comma(',')literal(''an'')comma(',')literal(''the'')right bracket(')')pipe('>')identifier('stem')left bracket('(')literal(''en'')right bracket(')')
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization2_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization2_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,1 +1,1 @@
-lit(''foo'')int-lit('0')int-lit('1')int-lit('-2')lit(''bar'')int-lit('+234')int-lit('-34')
+literal(''foo'')integer('0')integer('1')integer('-2')literal(''bar'')integer('+234')integer('-34')
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization3_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization3_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,1 +1,1 @@
-lit(''hallo'')real-lit('0.0')real-lit('.0')real-lit('.5')real-lit('-1.0')real-lit('-.05')int-lit('45')lit(''bar'')real-lit('+.123')real-lit('+3.1415')
+literal(''hallo'')real number('0.0')real number('.0')real number('.5')real number('-1.0')real number('-.05')integer('45')literal(''bar'')real number('+.123')real number('+3.1415')
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization4_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization4_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,2 +1,2 @@
-lit(''\' '')lit(''\\'')lit(''\a'')lit(''\
+literal(''\' '')literal(''\\'')literal(''\a'')literal(''\
'')
--- a/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization5_exp_out.txt Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/data/parsing/tokenization5_exp_out.txt Mon Jun 28 10:34:53 2010 +0530
@@ -1,1 +1,1 @@
-id('fo')unknown('for')id('fore')id('forth')id('ofor')id('oforo')id('i')unknown('if')id('ifdom')id('ifer')id('fif')id('fifi')id('forfi')id('fifor')
+identifier('fo')for('for')identifier('fore')identifier('forth')identifier('ofor')identifier('oforo')identifier('i')if('if')identifier('ifdom')identifier('ifer')identifier('fif')identifier('fifi')identifier('forfi')identifier('fifor')
--- a/searchengine/util/tsrc/cpixtoolsunittest/group/cpixtoolsunittest.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/group/cpixtoolsunittest.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -69,6 +69,7 @@
LIBRARY libpthread.lib
LIBRARY libm.lib
LIBRARY euser.lib
+LIBRARY libz.lib
// No capabilities needed by this application
CAPABILITY ReadUserData WriteUserData
--- a/searchengine/util/tsrc/cpixtoolsunittest/src/geotests.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/geotests.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -283,7 +283,7 @@
}
-void printQNrArea(Itk::TestMgr * ,
+void printQNrArea(Itk::TestMgr * testMgr,
const Cpt::QNr & qnr)
{
using namespace std;
--- a/searchengine/util/tsrc/cpixtoolsunittest/src/memorytest.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/memorytest.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -50,7 +50,7 @@
return zoo_iterator( new poly_zoo_iterator_() );
}
-void TestAutoIterator(Itk::TestMgr * ) {
+void TestAutoIterator(Itk::TestMgr * testMgr) {
zoo_iterator i = ZooIterator();
while (i) {
std::cout<<"Animal: "<<i++<<std::endl;
--- a/searchengine/util/tsrc/cpixtoolsunittest/src/mutextest.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/mutextest.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -24,7 +24,7 @@
#include "cpixsynctools.h"
-void testMutex(Itk::TestMgr * )
+void testMutex(Itk::TestMgr * mgr)
{
using namespace Cpt;
@@ -36,7 +36,7 @@
}
-void testRecursiveMutex(Itk::TestMgr * )
+void testRecursiveMutex(Itk::TestMgr * mgr)
{
using namespace Cpt;
@@ -51,7 +51,7 @@
}
-void testMultiSyncRegion(Itk::TestMgr * )
+void testMultiSyncRegion(Itk::TestMgr * mgr)
{
using namespace Cpt;
--- a/searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -24,49 +24,28 @@
using namespace Cpt::Parser;
using namespace std;
-enum TokenType {
- TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8
- TOKEN_RIGHT_BRACKET,
- TOKEN_COMMA, // 10
- TOKEN_PIPE,
- TOKEN_SWITCH,
- TOKEN_CASE,
- TOKEN_DEFAULT,
- TOKEN_LEFT_BRACE, // 15
- TOKEN_RIGHT_BRACE,
- TOKEN_COLON,
- TOKEN_TERMINATOR
-};
+const wchar_t* TOKEN_LEFT_BRACKET = L"left bracket";
+const wchar_t* TOKEN_RIGHT_BRACKET = L"right bracket";
+const wchar_t* TOKEN_COMMA = L"comma";
+const wchar_t* TOKEN_PIPE = L"pipe";
+const wchar_t* TOKEN_SWITCH = L"switch";
+const wchar_t* TOKEN_CASE = L"case";
+const wchar_t* TOKEN_DEFAULT = L"default";
+const wchar_t* TOKEN_LEFT_BRACE = L"left brace";
+const wchar_t* TOKEN_RIGHT_BRACE = L"right brace";
+const wchar_t* TOKEN_COLON = L"colon";
+const wchar_t* TOKEN_TERMINATOR = L"terminator";
void PrintToken(Cpt::Lex::Token token) {
- switch (token.type()) {
- case TOKEN_WS: wcout<<L"space"; break;
- case TOKEN_ID: wcout<<"id"; break;
- case TOKEN_LIT: wcout<<"lit"; break;
- case TOKEN_STRLIT: wcout<<"str-lit"; break;
- case TOKEN_REALLIT: wcout<<"real-lit"; break;
- case TOKEN_INTLIT: wcout<<"int-lit"; break;
- case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break;
- case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break;
- case TOKEN_COMMA: wcout<<"comma"; break;
- case TOKEN_PIPE: wcout<<"pipe"; break;
- case TOKEN_SWITCH : wcout<<"sw"; break;
- case TOKEN_CASE : wcout<<"case"; break;
- case TOKEN_DEFAULT : wcout<<"default"; break;
- case TOKEN_LEFT_BRACE : wcout<<"lbc"; break;
- case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break;
- case TOKEN_COLON : wcout<<"cl"; break;
- case TOKEN_TERMINATOR : wcout<<"tr"; break;
-
- default: wcout<<"unknown"; break;
- }
- wcout<<L"('"<<token.text()<<L"')";
+ wcout<<token.type()<<L"('"<<token.text()<<L"')";
}
-void TestTokenization(Itk::TestMgr * ,
+void TestTokenization(Itk::TestMgr * testMgr,
const wchar_t * inputStr)
{
WhitespaceTokenizer ws;
+ LineCommentTokenizer line;
+ SectionCommentTokenizer section;
IdTokenizer ids;
IntLitTokenizer ints;
RealLitTokenizer reals;
@@ -84,14 +63,14 @@
// and int-lit, real-lit will mean integer and real literals,
// respectively.
Tokenizer* tokenizers[] = {
- &ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
+ &ws, &line, §ion, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
};
MultiTokenizer tokenizer(tokenizers);
Tokens
source(tokenizer,
inputStr);
- WhiteSpaceFilter tokens(source);
+ StdFilter tokens(source);
while (tokens) PrintToken(tokens++);
cout<<endl;
@@ -107,7 +86,7 @@
void TestTokenization2(Itk::TestMgr * testMgr)
{
TestTokenization(testMgr,
- L"'foo' 0 1 -2 'bar' +234 -34");
+ L"'foo' 0 1 -2 'bar' +234 -34 // side note");
}
@@ -121,20 +100,20 @@
void TestTokenization4(Itk::TestMgr * testMgr)
{
TestTokenization(testMgr,
- L"'\\' ''\\\\' '\\a' '\\\n'");
+ L"'\\' ''\\\\' '\\a' '\\\n' // comment\n /*foobar*/");
}
-void TestTokenization5(Itk::TestMgr * )
+void TestTokenization5(Itk::TestMgr * testMgr)
{
WhitespaceTokenizer
ws;
IdTokenizer
ids;
SymbolTokenizer
- for_(0xf00, L"for");
+ for_(L"for", L"for");
SymbolTokenizer
- if_(0xbeef, L"if");
+ if_(L"if", L"if");
Tokenizer* tokenizers[] = {
&ws, &for_, &if_, &ids, 0
};
@@ -145,14 +124,76 @@
Tokens
source(tokenizer,
L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals
- WhiteSpaceFilter
+ StdFilter
tokens(source);
while (tokens) PrintToken(tokens++);
cout<<endl;
}
-void TestTokenizationErrors(Itk::TestMgr* )
+void TestTokenization6(Itk::TestMgr * testMgr)
+{
+ WhitespaceTokenizer
+ ws;
+ LineCommentTokenizer
+ line;
+ SectionCommentTokenizer
+ section;
+ IdTokenizer
+ ids;
+ IntLitTokenizer
+ intLit;
+ RealLitTokenizer
+ realLit;
+ SymbolTokenizer
+ div(L"slash", L"/");
+ SymbolTokenizer
+ mul(L"star", L"*");
+ SymbolTokenizer
+ plus(L"plus", L"+");
+ SymbolTokenizer
+ minus(L"minus", L"-");
+ SymbolTokenizer
+ equal(L"equals", L"=");
+
+ Tokenizer* tokenizers[] = {
+ &ws, &line, §ion, &ids, &intLit, &realLit, &div, &mul, &plus, &minus, &equal, 0
+ };
+
+ MultiTokenizer
+ tokenizer(tokenizers);
+
+ const wchar_t* text =
+ L"4 + 6 = 2 * 5\n"
+ L"6 / 2 = 1*3 // true\n"
+ L"3 / x /*important thingie*/ = 2 * y\n"
+ L"6 / x * / * / /* non sense / * / */ // zap"
+ L"//\n"
+ L"//";
+
+ {
+ cout<<"With whitespaces & comments visible"<<endl;
+ Tokens
+ tokens(tokenizer, text);
+
+ while (tokens) PrintToken(tokens++);
+ cout<<endl;
+ }
+
+ {
+ cout<<"With whitespaces & comments filtered"<<endl;
+ Tokens
+ source(tokenizer, text);
+
+ StdFilter tokens(source);
+
+ while (tokens) PrintToken(tokens++);
+ cout<<endl;
+ }
+
+}
+
+void TestTokenizationErrors(Itk::TestMgr* mgr)
{
WhitespaceTokenizer ws;
IdTokenizer ids;
@@ -171,11 +212,8 @@
try {
while (tokens) PrintToken(tokens++);
} catch (LexException& exc) {
- /* OBS
- wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
- */
- exc.setContext(text);
- wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+ exc.setContext(text);
+ wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
} catch (exception& exc) {
cout<<endl<<"Exception: "<<exc.what()<<endl;
}
@@ -185,17 +223,48 @@
try {
while (tokens) PrintToken(tokens++);
} catch (LexException& exc) {
- /* OBS
- wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
- */
- exc.setContext(text);
- wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+ exc.setContext(text);
+ wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
} catch (exception& exc) {
cout<<endl<<"Exception: "<<exc.what()<<endl;
}
}
}
+void TestWhitespaceSplitter(Itk::TestMgr* mgr)
+{
+ {
+ WhitespaceSplitter tokens(L"foobar foo bar foo\tbar _*4 4bar foo*bar foo\nbar foo\rbar foo\0bar");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L"foobar");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L" foobar \r\n");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L" ");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L"");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+}
+
Itk::TesterBase * CreateParsingTests()
{
using namespace Itk;
@@ -203,7 +272,6 @@
SuiteTester
* parsingTests = new SuiteTester("parsing");
-
parsingTests->add("tokenization1",
TestTokenization1,
"tokenization1");
@@ -223,11 +291,19 @@
parsingTests->add("tokenization5",
TestTokenization5,
"tokenization5");
+
+ parsingTests->add("tokenization6",
+ TestTokenization6,
+ "tokenization6");
parsingTests->add("syntaxerrors",
TestTokenizationErrors,
"syntaxerrors");
-
+
+ parsingTests->add("whitespace",
+ TestWhitespaceSplitter,
+ "whitespace");
+
return parsingTests;
}
--- a/searchengine/util/tsrc/cpixtoolsunittest/src/pooltests.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/pooltests.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -163,9 +163,9 @@
void ItemsArrayDeleter(void * p)
{
- /*TestPoolItem
+ TestPoolItem
** items = reinterpret_cast<TestPoolItem**>(p);
- */
+
delete[] p;
}
@@ -207,7 +207,7 @@
-void testSingleThreadedUse(Itk::TestMgr * )
+void testSingleThreadedUse(Itk::TestMgr * testMgr)
{
using namespace Cpt;
--- a/searchengine/util/tsrc/itk/group/itk.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/itk/group/itk.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -40,8 +40,8 @@
SYSTEMINCLUDE /epoc32/include
OS_LAYER_LIBC_SYSTEMINCLUDE
OS_LAYER_STDCPP_SYSTEMINCLUDE
+OS_LAYER_GLIB_SYSTEMINCLUDE
SYSTEMINCLUDE /epoc32/include/stdapis/stlportv5
-OS_LAYER_GLIB_SYSTEMINCLUDE
// libc and euser are always needed when using main() entry point
LIBRARY libc.lib
@@ -49,6 +49,7 @@
LIBRARY libstdcpp.lib
LIBRARY libm.lib
LIBRARY euser.lib
+LIBRARY libz.lib
// No capabilities needed by this application
CAPABILITY None
--- a/searchengine/util/tsrc/itk/src/itkimpl.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/itk/src/itkimpl.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -68,6 +68,8 @@
if (isreadable(inFilePath.c_str()))
{
+ int
+ res;
duplicatedStdInFD_ = dup(STDIN_FILENO);
if (duplicatedStdInFD_ == -1)
@@ -79,7 +81,7 @@
open(inFilePath.c_str(),O_RDONLY));
if (inFileFD_ == -1)
{
- Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_));
+ Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_));
throw IOCaptureExc(inFilePath.c_str());
}
int
@@ -87,8 +89,8 @@
STDIN_FILENO);
if (newStdIn == -1)
{
- Cpt_EINTR_RETRY_SP(close(inFileFD_));
- Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_));
+ Cpt_EINTR_RETRY(res,close(inFileFD_));
+ Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_));
throw IOCaptureExc("Can't dup2(infile,stdin)");
}
}
@@ -106,8 +108,10 @@
// failures here, but they must not go unnoticed
assert(fd != -1);
- Cpt_EINTR_RETRY_SP(close(inFileFD_));
- Cpt_EINTR_RETRY_SP(close(duplicatedStdInFD_));
+ int
+ res;
+ Cpt_EINTR_RETRY(res,close(inFileFD_));
+ Cpt_EINTR_RETRY(res,close(duplicatedStdInFD_));
}
}
@@ -281,7 +285,7 @@
}
Cpt::FileSentry resFsSentry( resFs );
- ssize_t
+ size_t
firstDifferingLine = -1,
currentLine = 1;
string
--- a/searchengine/util/tsrc/itk/src/itkobservers.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/itk/src/itkobservers.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -44,7 +44,7 @@
/*****************************************************************
* TestRunConsole
*/
- void TestRunConsole::beginRun(TestMgr * ,
+ void TestRunConsole::beginRun(TestMgr * testMgr,
size_t /* testCount */,
const char * /* baseDirPath */)
{
@@ -53,14 +53,14 @@
}
- void TestRunConsole::endRun(TestMgr * ) throw ()
+ void TestRunConsole::endRun(TestMgr * testMgr) throw ()
{
using namespace std;
os_ << endl << "TEST RUN COMPLETED." << endl;
}
- void TestRunConsole::beginTestCase(TestMgr * ,
+ void TestRunConsole::beginTestCase(TestMgr * testMgr,
TesterBase * testerBase)
{
++indent_;
@@ -754,7 +754,9 @@
void ProgressFsDisplayer::deleteCurFile()
{
- Cpt_EINTR_RETRY_SP( remove(curFilePath_.c_str()) );
+ int
+ success;
+ Cpt_EINTR_RETRY(success,remove(curFilePath_.c_str()));
}
--- a/searchengine/util/tsrc/itkdemo/group/itkdemo.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/itkdemo/group/itkdemo.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -61,6 +61,7 @@
LIBRARY libm.lib
LIBRARY euser.lib
LIBRARY libpthread.lib
+LIBRARY libz.lib
// No capabilities needed by this application
CAPABILITY ReadUserData WriteUserData
--- a/searcher/searchclient/bwins/cpixsearchclientu.def Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchclient/bwins/cpixsearchclientu.def Mon Jun 28 10:34:53 2010 +0530
@@ -90,4 +90,8 @@
?ContinueHouseKeeping@RSearchServerSession@@QAEHXZ @ 89 NONAME ; int RSearchServerSession::ContinueHouseKeeping(void)
?ForceHouseKeeping@RSearchServerSession@@QAEHXZ @ 90 NONAME ; int RSearchServerSession::ForceHouseKeeping(void)
?StopHouseKeeping@RSearchServerSession@@QAEHXZ @ 91 NONAME ; int RSearchServerSession::StopHouseKeeping(void)
+ ?SetQueryParserL@RSearchServerSubSession@@QAEXH@Z @ 92 NONAME ; void RSearchServerSubSession::SetQueryParserL(int)
+ ?SetQueryParserL@CCPixSearcher@@QAEXW4TQueryParser@1@@Z @ 93 NONAME ; void CCPixSearcher::SetQueryParserL(enum CCPixSearcher::TQueryParser)
+ ?SetQueryParserL@CCPixSearcher@@QAEXAAVMCPixSetQueryParserRequestObserver@@W4TQueryParser@1@@Z @ 94 NONAME ; void CCPixSearcher::SetQueryParserL(class MCPixSetQueryParserRequestObserver &, enum CCPixSearcher::TQueryParser)
+ ?SetQueryParser@RSearchServerSubSession@@QAEXHAAVTRequestStatus@@@Z @ 95 NONAME ; void RSearchServerSubSession::SetQueryParser(int, class TRequestStatus &)
--- a/searcher/searchclient/eabi/cpixsearchclientu.def Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchclient/eabi/cpixsearchclientu.def Mon Jun 28 10:34:53 2010 +0530
@@ -107,4 +107,8 @@
_ZN20RSearchServerSession16StopHouseKeepingEv @ 106 NONAME
_ZN20RSearchServerSession17ForceHouseKeepingEv @ 107 NONAME
_ZN20RSearchServerSession20ContinueHouseKeepingEv @ 108 NONAME
+ _ZN13CCPixSearcher15SetQueryParserLENS_12TQueryParserE @ 109 NONAME
+ _ZN13CCPixSearcher15SetQueryParserLER34MCPixSetQueryParserRequestObserverNS_12TQueryParserE @ 110 NONAME
+ _ZN23RSearchServerSubSession14SetQueryParserEiR14TRequestStatus @ 111 NONAME
+ _ZN23RSearchServerSubSession15SetQueryParserLEi @ 112 NONAME
--- a/searcher/searchclient/inc/searchservercommon.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchclient/inc/searchservercommon.h Mon Jun 28 10:34:53 2010 +0530
@@ -80,7 +80,8 @@
// Set analyzer. This must be latest message. Following messages
// are not supported by server
- ESearchServerSetAnalyzer
+ ESearchServerSetAnalyzer,
+ ESearchServerSetQueryParser
};
/**
--- a/searcher/searchclient/src/ccpixsearcher.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchclient/src/ccpixsearcher.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -157,6 +157,26 @@
iSubSession.SetAnalyzer( aAnalyzer, iStatus );
SetActive();
}
+
+ EXPORT_C void CCPixSearcher::SetQueryParserL( TQueryParser aQueryParser )
+ {
+ if ( !iIsDatabaseOpen ) User::Leave(KErrNotReady);
+ if ( IsActive() ) User::Leave(KErrInUse);
+
+ iSubSession.SetQueryParserL( aQueryParser );
+ }
+
+EXPORT_C void CCPixSearcher::SetQueryParserL( MCPixSetQueryParserRequestObserver& aObserver, TQueryParser aQueryParser )
+ {
+ if ( !iIsDatabaseOpen ) User::Leave(KErrNotReady);
+ if ( IsActive() ) User::Leave(KErrInUse);
+
+ iObserver.iSetQueryParser = &aObserver;
+ iState = EStateSetQueryParser;
+ iSubSession.SetQueryParser( aQueryParser, iStatus );
+ SetActive();
+ }
+
// CCPixSearcher::FormQueryString()
// Suport method for SearchL-methods
@@ -288,6 +308,12 @@
}
break;
+ case EStateSetQueryParser:
+ if ( observer.iSetQueryParser ) {
+ observer.iSetQueryParser->HandleSetQueryParserResultL( iStatus.Int() );
+ }
+ break;
+
case EStateSearch:
delete iQueryString; iQueryString = NULL; // cleanup
--- a/searcher/searchclient/src/rsearchserversession.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchclient/src/rsearchserversession.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -192,6 +192,24 @@
SendReceive(ESearchServerSetAnalyzer, args, aStatus);
}
+EXPORT_C void RSearchServerSubSession::SetQueryParserL(TInt aQueryParser)
+ {
+ TIpcArgs args(aQueryParser);
+
+ // This call completes immediately, however the server will not
+ // complete the request until later, so don't pass any local
+ // descriptors as they will be out of scope by the time the server
+ // attempts to read or write
+ User::LeaveIfError( SendReceive(ESearchServerSetQueryParser, args) );
+ }
+
+EXPORT_C void RSearchServerSubSession::SetQueryParser(TInt aQueryParser, TRequestStatus& aStatus)
+ {
+ TIpcArgs args(aQueryParser);
+
+ SendReceive(ESearchServerSetQueryParser, args, aStatus);
+ }
+
// RSearchServerSubSession::Search()
EXPORT_C void RSearchServerSubSession::SearchL(const TDesC& aSearchTerms)
--- a/searcher/searchserver/group/searchserver.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/group/searchserver.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -52,7 +52,7 @@
SOURCE CHeartBeatTimer.cpp
SOURCE CCPixAsyncronizer.cpp
SOURCE CHouseKeepingHandler.cpp
-SOURCE CLogPlayerRecorder.cpp
+SOURCE CLogPlayerRecorder.cpp CCPixAbstractSearcher.cpp
LIBRARY euser.lib
LIBRARY efsrv.lib
@@ -72,11 +72,13 @@
STATICLIBRARY libstemmer.lib
STATICLIBRARY libclucene.lib
LIBRARY libpthread.lib
+STATICLIBRARY libanalysis.lib
// For SPI
LIBRARY exiflib.lib
LIBRARY MetaDataUtility.lib
LIBRARY charconv.lib
+LIBRARY lbs.lib
// Logging
LIBRARY flogger.lib
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searcher/searchserver/inc/CCPixAbstractSearcher.h Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,103 @@
+/*
+ * CCPixAbstractSearcher.h
+ *
+ * Created on: Oct 8, 2009
+ * Author: admin
+ */
+
+#ifndef CCPIXABSTRACTSEARCHER_H_
+#define CCPIXABSTRACTSEARCHER_H_
+
+#include <e32base.h>
+
+#include "cpixmaindefs.h"
+#include "cpixdoc.h"
+
+// FORWARD DECLARATIONS
+class CSearchDocument;
+class CCPixAsyncronizer;
+class MCPixAsyncronizerObserver;
+
+_LIT(KCpixDefaultSearchField, CPIX_DEFAULT_FIELD);
+
+class CCPixAbstractSearcher : public CBase
+ {
+public:
+
+ enum TQueryParser {
+ EDatabaseQueryParser = 0,
+ EIncrementalQueryParser = 1
+ };
+
+
+public:
+
+ virtual ~CCPixAbstractSearcher();
+
+ /**
+ * Cancel any incomplete asyncronous operation
+ * @param aMessage RMessage2 of CancelAll request
+ */
+ virtual void CancelAll(const RMessage2& aMessage) = 0;
+
+ /**
+ * Search given terms from the default field.
+ * @param aSearchTerms Terms to look for.
+ * @param aObserver Observing object for this asyncronous call
+ * @param aMessage The requesting message
+ * @return ETrue if search was commited, EFalse if it was stop word for example and search was not commited
+ */
+ virtual TBool SearchL(const TDesC& aSearchTerms, MCPixAsyncronizerObserver* aObserver, const RMessage2& aMessage) = 0;
+
+ /**
+ * Complete previous call to SearchL
+ * @return Count of result documents.
+ */
+ virtual TInt SearchCompleteL() = 0;
+
+ /**
+ * Gets document from the current search results.
+ * @param aObserver Observing object for this asyncronous call
+ * @param aMessage The requesting message
+ * @parma aIndex index of the requested document
+ */
+ virtual void GetDocumentL(TInt aIndex, MCPixAsyncronizerObserver* aObserver, const RMessage2& aMessage) = 0;
+
+ /**
+ * Complete the previous GetDocumentL
+ * @return Document. Ownership is transferred to the caller of this function.
+ */
+ virtual CSearchDocument* GetDocumentCompleteL() = 0;
+
+ /**
+ * Creates new database (and destroys existing) if path is given. Otherwise, opens existing database.
+ * @param aDefaultSearchField Default field to which query results are looked from.
+ * @param aBaseAppClass Application class of this database handle.
+ * Defines which database this handle connects to.
+ */
+ virtual void OpenDatabaseL(const TDesC& aSearchableId, const TDesC& aDefaultSearchField = KCpixDefaultSearchField) = 0;
+
+ virtual void SetQueryParserL( TInt aQueryParser ) = 0;
+
+ /**
+ * IsOpen
+ * @returns ETrue if the database is currently open
+ */
+ virtual TBool IsOpen() = 0;
+
+ /**
+ * Sets the analyzer for this searcher.
+ *
+ * @param aAnalyzer analyzer definition string. See analyzer definition
+ * syntax in the documentation
+ */
+ virtual void SetAnalyzerL(const TDesC& aAnalyzer) = 0;
+
+public:
+
+ static CSearchDocument* ConvertDocumentL( cpix_Document* aDocument );
+
+ };
+
+
+#endif /* CCPIXABSTRACTSEARCHER_H_ */
--- a/searcher/searchserver/inc/ccpixsearch.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/inc/ccpixsearch.h Mon Jun 28 10:34:53 2010 +0530
@@ -19,6 +19,8 @@
#define CCPIXSEARCH_H_
#include <e32base.h>
+#include "CCPixAbstractSearcher.h"
+
#include <stdio.h>
#include <wchar.h>
#include <glib.h>
@@ -32,12 +34,11 @@
class CCPixAsyncronizer;
class MCPixAsyncronizerObserver;
-_LIT(KCpixDefaultSearchField, CPIX_DEFAULT_FIELD);
/**
* Symbian C++ wrapper for OpenC CPixSearch interface.
*/
-class CCPixSearch : public CBase
+class CCPixSearch : public CCPixAbstractSearcher
{
public:
@@ -115,7 +116,11 @@
*/
TBool IsOpen();
- void SetAnalyzerL(const TDesC& aAnalyzer);
+ void SetAnalyzerL(const TDesC& aAnalyzer);
+
+ void SetQueryParserL(TInt aQueryParser);
+
+ void RefreshQueryParserL();
static void InitializeL();
@@ -178,6 +183,7 @@
/**
* Query parser.
*/
+ TQueryParser iQueryParserType;
cpix_QueryParser* iQueryParser;
/**
--- a/searcher/searchserver/inc/csearchserversubsession.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/inc/csearchserversubsession.h Mon Jun 28 10:34:53 2010 +0530
@@ -23,7 +23,7 @@
// FORWARD DECLARATIONS
class CSearchServerSession;
class CCPixIdxDb;
-class CCPixSearch;
+class CCPixAbstractSearcher;
class CSearchDocument;
class CSearchServerSubSession : public CObject, public MCPixAsyncronizerObserver
@@ -35,6 +35,20 @@
public:
/**
+ * OpenSearchableL
+ * Opens CPixSearcher, if aSearchableId identifies search domain,
+ * or CPixDiscoverer, if aSearchableId identifies discovery
+ * service.
+ *
+ * @param aSearchableId either search domain or discovery service
+ * @param aDefaultfield in case aSearchableId defines search domain,
+ * this field is used as searchers default id.
+ * In case discoverer is opened, this parameter is
+ * ignored
+ */
+ void OpenSearcherL(const TDesC& aSearchableId, const TDesC& aDefaultField);
+
+ /**
* OpenDatabaseL.
* Opens database
* @param aMessage Message from client.
@@ -47,6 +61,12 @@
* @param aMessage Message from client.
*/
void SetAnalyzerL(const RMessage2& aMessage);
+
+ /**
+ * SetQueryParserL.
+ * TODO
+ */
+ void SetQueryParserL(const RMessage2& aMessage);
/**
* SearchL.
@@ -157,7 +177,7 @@
private:
CCPixIdxDb* iIndexDb;
- CCPixSearch* iSearchDb;
+ CCPixAbstractSearcher* iSearchDb;
CSearchDocument* iNextDocument;
CSearchServerSession* iSession;
};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searcher/searchserver/src/CCPixAbstractSearcher.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -0,0 +1,101 @@
+/*
+ * CCPixAbstractSearcher.cpp
+ *
+ * Created on: Oct 8, 2009
+ * Author: admin
+ */
+#include "CCpixAbstractSearcher.h"
+
+#include "Common.h"
+#include "SearchServerHelper.h"
+#include "CSearchDocument.h"
+
+CCPixAbstractSearcher::~CCPixAbstractSearcher() {}
+
+
+namespace {
+
+/**
+ * cpix_DocFieldEnum destroyer for TCleanupItem
+ * @param aCpixDocFieldEnum CPix document
+ */
+void CpixDocFieldEnumDestroyer(TAny* aCpixDocFieldEnum)
+ {
+ cpix_DocFieldEnum_destroy( static_cast<cpix_DocFieldEnum*>( aCpixDocFieldEnum ) );
+ }
+
+} // namespace
+
+CSearchDocument* CCPixAbstractSearcher::ConvertDocumentL( cpix_Document* aDocument )
+ {
+ // Read first the system fields that are passed as constructor parameters
+ //
+ const wchar_t* documentId = cpix_Document_getFieldValue( aDocument, LCPIX_DOCUID_FIELD);
+ SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed);
+
+ TPtrC documentIdPtr(KNullDesC);
+ if (documentId)
+ documentIdPtr.Set(reinterpret_cast<const TUint16*>(documentId));
+
+ const wchar_t* documentAppClass = cpix_Document_getFieldValue(aDocument, LCPIX_APPCLASS_FIELD);
+ SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed);
+
+ TPtrC documentAppClassPtr(KNullDesC);
+ if (documentAppClass)
+ documentAppClassPtr.Set(reinterpret_cast<const TUint16*>(documentAppClass));
+
+ const wchar_t* documentExcerpt = cpix_Document_getFieldValue(aDocument, LCPIX_EXCERPT_FIELD);
+ SearchServerHelper::CheckCpixErrorL(aDocument, KErrDatabaseQueryFailed);
+
+ TPtrC documentExcerptPtr(KNullDesC);
+ if (documentExcerpt)
+ documentExcerptPtr.Set(reinterpret_cast<const TUint16*>(documentExcerpt));
+
+ // Setup the document
+ //
+
+ CSearchDocument* document = CSearchDocument::NewLC(documentIdPtr, documentAppClassPtr, documentExcerptPtr);
+
+ // Enumerate the field of cpix_Document and add each of them
+ // into the CSearchDocument object.
+ //
+
+ cpix_DocFieldEnum* docFieldEnum = cpix_Document_fields(aDocument);
+ SearchServerHelper::CheckCpixErrorL(aDocument, KErrDocumentAccessFailed);
+
+ CleanupStack::PushL( TCleanupItem(CpixDocFieldEnumDestroyer, docFieldEnum) );
+
+ cpix_Field field;
+ while (cpix_DocFieldEnum_hasMore(docFieldEnum))
+ {
+ cpix_DocFieldEnum_next(docFieldEnum, &field);
+ SearchServerHelper::CheckCpixErrorL(docFieldEnum, KErrDatabaseQueryFailed);
+
+ const wchar_t* name = cpix_Field_name(&field);
+ SearchServerHelper::CheckCpixErrorL(&field, KErrDatabaseQueryFailed);
+
+ TPtrC namePtr( reinterpret_cast<const TUint16*>( name ) );
+ if ( namePtr == TPtrC( (TUint16*)LCPIX_DOCUID_FIELD )
+ || namePtr == TPtrC( (TUint16*)LCPIX_APPCLASS_FIELD )
+ || namePtr == TPtrC( (TUint16*)LCPIX_EXCERPT_FIELD ) )
+ {
+ continue; // These fields have already been added
+ }
+
+ const wchar_t* value = cpix_Field_stringValue(&field);
+ SearchServerHelper::CheckCpixErrorL(&field, KErrDatabaseQueryFailed);
+
+ TPtrC stringvalue( reinterpret_cast<const TUint16*>( value ) );
+
+ // NOTE: Also system fields will be iterated. Because
+ // the field name is not checked, all _appclass,
+ // _excerpt etc. fields will be overwritten.
+ document->AddFieldL(namePtr, stringvalue);
+ }
+
+ CleanupStack::PopAndDestroy(docFieldEnum);
+
+ CleanupStack::Pop(document);
+
+ return document;
+ }
--- a/searcher/searchserver/src/ccpixidxdb.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/src/ccpixidxdb.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -143,6 +143,9 @@
void CCPixIdxDb::InitializeL()
{
+
+ const char* KCPixResourceDirectory = "z:\\resource\\cpix"; // FIXME
+
#ifdef CPIX_LOGGING_ENABLED
_LIT(KCPixLogDirectory, "c:\\logs\\CPix\\OpenC\\");
const char* CPIX_LOG_FILE = "c:\\logs\\CPix\\OpenC\\libcpix";
@@ -186,6 +189,11 @@
SearchServerHelper::CheckCpixErrorL(initParams,
KErrCPixInitializationFailed);
+ cpix_InitParams_setResourceDir( initParams,
+ KCPixResourceDirectory );
+ SearchServerHelper::CheckCpixErrorL(initParams,
+ KErrCPixInitializationFailed);
+
cpix_InitParams_setMaxIdleSec(initParams,
IDXDB_MAXIDLE_SEC);
SearchServerHelper::CheckCpixErrorL(initParams,
@@ -478,6 +486,7 @@
if (!doc)
{
SearchServerHelper::LogErrorL(*result.err_);
+ cpix_ClearError(doc);
User::Leave(KErrCannotCreateDocument);
}
// document created, push to cleanup stack.
--- a/searcher/searchserver/src/ccpixsearch.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/src/ccpixsearch.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -27,18 +27,7 @@
#endif
-namespace {
-/**
- * cpix_DocFieldEnum destroyer for TCleanupItem
- * @param aCpixDocFieldEnum CPix document
- */
-void CpixDocFieldEnumDestroyer(TAny* aCpixDocFieldEnum)
- {
- cpix_DocFieldEnum_destroy( static_cast<cpix_DocFieldEnum*>( aCpixDocFieldEnum ) );
- }
-
-} // namespace
CCPixSearch* CCPixSearch::NewL()
{
@@ -56,7 +45,9 @@
}
CCPixSearch::CCPixSearch()
- : iPendingTask(EPendingTaskNone)
+ : iQueryParserType(EIncrementalQueryParser),
+ iPendingTask(EPendingTaskNone)
+
{
}
@@ -238,7 +229,9 @@
cpix_Hits_asyncDocResults(iHits, iPendingJobId);
SearchServerHelper::CheckCpixErrorL(iHits, KErrDocumentAccessFailed);
-
+
+ return ConvertDocumentL( &iCurrentCpixDocument );
+#if 0 // TODO XXX TIM
const wchar_t* documentId = cpix_Document_getFieldValue(&iCurrentCpixDocument, LCPIX_DOCUID_FIELD);
SearchServerHelper::CheckCpixErrorL(&iCurrentCpixDocument, KErrDatabaseQueryFailed);
@@ -300,6 +293,7 @@
OstTraceFunctionExit0( CCPIXSEARCH_GETDOCUMENTCOMPLETEL_EXIT );
return document;
+#endif // 0
}
void CCPixSearch::SetAnalyzerL(const TDesC& aAnalyzer)
@@ -321,15 +315,43 @@
iAnalyzer = cpix_Analyzer_create(&result, cAnalyzer);
SearchServerHelper::CheckCpixErrorL(&result, KErrCannotCreateAnalyzer);
- CleanupStack::PopAndDestroy( analyzer );
+ CleanupStack::PopAndDestroy( analyzer );
+
+ RefreshQueryParserL();
+ }
+
+void CCPixSearch::SetQueryParserL(TInt aQueryParser)
+ {
+ iQueryParserType = TQueryParser(aQueryParser);
+ RefreshQueryParserL();
+ }
- iQueryParser =
- cpix_QueryParser_create(&result,
- reinterpret_cast<const wchar_t*>(iDefaultSearchFieldZ->Des().PtrZ()),
- iAnalyzer);
+void CCPixSearch::RefreshQueryParserL()
+ {
+ cpix_QueryParser_destroy( iQueryParser );
+ iQueryParser = NULL;
+ cpix_Result result;
+
+ if ( iQueryParserType == EDatabaseQueryParser )
+ {
+ iQueryParser =
+ cpix_QueryParser_create( &result,
+ reinterpret_cast<const wchar_t*>(
+ iDefaultSearchFieldZ->Des().PtrZ()),
+ iAnalyzer );
+ }
+ else if ( iQueryParserType == EIncrementalQueryParser )
+ {
+ iQueryParser =
+ cpix_CreatePrefixQueryParser( &result,
+ reinterpret_cast<const wchar_t*>(
+ iDefaultSearchFieldZ->Des().PtrZ()) );
+
+ }
SearchServerHelper::CheckCpixErrorL(&result, KErrCannotCreateQueryParser);
}
+
void CCPixSearch::OpenDatabaseL(const TDesC& aBaseAppClass, const TDesC& aDefaultSearchField)
{
// Release data associated with old database
--- a/searcher/searchserver/src/csearchserver.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/src/csearchserver.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -51,7 +51,7 @@
ESearchServerAdd, // Indexing related messages
ESearchServerCancelAll, // Cancellation
ESearchServerDatabaseDefine, // Database define
- ESearchServerSetAnalyzer+1 // Unsupported messages
+ ESearchServerSetQueryParser+1 // Unsupported messages
};
// iElementsIndex of TPolicy
--- a/searcher/searchserver/src/csearchserversession.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/src/csearchserversession.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -207,6 +207,12 @@
RECORDED_EXECUTION_END("setAnalyzer")
break;
+ case ESearchServerSetQueryParser:
+ RECORDED_EXECUTION_BEGIN
+ subsession->SetQueryParserL(aMessage);
+ RECORDED_EXECUTION_END("setQueryParser")
+ break;
+
case ESearchServerSearch:
RECORDED_EXECUTION_BEGIN
subsession->SearchL(aMessage);
--- a/searcher/searchserver/src/csearchserversubsession.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searcher/searchserver/src/csearchserversubsession.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -83,7 +83,7 @@
// CSearchServerSession::CancelAll()
void CSearchServerSubSession::CancelAll(const RMessage2& aMessage)
{
- if (iSearchDb->IsOpen())
+ if (iSearchDb && iSearchDb->IsOpen())
{
// Cancel searching
iSearchDb->CancelAll(aMessage);
@@ -102,6 +102,15 @@
TRAP_IGNORE( LOG_PLAYER_RECORD( CLogPlayerRecorder::LogCancelL( reinterpret_cast<TUint>( this ) ) ) );
}
+void CSearchServerSubSession::OpenSearcherL(const TDesC& aSearchableId, const TDesC& aDefaultField)
+ {
+ delete iSearchDb;
+ iSearchDb = NULL;
+
+ iSearchDb = CCPixSearch::NewL();
+ iSearchDb->OpenDatabaseL( aSearchableId, aDefaultField );
+
+ }
void CSearchServerSubSession::OpenDatabaseL(const RMessage2& aMessage)
{
HBufC* baseAppClass = HBufC::NewLC(aMessage.GetDesLength(0));
@@ -122,7 +131,7 @@
}
else
{
- iSearchDb->OpenDatabaseL(*baseAppClass, *defaultSearchField);
+ OpenSearcherL( *baseAppClass, *defaultSearchField );
}
CleanupStack::PopAndDestroy(defaultSearchField);
@@ -134,7 +143,7 @@
void CSearchServerSubSession::SetAnalyzerL(const RMessage2& aMessage)
{
- if (!iSearchDb->IsOpen() && !iIndexDb->IsOpen())
+ if (!(iSearchDb && iSearchDb->IsOpen()) && !iIndexDb->IsOpen())
{
iSession->PanicClient(aMessage, EDatabaseNotOpen);
return;
@@ -144,7 +153,7 @@
TPtr analyzerPtr = analyzer->Des();
aMessage.ReadL(0, analyzerPtr);
- if (iSearchDb->IsOpen()) {
+ if (iSearchDb && iSearchDb->IsOpen()) {
iSearchDb->SetAnalyzerL( *analyzer );
}
if (iIndexDb->IsOpen()) {
@@ -156,13 +165,30 @@
aMessage.Complete(KErrNone);
}
+void CSearchServerSubSession::SetQueryParserL(const RMessage2& aMessage)
+ {
+ if (!(iSearchDb && iSearchDb->IsOpen()))
+ {
+ iSession->PanicClient(aMessage, EDatabaseNotOpen);
+ return;
+ }
+
+ TInt queryParser = aMessage.Int0();
+
+ iSearchDb->SetQueryParserL( queryParser );
+
+ // Complete the request
+ aMessage.Complete(KErrNone);
+ }
+
+
void CSearchServerSubSession::SearchL(const RMessage2& aMessage)
{
OstTraceFunctionEntry0( CSEARCHSERVERSUBSESSION_SEARCHL_ENTRY );
PERFORMANCE_LOG_START("CSearchServerSubSession::SearchL");
// Sanity check
- if (!iSearchDb->IsOpen())
+ if (!(iSearchDb && iSearchDb->IsOpen()))
{
iSession->PanicClient(aMessage, EDatabaseNotOpen);
OstTraceFunctionExit0( CSEARCHSERVERSUBSESSION_SEARCHL_EXIT );
@@ -252,7 +278,7 @@
PERFORMANCE_LOG_START("CSearchServerSubSession::GetDocumentObjectL");
// Sanity check
- if (!iSearchDb->IsOpen())
+ if ( !iSearchDb || !iSearchDb->IsOpen() )
{
iSession->PanicClient(aMessage, EDatabaseNotOpen);
OstTraceFunctionExit0( CSEARCHSERVERSUBSESSION_GETDOCUMENTOBJECTL_EXIT );
--- a/searchsrv_plat/cpix_framework_api/inc/ccpixsearcher.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchsrv_plat/cpix_framework_api/inc/ccpixsearcher.h Mon Jun 28 10:34:53 2010 +0530
@@ -33,7 +33,7 @@
class MCPixSearchRequestObserver;
class MCPixNextDocumentRequestObserver;
class MCPixSetAnalyzerRequestObserver;
-
+class MCPixSetQueryParserRequestObserver;
// CLASS DECLARATION
/**
@@ -46,6 +46,32 @@
*/
class CCPixSearcher : public CActive
{
+ public:
+
+ enum TQueryParser {
+ /**
+ * Database query parser provides advanced syntax support
+ * for creating complex and powerful queries. This query parser
+ * is intended for accessing the CPix databases. It is not
+ * localized and it should not be used to form queries
+ * directly from user input. It may simply fail with some
+ * locales.
+ */
+ EDatabaseQueryParser = 0,
+
+ /**
+ * Query parser aimed for incremental queries provided directly
+ * by user. This query parser is localized and should
+ * work properly for all locales. The language accepted
+ * by this parser is always the language specified by
+ * the current locale. May behave internally somewhat
+ * differently depending of language, but as a rule it
+ * should always provides meaningful results for direct
+ * user input.
+ */
+ EIncrementalQueryParser = 1
+ };
+
public: // Constructors and destructors
/**
@@ -122,7 +148,26 @@
/**
* SetAnalyzer. Asynchronous version
*/
- IMPORT_C void SetAnalyzerL( MCPixSetAnalyzerRequestObserver& aObserver, const TDesC& aAnalyzer );
+ IMPORT_C void SetAnalyzerL( MCPixSetAnalyzerRequestObserver& aObserver, const TDesC& aAnalyzer );
+
+ /**
+ * SetAnalyzer. Synchronous version
+ *
+ * The set QueryParser defines the query syntax used for searching. Two
+ * different query parsers are supported for two main use cases, that
+ * are powerful accessing of the database for document and the other is
+ *
+ *
+ * @note SetAnalyzerL MUST NOT be used, when searching using discovery services
+ */
+ IMPORT_C void SetQueryParserL( TQueryParser aQueryParser );
+
+ /**
+ * SetQueryParser. Synchronous version
+ *
+ * @note Query parser cannot be set for discovery service
+ */
+ IMPORT_C void SetQueryParserL( MCPixSetQueryParserRequestObserver& aObserver, TQueryParser aQueryParser );
/**
* SearchL. Synchronous version.
@@ -242,7 +287,8 @@
EStateOpenDatabase,
EStateSearch,
EStateGetDocument,
- EStateSetAnalyzer
+ EStateSetAnalyzer,
+ EStateSetQueryParser
};
union TObserver
@@ -252,6 +298,7 @@
MCPixSearchRequestObserver* iSearch;
MCPixNextDocumentRequestObserver* iNextDocument;
MCPixSetAnalyzerRequestObserver* iSetAnalyzer;
+ MCPixSetQueryParserRequestObserver* iSetQueryParser;
};
private:
--- a/searchsrv_plat/cpix_framework_api/inc/mcpixdatabaseobserver.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchsrv_plat/cpix_framework_api/inc/mcpixdatabaseobserver.h Mon Jun 28 10:34:53 2010 +0530
@@ -64,5 +64,25 @@
virtual void HandleSetAnalyzerResultL( TInt aError ) = 0;
};
+/**
+ * @brief Observes completions of CPixIndexer's asynchronous requests
+ * @ingroup ClientAPI
+ *
+ * Mixin class.
+ * Observer to handle indexing operations completion codes.
+ *
+ * Link against: CPixSearchClient.lib
+ */
+class MCPixSetQueryParserRequestObserver
+ {
+ public:
+
+ /**
+ * TODO
+ * @param aError Coompletion code of a asynchronous request. KErrNone if operation
+ * was succesful, otherwise system wide error code.
+ */
+ virtual void HandleSetQueryParserResultL( TInt aError ) = 0;
+ };
#endif /* MCPIXDATABASEOBSERVER_H_ */
--- a/searchsrv_plat/cpix_framework_api/inc/rsearchserversession.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchsrv_plat/cpix_framework_api/inc/rsearchserversession.h Mon Jun 28 10:34:53 2010 +0530
@@ -165,6 +165,10 @@
IMPORT_C void SetAnalyzerL(const TDesC& aAnalyzer);
IMPORT_C void SetAnalyzer(const TDesC& aAnalyzer, TRequestStatus& aStatus);
+
+ IMPORT_C void SetQueryParserL(TInt aQueryParser);
+ IMPORT_C void SetQueryParser(TInt aQueryParser, TRequestStatus& aStatus);
+
/**
* Search.
* Issues a request for a search
--- a/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchsrv_plat/cpix_utility_api/inc/cpixmaindefs.h Mon Jun 28 10:34:53 2010 +0530
@@ -196,6 +196,7 @@
*/
#define DEFAULT_CPIX_DIR "c:\\Data\\"
#define DEFAULT_CLUCENE_LOCK_DIR "c:\\system\\temp"
+#define DEFAULT_RESOURCE_DIR "c:\\Data\\"
@@ -223,18 +224,40 @@
***********************************************
*/
-#define CPIX_PIPE L">"
-#define CPIX_SWITCH L"switch"
-#define CPIX_CASE L"case"
-#define CPIX_DEFAULT L"default"
+#define CPIX_PIPE L">"
+#define CPIX_SWITCH L"switch"
+#define CPIX_LOCALE_SWITCH L"locale_switch"
+#define CPIX_CONFIG_SWITCH L"config_switch"
+#define CPIX_CASE L"case"
+#define CPIX_DEFAULT L"default"
+
+// These can be given as parameters for the 'natural' analyzer
+#define CPIX_ID_INDEXING L"indexing"
+#define CPIX_ID_QUERY L"query"
+#define CPIX_ID_PREFIX L"prefix"
+
+#define CPIX_ANALYZER_DEFAULT L"natural"
+
+// Default indexing analyzer
+//#define CPIX_ANALYZER_DEFAULT_QUERY L"natural(indexing)"
+
+// Default term query analyzer
+#define CPIX_ANALYZER_DEFAULT_QUERY L"natural(query)"
+
+// Default prefix analyzer
+#define CPIX_ANALYZER_DEFAULT_PREFIX L"natural(prefix)"
#define CPIX_ANALYZER_STANDARD L"standard"
-#define CPIX_ANALYZER_DEFAULT L"standard"
+//#define CPIX_ANALYZER_DEFAULT L"standard"
#define CPIX_TOKENIZER_STANDARD L"stdtokens"
#define CPIX_TOKENIZER_WHITESPACE L"whitespace"
#define CPIX_TOKENIZER_LETTER L"letter"
#define CPIX_TOKENIZER_KEYWORD L"keyword"
+#define CPIX_TOKENIZER_CJK L"cjk"
+#define CPIX_TOKENIZER_NGRAM L"ngram"
+#define CPIX_TOKENIZER_KOREAN L"korean"
+#define CPIX_TOKENIZER_KOREAN_QUERY L"koreanquery"
#define CPIX_FILTER_STANDARD L"stdfilter"
#define CPIX_FILTER_LOWERCASE L"lowercase"
@@ -243,11 +266,24 @@
#define CPIX_FILTER_STEM L"stem"
#define CPIX_FILTER_LENGTH L"length"
#define CPIX_FILTER_PREFIXES L"prefixes"
+#define CPIX_FILTER_THAI L"thai"
+#define CPIX_FILTER_PREFIX L"prefix"
+#define CPIX_FILTER_ELISION L"elision"
-#define CPIX_WLANG_EN L"en"
-#define CPIX_WLANG_FI L"fi"
-#define CPIX_WLANG_HU L"hu"
-#define CPIX_WLANG_RU L"ru"
+#define CPIX_WLANG_EN L"en" // english
+#define CPIX_WLANG_FI L"fi" // finnish
+#define CPIX_WLANG_HU L"hu" // hungarian
+#define CPIX_WLANG_RU L"ru" // russian
+#define CPIX_WLANG_AR L"ar" // arabic
+#define CPIX_WLANG_CN L"cn" // chinese
+#define CPIX_WLANG_CS L"cs" // czech
+#define CPIX_WLANG_DE L"de" // deutch - german
+#define CPIX_WLANG_EL L"el" // greek
+#define CPIX_WLANG_FA L"fa" // farsi - persian
+#define CPIX_WLANG_FR L"fr" // french
+#define CPIX_WLANG_HE L"he" // hebrew
+#define CPIX_WLANG_NL L"nl" // dutch
+#define CPIX_WLANG_BN L"bn" // bangla - bengali
#define MAX_EXCERPT_LENGTH 125 //maximum exceprt length
--- a/searchsrv_plat/cpix_utility_api/inc/messageharvesterdefs.h Fri Jun 11 14:43:47 2010 +0300
+++ b/searchsrv_plat/cpix_utility_api/inc/messageharvesterdefs.h Mon Jun 28 10:34:53 2010 +0530
@@ -40,6 +40,7 @@
#define SUBJECT_FIELD "Subject"
#define FOLDER_FIELD "Folder"
#define BODY_FIELD "Body"
+#define ATTACHMENT_FIELD "Attachment"
#define LTO_FIELD L"To"
#define LCC_FIELD L"Cc"
#define LBCC_FIELD L"Bcc"
@@ -47,5 +48,6 @@
#define LSUBJECT_FIELD L"Subject"
#define LFOLDER_FIELD L"Folder"
#define LBODY_FIELD L"Body"
+#define LATTACHMENT_FIELD L"Attachment"
#endif /*__MESSAGEHARVESTERDEFS_H__*/
--- a/sis/cpixsearch.pkg.source Fri Jun 11 14:43:47 2010 +0300
+++ b/sis/cpixsearch.pkg.source Mon Jun 28 10:34:53 2010 +0530
@@ -122,3 +122,12 @@
"\epoc32\release\armv5\$(TARGET)\qcpixsearchclient.dll" - "!:\sys\bin\qcpixsearchclient.dll"
@"centrep.sisx", (0x10202BE9)
+;Email Plugin
+"\epoc32\data\z\resource\plugins\cpixemailplugin.rsc"
+-"c:\resource\plugins\cpixemailplugin.rsc"
+"\epoc32\release\armv5\$(TARGET)\cpixemailplugin.dll"
+-"c:\sys\bin\cpixemailplugin.dll"
+
+;Qt Email fetcher
+"\epoc32\release\armv5\$(TARGET)\qtemailfetcher.dll"
+-"c:\sys\bin\qtemailfetcher.dll"
\ No newline at end of file
--- a/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -32,20 +32,14 @@
#ifdef SBSV2
#if defined(__S60_)
- OS_LAYER_SYSTEMINCLUDE
- #else // __S60_ not defined
- SYSTEMINCLUDE /epoc32/include
- SYSTEMINCLUDE /epoc32/include/internal
+ OS_LAYER_SYSTEMINCLUDE
#endif // __S60_
/* Source files */
SOURCEPATH ../src
#else // SBSV2 not defined
#if defined(__S60_)
- MW_LAYER_SYSTEMINCLUDE
- #else // __S60_ not defined
- SYSTEMINCLUDE /epoc32/include
- SYSTEMINCLUDE /epoc32/include/internal
+ MW_LAYER_SYSTEMINCLUDE
#endif // __S60_
/* Source files */
--- a/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw_nrm.mmp Fri Jun 11 14:43:47 2010 +0300
+++ b/tsrc/cpixmwtester/src/capsmod_mw/group/capsmod_mw_nrm.mmp Mon Jun 28 10:34:53 2010 +0530
@@ -33,9 +33,6 @@
#ifdef SBSV2
#if defined(__S60_)
OSEXT_LAYER_SYSTEMINCLUDE
- #else // __S60_ not defined
- SYSTEMINCLUDE /epoc32/include
- SYSTEMINCLUDE /epoc32/include/internal
#endif // __S60_
/* Source files */
@@ -43,9 +40,6 @@
#else // SBSV2 not defined
#if defined(__S60_)
MW_LAYER_SYSTEMINCLUDE
- #else // __S60_ not defined
- SYSTEMINCLUDE /epoc32/include
- SYSTEMINCLUDE /epoc32/include/internal
#endif // __S60_
/* Source files */
--- a/tsrc/cpixmwtester/src/cpixmwtesterblocks.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/tsrc/cpixmwtester/src/cpixmwtesterblocks.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -442,19 +442,19 @@
// CCPixMWTester::TestAddUnloadlistL
// -----------------------------------------------------------------------------
//
-TInt CCPixMWTester::TestAddUnloadlistL( CStifItemParser& aItem)
+TInt CCPixMWTester::TestAddUnloadlistL( CStifItemParser& /*aItem*/)
{
TInt err = KErrNone;
CBlacklistMgr* blacklistmanager = CBlacklistMgr::NewL();
CleanupStack::PushL( blacklistmanager );
//Add an Uid to Blacklist DB
- blacklistmanager->AddtoUnloadListL( KTestUid );
+ blacklistmanager->AddtoDontloadListL( KTestUid );
//Check if the Uid is added to database or not
- TBool found = blacklistmanager->FindfromUnloadListL(KTestUid );
+ TBool found = blacklistmanager->FindInDontloadListL(KTestUid );
if(!found) err = KErrNotFound;
//clear the UID from the database
- blacklistmanager->RemoveFromUnloadListL(KTestUid);
+ blacklistmanager->RemoveFromDontloadListL(KTestUid);
CleanupStack::PopAndDestroy( blacklistmanager );
doLog( iLog, err, KNoErrorString );
return err;
@@ -464,21 +464,21 @@
// CCPixMWTester::TestRemovefromUnloadlistL
// -----------------------------------------------------------------------------
//
-TInt CCPixMWTester::TestRemovefromUnloadlistL( CStifItemParser& aItem)
+TInt CCPixMWTester::TestRemovefromUnloadlistL( CStifItemParser& /*aItem*/)
{
TInt err = KErrNotFound;
CBlacklistMgr* blacklistmanager = CBlacklistMgr::NewL();
CleanupStack::PushL( blacklistmanager );
//Add an Uid to Blacklist DB
- blacklistmanager->AddtoUnloadListL( KTestUid );
+ blacklistmanager->AddtoDontloadListL( KTestUid );
//Check if the Uid is added to database or not
- TBool found = blacklistmanager->FindfromUnloadListL(KTestUid );
+ TBool found = blacklistmanager->FindInDontloadListL(KTestUid );
if(found)
{
//clear the UID from the database
- blacklistmanager->RemoveFromUnloadListL(KTestUid);
- found = blacklistmanager->FindfromUnloadListL(KTestUid );
+ blacklistmanager->RemoveFromDontloadListL(KTestUid);
+ found = blacklistmanager->FindInDontloadListL(KTestUid );
if ( !found ) err = KErrNone;
}
CleanupStack::PopAndDestroy( blacklistmanager );