--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/cl/clucene/src/clucene/index/indexreader.cpp Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,665 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "clucene/stdheader.h"
+#include "IndexReader.h"
+#include "IndexWriter.h"
+
+#include "clucene/store/directory.h"
+#include "CLucene/store/FSDirectory.h"
+#include "CLucene/store/Lock.h"
+#include "clucene/document/document.h"
+#include "clucene/search/similarity.h"
+#include "SegmentInfos.h"
+#include "MultiReader.h"
+#include "Terms.h"
+
+CL_NS_USE(util)
+CL_NS_USE(store)
+CL_NS_DEF(index)
+
+ IndexReader::IndexReader(Directory* dir):
+ directory(_CL_POINTER(dir)){
+ //Constructor.
+ //Func - Creates an instance of IndexReader
+ //Pre - true
+ //Post - An instance has been created with writeLock = NULL
+
+ writeLock = NULL;
+ segmentInfos = NULL;
+ directoryOwner = false;
+ closeDirectory = false;
+ stale = false;
+ hasChanges = false;
+ }
+
+ IndexReader::IndexReader(Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory) {
+ this->directory = _CL_POINTER(directory);
+ this->segmentInfos = segmentInfos;
+ directoryOwner = true;
+ this->closeDirectory = closeDirectory;
+ stale = false;
+ hasChanges = false;
+ writeLock = NULL;
+ }
+
+ IndexReader::~IndexReader(){
+ //Func - Destructor
+ // Destroys the instance and releases the writeLock if needed
+ //Pre - true
+ //Post - The instance has been destroyed if pre(writeLock) exists is has been released
+ if (writeLock != NULL) {
+ //release writeLock
+ writeLock->release();
+ _CLDELETE(writeLock);
+ }
+ _CLDELETE(segmentInfos);
+ _CLDECDELETE(directory);
+ }
+
+ IndexReader* IndexReader::open(const char* path){
+ //Func - Static method.
+ // Returns an IndexReader reading the index in an FSDirectory in the named path.
+ //Pre - path != NULL and contains the path of the index for which an IndexReader must be
+ // instantiated
+ // closeDir indicates if the directory needs to be closed
+ //Post - An IndexReader has been returned that reads tnhe index located at path
+
+ CND_PRECONDITION(path != NULL, "path is NULL");
+
+ Directory* dir = FSDirectory::getDirectory(path,false);
+ IndexReader* reader = open(dir,true);
+ //because fsdirectory will now have a refcount of 1 more than
+ //if the reader had been opened with a directory object,
+ //we need to do a refdec
+ _CLDECDELETE(dir);
+ return reader;
+ }
+
+ IndexReader* IndexReader::open( Directory* directory, bool closeDirectory){
+ //Func - Static method.
+ // Returns an IndexReader reading the index in an FSDirectory in the named path.
+ //Pre - directory represents a directory
+ // closeDir indicates if the directory needs to be closed
+ //Post - An IndexReader has been returned that reads the index located at directory
+
+ // in- & inter-process sync
+ // CPIXASYNC SCOPED_LOCK_MUTEX(directory->THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(directory->Directory_THIS_LOCK)
+
+ IndexReader* ret = NULL;
+
+ LuceneLock* lock = directory->makeLock("commit.lock");
+
+ //Instantiate an IndexReader::LockWith which can produce an IndexReader
+ IndexReader::LockWith with(lock,directory);
+
+ try{
+ //Create an IndexReader reading the index
+ ret = with.runAndReturn();
+ }_CLFINALLY(
+ _CLDELETE( lock );
+ );
+
+ ret->closeDirectory = closeDirectory;
+
+ CND_CONDITION(ret != NULL,"ret is NULL");
+ //return reference
+ return ret;
+ }
+
+ CL_NS(document)::Document* IndexReader::document(const int32_t n){
+ CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document;
+ if (!document(n,ret) )
+ _CLDELETE(ret);
+ return ret;
+ }
+
+ IndexReader* IndexReader::LockWith::doBody() {
+ //Func - Reads the segmentinfo file and depending on the number of segments found
+ // it returns a SegmentsReader or a SegmentReader
+ //Pre - directory != NULL
+ //Post - Depending on the number of Segments present in directory this method
+ // returns an empty SegmentsReader when there are no segments, a SegmentReader when
+ // directory contains 1 segment and a nonempty SegmentsReader when directory
+ // contains multiple segements
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ //Instantiate SegmentInfos
+ SegmentInfos* infos = _CLNEW SegmentInfos;
+ try{
+ //Have SegmentInfos read the segments file in directory
+ infos->read(directory);
+ }catch(...){
+ //make sure infos is cleaned up
+ _CLDELETE(infos);
+ throw;
+ }
+
+ // If there is at least one segment (if infos.size() >= 1), the last
+ // SegmentReader object will close the directory when the SegmentReader
+ // object itself is closed (see SegmentReader::doClose).
+ // If there are no segments, there will be no "last SegmentReader object"
+ // to fulfill this responsibility, so we need to explicitly close the
+ // directory in the segmentsreader.close
+
+ //Count the number segments in the directory
+ const uint32_t nSegs = infos->size();
+
+ if (nSegs == 1 ) {
+ // index is optimized
+ return _CLNEW SegmentReader(infos, infos->info(0));
+ }else{
+ //Instantiate an array of pointers to SegmentReaders of size nSegs (The number of segments in the index)
+ IndexReader** readers = NULL;
+
+ if (nSegs > 0){
+ uint32_t infosize=infos->size();
+ readers = _CL_NEWARRAY(IndexReader*,infosize+1);
+ for (uint32_t i = 0; i < infosize; ++i) {
+ //Instantiate a SegementReader responsible for reading the i-th segment and store it in
+ //the readers array
+ readers[i] = _CLNEW SegmentReader(infos->info(i));
+ }
+ readers[infosize] = NULL;
+ }
+
+ //return an instance of SegmentsReader which is a reader that manages all Segments
+ return _CLNEW MultiReader(directory, infos, readers);
+ }// end if
+ }
+
+ uint64_t IndexReader::lastModified(const char* directory) {
+ //Func - Static method
+ // Returns the time the index in the named directory was last modified.
+ //Pre - directory != NULL and contains the path name of the directory to check
+ //Post - The last modified time of the index has been returned
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ return FSDirectory::fileModified(directory,"segments");
+ }
+
+ int64_t IndexReader::getCurrentVersion(Directory* directory) {
+ // CPIXASYNC SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+ SCOPED_LOCK_CRUCIAL_MUTEX(directory->Directory_THIS_LOCK) // in- & inter-process sync
+ LuceneLock* commitLock=directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+ bool locked=false;
+ int64_t ret = 0;
+ try {
+ locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+ ret = SegmentInfos::readCurrentVersion(directory);
+ }_CLFINALLY(
+ if (locked) {
+ commitLock->release();
+ }
+ _CLDELETE(commitLock);
+ )
+ return ret;
+ }
+
+
+ int64_t IndexReader::getCurrentVersion(const char* directory){
+ Directory* dir = FSDirectory::getDirectory(directory, false);
+ int64_t version = getCurrentVersion(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+ return version;
+ }
+ int64_t IndexReader::getVersion() {
+ return segmentInfos->getVersion();
+ }
+
+ bool IndexReader::isCurrent() {
+ // CPIXASYNC SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+ SCOPED_LOCK_CRUCIAL_MUTEX(directory->Directory_THIS_LOCK) // in- & inter-process sync
+ LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+ bool locked=false;
+ bool ret = false;
+ try {
+ locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+ ret = SegmentInfos::readCurrentVersion(directory) == segmentInfos->getVersion();
+ } _CLFINALLY(
+ if (locked) {
+ commitLock->release();
+ }
+ )
+ return ret;
+ }
+
+ uint64_t IndexReader::lastModified(const Directory* directory) {
+ //Func - Static method
+ // Returns the time the index in this directory was last modified.
+ //Pre - directory contains a valid reference
+ //Post - The last modified time of the index has been returned
+
+ return directory->fileModified("segments");
+ }
+
+
+ bool IndexReader::indexExists(const char* directory){
+ //Func - Static method
+ // Checks if an index exists in the named directory
+ //Pre - directory != NULL
+ //Post - Returns true if an index exists at the specified directory->
+ // If the directory does not exist or if there is no index in it.
+ // false is returned.
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ //Create a buffer of length CL_MAXDIR
+ char f[CL_MAX_PATH];
+ //Copy the directory string to the buffer. leave room for /segments
+ strncpy(f,directory,CL_MAX_PATH-10);
+ //Cat the name of the segments to buffer
+ strcat(f, "/segments");
+ //Check if the segments file exists
+ return Misc::dir_Exists(f);
+ }
+
+
+ void IndexReader::setNorm(int32_t doc, const TCHAR* field, uint8_t value){
+ // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(IndexReader_THIS_LOCK)
+ if(directoryOwner)
+ aquireWriteLock();
+ doSetNorm(doc, field, value);
+ hasChanges = true;
+ }
+
+ void IndexReader::aquireWriteLock() {
+ if (stale)
+ _CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+
+ if (writeLock == NULL) {
+ LuceneLock* writeLock = directory->makeLock("write.lock");
+ if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock
+ _CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock
+ this->writeLock = writeLock;
+
+ // we have to check whether index has changed since this reader was opened.
+ // if so, this reader is no longer valid for deletion
+ if (SegmentInfos::readCurrentVersion(directory) > segmentInfos->getVersion()) {
+ stale = true;
+ this->writeLock->release();
+ _CLDELETE(this->writeLock);
+ _CLTHROWA(CL_ERR_IO,"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+ }
+ }
+ }
+
+
+ void IndexReader::setNorm(int32_t doc, const TCHAR* field, float_t value){
+ setNorm(doc, field, CL_NS(search)::Similarity::encodeNorm(value));
+ }
+
+ bool IndexReader::indexExists(const Directory* directory){
+ //Func - Static method
+ // Checks if an index exists in the directory
+ //Pre - directory is a valid reference
+ //Post - Returns true if an index exists at the specified directory->
+ // If the directory does not exist or if there is no index in it.
+ // false is returned.
+
+ return directory->fileExists("segments");
+ }
+
+ TermDocs* IndexReader::termDocs(Term* term) const {
+ //Func - Returns an enumeration of all the documents which contain
+ // term. For each document, the document number, the frequency of
+ // the term in that document is also provided, for use in search scoring.
+ // Thus, this method implements the mapping:
+ //
+ // Term => <docNum, freq>*
+ // The enumeration is ordered by document number. Each document number
+ // is greater than all that precede it in the enumeration.
+ //Pre - term != NULL
+ //Post - A reference to TermDocs containing an enumeration of all found documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Reference an instantiated TermDocs instance
+ TermDocs* _termDocs = termDocs();
+ //Seek all documents containing term
+ _termDocs->seek(term);
+ //return the enumaration
+ return _termDocs;
+ }
+
+ TermPositions* IndexReader::termPositions(Term* term) const{
+ //Func - Returns an enumeration of all the documents which contain term. For each
+ // document, in addition to the document number and frequency of the term in
+ // that document, a list of all of the ordinal positions of the term in the document
+ // is available. Thus, this method implements the mapping:
+ //
+ // Term => <docNum, freq,<pos 1, pos 2, ...pos freq-1>>*
+ //
+ // This positional information faciliates phrase and proximity searching.
+ // The enumeration is ordered by document number. Each document number is greater than
+ // all that precede it in the enumeration.
+ //Pre - term != NULL
+ //Post - A reference to TermPositions containing an enumeration of all found documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Reference an instantiated termPositions instance
+ TermPositions* _termPositions = termPositions();
+ //Seek all documents containing term
+ _termPositions->seek(term);
+ //return the enumeration
+ return _termPositions;
+ }
+
+ void IndexReader::deleteDocument(const int32_t docNum) {
+ //Func - Deletes the document numbered docNum. Once a document is deleted it will not appear
+ // in TermDocs or TermPostitions enumerations. Attempts to read its field with the document
+ // method will result in an error. The presence of this document may still be reflected in
+ // the docFreq statistic, though this will be corrected eventually as the index is further modified.
+ //Pre - docNum >= 0
+ //Post - If successful the document identified by docNum has been deleted. If no writelock
+ // could be obtained an exception has been thrown stating that the index was locked or has no write access
+
+ // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(IndexReader_THIS_LOCK)
+
+ CND_PRECONDITION(docNum >= 0, "docNum is negative");
+
+ if (directoryOwner)
+ aquireWriteLock();
+
+ //Have the document identified by docNum deleted
+ doDelete(docNum);
+ hasChanges = true;
+ }
+
+ /**
+ * Commit changes resulting from delete, undeleteAll, or setNorm operations
+ *
+ * @throws IOException
+ */
+ void IndexReader::commit(){
+ // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(IndexReader_THIS_LOCK)
+ if(hasChanges){
+ if(directoryOwner){
+ {
+ // CPIXASYNC SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+ SCOPED_LOCK_CRUCIAL_MUTEX(directory->Directory_THIS_LOCK) // in- & inter-process sync
+
+ LuceneLock* commitLock = directory->makeLock("commit.lock");
+ IndexReader::CommitLockWith cl(commitLock,this);
+ cl.run();
+ _CLDELETE(commitLock);
+
+ }
+ if (writeLock != NULL) {
+ writeLock->release(); // release write lock
+ _CLDELETE(writeLock);
+ }
+ }else
+ doCommit();
+ }
+ hasChanges = false;
+ }
+
+
+ void IndexReader::undeleteAll(){
+ // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(IndexReader_THIS_LOCK)
+ if(directoryOwner)
+ aquireWriteLock();
+ doUndeleteAll();
+ hasChanges = true;
+ }
+
+ int32_t IndexReader::deleteDocuments(Term* term) {
+ //Func - Deletes all documents containing term. This is useful if one uses a
+ // document field to hold a unique ID string for the document. Then to delete such
+ // a document, one merely constructs a term with the appropriate field and the unique
+ // ID string as its text and passes it to this method.
+ //Pre - term != NULL
+ //Post - All documents containing term have been deleted. The number of deleted documents
+ // has been returned
+
+ CND_PRECONDITION(term != NULL, "term is NULL");
+
+ //Search for the documents contain term
+ TermDocs* docs = termDocs(term);
+
+ //Check if documents have been found
+ if ( docs == NULL ){
+ return 0;
+ }
+
+ //initialize
+ int32_t Counter = 0;
+ try {
+ //iterate through the found documents
+ while (docs->next()) {
+ //Delete the document
+ deleteDocument(docs->doc());
+ ++Counter;
+ }
+ }_CLFINALLY(
+ //Close the enumeration
+ docs->close();
+ );
+
+ //Delete the enumeration of found documents
+ _CLDELETE( docs );
+
+ //Return the number of deleted documents
+ return Counter;
+ }
+
+ TCHAR** IndexReader::getFieldNames(){
+ CL_NS(util)::StringArrayWithDeletor array;
+ getFieldNames(IndexReader::ALL, array);
+
+ array.setDoDelete(false);
+ TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
+ int j=0;
+ CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
+ while ( itr != array.end() ){
+ ret[j]=*itr;
+ ++j;++itr;
+ }
+ ret[j]=NULL;
+ return ret;
+ }
+ TCHAR** IndexReader::getFieldNames(bool indexed){
+ CL_NS(util)::StringArrayWithDeletor array;
+ getFieldNames(indexed?IndexReader::INDEXED:IndexReader::UNINDEXED, array);
+
+ array.setDoDelete(false);
+ TCHAR** ret = _CL_NEWARRAY(TCHAR*,array.size()+1);
+ int j=0;
+ CL_NS(util)::StringArrayWithDeletor::iterator itr = array.begin();
+ while ( itr != array.end() ){
+ ret[j]=*itr;
+ ++j;++itr;
+ }
+ ret[j]=NULL;
+ return ret;
+ }
+
+
+ void IndexReader::close() {
+ //Func - Closes files associated with this index and also saves any new deletions to disk.
+ // No other methods should be called after this has been called.
+ //Pre - true
+ //Post - All files associated with this index have been deleted and new deletions have been
+ // saved to disk
+ // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_CRUCIAL_MUTEX(IndexReader_THIS_LOCK)
+
+ CloseCallbackMap::iterator iter = closeCallbacks.begin();
+ for ( ;iter!=closeCallbacks.end();iter++){
+ CloseCallback callback = *iter->first;
+ callback(this,iter->second);
+ }
+
+ commit();
+ doClose();
+
+ if(closeDirectory){
+ directory->close();
+ _CLDECDELETE(directory);
+ }
+ }
+
+ bool IndexReader::isLocked(Directory* directory) {
+ //Func - Static method
+ // Checks if the index in the directory is currently locked.
+ //Pre - directory is a valid reference to a directory to check for a lock
+ //Post - Returns true if the index in the named directory is locked otherwise false
+
+ //Check the existence of the file write.lock and return true when it does and false
+ //when it doesn't
+ LuceneLock* l1 = directory->makeLock("write.lock");
+ LuceneLock* l2 = directory->makeLock("commit.lock");
+
+ bool ret = l1->isLocked() || l2->isLocked();
+
+ _CLDELETE(l1);
+ _CLDELETE(l2);
+ return ret;
+ }
+
+ bool IndexReader::isLocked(const char* directory) {
+ //Func - Static method
+ // Checks if the index in the named directory is currently locked.
+ //Pre - directory != NULL and contains the directory to check for a lock
+ //Post - Returns true if the index in the named directory is locked otherwise false
+
+ CND_PRECONDITION(directory != NULL, "directory is NULL");
+
+ //Create a buffer of length CL_MAXDIR
+ char f[CL_MAX_PATH]; //todo: potential buffer overflow
+ //Copy the directory string to the buffer. leave room for /write.lock
+ strncpy(f,directory,CL_MAX_PATH-12);
+ //Cat the name of the write.lock file to buffer
+ strcat ( f,"/write.lock" );
+
+ Directory* dir = FSDirectory::getDirectory(directory,false);
+ bool ret = isLocked(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+
+ return ret;
+ }
+
+/** Returns true if there are norms stored for this field. */
+bool IndexReader::hasNorms(const TCHAR* field) {
+ // backward compatible implementation.
+ // SegmentReader has an efficient implementation.
+ return norms(field) != NULL;
+}
+
+void IndexReader::unlock(const char* path){
+ FSDirectory* dir = FSDirectory::getDirectory(path,false);
+ unlock(dir);
+ dir->close();
+ _CLDECDELETE(dir);
+}
+ void IndexReader::unlock(Directory* directory){
+ //Func - Static method
+ // Forcibly unlocks the index in the named directory->
+ // Caution: this should only be used by failure recovery code,
+ // when it is known that no other process nor thread is in fact
+ // currently accessing this index.
+ //Pre - directory is a valid reference to a directory
+ //Post - The directory has been forcibly unlocked
+ LuceneLock* lock;
+
+ lock = directory->makeLock("write.lock");
+ lock->release();
+ _CLDELETE(lock);
+
+ lock = directory->makeLock("commit.lock");
+ lock->release();
+ _CLDELETE(lock);
+ }
+
+bool IndexReader::isLuceneFile(const char* filename){
+ if ( !filename )
+ return false;
+ size_t len = strlen(filename);
+ if ( len < 6 ) //need at least x.frx
+ return false;
+ const char* ext=filename+len;
+ while(*ext != '.' && ext!=filename)
+ ext--;
+
+ if ( strcmp(ext, ".cfs") == 0 )
+ return true;
+ else if ( strcmp(ext, ".fnm") == 0 )
+ return true;
+ else if ( strcmp(ext, ".fdx") == 0 )
+ return true;
+ else if ( strcmp(ext, ".fdt") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tii") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tis") == 0 )
+ return true;
+ else if ( strcmp(ext, ".frq") == 0 )
+ return true;
+ else if ( strcmp(ext, ".prx") == 0 )
+ return true;
+ else if ( strcmp(ext, ".del") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tvx") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tvd") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tvf") == 0 )
+ return true;
+ else if ( strcmp(ext, ".tvp") == 0 )
+ return true;
+
+ else if ( strcmp(filename, "segments") == 0 )
+ return true;
+ else if ( strcmp(filename, "segments.new") == 0 )
+ return true;
+ else if ( strcmp(filename, "deletable") == 0 )
+ return true;
+
+ else if ( strncmp(ext,".f",2)==0 ){
+ const char* n = ext+2;
+ if ( *n && _istdigit(*n) )
+ return true;
+ }
+
+ return false;
+}
+
+ void IndexReader::addCloseCallback(CloseCallback callback, void* parameter){
+ closeCallbacks.put(callback, parameter);
+ }
+
+
+ //Constructor
+ IndexReader::LockWith::LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir):
+ CL_NS(store)::LuceneLockWith<IndexReader*>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT)
+ {
+ this->directory = dir;
+ }
+ //Constructor
+ IndexReader::CommitLockWith::CommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r ):
+ CL_NS(store)::LuceneLockWith<void>(lock,IndexWriter::COMMIT_LOCK_TIMEOUT),
+ reader(r)
+ {
+ }
+ void IndexReader::CommitLockWith::doBody(){
+ reader->doCommit();
+ reader->segmentInfos->write(reader->getDirectory());
+ }
+
+CL_NS_END