searchengine/cpix/cpix/src/cpixhits.cpp
changeset 0 671dee74050a
child 14 8bd192d47aaa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/cpixhits.cpp	Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,351 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+
+#include "cpixhits.h"
+#include "idxdb.h"
+#include "cpixexc.h"
+#include "iqrytype.h"
+#include "document.h"
+
+#include "common/cpixlog.h"
+
+namespace Cpix
+{
+
+    /**
+     * Interface IHits
+     */
+    IHits::~IHits() 
+    {
+	;
+    }
+
+
+    void HitsBase::destroyWrappers()
+    {
+        WrapperMap::iterator
+            i = wrappers_.begin(),
+            end = wrappers_.end();
+
+        for (; i != end; ++i)
+            {
+                delete i->second;
+            }
+
+        wrappers_.clear();
+    }
+
+
+    HitsBase::HitsBase(bool docsOwnedByClucene)
+        : docsOwnedByClucene_(docsOwnedByClucene)
+    {
+        ;
+    }
+
+
+    void HitsBase::wrapDocument(int32_t                      index,
+                                lucene::document::Document * doc)
+    {
+        if (wrappers_.find(index) != wrappers_.end())
+            {
+                delete wrappers_[index];
+                wrappers_[index] = NULL;
+            }
+
+        Cpix::Document
+            * cpixDoc = new Cpix::Document(doc,
+                                           docsOwnedByClucene_);
+        wrappers_[index] = cpixDoc;
+    }
+
+
+    Cpix::Document * HitsBase::getDocument(int32_t index)
+    {
+        Cpix::Document
+            * rv = NULL;
+
+        if (wrappers_.find(index) == wrappers_.end())
+            {
+                getDocument_(index);
+            }
+
+        rv = wrappers_[index];
+
+        return rv;
+    }
+
+
+
+    /**
+     * Class ClhDocumentConsumer
+     */
+    int32_t ClhDocumentConsumer::beginIndex() const
+    {
+        return beginIndex_;
+    }
+
+
+    int32_t ClhDocumentConsumer::endIndex() const
+    {
+        return endIndex_;
+    }
+
+
+    void ClhDocumentConsumer::useDocument(int32_t                      index,
+                                          lucene::document::Document * document)
+    {
+        clHits_.wrapDocument(index,
+                             document);
+    }
+
+
+    void ClhDocumentConsumer::failedDocument(int32_t        index,
+                                             int            clErrorNumber)
+    {
+        failedDocErrors_[index - beginIndex_] = clErrorNumber;
+    }
+
+
+    ClhDocumentConsumer::ClhDocumentConsumer(LuceneHits & clHits)
+        : pageSize_(IdxDbMgr::instance()->getClHitsPageSize()),
+          beginIndex_(UNUSED),
+          endIndex_(UNUSED),
+          clHits_(clHits)
+    {
+        failedDocErrors_.reserve(pageSize_);
+        for (int i = 0; i < pageSize_; ++i)
+            {
+                failedDocErrors_.push_back(CL_ERR_UNKNOWN);
+            }
+    }
+
+
+    void ClhDocumentConsumer::setPageBoundaryForIndex(int32_t index)
+    {
+        if (index >= clHits_.length())
+            {
+                THROW_CPIXEXC("Not a valid hit doc: %d (hits length is %d)",
+                              index,
+                              clHits_.length());
+            }
+
+        beginIndex_ = index - (index % pageSize_);
+        endIndex_ = beginIndex_ + pageSize_;
+
+        endIndex_ = std::min(clHits_.length(),
+                             endIndex_);
+
+        for (int i = 0; i < pageSize_; ++i)
+            {
+                failedDocErrors_[i] = CL_ERR_UNKNOWN;
+            }
+    }
+
+        
+    void ClhDocumentConsumer::throwIfFailedDocIndex(int32_t index)
+    {
+        if (beginIndex_ == UNUSED)
+            {
+                // nothing has been fetched with this instance, so
+                // nothing has failed yet
+                return;
+            }
+
+        if (index < beginIndex_ || index >= endIndex_)
+            {
+                // we have not tried to load this document in this
+                // batch
+                return;
+            }
+
+        int32_t
+            idx = index - beginIndex_;
+
+        if (failedDocErrors_[idx] != CL_ERR_Success)
+            {
+                THROW_CPIXEXC("Failed to fetch hit document %d, CL error code: %d (cf. clucene debug/error.h)",
+                              index,
+                              failedDocErrors_[idx]);
+            }
+        else
+            {
+                THROW_CPIXEXC(PL_ERROR "Reloading a document should not be necessary %d",
+                              index);
+            }
+    }
+
+
+
+
+
+    /**
+     * Class LuceneHits wrapping lucene::document::Hits
+     */
+    LuceneHits::LuceneHits(lucene::search::Hits  * hits, 
+                             lucene::search::Query * query,
+                             IdxDbHndl               idxDb,
+                             Version                 idxDbVersion)
+	: HitsBase(true), // docs in here are owned by clucene
+          hits_(hits),
+          length_(hits != NULL ? hits->length() : 0),
+	  query_(query),  // increase reference count
+          idxDb_(idxDb),
+          idxDbVersion_(idxDbVersion),
+          docConsumer_(NULL)
+    {
+        docConsumer_ = new ClhDocumentConsumer(*this);
+
+        IdxDbMgr::instance()->incRefHndl(idxDb_);
+
+        // pre-fetching the first page - assumption is that first
+        // document to get will be #0 anyway, and no need for IPC +
+        // ctxt switch with client by which time an idx operation may
+        // have upset everything.
+        //
+        // TODO the dependent idxdb locks are released by this point,
+        // so getDocument_ will have to re-acquire all locks, which is
+        // not only OS operation but may bump into already modified
+        // idx-es. Idea: IIdxDb::search could be reorganized into
+        // fetchRecommitting() instead - the first fetch operations
+        // would be performed while locks are still in place...
+        if (length_ > 0)
+            {
+                getDocument_(0);
+            }
+    }
+	
+    LuceneHits::~LuceneHits() 
+    {
+        delete hits_; 
+        hits_ = 0; 
+        
+        destroyWrappers();
+
+        delete docConsumer_;
+
+        IdxDbMgr::instance()->releaseHndl(idxDb_);
+    }
+
+
+    void LuceneHits::getDocument_(int32_t index) 
+    {
+        // check if we have tried to fetch it before and failed
+        docConsumer_->throwIfFailedDocIndex(index);
+
+        // common usage pattern is to enumerate hits, never to access
+        // them at true random manner. That means that we never need
+        // wrappers for more than the current page - ie when about to
+        // fetch the next page we can discard all other wrapped
+        // documents. Merely an memory consumption optimization
+        // measure, can be commented out.
+        destroyWrappers();
+
+        // if we get here, then it means we don't have the page of hit
+        // docs we need, but it may still throw if the hits is an
+        // empty object or the hits itself was NULL
+        docConsumer_->setPageBoundaryForIndex(index);
+
+        IIdxDb* idx = IIdxDb::getPtr( idxDb_ );
+
+        lucene::search::Hits
+            * currentHits = hits_;
+
+        hits_ = 0;
+
+        // the version number of idx that produced currentHits is
+        // either still the same in which case we get back
+        // currentHits, or it has changed and in that case currentHits
+        // will be destroyed by this call and a new one will be
+        // returned - rv will recieve the pointer to the fetched document
+        hits_ = idx->fetchRecommitting(currentHits,
+                                       &idxDbVersion_,
+                                       query_,
+                                       *docConsumer_);
+        
+        length_ = hits_->length();
+
+        if (wrappers_.find(index) == wrappers_.end())
+            {
+                THROW_CPIXEXC("Failed to fetch doc %d: TODO reason",
+                              index);
+            }
+    }
+
+
+    int32_t LuceneHits::length()
+    {
+        return length_;
+    }
+
+    
+    /**
+     * HitDocumentList (for suggestion feature)
+     */
+    HitDocumentList::HitDocumentList()
+	: HitsBase(false) // docs owned by this
+    {
+        ;
+    }
+	
+    HitDocumentList::~HitDocumentList()
+    {
+        destroyWrappers();
+        for (std::vector<lucene::document::Document*>::iterator i = documents_.begin(); 
+             i != documents_.end(); 
+             i++) 
+            {
+                _CLDELETE(*i); 
+            }
+    }
+    
+    void HitDocumentList::getDocument_(int32_t index)
+    {
+        if (index >= 0 && index < documents_.size()) 
+            {
+                wrapDocument(index,
+                             documents_[index]);
+            }
+        else 
+            {
+                THROW_CPIXEXC(L"Hit document index '%d' out of bounds",
+                              index);
+            }
+    }
+
+    int32_t HitDocumentList::length()
+    {
+        return documents_.size(); 
+    }
+
+
+    void HitDocumentList::add(lucene::document::Document* document)
+    {
+        documents_.push_back(document); 
+    }
+	
+
+    void HitDocumentList::remove(int index)
+    {
+        documents_.erase(documents_.begin()+index); 
+    }
+
+
+
+
+
+}