searchengine/cpix/cpix/src/cpixhits.cpp
changeset 0 671dee74050a
child 14 8bd192d47aaa
equal deleted inserted replaced
-1:000000000000 0:671dee74050a
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include "cpixhits.h"
       
    20 #include "idxdb.h"
       
    21 #include "cpixexc.h"
       
    22 #include "iqrytype.h"
       
    23 #include "document.h"
       
    24 
       
    25 #include "common/cpixlog.h"
       
    26 
       
    27 namespace Cpix
       
    28 {
       
    29 
       
    30     /**
       
    31      * Interface IHits
       
    32      */
       
    33     IHits::~IHits() 
       
    34     {
       
    35 	;
       
    36     }
       
    37 
       
    38 
       
    39     void HitsBase::destroyWrappers()
       
    40     {
       
    41         WrapperMap::iterator
       
    42             i = wrappers_.begin(),
       
    43             end = wrappers_.end();
       
    44 
       
    45         for (; i != end; ++i)
       
    46             {
       
    47                 delete i->second;
       
    48             }
       
    49 
       
    50         wrappers_.clear();
       
    51     }
       
    52 
       
    53 
       
    54     HitsBase::HitsBase(bool docsOwnedByClucene)
       
    55         : docsOwnedByClucene_(docsOwnedByClucene)
       
    56     {
       
    57         ;
       
    58     }
       
    59 
       
    60 
       
    61     void HitsBase::wrapDocument(int32_t                      index,
       
    62                                 lucene::document::Document * doc)
       
    63     {
       
    64         if (wrappers_.find(index) != wrappers_.end())
       
    65             {
       
    66                 delete wrappers_[index];
       
    67                 wrappers_[index] = NULL;
       
    68             }
       
    69 
       
    70         Cpix::Document
       
    71             * cpixDoc = new Cpix::Document(doc,
       
    72                                            docsOwnedByClucene_);
       
    73         wrappers_[index] = cpixDoc;
       
    74     }
       
    75 
       
    76 
       
    77     Cpix::Document * HitsBase::getDocument(int32_t index)
       
    78     {
       
    79         Cpix::Document
       
    80             * rv = NULL;
       
    81 
       
    82         if (wrappers_.find(index) == wrappers_.end())
       
    83             {
       
    84                 getDocument_(index);
       
    85             }
       
    86 
       
    87         rv = wrappers_[index];
       
    88 
       
    89         return rv;
       
    90     }
       
    91 
       
    92 
       
    93 
       
    94     /**
       
    95      * Class ClhDocumentConsumer
       
    96      */
       
    97     int32_t ClhDocumentConsumer::beginIndex() const
       
    98     {
       
    99         return beginIndex_;
       
   100     }
       
   101 
       
   102 
       
   103     int32_t ClhDocumentConsumer::endIndex() const
       
   104     {
       
   105         return endIndex_;
       
   106     }
       
   107 
       
   108 
       
   109     void ClhDocumentConsumer::useDocument(int32_t                      index,
       
   110                                           lucene::document::Document * document)
       
   111     {
       
   112         clHits_.wrapDocument(index,
       
   113                              document);
       
   114     }
       
   115 
       
   116 
       
   117     void ClhDocumentConsumer::failedDocument(int32_t        index,
       
   118                                              int            clErrorNumber)
       
   119     {
       
   120         failedDocErrors_[index - beginIndex_] = clErrorNumber;
       
   121     }
       
   122 
       
   123 
       
   124     ClhDocumentConsumer::ClhDocumentConsumer(LuceneHits & clHits)
       
   125         : pageSize_(IdxDbMgr::instance()->getClHitsPageSize()),
       
   126           beginIndex_(UNUSED),
       
   127           endIndex_(UNUSED),
       
   128           clHits_(clHits)
       
   129     {
       
   130         failedDocErrors_.reserve(pageSize_);
       
   131         for (int i = 0; i < pageSize_; ++i)
       
   132             {
       
   133                 failedDocErrors_.push_back(CL_ERR_UNKNOWN);
       
   134             }
       
   135     }
       
   136 
       
   137 
       
   138     void ClhDocumentConsumer::setPageBoundaryForIndex(int32_t index)
       
   139     {
       
   140         if (index >= clHits_.length())
       
   141             {
       
   142                 THROW_CPIXEXC("Not a valid hit doc: %d (hits length is %d)",
       
   143                               index,
       
   144                               clHits_.length());
       
   145             }
       
   146 
       
   147         beginIndex_ = index - (index % pageSize_);
       
   148         endIndex_ = beginIndex_ + pageSize_;
       
   149 
       
   150         endIndex_ = std::min(clHits_.length(),
       
   151                              endIndex_);
       
   152 
       
   153         for (int i = 0; i < pageSize_; ++i)
       
   154             {
       
   155                 failedDocErrors_[i] = CL_ERR_UNKNOWN;
       
   156             }
       
   157     }
       
   158 
       
   159         
       
   160     void ClhDocumentConsumer::throwIfFailedDocIndex(int32_t index)
       
   161     {
       
   162         if (beginIndex_ == UNUSED)
       
   163             {
       
   164                 // nothing has been fetched with this instance, so
       
   165                 // nothing has failed yet
       
   166                 return;
       
   167             }
       
   168 
       
   169         if (index < beginIndex_ || index >= endIndex_)
       
   170             {
       
   171                 // we have not tried to load this document in this
       
   172                 // batch
       
   173                 return;
       
   174             }
       
   175 
       
   176         int32_t
       
   177             idx = index - beginIndex_;
       
   178 
       
   179         if (failedDocErrors_[idx] != CL_ERR_Success)
       
   180             {
       
   181                 THROW_CPIXEXC("Failed to fetch hit document %d, CL error code: %d (cf. clucene debug/error.h)",
       
   182                               index,
       
   183                               failedDocErrors_[idx]);
       
   184             }
       
   185         else
       
   186             {
       
   187                 THROW_CPIXEXC(PL_ERROR "Reloading a document should not be necessary %d",
       
   188                               index);
       
   189             }
       
   190     }
       
   191 
       
   192 
       
   193 
       
   194 
       
   195 
       
   196     /**
       
   197      * Class LuceneHits wrapping lucene::document::Hits
       
   198      */
       
   199     LuceneHits::LuceneHits(lucene::search::Hits  * hits, 
       
   200                              lucene::search::Query * query,
       
   201                              IdxDbHndl               idxDb,
       
   202                              Version                 idxDbVersion)
       
   203 	: HitsBase(true), // docs in here are owned by clucene
       
   204           hits_(hits),
       
   205           length_(hits != NULL ? hits->length() : 0),
       
   206 	  query_(query),  // increase reference count
       
   207           idxDb_(idxDb),
       
   208           idxDbVersion_(idxDbVersion),
       
   209           docConsumer_(NULL)
       
   210     {
       
   211         docConsumer_ = new ClhDocumentConsumer(*this);
       
   212 
       
   213         IdxDbMgr::instance()->incRefHndl(idxDb_);
       
   214 
       
   215         // pre-fetching the first page - assumption is that first
       
   216         // document to get will be #0 anyway, and no need for IPC +
       
   217         // ctxt switch with client by which time an idx operation may
       
   218         // have upset everything.
       
   219         //
       
   220         // TODO the dependent idxdb locks are released by this point,
       
   221         // so getDocument_ will have to re-acquire all locks, which is
       
   222         // not only OS operation but may bump into already modified
       
   223         // idx-es. Idea: IIdxDb::search could be reorganized into
       
   224         // fetchRecommitting() instead - the first fetch operations
       
   225         // would be performed while locks are still in place...
       
   226         if (length_ > 0)
       
   227             {
       
   228                 getDocument_(0);
       
   229             }
       
   230     }
       
   231 	
       
   232     LuceneHits::~LuceneHits() 
       
   233     {
       
   234         delete hits_; 
       
   235         hits_ = 0; 
       
   236         
       
   237         destroyWrappers();
       
   238 
       
   239         delete docConsumer_;
       
   240 
       
   241         IdxDbMgr::instance()->releaseHndl(idxDb_);
       
   242     }
       
   243 
       
   244 
       
   245     void LuceneHits::getDocument_(int32_t index) 
       
   246     {
       
   247         // check if we have tried to fetch it before and failed
       
   248         docConsumer_->throwIfFailedDocIndex(index);
       
   249 
       
   250         // common usage pattern is to enumerate hits, never to access
       
   251         // them at true random manner. That means that we never need
       
   252         // wrappers for more than the current page - ie when about to
       
   253         // fetch the next page we can discard all other wrapped
       
   254         // documents. Merely an memory consumption optimization
       
   255         // measure, can be commented out.
       
   256         destroyWrappers();
       
   257 
       
   258         // if we get here, then it means we don't have the page of hit
       
   259         // docs we need, but it may still throw if the hits is an
       
   260         // empty object or the hits itself was NULL
       
   261         docConsumer_->setPageBoundaryForIndex(index);
       
   262 
       
   263         IIdxDb* idx = IIdxDb::getPtr( idxDb_ );
       
   264 
       
   265         lucene::search::Hits
       
   266             * currentHits = hits_;
       
   267 
       
   268         hits_ = 0;
       
   269 
       
   270         // the version number of idx that produced currentHits is
       
   271         // either still the same in which case we get back
       
   272         // currentHits, or it has changed and in that case currentHits
       
   273         // will be destroyed by this call and a new one will be
       
   274         // returned - rv will recieve the pointer to the fetched document
       
   275         hits_ = idx->fetchRecommitting(currentHits,
       
   276                                        &idxDbVersion_,
       
   277                                        query_,
       
   278                                        *docConsumer_);
       
   279         
       
   280         length_ = hits_->length();
       
   281 
       
   282         if (wrappers_.find(index) == wrappers_.end())
       
   283             {
       
   284                 THROW_CPIXEXC("Failed to fetch doc %d: TODO reason",
       
   285                               index);
       
   286             }
       
   287     }
       
   288 
       
   289 
       
   290     int32_t LuceneHits::length()
       
   291     {
       
   292         return length_;
       
   293     }
       
   294 
       
   295     
       
   296     /**
       
   297      * HitDocumentList (for suggestion feature)
       
   298      */
       
   299     HitDocumentList::HitDocumentList()
       
   300 	: HitsBase(false) // docs owned by this
       
   301     {
       
   302         ;
       
   303     }
       
   304 	
       
   305     HitDocumentList::~HitDocumentList()
       
   306     {
       
   307         destroyWrappers();
       
   308         for (std::vector<lucene::document::Document*>::iterator i = documents_.begin(); 
       
   309              i != documents_.end(); 
       
   310              i++) 
       
   311             {
       
   312                 _CLDELETE(*i); 
       
   313             }
       
   314     }
       
   315     
       
   316     void HitDocumentList::getDocument_(int32_t index)
       
   317     {
       
   318         if (index >= 0 && index < documents_.size()) 
       
   319             {
       
   320                 wrapDocument(index,
       
   321                              documents_[index]);
       
   322             }
       
   323         else 
       
   324             {
       
   325                 THROW_CPIXEXC(L"Hit document index '%d' out of bounds",
       
   326                               index);
       
   327             }
       
   328     }
       
   329 
       
   330     int32_t HitDocumentList::length()
       
   331     {
       
   332         return documents_.size(); 
       
   333     }
       
   334 
       
   335 
       
   336     void HitDocumentList::add(lucene::document::Document* document)
       
   337     {
       
   338         documents_.push_back(document); 
       
   339     }
       
   340 	
       
   341 
       
   342     void HitDocumentList::remove(int index)
       
   343     {
       
   344         documents_.erase(documents_.begin()+index); 
       
   345     }
       
   346 
       
   347 
       
   348 
       
   349 
       
   350 
       
   351 }