searchengine/cpix/cpix/src/multiIdxDb.cpp
changeset 0 671dee74050a
child 23 d4d56f5e7c55
equal deleted inserted replaced
-1:000000000000 0:671dee74050a
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #include <fstream>
       
    19 #include <map>
       
    20 #include <memory>
       
    21 #include <vector>
       
    22 
       
    23 #include "CLucene.h"
       
    24 #include "CLucene/queryParser/MultiFieldQueryParser.h"
       
    25 
       
    26 #include "cpixtools.h"
       
    27 
       
    28 #include "cpixsearch.h"
       
    29 #include "cpixidxdb.h"
       
    30 #include "multiidxdb.h"
       
    31 #include "initparams.h"
       
    32 #include "cpixutil.h"
       
    33 #include "rotlogger.h"
       
    34 #include "idxdbmgr.h"
       
    35 #include "cpixhits.h"
       
    36 #include "ireaderowner.h"
       
    37 
       
    38 #include "common/cpixlog.h"
       
    39 
       
    40 
       
    41 // from idxdb.cpp
       
    42 Cpix::IHits * GetTerms(lucene::index::IndexReader * reader,
       
    43                        const wchar_t              * field,
       
    44                        const wchar_t              * wildcard,
       
    45                        const wchar_t 		  * appclassPrefix,
       
    46                        int                          maxHits);
       
    47 
       
    48 
       
    49 namespace Cpix
       
    50 {
       
    51 
       
    52     //
       
    53     // OriginatorInfo
       
    54     //
       
    55     OriginatorInfo::OriginatorInfo()
       
    56         : version_(0),
       
    57           readerOwner_(NULL)
       
    58     {
       
    59         ;
       
    60     }
       
    61 
       
    62 
       
    63     OriginatorInfo::OriginatorInfo(IIdxDb * idxDb)
       
    64         : version_(0),
       
    65           readerOwner_(NULL)
       
    66     {
       
    67         readerOwner_ = dynamic_cast<IReaderOwner*>(idxDb);
       
    68 
       
    69         if (readerOwner_ == NULL)
       
    70             {
       
    71                 THROW_CPIXEXC(PL_ERROR "IIdxDb obj here must be IReaderOwner");
       
    72             }
       
    73     }
       
    74 
       
    75 
       
    76     Version OriginatorInfo::version() const
       
    77     {
       
    78         return version_;
       
    79     }
       
    80 
       
    81 
       
    82 
       
    83     void OriginatorInfo::setVersion(Version version)
       
    84     {
       
    85         version_ = version;
       
    86     }
       
    87 
       
    88 
       
    89     IReaderOwner & OriginatorInfo::readerOwner() const
       
    90     {
       
    91         if (readerOwner_ == NULL)
       
    92             {
       
    93                 THROW_CPIXEXC(PL_ERROR "Reader owner pointer is NULL");
       
    94             }
       
    95 
       
    96         return *readerOwner_;
       
    97     }
       
    98 
       
    99 
       
   100 
       
   101     //
       
   102     // MultiIdxDb
       
   103     //
       
   104 
       
   105     using namespace lucene::search;
       
   106     using namespace lucene::index;
       
   107 
       
   108 
       
   109     lucene::search::Hits * MultiIdxDb::search(lucene::search::Query * query,
       
   110                                               Version               * version)
       
   111     {
       
   112         logDbgMsg("MultiIdxDb::search BEGIN");
       
   113         Cpt::StopperWatch
       
   114             stopperWatch;
       
   115 
       
   116         Cpt::SyncRegion
       
   117             sr(mutex_);
       
   118 
       
   119         const size_t
       
   120             size = originators_.size();
       
   121 
       
   122         lucene::search::Hits
       
   123             * rv = NULL;
       
   124 
       
   125         if (size > 0)
       
   126             {
       
   127                 // all the IdxDb instances must be locked, in order to be able
       
   128                 // to safely search
       
   129                 //
       
   130                 // note that the handles are iterated in an ascending order
       
   131                 // always (benefit of using tree-based set implementation),
       
   132                 // thus the handles are always locked in the ascending order
       
   133                 Cpt::MultiSyncRegion
       
   134                     msr(originators_.size());
       
   135 
       
   136                 rv = doSearch(query,
       
   137                               version,
       
   138                               &msr); 
       
   139 
       
   140                 // msr dies and unlocks all idxDb here
       
   141             }
       
   142 
       
   143         logDbgMsg("MultiIdxDb::search END (elapsed: %ld ms)",
       
   144                   stopperWatch.elapsedMSecs());
       
   145 
       
   146         return rv;
       
   147 
       
   148         // sr dies and unlocks this.mutex_
       
   149     }
       
   150 
       
   151 
       
   152 
       
   153     lucene::search::Hits * 
       
   154     MultiIdxDb::fetchRecommitting(lucene::search::Hits     * currentHits,
       
   155                                   Version                  * version,
       
   156                                   lucene::search::Query    * query,
       
   157                                   DocumentConsumer         & documentConsumer)
       
   158     {
       
   159         logDbgMsg("MultiIdxDb::recommitIfNecessary BEGIN");
       
   160         Cpt::StopperWatch
       
   161             stopperWatch;
       
   162 
       
   163         Cpt::SyncRegion
       
   164             sr(mutex_);
       
   165 
       
   166         const size_t 
       
   167             size = originators_.size();
       
   168 
       
   169         std::auto_ptr<lucene::search::Hits>
       
   170             rv(currentHits);
       
   171 
       
   172         if (size > 0)
       
   173             {
       
   174                 // all the IdxDb instances must be locked, in order to
       
   175                 // be able to safely search
       
   176                 //
       
   177                 // note that the handles are iterated in an ascending
       
   178                 // order always (benefit of using tree-based set
       
   179                 // implementation), thus the handles are always locked
       
   180                 // in the ascending order
       
   181                 Cpt::MultiSyncRegion
       
   182                     msr(size);
       
   183 
       
   184                 bool
       
   185                     recommitNeeded = (currentHits == NULL 
       
   186                                       || *version != version_),
       
   187                     hasCheckedOriginators = false;
       
   188 
       
   189                 if (!recommitNeeded)
       
   190                     {
       
   191                         // so far we don't know of any change - but we
       
   192                         // have to check it and actually lock the
       
   193                         // originators
       
   194 
       
   195                         hasCheckedOriginators = true;
       
   196 
       
   197                         OriginatorMap::iterator
       
   198                             i = originators_.begin(),
       
   199                             end = originators_.end();
       
   200 
       
   201                         for (; i != end; ++i)
       
   202                             {
       
   203                                 IReaderOwner
       
   204                                     & ro(i->second.readerOwner());
       
   205                         
       
   206                                 // Gets the version number of the
       
   207                                 // index AND locks it - once we are
       
   208                                 // told that the version number has
       
   209                                 // not changed, it MUST NOT change
       
   210                                 // until we release the originator
       
   211                                 Version
       
   212                                     idxDbVersion = ro.getVersion(msr);
       
   213                         
       
   214                                 if (idxDbVersion != i->second.version())
       
   215                                     {
       
   216                                         i->second.setVersion(idxDbVersion);
       
   217                                         recommitNeeded = true;
       
   218                                     }
       
   219                             }
       
   220                     }
       
   221 
       
   222                 if (recommitNeeded)
       
   223                     {
       
   224                         // we already have the originator idx-es
       
   225                         // locked if we have checked the versions, so
       
   226                         // msr instance is not given anymore (no
       
   227                         // reason to lock the very same instances
       
   228                         // again)
       
   229                         rv.reset(doSearch(query,
       
   230                                           version,
       
   231                                           hasCheckedOriginators ? NULL : &msr));
       
   232                     }
       
   233 
       
   234                 documentConsumer.fetchDocuments(rv.get());
       
   235 
       
   236                 // msr dies and unlocks all idxDb here
       
   237             }
       
   238         else
       
   239             {
       
   240                 // there are no handles this instance refers to (they
       
   241                 // have been undefined apparently): old hits are dead,
       
   242                 // there is no new one
       
   243                 rv.reset(NULL);
       
   244             }
       
   245 
       
   246         logDbgMsg("MultiIdxDb::recommitIfNecessary END (elapsed: %ld ms)",
       
   247                   stopperWatch.elapsedMSecs());
       
   248 
       
   249         return rv.release();
       
   250 
       
   251         // sr dies and unlocks this.mutex_
       
   252     }
       
   253 
       
   254 
       
   255     IHits * MultiIdxDb::getTerms(const wchar_t * field, 
       
   256                                  const wchar_t * wildcard, 
       
   257                                  const wchar_t * appclassPrefix, 
       
   258                                  int             maxHits)
       
   259     {
       
   260         logDbgMsg("MultiIdxDb::getTerms BEGIN");
       
   261         Cpt::StopperWatch
       
   262             stopperWatch;
       
   263 
       
   264         using namespace lucene::index;
       
   265 
       
   266         Cpt::SyncRegion
       
   267             sr(mutex_);
       
   268 
       
   269         cleanup();
       
   270 
       
   271         IHits
       
   272             * rv = NULL;
       
   273 
       
   274         const size_t
       
   275             size = originators_.size();
       
   276 
       
   277         if (size > 0)
       
   278             {
       
   279         
       
   280                 // all the IdxDb instances must be locked, in order to
       
   281                 // be able to safely search
       
   282                 //
       
   283                 // note that the handles are iterated in an ascending
       
   284                 // order always (benefit of using tree-based set
       
   285                 // implementation), thus the handles are always locked
       
   286                 // in the ascending order
       
   287                 Cpt::MultiSyncRegion
       
   288                     msr(size);
       
   289 
       
   290                 std::vector<IndexReader*>
       
   291                     readers;
       
   292                 readers.reserve(size + 1);
       
   293 
       
   294                 // this operation (getTerms()) does not need the
       
   295                 // version number foolery to decide if hits are valid,
       
   296                 // but we get and maintain that sort of information
       
   297                 // because
       
   298                 //
       
   299                 // (a) getting the version along with the reader and
       
   300                 // maintaining it is near-zero cost operations
       
   301                 //
       
   302                 // (b) it helps a lot during fetchRecommitting()
       
   303                 // operations if we already know that we need to
       
   304                 // recommit
       
   305                 bool
       
   306                     hasAnyOriginatorChanged = false;
       
   307 
       
   308                 OriginatorMap::iterator
       
   309                     i = originators_.begin(),
       
   310                     end = originators_.end();
       
   311                 for (; i != end; ++i)
       
   312                     {
       
   313                         IReaderOwner
       
   314                             & ro(i->second.readerOwner());
       
   315 
       
   316                         Version
       
   317                             version;
       
   318 
       
   319                         // idx is locked now (lock information is in
       
   320                         // msr)
       
   321 
       
   322                         readers.push_back(ro.getReader(&msr,
       
   323                                                        &version));
       
   324 
       
   325                         if (version != i->second.version())
       
   326                             {
       
   327                                 i->second.setVersion(version);
       
   328                                 hasAnyOriginatorChanged = true;
       
   329                             }
       
   330                     }
       
   331 
       
   332                 if (hasAnyOriginatorChanged)
       
   333                     {
       
   334                         version_ = IdxDbMgr::instance()->getNextVersion();
       
   335                     }
       
   336 
       
   337                 readers.push_back(NULL);
       
   338         
       
   339                 MultiReader
       
   340                     mr(&readers[0],
       
   341                        false); // mr does not own the reader array and
       
   342                                // ptr-s
       
   343         
       
   344                 /* TEMP
       
   345                    logDbgMsg("MultiIdxDb::getTerms MIDDLE, haoc(%d), size(%d)",
       
   346                    hasAnyOriginatorChanged,
       
   347                    size);
       
   348                 */
       
   349 
       
   350                 rv = GetTerms(&mr,
       
   351                               field,
       
   352                               wildcard,
       
   353                               appclassPrefix,
       
   354                               maxHits);
       
   355 
       
   356                 // msr dies and unlocks all idxDb here
       
   357             }
       
   358         else
       
   359             {
       
   360                 // no handles to depend upon - we return an empty list
       
   361                 rv = new HitDocumentList();
       
   362             }
       
   363         
       
   364         // readers in reader are not owned by us - nothing to clean
       
   365         // reader itself is automaticly cleaned
       
   366         // mir is automatically cleaned
       
   367 
       
   368         logDbgMsg("MultiIdxDb::getTerms END (elapsed: %ld ms)",
       
   369                   stopperWatch.elapsedMSecs());
       
   370 
       
   371         return rv;
       
   372 
       
   373         // sr dies and unlocks this.mutex_
       
   374     }
       
   375 
       
   376 
       
   377     SchemaId MultiIdxDb::addSchema(const cpix_FieldDesc * fieldDescs,
       
   378                                    size_t                 count) 
       
   379     {
       
   380         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   381     }
       
   382     
       
   383     void MultiIdxDb::add(Document * doc,
       
   384                          lucene::analysis::Analyzer * analyzer)
       
   385     {
       
   386         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   387     }
       
   388 
       
   389     void MultiIdxDb::add2(SchemaId                      schemaId,
       
   390                           const wchar_t               * docUid,
       
   391                           const char                  * appClass,
       
   392                           const wchar_t               * excerpt,
       
   393                           const wchar_t               * mimeType,
       
   394                           const wchar_t              ** fieldValues,
       
   395                           lucene::analysis::Analyzer  * analyzer)
       
   396     {
       
   397         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   398     }
       
   399 
       
   400     int32_t MultiIdxDb::deleteDocuments(const wchar_t  * docUid) 
       
   401     {
       
   402         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   403     }
       
   404 
       
   405     int32_t MultiIdxDb::deleteDocuments2(lucene::index::Term * term)
       
   406     {
       
   407         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   408     }
       
   409 
       
   410     void MultiIdxDb::update(Document * doc,
       
   411                             lucene::analysis::Analyzer * analyzer)
       
   412     {
       
   413         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   414     }
       
   415     
       
   416     void MultiIdxDb::update2(SchemaId                     schemaId,
       
   417                              const wchar_t              * docUid,
       
   418                              const char                 * appClass,
       
   419                              const wchar_t              * excerpt,
       
   420                              const wchar_t              * mimeType,
       
   421                              const wchar_t             ** fieldValues,
       
   422                              lucene::analysis::Analyzer * analyzer)
       
   423     {
       
   424         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   425     }
       
   426     
       
   427 
       
   428     void MultiIdxDb::setMaxInsertBufSize(size_t /* value */)
       
   429     {
       
   430         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   431     }
       
   432 
       
   433 
       
   434     void MultiIdxDb::flush()
       
   435     {
       
   436         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   437     }
       
   438 
       
   439     
       
   440     void MultiIdxDb::close()
       
   441     {
       
   442         Cpt::SyncRegion
       
   443             sr(mutex_);
       
   444 
       
   445         releaseHndls();
       
   446         cleanup();
       
   447     }
       
   448 
       
   449     void MultiIdxDb::brutalClose() throw ()
       
   450     {
       
   451         try
       
   452             {
       
   453                 close();
       
   454             }
       
   455         catch (...)
       
   456             {
       
   457                 logMsg(CPIX_LL_ERROR,
       
   458                        "Could not cleanup multiidxdb instance");
       
   459             }
       
   460     }
       
   461 
       
   462 
       
   463     void MultiIdxDb::doHousekeeping()
       
   464     {
       
   465         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   466     }
       
   467 
       
   468 
       
   469     void MultiIdxDb::dbgDumpState()
       
   470     {
       
   471         logTestMsg(CPIX_LL_TRACE,
       
   472                    "    DUMPING MultiIdxDb instance: BEGIN",
       
   473                    reinterpret_cast<long>(this));
       
   474         
       
   475         logTestMsg(CPIX_LL_TRACE,
       
   476                    "    o domainSelector_: %s",
       
   477                    domainSelector_.c_str());
       
   478 
       
   479         Cpt::SyncRegion
       
   480             sr(mutex_);
       
   481 
       
   482         OriginatorMap::iterator
       
   483             i = originators_.begin(),
       
   484             end = originators_.end();
       
   485 
       
   486         for (; i != end; ++i)
       
   487             {
       
   488                 logTestMsg(CPIX_LL_TRACE,
       
   489                            "    o idxDbHndl %d",
       
   490                            i->first);
       
   491             }
       
   492 
       
   493         logTestMsg(CPIX_LL_TRACE,
       
   494                    "    DUMPING MultiIdxDb instance: END.",
       
   495                    reinterpret_cast<long>(this));
       
   496         
       
   497     }
       
   498 
       
   499     
       
   500     MultiIdxDb::MultiIdxDb(std::set<IdxDbHndl> & idxDbHndls,
       
   501                            const char          * domainSelector)
       
   502         : refCount_(1),
       
   503           domainSelector_(domainSelector),
       
   504           searcher_(NULL),
       
   505           mutex_(true), // recursive
       
   506           version_(IdxDbMgr::instance()->getNextVersion())
       
   507     {
       
   508         std::set<IdxDbHndl>::const_iterator
       
   509             i = idxDbHndls.begin(),
       
   510             end = idxDbHndls.end();
       
   511 
       
   512         for (; i != end; ++i)
       
   513             {
       
   514                 originators_[*i] = OriginatorInfo(IIdxDb::getPtr(*i));
       
   515             }
       
   516     }
       
   517 
       
   518 
       
   519     MultiIdxDb::~MultiIdxDb()
       
   520     {
       
   521         brutalClose();
       
   522     }
       
   523 
       
   524 
       
   525     void MultiIdxDb::recreateIdx()
       
   526     {
       
   527         THROW_CPIXEXC(PL_ERROR "should not have been called");
       
   528     }
       
   529 
       
   530 
       
   531     bool MultiIdxDb::removeHndl(IdxDbHndl droppedHndl)
       
   532     {
       
   533         bool
       
   534             rv = false;
       
   535 
       
   536         Cpt::SyncRegion
       
   537             sr(mutex_);
       
   538 
       
   539         version_ = IdxDbMgr::instance()->getNextVersion();
       
   540         
       
   541         rv = originators_.find(droppedHndl) != originators_.end();
       
   542 
       
   543         originators_.erase(droppedHndl);
       
   544 
       
   545         return rv;
       
   546     }
       
   547     
       
   548     
       
   549     void MultiIdxDb::suggestHndl(IdxDbHndl    newHndl,
       
   550                                  const char * baseAppClass)
       
   551     {
       
   552         Cpt::SyncRegion
       
   553             sr(mutex_);
       
   554 
       
   555         version_ = IdxDbMgr::instance()->getNextVersion();
       
   556 
       
   557         if (IdxDbMgr::match(domainSelector_.c_str(), baseAppClass))
       
   558             {
       
   559                 IdxDbMgr
       
   560                     * idxDbMgr = IdxDbMgr::instance();
       
   561 
       
   562                 // this private method of IdxDbMgr is not thread-safe,
       
   563                 // but this function is called only from a function
       
   564                 // (defineVolume) of IdxDbMgr, which already does that
       
   565                 idxDbMgr->incIdxDbRefCount(newHndl);
       
   566                 
       
   567                 originators_[newHndl] = OriginatorInfo(IIdxDb::getPtr(newHndl));
       
   568             }
       
   569     }
       
   570     
       
   571 
       
   572 
       
   573     void MultiIdxDb::cleanup()
       
   574     {
       
   575         std::vector<Searchable*>::iterator
       
   576             i = searchers_.begin(),
       
   577             end = searchers_.end();
       
   578 
       
   579         for (; i != end; ++i)
       
   580             {
       
   581                 delete *i;
       
   582             }
       
   583 
       
   584         searchers_.clear();
       
   585                 
       
   586         if (searcher_ != NULL)
       
   587             {
       
   588                 delete searcher_;
       
   589                 searcher_ = NULL;
       
   590             }
       
   591     }
       
   592 
       
   593 
       
   594     void MultiIdxDb::releaseHndls()
       
   595     {
       
   596         OriginatorMap::iterator
       
   597             i = originators_.begin(),
       
   598             end = originators_.end();
       
   599 
       
   600         for (; i != end; ++i)
       
   601             {
       
   602                 IIdxDb::release(i->first);
       
   603             }
       
   604 
       
   605         originators_.clear();
       
   606     }
       
   607 
       
   608 
       
   609     lucene::search::Hits *
       
   610     MultiIdxDb::doSearch(lucene::search::Query * query,
       
   611                          Version               * version,
       
   612                          Cpt::MultiSyncRegion  * msr)
       
   613     {
       
   614         cleanup();
       
   615 
       
   616         // TODO
       
   617         //
       
   618         // This current implementation builds up a multisearcher from
       
   619         // scratch for each search invocation and tears it down once
       
   620         // the search is done. Consider optimizing it.
       
   621         //
       
   622         const size_t size = originators_.size();
       
   623 
       
   624         searchers_.reserve(size + 1);
       
   625 
       
   626         bool
       
   627             hasAnyOriginatorChanged = false;
       
   628 
       
   629         OriginatorMap::iterator
       
   630             i = originators_.begin(),
       
   631             end = originators_.end();
       
   632 
       
   633         for (; i != end; ++i)
       
   634             {
       
   635                 IReaderOwner
       
   636                     & ro(i->second.readerOwner());
       
   637 
       
   638                 Version
       
   639                     idxDbVersion;
       
   640 
       
   641                 // idx is locked now (lock information is in msr)
       
   642                 searchers_.push_back(new IndexSearcher(ro.getReader(msr,
       
   643                                                                     &idxDbVersion)));
       
   644 
       
   645                 if (idxDbVersion != i->second.version())
       
   646                     {
       
   647                         i->second.setVersion(idxDbVersion);
       
   648                         hasAnyOriginatorChanged = true;
       
   649                     }
       
   650             }
       
   651 
       
   652         if (hasAnyOriginatorChanged)
       
   653             {
       
   654                 version_ = IdxDbMgr::instance()->getNextVersion();
       
   655             }
       
   656 
       
   657         *version = version_;
       
   658 
       
   659         searchers_.push_back(NULL);
       
   660         
       
   661         searcher_ = new MultiSearcher(&searchers_[0]);
       
   662 
       
   663         lucene::search::Hits
       
   664             * rv = searcher_->search(query); 
       
   665 
       
   666         return rv;
       
   667     }
       
   668 
       
   669 
       
   670 }
       
   671