searchengine/cpix/cpix/inc/private/cpixhits.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Wed, 18 Aug 2010 10:53:26 +0300
changeset 15 cf5c74390b98
parent 0 671dee74050a
permissions -rw-r--r--
Revision: 201031 Kit: 201033

/*
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*
*/

#ifndef CPIX_CPIXHITS_H
#define CPIX_CPIXHITS_H


#include <stdlib.h>

#include <map>

#include "cpixinit.h"
#include "cpixerror.h"
#include "cpixdoc.h"
#include "cpixidxdb.h"

#include "CLucene.h"

#include "cpixsynctools.h"

#include "fwdtypes.h"
#include "common/refcountedbase.h"
#include "documentconsumer.h"


namespace Cpix
{
    class Document;
    class Document;

    /**
     * Hits represents a ranked list of documents, matching some query
     * or condition. This object is used for both representing
     * documents matching a search query but also for representing
     * terms returned by suggest request. Note that the hits object
     * may become invalid (and need recommit) if their originating idx
     * db has been modified.
     */
    class IHits : public RefCountedBase
    {
    public:
		
        /**
         *  Destructor 
         */
        virtual ~IHits(); 
		
        // Note: There is no const version of getDocument,
        // because getting document may change the internal
        // state of the object (references are converted into
        // real objects)
        /**
         * Provides access to the document at given
         * index. Note that the documents are ranked with
         * index 0 containing best match and index at length-1
         * having the worst match.
         */
        virtual Document * getDocument(int32_t index) = 0;
        
        virtual int resetDocumentCache(int32_t index, int32_t count) = 0;
	
        /**
         * Returns the amount of hits
         */
        virtual int32_t length() = 0;

    };
    


    /**
     * There are two "real" implementations to interface IHits, but
     * they do share a bit of common functionality, hence this base
     * class.
     *
     * The common functionality is this: neither of the IHits real
     * implementations will own the lucene::document::Document
     * instance they keep returning wrapped by Cpix::Document wrapper
     * instances. But, someone has to own the wrappers, and it has to
     * be the hits instance. So there is a common code for managing
     * the lifetime of Cpix::Document wrappers in both
     * implementations.
     */
    class HitsBase : public IHits
    {
    private:
        //
        // private members
        //
        bool       docsOwnedByClucene_;


    protected:
        //
        // Someone has to own the Cpix::Document wrapper instances,
        // and this someone can be only this instance
        //
        typedef std::map<int32_t, Document*> WrapperMap;
        WrapperMap                           wrappers_;

        void destroyWrappers(int32_t index);

        
        /**
         * Implementors just have to make sure that the correct lucene
         * document is found and wrapped with a call to wrapDocument.
         *
         * They are allowed to fetch more and wrap them.
         *
         * If the call succeeds, the wrapped document MUST be in the
         * wrappers_ collection. Otherwise, it MUST throw.
         */
        virtual void getDocument_(int32_t index) = 0;


    public:
        /**
         * Wraps and stores the wrapped document at a given index.
         */
        void wrapDocument(int32_t                      index,
                          lucene::document::Document * doc);


        Document * getDocument(int32_t index);
        
        int resetDocumentCache(int32_t index, int32_t count);


        /**
         * The documents this hitsbase will keep wrapping can be
         * (a) owned by clucene
         * (b) owned by CPix
         */
        HitsBase(bool docsOwnedByClucene);
    };



    /**
     * DocumentConsumer for LuceneHits class:
     * ClhDocumentConsumer. This class helps LuceneHits instance to
     * pump pages of documents in one go (because locking idxdb /
     * multiidxdb instances can be very timeconsuming and recommitting
     * a search is just plain horrible).
     */
    class ClhDocumentConsumer : public DocumentConsumer
    {
    public:
        enum 
            { 
                UNUSED    = -1
            };

    private:
        //
        // private
        //
        const size_t       pageSize_;

        int32_t            beginIndex_;
        int32_t            endIndex_;

        LuceneHits      & clHits_;

        std::vector<int>   failedDocErrors_;

    public:
        //
        // public operations
        //


        //
        // from base class DocumentConsumer
        //
        virtual int32_t beginIndex() const;
        virtual int32_t endIndex() const;
        virtual void useDocument(int32_t                      index,
                                 lucene::document::Document * document);
        virtual void failedDocument(int32_t        index,
                                    int            clErrorNumber);


        //
        // own operations
        //
        
        /**
         * Constructor
         *
         * @param clHits the LuceneHits instance that this object
         * will pump documents to.
         */
        ClhDocumentConsumer(LuceneHits & clHits);


        /**
         * The owner LuceneHits is forced to load hit doc at index
         * 'index'. This instance needs to initialize the page
         * boundaries and other internal variables accordingly. Sanity
         * of index is also checked here (should not be ge then length
         * of hits).
         *
         * @param index the index to load, plus a number of other hit
         * docs around it in the same page (defined by PAGE_SIZE).
         */
        void setPageBoundaryForIndex(int32_t index);

        
        /**
         * When a document is not found wrapped by HitsBase, it will
         * try loading it. However, since this instance loads multiple
         * docs at the same time (in pages), it can happen that a
         * document load has been attempted, but failed. Such failure
         * is reported by this call, if there was any.
         */
        void throwIfFailedDocIndex(int32_t index);
    };



    /**
     * Repsesents hits, that is a ranked list of documents
     * matching a search query. The implementation encapsulates
     * CLucene Hits object, and the object merely provides an
     * interface to those results.
     * 
     * Note that CLucene Hits object contains references to the
     * underlying database in the filesystem.  If the underlying
     * database changes, the references contained by this Hits
     * object becames invalid thus invalidating the object.
     */
    class LuceneHits : public HitsBase 
    {
    private:
        //
        // private members
        //
        /** pointer to native (CLucene) impl, owned by the idxDb_
    	 */
    	lucene::search::Hits         * hits_; 

        int32_t                        length_;

        lucene::search::Query        * query_;
        
        //
        // In order to be able to sensibly invalidate actions on
        // objects that may not exist or are in an insensible state,
        // we track the Cpix::IdxDb instance these hits were yielded
        // by and whether the hits instance pointer to by ptr_ is
        // still valid.
        //
        IdxDbHndl                      idxDb_;

        Version                        idxDbVersion_;

        // the class that will pump batches of documents from hits
        friend class ClhDocumentConsumer;

        // the document consumer instance through which batches of hit
        // docs will be pumped to this hits instance to be stored in
        // wrapped form.
        ClhDocumentConsumer          * docConsumer_;

    public: 
        /**
         * Constructor. 
         */
        LuceneHits(lucene::search::Hits  * hits,
                    lucene::search::Query * query,
                    IdxDbHndl               idxDb,
                    Version                 idxDbVersion); 

        /**
         * ~LuceneHits
         */
        virtual ~LuceneHits(); 
		
    public: // from Hits
		
        virtual int32_t length();

    protected: 
    	
        virtual void getDocument_(int32_t index);        
    private: 

        /**
         * Releases used resources. Called on invalidation and destruction
         */
        void release(); 
    };

    /**
     * Represents hits that is a ranked list of documents. This
     * implementation owns the document list and it does contain
     * external references thus it cannot become invalid.
     */
    class HitDocumentList : public HitsBase
    {
    public: 

        /**
         * Constructs an empty hit document
         */
        HitDocumentList(); 

        /**
         * Destroys the object
         */
        virtual ~HitDocumentList(); 
		
    public: // from Hits
		
        virtual int32_t length();

    public: // For manipulating the object
		
        /**
         * Appends the object to the end of the hits list. Transfers
         * ownership
         */
        void add(lucene::document::Document* document); 
		
        /**
         * Destroys the document at given index
         */
        void remove(int index); 

    protected:
        virtual void getDocument_(int32_t index);

		
    private: 
		
        /**
         * The list of documents. All the listed documents are owned
         * by this object
         */
        std::vector<lucene::document::Document*> documents_; 
    };



}

#endif // CPIX_CPIXHITS_H