--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/inc/private/cpixhits.h Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,358 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#ifndef CPIX_CPIXHITS_H
+#define CPIX_CPIXHITS_H
+
+
+#include <stdlib.h>
+
+#include <map>
+
+#include "cpixinit.h"
+#include "cpixerror.h"
+#include "cpixdoc.h"
+#include "cpixidxdb.h"
+
+#include "CLucene.h"
+
+#include "cpixsynctools.h"
+
+#include "fwdtypes.h"
+#include "common/refcountedbase.h"
+#include "documentconsumer.h"
+
+
+namespace Cpix
+{
+ class Document;
+ class Document;
+
+ /**
+ * Hits represents a ranked list of documents, matching some query
+ * or condition. This object is used for both representing
+ * documents matching a search query but also for representing
+ * terms returned by suggest request. Note that the hits object
+ * may become invalid (and need recommit) if their originating idx
+ * db has been modified.
+ */
+ class IHits : public RefCountedBase
+ {
+ public:
+
+ /**
+ * Destructor
+ */
+ virtual ~IHits();
+
+ // Note: There is no const version of getDocument,
+ // because getting document may change the internal
+ // state of the object (references are converted into
+ // real objects)
+ /**
+ * Provides access to the document at given
+ * index. Note that the documents are ranked with
+ * index 0 containing best match and index at length-1
+ * having the worst match.
+ */
+ virtual Document * getDocument(int32_t index) = 0;
+
+ /**
+ * Returns the amount of hits
+ */
+ virtual int32_t length() = 0;
+
+ };
+
+
+
+ /**
+ * There are two "real" implementations to interface IHits, but
+ * they do share a bit of common functionality, hence this base
+ * class.
+ *
+ * The common functionality is this: neither of the IHits real
+ * implementations will own the lucene::document::Document
+ * instance they keep returning wrapped by Cpix::Document wrapper
+ * instances. But, someone has to own the wrappers, and it has to
+ * be the hits instance. So there is a common code for managing
+ * the lifetime of Cpix::Document wrappers in both
+ * implementations.
+ */
+ class HitsBase : public IHits
+ {
+ private:
+ //
+ // private members
+ //
+ bool docsOwnedByClucene_;
+
+
+ protected:
+ //
+ // Someone has to own the Cpix::Document wrapper instances,
+ // and this someone can be only this instance
+ //
+ typedef std::map<int32_t, Document*> WrapperMap;
+ WrapperMap wrappers_;
+
+ void destroyWrappers();
+
+
+ /**
+ * Implementors just have to make sure that the correct lucene
+ * document is found and wrapped with a call to wrapDocument.
+ *
+ * They are allowed to fetch more and wrap them.
+ *
+ * If the call succeeds, the wrapped document MUST be in the
+ * wrappers_ collection. Otherwise, it MUST throw.
+ */
+ virtual void getDocument_(int32_t index) = 0;
+
+
+ public:
+ /**
+ * Wraps and stores the wrapped document at a given index.
+ */
+ void wrapDocument(int32_t index,
+ lucene::document::Document * doc);
+
+
+ Document * getDocument(int32_t index);
+
+
+ /**
+ * The documents this hitsbase will keep wrapping can be
+ * (a) owned by clucene
+ * (b) owned by CPix
+ */
+ HitsBase(bool docsOwnedByClucene);
+ };
+
+
+
+ /**
+ * DocumentConsumer for LuceneHits class:
+ * ClhDocumentConsumer. This class helps LuceneHits instance to
+ * pump pages of documents in one go (because locking idxdb /
+ * multiidxdb instances can be very timeconsuming and recommitting
+ * a search is just plain horrible).
+ */
+ class ClhDocumentConsumer : public DocumentConsumer
+ {
+ public:
+ enum
+ {
+ UNUSED = -1
+ };
+
+ private:
+ //
+ // private
+ //
+ const size_t pageSize_;
+
+ int32_t beginIndex_;
+ int32_t endIndex_;
+
+ LuceneHits & clHits_;
+
+ std::vector<int> failedDocErrors_;
+
+ public:
+ //
+ // public operations
+ //
+
+
+ //
+ // from base class DocumentConsumer
+ //
+ virtual int32_t beginIndex() const;
+ virtual int32_t endIndex() const;
+ virtual void useDocument(int32_t index,
+ lucene::document::Document * document);
+ virtual void failedDocument(int32_t index,
+ int clErrorNumber);
+
+
+ //
+ // own operations
+ //
+
+ /**
+ * Constructor
+ *
+ * @param clHits the LuceneHits instance that this object
+ * will pump documents to.
+ */
+ ClhDocumentConsumer(LuceneHits & clHits);
+
+
+ /**
+ * The owner LuceneHits is forced to load hit doc at index
+ * 'index'. This instance needs to initialize the page
+ * boundaries and other internal variables accordingly. Sanity
+ * of index is also checked here (should not be ge then length
+ * of hits).
+ *
+ * @param index the index to load, plus a number of other hit
+ * docs around it in the same page (defined by PAGE_SIZE).
+ */
+ void setPageBoundaryForIndex(int32_t index);
+
+
+ /**
+ * When a document is not found wrapped by HitsBase, it will
+ * try loading it. However, since this instance loads multiple
+ * docs at the same time (in pages), it can happen that a
+ * document load has been attempted, but failed. Such failure
+ * is reported by this call, if there was any.
+ */
+ void throwIfFailedDocIndex(int32_t index);
+ };
+
+
+
+ /**
+ * Repsesents hits, that is a ranked list of documents
+ * matching a search query. The implementation encapsulates
+ * CLucene Hits object, and the object merely provides an
+ * interface to those results.
+ *
+ * Note that CLucene Hits object contains references to the
+ * underlying database in the filesystem. If the underlying
+ * database changes, the references contained by this Hits
+ * object becames invalid thus invalidating the object.
+ */
+ class LuceneHits : public HitsBase
+ {
+ private:
+ //
+ // private members
+ //
+ /** pointer to native (CLucene) impl, owned by the idxDb_
+ */
+ lucene::search::Hits * hits_;
+
+ int32_t length_;
+
+ lucene::search::Query * query_;
+
+ //
+ // In order to be able to sensibly invalidate actions on
+ // objects that may not exist or are in an insensible state,
+ // we track the Cpix::IdxDb instance these hits were yielded
+ // by and whether the hits instance pointer to by ptr_ is
+ // still valid.
+ //
+ IdxDbHndl idxDb_;
+
+ Version idxDbVersion_;
+
+ // the class that will pump batches of documents from hits
+ friend class ClhDocumentConsumer;
+
+ // the document consumer instance through which batches of hit
+ // docs will be pumped to this hits instance to be stored in
+ // wrapped form.
+ ClhDocumentConsumer * docConsumer_;
+
+ public:
+ /**
+ * Constructor.
+ */
+ LuceneHits(lucene::search::Hits * hits,
+ lucene::search::Query * query,
+ IdxDbHndl idxDb,
+ Version idxDbVersion);
+
+ /**
+ * ~LuceneHits
+ */
+ virtual ~LuceneHits();
+
+ public: // from Hits
+
+ virtual int32_t length();
+
+ protected:
+
+ virtual void getDocument_(int32_t index);
+ private:
+
+ /**
+ * Releases used resources. Called on invalidation and destruction
+ */
+ void release();
+ };
+
+ /**
+ * Represents hits that is a ranked list of documents. This
+ * implementation owns the document list and it does contain
+ * external references thus it cannot become invalid.
+ */
+ class HitDocumentList : public HitsBase
+ {
+ public:
+
+ /**
+ * Constructs an empty hit document
+ */
+ HitDocumentList();
+
+ /**
+ * Destroys the object
+ */
+ virtual ~HitDocumentList();
+
+ public: // from Hits
+
+ virtual int32_t length();
+
+ public: // For manipulating the object
+
+ /**
+ * Appends the object to the end of the hits list. Transfers
+ * ownership
+ */
+ void add(lucene::document::Document* document);
+
+ /**
+ * Destroys the document at given index
+ */
+ void remove(int index);
+
+ protected:
+ virtual void getDocument_(int32_t index);
+
+
+ private:
+
+ /**
+ * The list of documents. All the listed documents are owned
+ * by this object
+ */
+ std::vector<lucene::document::Document*> documents_;
+ };
+
+
+
+}
+
+#endif // CPIX_CPIXHITS_H