searchengine/cpix/cpix/src/document.cpp
changeset 0 671dee74050a
child 7 a5fbfefd615f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/document.cpp	Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,343 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+#include "document.h"
+#include "cpixexc.h"
+#include "cpixdoc.h"
+
+#include "idxdb.h"
+#include "idxdbmgr.h"
+
+#include "CLucene.h"
+
+#include "indevicecfg.h"
+#include "cpixstrtools.h"
+
+#include "common/cloners.h"
+
+namespace Cpix
+{
+    Field::Field(const wchar_t * name,
+                 const wchar_t * value,
+                 int             configs) 
+        :	own_(true), field_(0) {
+        resolveConfig(configs); 
+        field_ = _CLNEW lucene::document::Field(name, value, configs); 		 
+    }
+	
+
+    Field::Field(const wchar_t * name,
+                 lucene::util::Reader* stream,
+                 int             configs) 
+	:  own_(true), field_(0) {
+        resolveConfig(configs); 
+        field_ = _CLNEW lucene::document::Field(name, stream, configs); 		 
+    }
+
+
+    void Field::resolveConfig(int& configs)
+    {
+        if (configs & cpix_AGGREGATE_NO & cpix_AGGREGATE_YES) {
+            THROW_CPIXEXC("Field is marked to be both aggregated and not aggregated" );
+        } else if (configs & cpix_AGGREGATE_NO) {
+            aggregate_ = false; 
+            configs &= ~cpix_AGGREGATE_NO; // erase it from config
+        } else if (configs & cpix_AGGREGATE_YES) {
+            aggregate_ = true; 
+            configs &= ~cpix_AGGREGATE_YES; // erase it from config
+        } else {
+            // Aggregate indexed fields by default. 
+            aggregate_ = !(configs & cpix_INDEX_NO);
+        }
+    }
+				    
+
+    Field::Field(lucene::document::Field* field,
+                 bool aggregate,
+                 bool own)	
+	: own_(own),
+      field_( field ), 
+	  aggregate_( aggregate ) 
+	   {
+    }
+	
+    Field::~Field() {
+        if (own_) {
+            _CLDELETE( field_ );
+        }
+    }
+	
+    const wchar_t* Field::name() {
+        return field_->name(); 
+    }
+
+    lucene::document::Field* Field::release() {
+        if (own_) {
+			own_ = false; 
+            return field_; 
+        } else {
+            THROW_CPIXEXC("Native CLucene field, which is not owned by CPix::Field, cannot be released");
+        }
+    }
+
+    lucene::document::Field& Field::native() {
+        return *field_;; 
+    }
+
+    int Field::isBinary() const {
+        return field_->isBinary(); 
+    }
+	
+    int Field::isIndexed() const {
+        return field_->isIndexed(); 
+    }
+
+    bool Field::isAggregated() const {
+        return aggregate_;
+    }
+	
+    float_t Field::boost() const {
+        return field_->getBoost(); 
+    }
+
+    void Field::setBoost(float_t boost) {
+        field_->setBoost(boost); 
+    }
+
+    const wchar_t* Field::stringValue() const {
+        return field_->stringValue();
+    }
+	
+    DocumentFieldIterator::DocumentFieldIterator(lucene::document::DocumentFieldEnumeration* fields, Document &document) 
+	: 	fields_(fields), 
+                document_(document){
+    }
+	
+    DocumentFieldIterator::~DocumentFieldIterator() {
+        _CLDELETE( fields_ );
+    }
+		
+    Field* DocumentFieldIterator::operator++(int) {
+        return document_.wrapField( fields_->nextElement() );
+    }
+	
+    DocumentFieldIterator::operator bool() {
+        return fields_->hasMoreElements();
+    }
+
+	
+    Document::Document(const wchar_t * docUid,
+                       const char    * appClass,
+                       const wchar_t * excerpt,
+                       const wchar_t * mimeType) 
+	: document_( 0 ),
+	  fieldWrappers_(), 
+      own_(true)
+    {
+        if (docUid == NULL || *docUid == 0) {
+            THROW_CPIXEXC("Document UID must be defined");
+        }
+		
+        document_ = _CLNEW lucene::document::Document(); 
+
+        setDocUid(docUid);
+		
+        if (appClass != NULL && *appClass != 0) {
+            setAppClass(appClass); 
+        }
+		
+        if (excerpt != NULL && *excerpt != 0) {
+            setExcerpt(excerpt); 
+        }
+
+        if (mimeType != NULL && *mimeType != 0) {
+            setMimeType(mimeType); 
+        }
+    }
+
+    Document::Document(lucene::document::Document * document,
+                       bool                         ownedByClucene) 
+	: document_( document ), 
+	  fieldWrappers_(), 
+	  own_(ownedByClucene) {
+        if (own_)
+            {
+                std::auto_ptr<lucene::document::Document>
+                    clone(Clone(document));
+                document_ = clone.get();
+                clone.release();
+            }
+    }
+
+    Document::~Document() 
+    {
+		// Release document, if owned (e.g. not owned by hit list) 
+        if (own_)
+        {
+			_CLDELETE(document_);
+        }
+        
+        
+        // Clean up CLucene field wrappers
+        FieldWrapperMap::iterator
+            i = fieldWrappers_.begin(),
+            end = fieldWrappers_.end();
+
+        for (; i != end; ++i)
+        {
+            delete i->second;
+        }
+    }
+	
+    bool Document::isSystemField(const wchar_t* name) {
+        return wcscmp(name, LCPIX_DOCUID_FIELD) == 0
+            || wcscmp(name, LCPIX_APPCLASS_FIELD) == 0
+            || wcscmp(name, LCPIX_UNTOKENIZED_APPCLASS_FIELD) == 0
+            || wcscmp(name, LCPIX_EXCERPT_FIELD) == 0
+            || wcscmp(name, LCPIX_MIMETYPE_FIELD) == 0;
+    }
+	
+    void Document::setDocUid(const wchar_t* docUid) {
+        removeField(LCPIX_DOCUID_FIELD); 
+        document_->add(*_CLNEW lucene::document::Field( LCPIX_DOCUID_FIELD,
+                                                        docUid,
+                                                        lucene::document::Field::STORE_YES | 
+                                                        lucene::document::Field::INDEX_UNTOKENIZED));
+    }
+	
+    void Document::setAppClass(const char* appClass) {
+        removeField(LCPIX_APPCLASS_FIELD);
+        removeField(LCPIX_UNTOKENIZED_APPCLASS_FIELD); 
+        if (Cpix::IdxDbMgr::isQualBaseAppClass(appClass)) {
+            THROW_CPIXEXC("Qualification on app class field '%s'",
+                          appClass);
+        }
+	
+        size_t
+            wAppClassBufSize = strlen(appClass) + 1;
+        /* OBS
+        Cpt::auto_array<wchar_t>
+            wAppClass(new wchar_t[wAppClassBufSize]);
+        mbstowcs(wAppClass.get(),
+                 appClass,
+                 wAppClassBufSize);
+        */
+        Cpt::auto_array<wchar_t>
+            wAppClass(appClass);
+
+        document_->add(*_CLNEW lucene::document::Field(LCPIX_APPCLASS_FIELD,
+                                                       wAppClass.get(),
+                                                       lucene::document::Field::STORE_YES 
+                                                       | lucene::document::Field::INDEX_TOKENIZED));
+		
+        document_->add(*_CLNEW lucene::document::Field(LCPIX_UNTOKENIZED_APPCLASS_FIELD,
+                                                       wAppClass.get(),
+                                                       lucene::document::Field::STORE_NO
+                                                       | lucene::document::Field::INDEX_UNTOKENIZED));
+    }
+
+    void Document::setExcerpt(const wchar_t* excerpt) {
+        removeField(LCPIX_EXCERPT_FIELD);
+        document_->add(*_CLNEW lucene::document::Field(LCPIX_EXCERPT_FIELD,
+                                                       excerpt,
+                                                       lucene::document::Field::STORE_YES 
+                                                       | lucene::document::Field::INDEX_NO));
+    }
+	
+    void Document::setMimeType(const wchar_t* mimeType) {
+        removeField(LCPIX_MIMETYPE_FIELD); 
+        document_->add(*_CLNEW lucene::document::Field(LCPIX_MIMETYPE_FIELD,
+                                                       mimeType,
+                                                       lucene::document::Field::STORE_YES 
+                                                       | lucene::document::Field::INDEX_UNTOKENIZED));
+    }
+
+    lucene::document::Document& Document::native() {
+        return *document_;
+    }
+
+    void Document::removeField( const wchar_t* fieldName ) {
+        lucene::document::Field* field = document_->getField( fieldName ); 
+        if ( field )
+            {
+                if (fieldWrappers_.find(field) != fieldWrappers_.end())
+                    {
+                        delete fieldWrappers_[field];
+                        fieldWrappers_.erase(field); 
+                    }
+            }
+        
+        document_->removeField( fieldName ); 
+        if (wcscmp(fieldName, LCPIX_APPCLASS_FIELD) == 0) {
+            document_->removeField( LCPIX_UNTOKENIZED_APPCLASS_FIELD ); 
+        }
+    }
+	
+    void Document::add(Field* field) {
+        if (isSystemField(field->name())) {
+            THROW_CPIXEXC("Document::add() must not used to modify system field %s", field->name());
+        }
+        if (document_->getField(field->name())) {
+            removeField(field->name()); 
+        }
+        if (field->isAggregated()) {
+            aggregate_.insert(std::wstring(field->name()));
+        }
+        
+        // store the field as a wrapper
+        fieldWrappers_[&field->native()] = field; 
+
+        // add the native field 
+		document_->add( *field->release() );
+    }
+	
+    const wchar_t* Document::get(const wchar_t* fieldName) const {
+        return document_->get(fieldName); 
+    }
+
+    DocumentFieldIterator* Document::fields() {
+        return new DocumentFieldIterator( document_->fields(), *this ); 
+    }
+	
+    float_t Document::boost() const {
+        return document_->getBoost(); 
+    }
+	
+    void Document::setBoost(float_t boost) {
+        document_->setBoost( boost ); 
+    }
+	
+    void Document::clear() {
+        document_->clear(); 
+    }
+	
+    bool Document::isAggregated(const std::wstring& fieldName) const {
+        return aggregate_.count(fieldName);
+    }
+	
+    Field* Document::wrapField(lucene::document::Field* field) {
+		if (fieldWrappers_.find(field) == fieldWrappers_.end())
+			{
+			fieldWrappers_[field] =         		
+				new Field(field, 
+						  isAggregated(field->name()));
+			}
+
+		return fieldWrappers_[field];
+    }
+
+}