searchengine/cpix/cpix/inc/public/cpixanalyzer.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Wed, 18 Aug 2010 10:53:26 +0300
changeset 15 cf5c74390b98
parent 0 671dee74050a
permissions -rw-r--r--
Revision: 201031 Kit: 201033

/*
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*
*/

#ifndef CPIX_CPIXANALYZER_H
#define CPIX_CPIXANALYZER_H


#include "cpixinit.h"
#include "cpixerror.h"


/**
 * CPix API for analyzers.
 *
 * See cpixsearch.h for notes and info.
 */


#ifdef __cplusplus
extern "C" {
#endif


    /**
     * See ISO 639-1
     * http://www.loc.gov/standards/iso639-2/php/English_list.php
     */
    struct cpix_LangCode_
    {
        const char * code_;
    };
    typedef cpix_LangCode_ cpix_LangCode;

    extern const cpix_LangCode cpix_LANG_EN;
    extern const cpix_LangCode cpix_LANG_FI;
    extern const cpix_LangCode cpix_LANG_HU;
    extern const cpix_LangCode cpix_LANG_RU;

    
    /**
     * Gets the language descriptors for supported languages.
     *
     * @return the NULL-terminated array of (pointers-to) supported
     * language descriptors. DO NOT MODIFY THESE.
     */
    const cpix_LangCode ** cpix_supportedLanguages();

    /************************************************************************
     * Interface cpix_Analyzer
     */
    struct cpix_Analyzer_
    {
        // pointer to native (CLucene) impl
        void       * ptr_;
        
        // Last error, if any, that resulted from executing the
        // last operation on this cpix_XXX object
        // Use macros cpix_Failed, cpix_Succeeded and cpix_ClearError.
        // Do not attempt releasing it.
        cpix_Error * err_;
    };
    typedef cpix_Analyzer_ cpix_Analyzer;
    

    /**
     * Factory method to create a simple analyzer.
     *
     * @return a new analyzer object, ownership is transferred to the
     * caller. NULL if creation failed - no more info is given why.
     */
    cpix_Analyzer * cpix_CreateSimpleAnalyzer(cpix_Result * result);
    
    /**
     * Factory method to create an analyzer that uses the snowball
     * library (libstemmer) to perform stemming.
     *
     * @param result any details on failure are communicated through
     * this
     *
     * @param langCode the language code specifying the stemmer to use
     *
     * @return the new analyzer object, ownership is transferred to
     * the caller. NULL if creation failed - no more info is given
     * why.
     */
    cpix_Analyzer * cpix_CreateSnowballAnalyzer(cpix_Result   * result,
                                                cpix_LangCode   langCode);
    
    /**
     * Factory method for creating a complex analyzer described in the
     * definition string. 
     * 
     * The basic syntax consist of a sequence starting with a tokenizer
     * definitions and followed by the sequence of filter definitions.
     * 
     * tokenizer>filter>filter>filter
     *  
     * In syntax, tokenizers and filters are separated by pipe symbol '>', 
     * which symbolizes the piping of the token stream output of previous item
     * to the token stream input of the following item.
     * 
     * Filters' accept constructors, like the stemmer, stop filter and length 
     * filter: 
     * 
     * stdtokens>length(2, 8)>stop(en)>stop('foo', 'bar')>stem(en)
     * 
     * There is also special syntax for switching filter depending of the 
     * analyzed field. 
     * 
     * switch {
     *   case 'field1':            tokenizer1>filter1; 
     *   case 'field2', 'field3':  tokenizer2>filter2;
     *   default:                  analyzer;
     * }
     * 
     * See documentation for exact list of analyzers, tokenizers and filters
     * that are supported and of their parameters. 
     */
    cpix_Analyzer * cpix_Analyzer_create(cpix_Result   * result,
                                         const wchar_t * definition);


    /**
     * Destroys this analyzer instance. Never fails.
     */
    void cpix_Analyzer_destroy(cpix_Analyzer * thisAnalyzer);
 
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */



#endif /* CPIX_CPIXANALYZER_H */