MCL/sf/app/speechsrv: srsf/devasr/src/devasrsrsalgorithmmanager.h@e36f3802f733


/*
* Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies). 
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description:  This file contains definition of the DevASR private interface.
*
*/


#ifndef DEVASRSRSALGORITHMMANAGER_H
#define DEVASRSRSALGORITHMMANAGER_H

//#define AUDIOBUFFER_TO_FILE

//  INCLUDES
#include <sounddevice.h>
#include <nsssispeechrecognitiondatadevasr.h>
#include <nssdevasrcommon.h>
#include "devasrutil.h"
#include "devasrrecognitionalgmgr.h"
#include "devasrresourcehandler.h"
#include "devasrvmalgorithmmanager.h"


#ifdef AUDIOBUFFER_TO_FILE
#include <f32file.h>
#endif

// DATA TYPES

// Possible DevASR states
enum TDevASRState
    {
    EDASRIdle,
    EDASRInitTrain,
    EDASRTrain,
    EDASRSampling,
    EDASRTrainSampling,
    EDASRPlayback,
    EDASRRecognitionPlayback,
    EDASRPreRecognition,
    EDASRInitRecognition,
    EDASRRecognition,
    EDASRRecognitionSampling,
    EDASRCancel
    };

// Possible DevSound States
enum TDevSoundState
    {
    ESoundDeviceNotInitialized,
    ESoundDeviceInitialized,
    ESoundDeviceRecord,
    ESoundDevicePlayback,
    ESoundDeviceStopped
    };

// FORWARD DECLARATIONS
class MDevASRObserver;
class CRecognitionAlgMgr;

// CLASS DECLARATION

/**
*  Interface Class to be used by the DevASR thin client implementation.
*
*  @lib NssDevASR.lib
*/
class CSRSAlgorithmManager : public CActive, 
                             public MDevSoundObserver, 
                             public MRecAlgMgrObserver,
                             public MVmAlgMgrObserver
    {
    public: // Constructors and destructor
        
        /**
        * Two-phased constructors.
        */
        static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver);
        static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver);
        
        /**
        * Destructor.
        */
        virtual ~CSRSAlgorithmManager();
        
    public: // New functions
            

        /**
        * Does grammar compilation.
        */
        void CompileGrammarL( CSICompiledGrammar& aGrammar );

        /**
        * Computes new grammar size with given number of variants.
        */
        void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar, 
                                     const TUint32 aTargetNRuleVariants, 
                                     const TUint32 aMaxNRuleVariants, 
                                     const RArray<TUint>& aNewRuleScores, 
                                     RArray<TUint>& aNNeNRuleVariants, 
                                     TUint32& aNPrune );

        /**
        * Prunes unnecessay rule variants away from grammar.
        */
        TBool PruneGrammar( const CSIGrammar& aGrammar, 
                            const TUint32 aMinNumber, 
                            RArray<TSIRuleVariantInfo>& aPrunedRuleVariants );

        /**
        * Starts speaker independent training from text.
        */  
        void StartTrainingFromTextL( CSITtpWordList& aWordList, 
                                     const RArray<TLanguage>& aDefaultLanguage, 
                                     const RArray<TUint32>& aMaxNPronunsForWord );



        /**
        * Adapts recognition models
        *
        * @param "CSIResultSet& aResultSet" Result set
        * @param "TInt aResultIndex" Index of correct result
        */
        void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex );

        /**
        * Cancels the current or started tasks.
        */
        void Cancel();
        
        /**
        * This method is used to indicate the end of a recognition session.
        */
        void EndRecSession();
        
        /**
        * Retreive the properties of the underlying speech recognition engine.
        *
        * @param "RArray<TInt>& aPropertyId" An array of identifiers being 
        *         querried.
        * @param "RArray<TInt>& aPropertyValue" An array of values corresponding 
        *        to the querried identifiers
        */
        void GetEnginePropertiesL( const RArray<TInt>& aPropertyId,
                                   RArray<TInt>& aPropertyValue );
        
        /**
        * Initializes the front-end module in the speech recognition engine.
        * The frontend module used during training/recognition functions is started
        * as a result. This method is intended to be used in conjunction with InitTrainBE().
        * 
        * @param "TRecognizerMode aFeMode" Mode of the recognizer.
        */
        void InitFrontEnd( TRecognizerMode aFeMode );
    
        /**
        * Initialize the recognition engine back-end. The module responsible for recognition
        * function is started as a result. This method must be used before any recognition
        * operations and intended to be used in conjunction with InitFrontEnd().
        * 
        * @param "CSIResultSet& aResult" A reference to an object where the 
        *        recognition result will be written
        */
        void InitRecognizerBE( CSIResultSet& aResult );
        
        /**
        * Load the specified recognizer parameter(s). These parameters are used to alter
        * the recognizer's default parameters. The parameters are specified as attribute-value
        * pairs.
        * 
        * @param "RArray<TInt>& aParameterId" An array of parameter identifiers.
        * @param "RArray<TInt>& aParameterValue" An array of parameter values.
        */
        void LoadEnginePropertiesL( const RArray<TInt>& aParameterId,
                                    const RArray<TInt>& aParameterValue );
        
        /**
        * Load the specified grammar into the recognizer.
        * 
        * @param "aGrammar" A reference to a grammar in an internal format.
        */
        void LoadGrammarL( const CSIGrammar& aGrammar );
        void LoadGrammarL( const CSICompiledGrammar& aGrammar );

        /**
        * Unloads the specified grammar from the recognizer.
        * 
        * @param "aGrammar" A reference to a grammar in an internal format.
        */
        void UnloadGrammarL( const CSIGrammar& aGrammar );
        void UnloadGrammarL( const CSICompiledGrammar& aGrammar );

        /**
        * Activates a grammar
        *
        * @param "TSIGrammarID aGrammarID" A grammar identifier.
        */
        void ActivateGrammarL( TSIGrammarID aGrammarID );
        
        /**
        * Deactivates a grammar
        *
        * @param "TSIGrammarID aGrammarID" A grammar identifier.
        */
        void DeactivateGrammarL( TSIGrammarID aGrammarID );
        
        /**
        * Load the specified lexicon into the recognizer.
        * 
        * @param "aLexicon" A reference to a lexicon.
        */
        void LoadLexiconL( const CSILexicon& aLexicon );
        
        /**
        * Load the specified models into the recognizer.
        * 
        * @param "aModels" A reference to a model bank.
        */
        void LoadModelsL( const CSIModelBank& aModels );
        
        /**
        * Request to begin recording.
        * 
        * @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time 
        *        to record, expressed in microseconds.
        */
        void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration );
        
        /**
        * Ends recording process
        */
        void StopRecognition();

        /**
        * Use to set the priority of the sound device
        *
        * @param "TMMFPrioritySettings& aPrioritySettings" Priority settings
        *        structure.
        */
        void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings );
        
        /**
        * Request to start a recognition session.
        * 
        * @param "TRecognizerMode aMode" Recognizer mode.
        * @return result code of request
        */
        TInt StartRecSession( TRecognizerMode aMode );
        
        /**
        * Call to give speech data to recognizer.
        *
        * @param "TDesC8& aBuffer" Filled buffer.
        * @param "TBool aEnd" Flag to tell if this is the last buffer.
        */
        void SendSpeechData( TPtrC8& aBuffer, TBool aEnd );

        /**
        * Request to unload the specified rule in the grammar from recognizer.
        *
        * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on.
        * @param "TSIRuleID aRuleID" Identifier of the rule to remove.
        */
        void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID );
        
        /**
        * Retreive the raw audio data accumulated during recording.
        *
        * @param "TDes8* aBuffer" Reference to a buffer containing the data.
        * @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the 
        *        utterance.
        */
        void GetUtteranceData( TDes8* aBuffer, 
                               TTimeIntervalMicroSeconds32& aDuration );

        /**
        * Checks if grammar is loaded or not
        *
        * @since 2.8
        * @param "TSIGrammarID aGrammarID" Grammar ID
        * @return ETrue if grammar is currently loaded, EFalse otherwise.
        */
        TBool IsGrammarLoaded( TSIGrammarID aGrammarID );
        
        /**
        * Checks if grammar is active or not
        *
        * @since 2.8
        * @param "TSIGrammarID aGrammarID" Grammar ID
        * @return ETrue if grammar is active, EFalse otherwise.
        */
        TBool IsGrammarActive( TSIGrammarID aGrammarID );

        /**
        * Prints DevASR state information for debugging
        */
        void DevASRState();
        
        /**
        * Pre-starts recording before StartRecording call.
        *
        * @since 3.2
        */
        void PreStartSamplingL();

        // ===================================
        // DevSound Observer MIXIN begins
        // ===================================
        
        /**
        * Called by DevSound when a buffer is available to be read.
        *
        * @param "CMMFBuffer* aBuffer" Buffer containing data to be processed
        */
        void BufferToBeEmptied( CMMFBuffer* aBuffer );
        
        /**
        * Called by DevSound when a buffer is available to be written.
        *
        * @param "CMMFBuffer* aBuffer" Buffer to which data is written
        */
        void BufferToBeFilled( CMMFBuffer* aBuffer );
        
        /**
        * Not used by DevASR
        */
        void ConvertError( TInt aError );
        
        /**
        * Handles device event.
        *
        * @param "TDesC8& aMsg" A message packed in the descriptor format.
        */
        void DeviceMessage( TUid aMessageType, const TDesC8& aMsg );
        
        /**
        * Called by DevSound when initialization is completed.
        *
        * @param aError Error code, KErrNone if successful.
        */
        void InitializeComplete( TInt aError );
        
        /**
        * Handles record completion or cancel event from DevSound.
        *
        * @param aError Error code, KErrNone if successful.
        */
        void RecordError( TInt aError );
        
        /**
        * Handles play completion or cancel event from DevSound.
        *
        * @param "TInt aError" Error code, KErrNone if successful.
        */
        void PlayError( TInt aError );
        
        /**
        * Handles tone play completion or cancel event from DevSound
        * Tone is not used by DevASR so this event is not expected.
        *
        * @param "TInt aError" Error code, KErrNone if successful.
        */
        void ToneFinished( TInt aError );
        
       	/**
        * Handles audio policy events from DevSound.
        *
        * @param "TMMFEvent&" aEvent An audio policy event.
        */
        void SendEventToClient( const TMMFEvent& aEvent );
        
        // ===================================
        // DevSound Observer MIXIN ends
        // ===================================
        
        // ============================================
        // RecognitionAlgMgr Observer MIXIN begins
        // ============================================
         
        /**
        * Invoked by the adaptation hw device when model adaptation has
        * finished its processing.
        *
        * @since 2.8
        * @param "TInt aResultCode" Error code of operation.
        */
        void AdaptComplete( TInt aResultCode );

        /**
        * Invoked by the front-end when EOU has been detected.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void EouDetected( TInt aResultCode );
        
        /**
        * Invoked by the algorithm manager when the frontend initialization
        * is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void InitFEComplete( TInt aResultCode );
        
        /**
        * Invoked by the algorithm manager when the recognition backend
        * initialization is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void InitRecognizerBEComplete( TInt aResultCode );
        
        /**
        * Invoked by the algorithm manager when loading grammar
        * is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void LoadGrammarComplete( TInt aResultCode );

        /**
        * Invoked when grammar has been unloaded.
        *
        * @param "TInt aResultCode" Result of grammar loading
        */
        void UnloadGrammarComplete( TInt aResultCode );
        
        /**
        * Invoked when grammar has been activated.
        *
        * @param "TInt aResultCode" Result of grammar activation
        */
        void ActivateGrammarComplete( TInt aResultCode );

        /**
        * Invoked when grammar has been deactivated.
        *
        * @param "TInt aResultCode" Result of grammar deactivation
        */
        void DeActivateGrammarComplete( TInt aResultCode );
        
        /**
        * Invoked by the algorithm manager when loading lexicon
        * is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void LoadLexiconComplete( TInt aResultCode );
        
        /**
        * Invoked by the algorithm manager when loading models
        * is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void LoadModelsComplete( TInt aResultCode );
        
        /**
        * Invoked when the recognition process is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void RecognitionComplete( TInt aResultCode );
        
        /**
        * Invoked when the training process is completed.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void TrainComplete( TInt aResultCode );

        /**
        * Invoked when rule unloading has been done.
        *
        * @param "TInt aResultCode" Result code, KErrNone if successful.
        */
        void UnloadRuleComplete( TInt aResultCode );
        
        /**
        * Invoked when a feature vector is available.
        *
        * @param "TDesC8& aFV" Buffer containing a feature vector.
        * @param "TInt32 aSNR" Signal-to-noise ratio.
        * @param "TInt32 aPosition" Indicates whether this is the first,
        *         subsequent or last feature vector.
        */
        void FeatureVectorDataRcvd( const TDesC8& aFV, 
                                    TInt32 aSNR, 
                                    TInt32 aPosition );
        
        /**
        * Invoked by the front-end when utterance data is needed.
        *
        * @since 2.8
        */
        void RequestSpeechData();

     
        /**
        * Resolves result.
        */
        void ResolveResult( const RArray<TUint>& aNBestIDs,
                            CSIResultSet& aSIResultSet,
                            const RPointerArray<CSICompiledGrammar>& aSICompiledGrammar,
                            const TDesC8& aCombinedData );

        /**
        * Invoked when grammar combining is needed
        *
        * @since 2.8
        * @param "RPointerArray<CSICompiledGrammar>& aCompiledGrammars" 
        *        Array of previously compiled grammar
        * @param "const RPointerArray<TSIRuleVariantInfo>& aExcludedRules" 
        *        Rules to be blacklisted.
        */
        void CombineGrammarL( const RPointerArray<CSICompiledGrammar>& aCompiledGrammars,
                              const RPointerArray<TSIRuleVariantInfo>& aExcludedRules );

        // ============================================
        // RecognitionAlgMgr Observer MIXIN ends
        // ============================================
        
        // ============================================
        // MVmAlgMgrObserver Observer MIXIN begins
        // ============================================

        /**
        * Notifies that grammar combination has been done.
        */
        void CombineComplete( HBufC8* aResult, TInt aError );

        /**
        * Get the specified grammar.
        * 
        * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found.
		* @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI 
        *        active grammar, NULL if not found.
		* @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI 
        *        non-active grammar, NULL if not found
        */
        void GetGrammarL( const TSIGrammarID aGrammarID, 
                          CSICompiledGrammar** aSIActiveGrammar,
                          CSICompiledGrammar** aSIDeActivatedGrammar );

        // ============================================
        // MVmAlgMgrObserver Observer MIXIN begins
        // ============================================
        
    private:
        
        /**
        * C++ default constructor.
        *
        * @param "MDevASRObserver& aObserver" Reference to observer.
        */
        CSRSAlgorithmManager( MDevASRObserver& aObserver );
        
        /**
        * By default Symbian 2nd phase constructor is private.
        */
        void ConstructL();
        
        /**
        * From CActive.
        */
        void RunL();
        void DoCancel();
        
        /**
        * Empties and copies the data to holding buffer.
        *
        * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
        * @param "TInt aBufferLength" Length of buffer
        */
        TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
        
        /**
        * Fill the received buffer with utterance data upto the specified length.
        *
        * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
        * @param "TInt aBufferLength" Length of buffer
        */
        TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
       
        /**
        * Transition from current state to target state if possible.
        *
        * @param "TDevASRState aState" A target state
        * @return ETrue if successful.
        */
        TBool StateTransition( TDevASRState aState );
        
        /**
        * Translate system error codes to DevASR cause codes.
        *
        * @param "TInt aError" System error constant
        * @return A DevASR error code
        */
        TDevASRError TranslateError( TInt aError );
        
        /**
        * Async handler for a play request.
        */
        void HandlePlayL();
        
        /**
        * Async handler for a record request.
        */
        void HandleRecordL();
       
        /**
        * Async handler for a UtteranceDataProcessed event.
        */
        void HandleUtteranceDataProcessed();
       
        /**
        * Completes a request with specified status.
        *
        * @param "TInt aStatus" Operation status.
        */
        void Ready( const TInt aStatus );
        
        /**
        * Configure the sound device according to parameters in the resource file.
        */
        void ConfigureSoundDeviceL();
        
        /**
        * Initializes DevSound for sampling
        */
        void InitializeDevSoundL();
        
        /**
        * Starts sampling with DevSound
        */
        void StartDevSoundL();
        
    private:    // Data
        
        // DevASR Observer, the SRS Plugin
        MDevASRObserver* iDevASRObserver;
        
        // ----- DevSound related data -----
        
        // Pointer to DevSound
        CMMFDevSound* iDevSound;
        
        // structure of capabilities for DevSound instance
        TMMFCapabilities iDevSoundCapabilities;
        
        // structure of priorities for DevSound instance
        TMMFPrioritySettings iPrioritySettings;
        
        // stores the current volume and gain
        TInt iCurrentVolume;
        TInt iCurrentGain;
        
        // ----- Algorithm managers ----

        // Pointer to Recognition Algorithm Manager
        CRecognitionAlgMgr* iRecognitionAlgMgr;

        // Pointer to Vocabulary algorithm manager
        CVMAlgorithmManager* iVMAlgorithmManager;

        // Flag to tell if recognition should be started when first audio
        // buffer has been sampled.
        TBool iStartRecognition;

        // Flag to tell if there is a pending RequestSpeechData() call from 
        // recognition hw device.
        TBool iPendingRequestSpeechData;
        
        // --- SD Training audio buffer managemet ---
        // Start and stop points in the audio buffer, used during recording
        TUint32 iStartPoint;
        TUint32 iStopPoint;
        // Audio buffer to hold PCM data coming from DevSound
        TUint8* iAudioBuffer;
        TPtr8 iPtr;


        // ----- Buffer management -----
        
        // Number of ms to record
        TTimeIntervalMicroSeconds32 iRecordDuration;
        

        // Overall number of bytes that need to be sampled.
        // Zero if we should sample until EndRecord() is called.
        TInt iOverallLength;

        // Number of samples that are recorded.
        TInt iOverallSampled;
        
        // Indicates if utterance processing is currently being done
        TBool iProcessingUtterance;
        
        
        // Queue of audio buffer descriptors to be processed
        TSglQue<CQueItem>* iAudioBufferQue;

        // An item in the queue of audio buffer descriptors
        CQueItem* iQueItem;

        // Flag to tell if recording should be ended
        TBool iEndFlag;

        // Recognizer mode
        TRecognizerMode iMode;

        
        // ----- End-pointing usage -----
        
        // Start and end frames in audio buffer
        TUint32 iStartFrame;
        TUint32 iEndFrame;
        TReal iFrameLength;
        
        // Start and end point in the audio buffer after end-pointing.
        TUint32 iBufferStartPoint;
        TUint32 iBufferEndPoint;
        
        // ----- State management -----
        
        // State of DevASR
        TDevASRState iDevASRState;
        
        // State of DevSound
        TDevSoundState iDevSoundState;
        
        // ----- Active object handling -----
        TInt iRequestFunction;
        
        // resource file handler
        CDevASRResourceHandler* iResourceHandler;

        
        TUint8* iBufferUnderConstruction;
        TInt iConstructionPoint;

		const CSIModelBank* iSIModelBank;
		const CSILexicon* iSILexicon;
        
        // Flag which tells if pre-sampling has been started
        TBool iPreSamplingStarted;
        
        // Wait loop to wait for audio initialization callback
        CActiveSchedulerWait iAudioWait;
        
        // The following are used when dumping captured audio to file for analysis
#ifdef AUDIOBUFFER_TO_FILE
        RFs iFs;
        RFile iBufferDataFile;
        TBool iFileCreated;
#endif
        
    };
    
#endif // DEVASRSRSALGORITHMMANAGER_H
    
// End of file
author	Pat Downey <patd@symbian.org>
	Wed, 01 Sep 2010 12:29:17 +0100
branch	RCL_3
changeset 19	e36f3802f733
parent 0	bf1d17376201
permissions	-rw-r--r--