/*
* Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: This file contains definition of the DevASR private interface.
*
*/
#ifndef DEVASRSRSALGORITHMMANAGER_H
#define DEVASRSRSALGORITHMMANAGER_H
//#define AUDIOBUFFER_TO_FILE
// INCLUDES
#include <sounddevice.h>
#include <nsssispeechrecognitiondatadevasr.h>
#include <nssdevasrcommon.h>
#include "devasrutil.h"
#include "devasrrecognitionalgmgr.h"
#include "devasrresourcehandler.h"
#include "devasrvmalgorithmmanager.h"
#ifdef AUDIOBUFFER_TO_FILE
#include <f32file.h>
#endif
// DATA TYPES
// Possible DevASR states
enum TDevASRState
{
EDASRIdle,
EDASRInitTrain,
EDASRTrain,
EDASRSampling,
EDASRTrainSampling,
EDASRPlayback,
EDASRRecognitionPlayback,
EDASRPreRecognition,
EDASRInitRecognition,
EDASRRecognition,
EDASRRecognitionSampling,
EDASRCancel
};
// Possible DevSound States
enum TDevSoundState
{
ESoundDeviceNotInitialized,
ESoundDeviceInitialized,
ESoundDeviceRecord,
ESoundDevicePlayback,
ESoundDeviceStopped
};
// FORWARD DECLARATIONS
class MDevASRObserver;
class CRecognitionAlgMgr;
// CLASS DECLARATION
/**
* Interface Class to be used by the DevASR thin client implementation.
*
* @lib NssDevASR.lib
*/
class CSRSAlgorithmManager : public CActive,
public MDevSoundObserver,
public MRecAlgMgrObserver,
public MVmAlgMgrObserver
{
public: // Constructors and destructor
/**
* Two-phased constructors.
*/
static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver);
static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver);
/**
* Destructor.
*/
virtual ~CSRSAlgorithmManager();
public: // New functions
/**
* Does grammar compilation.
*/
void CompileGrammarL( CSICompiledGrammar& aGrammar );
/**
* Computes new grammar size with given number of variants.
*/
void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar,
const TUint32 aTargetNRuleVariants,
const TUint32 aMaxNRuleVariants,
const RArray<TUint>& aNewRuleScores,
RArray<TUint>& aNNeNRuleVariants,
TUint32& aNPrune );
/**
* Prunes unnecessay rule variants away from grammar.
*/
TBool PruneGrammar( const CSIGrammar& aGrammar,
const TUint32 aMinNumber,
RArray<TSIRuleVariantInfo>& aPrunedRuleVariants );
/**
* Starts speaker independent training from text.
*/
void StartTrainingFromTextL( CSITtpWordList& aWordList,
const RArray<TLanguage>& aDefaultLanguage,
const RArray<TUint32>& aMaxNPronunsForWord );
/**
* Adapts recognition models
*
* @param "CSIResultSet& aResultSet" Result set
* @param "TInt aResultIndex" Index of correct result
*/
void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex );
/**
* Cancels the current or started tasks.
*/
void Cancel();
/**
* This method is used to indicate the end of a recognition session.
*/
void EndRecSession();
/**
* Retreive the properties of the underlying speech recognition engine.
*
* @param "RArray<TInt>& aPropertyId" An array of identifiers being
* querried.
* @param "RArray<TInt>& aPropertyValue" An array of values corresponding
* to the querried identifiers
*/
void GetEnginePropertiesL( const RArray<TInt>& aPropertyId,
RArray<TInt>& aPropertyValue );
/**
* Initializes the front-end module in the speech recognition engine.
* The frontend module used during training/recognition functions is started
* as a result. This method is intended to be used in conjunction with InitTrainBE().
*
* @param "TRecognizerMode aFeMode" Mode of the recognizer.
*/
void InitFrontEnd( TRecognizerMode aFeMode );
/**
* Initialize the recognition engine back-end. The module responsible for recognition
* function is started as a result. This method must be used before any recognition
* operations and intended to be used in conjunction with InitFrontEnd().
*
* @param "CSIResultSet& aResult" A reference to an object where the
* recognition result will be written
*/
void InitRecognizerBE( CSIResultSet& aResult );
/**
* Load the specified recognizer parameter(s). These parameters are used to alter
* the recognizer's default parameters. The parameters are specified as attribute-value
* pairs.
*
* @param "RArray<TInt>& aParameterId" An array of parameter identifiers.
* @param "RArray<TInt>& aParameterValue" An array of parameter values.
*/
void LoadEnginePropertiesL( const RArray<TInt>& aParameterId,
const RArray<TInt>& aParameterValue );
/**
* Load the specified grammar into the recognizer.
*
* @param "aGrammar" A reference to a grammar in an internal format.
*/
void LoadGrammarL( const CSIGrammar& aGrammar );
void LoadGrammarL( const CSICompiledGrammar& aGrammar );
/**
* Unloads the specified grammar from the recognizer.
*
* @param "aGrammar" A reference to a grammar in an internal format.
*/
void UnloadGrammarL( const CSIGrammar& aGrammar );
void UnloadGrammarL( const CSICompiledGrammar& aGrammar );
/**
* Activates a grammar
*
* @param "TSIGrammarID aGrammarID" A grammar identifier.
*/
void ActivateGrammarL( TSIGrammarID aGrammarID );
/**
* Deactivates a grammar
*
* @param "TSIGrammarID aGrammarID" A grammar identifier.
*/
void DeactivateGrammarL( TSIGrammarID aGrammarID );
/**
* Load the specified lexicon into the recognizer.
*
* @param "aLexicon" A reference to a lexicon.
*/
void LoadLexiconL( const CSILexicon& aLexicon );
/**
* Load the specified models into the recognizer.
*
* @param "aModels" A reference to a model bank.
*/
void LoadModelsL( const CSIModelBank& aModels );
/**
* Request to begin recording.
*
* @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time
* to record, expressed in microseconds.
*/
void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration );
/**
* Ends recording process
*/
void StopRecognition();
/**
* Use to set the priority of the sound device
*
* @param "TMMFPrioritySettings& aPrioritySettings" Priority settings
* structure.
*/
void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings );
/**
* Request to start a recognition session.
*
* @param "TRecognizerMode aMode" Recognizer mode.
* @return result code of request
*/
TInt StartRecSession( TRecognizerMode aMode );
/**
* Call to give speech data to recognizer.
*
* @param "TDesC8& aBuffer" Filled buffer.
* @param "TBool aEnd" Flag to tell if this is the last buffer.
*/
void SendSpeechData( TPtrC8& aBuffer, TBool aEnd );
/**
* Request to unload the specified rule in the grammar from recognizer.
*
* @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on.
* @param "TSIRuleID aRuleID" Identifier of the rule to remove.
*/
void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID );
/**
* Retreive the raw audio data accumulated during recording.
*
* @param "TDes8* aBuffer" Reference to a buffer containing the data.
* @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the
* utterance.
*/
void GetUtteranceData( TDes8* aBuffer,
TTimeIntervalMicroSeconds32& aDuration );
/**
* Checks if grammar is loaded or not
*
* @since 2.8
* @param "TSIGrammarID aGrammarID" Grammar ID
* @return ETrue if grammar is currently loaded, EFalse otherwise.
*/
TBool IsGrammarLoaded( TSIGrammarID aGrammarID );
/**
* Checks if grammar is active or not
*
* @since 2.8
* @param "TSIGrammarID aGrammarID" Grammar ID
* @return ETrue if grammar is active, EFalse otherwise.
*/
TBool IsGrammarActive( TSIGrammarID aGrammarID );
/**
* Prints DevASR state information for debugging
*/
void DevASRState();
/**
* Pre-starts recording before StartRecording call.
*
* @since 3.2
*/
void PreStartSamplingL();
// ===================================
// DevSound Observer MIXIN begins
// ===================================
/**
* Called by DevSound when a buffer is available to be read.
*
* @param "CMMFBuffer* aBuffer" Buffer containing data to be processed
*/
void BufferToBeEmptied( CMMFBuffer* aBuffer );
/**
* Called by DevSound when a buffer is available to be written.
*
* @param "CMMFBuffer* aBuffer" Buffer to which data is written
*/
void BufferToBeFilled( CMMFBuffer* aBuffer );
/**
* Not used by DevASR
*/
void ConvertError( TInt aError );
/**
* Handles device event.
*
* @param "TDesC8& aMsg" A message packed in the descriptor format.
*/
void DeviceMessage( TUid aMessageType, const TDesC8& aMsg );
/**
* Called by DevSound when initialization is completed.
*
* @param aError Error code, KErrNone if successful.
*/
void InitializeComplete( TInt aError );
/**
* Handles record completion or cancel event from DevSound.
*
* @param aError Error code, KErrNone if successful.
*/
void RecordError( TInt aError );
/**
* Handles play completion or cancel event from DevSound.
*
* @param "TInt aError" Error code, KErrNone if successful.
*/
void PlayError( TInt aError );
/**
* Handles tone play completion or cancel event from DevSound
* Tone is not used by DevASR so this event is not expected.
*
* @param "TInt aError" Error code, KErrNone if successful.
*/
void ToneFinished( TInt aError );
/**
* Handles audio policy events from DevSound.
*
* @param "TMMFEvent&" aEvent An audio policy event.
*/
void SendEventToClient( const TMMFEvent& aEvent );
// ===================================
// DevSound Observer MIXIN ends
// ===================================
// ============================================
// RecognitionAlgMgr Observer MIXIN begins
// ============================================
/**
* Invoked by the adaptation hw device when model adaptation has
* finished its processing.
*
* @since 2.8
* @param "TInt aResultCode" Error code of operation.
*/
void AdaptComplete( TInt aResultCode );
/**
* Invoked by the front-end when EOU has been detected.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void EouDetected( TInt aResultCode );
/**
* Invoked by the algorithm manager when the frontend initialization
* is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void InitFEComplete( TInt aResultCode );
/**
* Invoked by the algorithm manager when the recognition backend
* initialization is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void InitRecognizerBEComplete( TInt aResultCode );
/**
* Invoked by the algorithm manager when loading grammar
* is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void LoadGrammarComplete( TInt aResultCode );
/**
* Invoked when grammar has been unloaded.
*
* @param "TInt aResultCode" Result of grammar loading
*/
void UnloadGrammarComplete( TInt aResultCode );
/**
* Invoked when grammar has been activated.
*
* @param "TInt aResultCode" Result of grammar activation
*/
void ActivateGrammarComplete( TInt aResultCode );
/**
* Invoked when grammar has been deactivated.
*
* @param "TInt aResultCode" Result of grammar deactivation
*/
void DeActivateGrammarComplete( TInt aResultCode );
/**
* Invoked by the algorithm manager when loading lexicon
* is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void LoadLexiconComplete( TInt aResultCode );
/**
* Invoked by the algorithm manager when loading models
* is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void LoadModelsComplete( TInt aResultCode );
/**
* Invoked when the recognition process is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void RecognitionComplete( TInt aResultCode );
/**
* Invoked when the training process is completed.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void TrainComplete( TInt aResultCode );
/**
* Invoked when rule unloading has been done.
*
* @param "TInt aResultCode" Result code, KErrNone if successful.
*/
void UnloadRuleComplete( TInt aResultCode );
/**
* Invoked when a feature vector is available.
*
* @param "TDesC8& aFV" Buffer containing a feature vector.
* @param "TInt32 aSNR" Signal-to-noise ratio.
* @param "TInt32 aPosition" Indicates whether this is the first,
* subsequent or last feature vector.
*/
void FeatureVectorDataRcvd( const TDesC8& aFV,
TInt32 aSNR,
TInt32 aPosition );
/**
* Invoked by the front-end when utterance data is needed.
*
* @since 2.8
*/
void RequestSpeechData();
/**
* Resolves result.
*/
void ResolveResult( const RArray<TUint>& aNBestIDs,
CSIResultSet& aSIResultSet,
const RPointerArray<CSICompiledGrammar>& aSICompiledGrammar,
const TDesC8& aCombinedData );
/**
* Invoked when grammar combining is needed
*
* @since 2.8
* @param "RPointerArray<CSICompiledGrammar>& aCompiledGrammars"
* Array of previously compiled grammar
* @param "const RPointerArray<TSIRuleVariantInfo>& aExcludedRules"
* Rules to be blacklisted.
*/
void CombineGrammarL( const RPointerArray<CSICompiledGrammar>& aCompiledGrammars,
const RPointerArray<TSIRuleVariantInfo>& aExcludedRules );
// ============================================
// RecognitionAlgMgr Observer MIXIN ends
// ============================================
// ============================================
// MVmAlgMgrObserver Observer MIXIN begins
// ============================================
/**
* Notifies that grammar combination has been done.
*/
void CombineComplete( HBufC8* aResult, TInt aError );
/**
* Get the specified grammar.
*
* @param "TSIGrammarID aGrammarID" Identifier of grammar to be found.
* @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI
* active grammar, NULL if not found.
* @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI
* non-active grammar, NULL if not found
*/
void GetGrammarL( const TSIGrammarID aGrammarID,
CSICompiledGrammar** aSIActiveGrammar,
CSICompiledGrammar** aSIDeActivatedGrammar );
// ============================================
// MVmAlgMgrObserver Observer MIXIN begins
// ============================================
private:
/**
* C++ default constructor.
*
* @param "MDevASRObserver& aObserver" Reference to observer.
*/
CSRSAlgorithmManager( MDevASRObserver& aObserver );
/**
* By default Symbian 2nd phase constructor is private.
*/
void ConstructL();
/**
* From CActive.
*/
void RunL();
void DoCancel();
/**
* Empties and copies the data to holding buffer.
*
* @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
* @param "TInt aBufferLength" Length of buffer
*/
TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
/**
* Fill the received buffer with utterance data upto the specified length.
*
* @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
* @param "TInt aBufferLength" Length of buffer
*/
TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
/**
* Transition from current state to target state if possible.
*
* @param "TDevASRState aState" A target state
* @return ETrue if successful.
*/
TBool StateTransition( TDevASRState aState );
/**
* Translate system error codes to DevASR cause codes.
*
* @param "TInt aError" System error constant
* @return A DevASR error code
*/
TDevASRError TranslateError( TInt aError );
/**
* Async handler for a play request.
*/
void HandlePlayL();
/**
* Async handler for a record request.
*/
void HandleRecordL();
/**
* Async handler for a UtteranceDataProcessed event.
*/
void HandleUtteranceDataProcessed();
/**
* Completes a request with specified status.
*
* @param "TInt aStatus" Operation status.
*/
void Ready( const TInt aStatus );
/**
* Configure the sound device according to parameters in the resource file.
*/
void ConfigureSoundDeviceL();
/**
* Initializes DevSound for sampling
*/
void InitializeDevSoundL();
/**
* Starts sampling with DevSound
*/
void StartDevSoundL();
private: // Data
// DevASR Observer, the SRS Plugin
MDevASRObserver* iDevASRObserver;
// ----- DevSound related data -----
// Pointer to DevSound
CMMFDevSound* iDevSound;
// structure of capabilities for DevSound instance
TMMFCapabilities iDevSoundCapabilities;
// structure of priorities for DevSound instance
TMMFPrioritySettings iPrioritySettings;
// stores the current volume and gain
TInt iCurrentVolume;
TInt iCurrentGain;
// ----- Algorithm managers ----
// Pointer to Recognition Algorithm Manager
CRecognitionAlgMgr* iRecognitionAlgMgr;
// Pointer to Vocabulary algorithm manager
CVMAlgorithmManager* iVMAlgorithmManager;
// Flag to tell if recognition should be started when first audio
// buffer has been sampled.
TBool iStartRecognition;
// Flag to tell if there is a pending RequestSpeechData() call from
// recognition hw device.
TBool iPendingRequestSpeechData;
// --- SD Training audio buffer managemet ---
// Start and stop points in the audio buffer, used during recording
TUint32 iStartPoint;
TUint32 iStopPoint;
// Audio buffer to hold PCM data coming from DevSound
TUint8* iAudioBuffer;
TPtr8 iPtr;
// ----- Buffer management -----
// Number of ms to record
TTimeIntervalMicroSeconds32 iRecordDuration;
// Overall number of bytes that need to be sampled.
// Zero if we should sample until EndRecord() is called.
TInt iOverallLength;
// Number of samples that are recorded.
TInt iOverallSampled;
// Indicates if utterance processing is currently being done
TBool iProcessingUtterance;
// Queue of audio buffer descriptors to be processed
TSglQue<CQueItem>* iAudioBufferQue;
// An item in the queue of audio buffer descriptors
CQueItem* iQueItem;
// Flag to tell if recording should be ended
TBool iEndFlag;
// Recognizer mode
TRecognizerMode iMode;
// ----- End-pointing usage -----
// Start and end frames in audio buffer
TUint32 iStartFrame;
TUint32 iEndFrame;
TReal iFrameLength;
// Start and end point in the audio buffer after end-pointing.
TUint32 iBufferStartPoint;
TUint32 iBufferEndPoint;
// ----- State management -----
// State of DevASR
TDevASRState iDevASRState;
// State of DevSound
TDevSoundState iDevSoundState;
// ----- Active object handling -----
TInt iRequestFunction;
// resource file handler
CDevASRResourceHandler* iResourceHandler;
TUint8* iBufferUnderConstruction;
TInt iConstructionPoint;
const CSIModelBank* iSIModelBank;
const CSILexicon* iSILexicon;
// Flag which tells if pre-sampling has been started
TBool iPreSamplingStarted;
// Wait loop to wait for audio initialization callback
CActiveSchedulerWait iAudioWait;
// The following are used when dumping captured audio to file for analysis
#ifdef AUDIOBUFFER_TO_FILE
RFs iFs;
RFile iBufferDataFile;
TBool iFileCreated;
#endif
};
#endif // DEVASRSRSALGORITHMMANAGER_H
// End of file