diff -r b13cd05eeb2f -r 57b735022c18 srsf/devasr/src/devasrsrsalgorithmmanager.h --- a/srsf/devasr/src/devasrsrsalgorithmmanager.h Mon Jan 18 20:20:30 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,773 +0,0 @@ -/* -* Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies). -* All rights reserved. -* This component and the accompanying materials are made available -* under the terms of "Eclipse Public License v1.0" -* which accompanies this distribution, and is available -* at the URL "http://www.eclipse.org/legal/epl-v10.html". -* -* Initial Contributors: -* Nokia Corporation - initial contribution. -* -* Contributors: -* -* Description: This file contains definition of the DevASR private interface. -* -*/ - - -#ifndef DEVASRSRSALGORITHMMANAGER_H -#define DEVASRSRSALGORITHMMANAGER_H - -//#define AUDIOBUFFER_TO_FILE - -// INCLUDES -#include -#include -#include -#include "devasrutil.h" -#include "devasrrecognitionalgmgr.h" -#include "devasrresourcehandler.h" -#include "devasrvmalgorithmmanager.h" - - -#ifdef AUDIOBUFFER_TO_FILE -#include -#endif - -// DATA TYPES - -// Possible DevASR states -enum TDevASRState - { - EDASRIdle, - EDASRInitTrain, - EDASRTrain, - EDASRSampling, - EDASRTrainSampling, - EDASRPlayback, - EDASRRecognitionPlayback, - EDASRPreRecognition, - EDASRInitRecognition, - EDASRRecognition, - EDASRRecognitionSampling, - EDASRCancel - }; - -// Possible DevSound States -enum TDevSoundState - { - ESoundDeviceNotInitialized, - ESoundDeviceInitialized, - ESoundDeviceRecord, - ESoundDevicePlayback, - ESoundDeviceStopped - }; - -// FORWARD DECLARATIONS -class MDevASRObserver; -class CRecognitionAlgMgr; - -// CLASS DECLARATION - -/** -* Interface Class to be used by the DevASR thin client implementation. -* -* @lib NssDevASR.lib -*/ -class CSRSAlgorithmManager : public CActive, - public MDevSoundObserver, - public MRecAlgMgrObserver, - public MVmAlgMgrObserver - { - public: // Constructors and destructor - - /** - * Two-phased constructors. - */ - static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver); - static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver); - - /** - * Destructor. - */ - virtual ~CSRSAlgorithmManager(); - - public: // New functions - - - /** - * Does grammar compilation. - */ - void CompileGrammarL( CSICompiledGrammar& aGrammar ); - - /** - * Computes new grammar size with given number of variants. - */ - void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar, - const TUint32 aTargetNRuleVariants, - const TUint32 aMaxNRuleVariants, - const RArray& aNewRuleScores, - RArray& aNNeNRuleVariants, - TUint32& aNPrune ); - - /** - * Prunes unnecessay rule variants away from grammar. - */ - TBool PruneGrammar( const CSIGrammar& aGrammar, - const TUint32 aMinNumber, - RArray& aPrunedRuleVariants ); - - /** - * Starts speaker independent training from text. - */ - void StartTrainingFromTextL( CSITtpWordList& aWordList, - const RArray& aDefaultLanguage, - const RArray& aMaxNPronunsForWord ); - - - - /** - * Adapts recognition models - * - * @param "CSIResultSet& aResultSet" Result set - * @param "TInt aResultIndex" Index of correct result - */ - void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex ); - - /** - * Cancels the current or started tasks. - */ - void Cancel(); - - /** - * This method is used to indicate the end of a recognition session. - */ - void EndRecSession(); - - /** - * Retreive the properties of the underlying speech recognition engine. - * - * @param "RArray& aPropertyId" An array of identifiers being - * querried. - * @param "RArray& aPropertyValue" An array of values corresponding - * to the querried identifiers - */ - void GetEnginePropertiesL( const RArray& aPropertyId, - RArray& aPropertyValue ); - - /** - * Initializes the front-end module in the speech recognition engine. - * The frontend module used during training/recognition functions is started - * as a result. This method is intended to be used in conjunction with InitTrainBE(). - * - * @param "TRecognizerMode aFeMode" Mode of the recognizer. - */ - void InitFrontEnd( TRecognizerMode aFeMode ); - - /** - * Initialize the recognition engine back-end. The module responsible for recognition - * function is started as a result. This method must be used before any recognition - * operations and intended to be used in conjunction with InitFrontEnd(). - * - * @param "CSIResultSet& aResult" A reference to an object where the - * recognition result will be written - */ - void InitRecognizerBE( CSIResultSet& aResult ); - - /** - * Load the specified recognizer parameter(s). These parameters are used to alter - * the recognizer's default parameters. The parameters are specified as attribute-value - * pairs. - * - * @param "RArray& aParameterId" An array of parameter identifiers. - * @param "RArray& aParameterValue" An array of parameter values. - */ - void LoadEnginePropertiesL( const RArray& aParameterId, - const RArray& aParameterValue ); - - /** - * Load the specified grammar into the recognizer. - * - * @param "aGrammar" A reference to a grammar in an internal format. - */ - void LoadGrammarL( const CSIGrammar& aGrammar ); - void LoadGrammarL( const CSICompiledGrammar& aGrammar ); - - /** - * Unloads the specified grammar from the recognizer. - * - * @param "aGrammar" A reference to a grammar in an internal format. - */ - void UnloadGrammarL( const CSIGrammar& aGrammar ); - void UnloadGrammarL( const CSICompiledGrammar& aGrammar ); - - /** - * Activates a grammar - * - * @param "TSIGrammarID aGrammarID" A grammar identifier. - */ - void ActivateGrammarL( TSIGrammarID aGrammarID ); - - /** - * Deactivates a grammar - * - * @param "TSIGrammarID aGrammarID" A grammar identifier. - */ - void DeactivateGrammarL( TSIGrammarID aGrammarID ); - - /** - * Load the specified lexicon into the recognizer. - * - * @param "aLexicon" A reference to a lexicon. - */ - void LoadLexiconL( const CSILexicon& aLexicon ); - - /** - * Load the specified models into the recognizer. - * - * @param "aModels" A reference to a model bank. - */ - void LoadModelsL( const CSIModelBank& aModels ); - - /** - * Request to begin recording. - * - * @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time - * to record, expressed in microseconds. - */ - void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration ); - - /** - * Ends recording process - */ - void StopRecognition(); - - /** - * Use to set the priority of the sound device - * - * @param "TMMFPrioritySettings& aPrioritySettings" Priority settings - * structure. - */ - void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings ); - - /** - * Request to start a recognition session. - * - * @param "TRecognizerMode aMode" Recognizer mode. - * @return result code of request - */ - TInt StartRecSession( TRecognizerMode aMode ); - - /** - * Call to give speech data to recognizer. - * - * @param "TDesC8& aBuffer" Filled buffer. - * @param "TBool aEnd" Flag to tell if this is the last buffer. - */ - void SendSpeechData( TPtrC8& aBuffer, TBool aEnd ); - - /** - * Request to unload the specified rule in the grammar from recognizer. - * - * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on. - * @param "TSIRuleID aRuleID" Identifier of the rule to remove. - */ - void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID ); - - /** - * Retreive the raw audio data accumulated during recording. - * - * @param "TDes8* aBuffer" Reference to a buffer containing the data. - * @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the - * utterance. - */ - void GetUtteranceData( TDes8* aBuffer, - TTimeIntervalMicroSeconds32& aDuration ); - - /** - * Checks if grammar is loaded or not - * - * @since 2.8 - * @param "TSIGrammarID aGrammarID" Grammar ID - * @return ETrue if grammar is currently loaded, EFalse otherwise. - */ - TBool IsGrammarLoaded( TSIGrammarID aGrammarID ); - - /** - * Checks if grammar is active or not - * - * @since 2.8 - * @param "TSIGrammarID aGrammarID" Grammar ID - * @return ETrue if grammar is active, EFalse otherwise. - */ - TBool IsGrammarActive( TSIGrammarID aGrammarID ); - - /** - * Prints DevASR state information for debugging - */ - void DevASRState(); - - /** - * Pre-starts recording before StartRecording call. - * - * @since 3.2 - */ - void PreStartSamplingL(); - - // =================================== - // DevSound Observer MIXIN begins - // =================================== - - /** - * Called by DevSound when a buffer is available to be read. - * - * @param "CMMFBuffer* aBuffer" Buffer containing data to be processed - */ - void BufferToBeEmptied( CMMFBuffer* aBuffer ); - - /** - * Called by DevSound when a buffer is available to be written. - * - * @param "CMMFBuffer* aBuffer" Buffer to which data is written - */ - void BufferToBeFilled( CMMFBuffer* aBuffer ); - - /** - * Not used by DevASR - */ - void ConvertError( TInt aError ); - - /** - * Handles device event. - * - * @param "TDesC8& aMsg" A message packed in the descriptor format. - */ - void DeviceMessage( TUid aMessageType, const TDesC8& aMsg ); - - /** - * Called by DevSound when initialization is completed. - * - * @param aError Error code, KErrNone if successful. - */ - void InitializeComplete( TInt aError ); - - /** - * Handles record completion or cancel event from DevSound. - * - * @param aError Error code, KErrNone if successful. - */ - void RecordError( TInt aError ); - - /** - * Handles play completion or cancel event from DevSound. - * - * @param "TInt aError" Error code, KErrNone if successful. - */ - void PlayError( TInt aError ); - - /** - * Handles tone play completion or cancel event from DevSound - * Tone is not used by DevASR so this event is not expected. - * - * @param "TInt aError" Error code, KErrNone if successful. - */ - void ToneFinished( TInt aError ); - - /** - * Handles audio policy events from DevSound. - * - * @param "TMMFEvent&" aEvent An audio policy event. - */ - void SendEventToClient( const TMMFEvent& aEvent ); - - // =================================== - // DevSound Observer MIXIN ends - // =================================== - - // ============================================ - // RecognitionAlgMgr Observer MIXIN begins - // ============================================ - - /** - * Invoked by the adaptation hw device when model adaptation has - * finished its processing. - * - * @since 2.8 - * @param "TInt aResultCode" Error code of operation. - */ - void AdaptComplete( TInt aResultCode ); - - /** - * Invoked by the front-end when EOU has been detected. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void EouDetected( TInt aResultCode ); - - /** - * Invoked by the algorithm manager when the frontend initialization - * is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void InitFEComplete( TInt aResultCode ); - - /** - * Invoked by the algorithm manager when the recognition backend - * initialization is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void InitRecognizerBEComplete( TInt aResultCode ); - - /** - * Invoked by the algorithm manager when loading grammar - * is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void LoadGrammarComplete( TInt aResultCode ); - - /** - * Invoked when grammar has been unloaded. - * - * @param "TInt aResultCode" Result of grammar loading - */ - void UnloadGrammarComplete( TInt aResultCode ); - - /** - * Invoked when grammar has been activated. - * - * @param "TInt aResultCode" Result of grammar activation - */ - void ActivateGrammarComplete( TInt aResultCode ); - - /** - * Invoked when grammar has been deactivated. - * - * @param "TInt aResultCode" Result of grammar deactivation - */ - void DeActivateGrammarComplete( TInt aResultCode ); - - /** - * Invoked by the algorithm manager when loading lexicon - * is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void LoadLexiconComplete( TInt aResultCode ); - - /** - * Invoked by the algorithm manager when loading models - * is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void LoadModelsComplete( TInt aResultCode ); - - /** - * Invoked when the recognition process is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void RecognitionComplete( TInt aResultCode ); - - /** - * Invoked when the training process is completed. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void TrainComplete( TInt aResultCode ); - - /** - * Invoked when rule unloading has been done. - * - * @param "TInt aResultCode" Result code, KErrNone if successful. - */ - void UnloadRuleComplete( TInt aResultCode ); - - /** - * Invoked when a feature vector is available. - * - * @param "TDesC8& aFV" Buffer containing a feature vector. - * @param "TInt32 aSNR" Signal-to-noise ratio. - * @param "TInt32 aPosition" Indicates whether this is the first, - * subsequent or last feature vector. - */ - void FeatureVectorDataRcvd( const TDesC8& aFV, - TInt32 aSNR, - TInt32 aPosition ); - - /** - * Invoked by the front-end when utterance data is needed. - * - * @since 2.8 - */ - void RequestSpeechData(); - - - /** - * Resolves result. - */ - void ResolveResult( const RArray& aNBestIDs, - CSIResultSet& aSIResultSet, - const RPointerArray& aSICompiledGrammar, - const TDesC8& aCombinedData ); - - /** - * Invoked when grammar combining is needed - * - * @since 2.8 - * @param "RPointerArray& aCompiledGrammars" - * Array of previously compiled grammar - * @param "const RPointerArray& aExcludedRules" - * Rules to be blacklisted. - */ - void CombineGrammarL( const RPointerArray& aCompiledGrammars, - const RPointerArray& aExcludedRules ); - - // ============================================ - // RecognitionAlgMgr Observer MIXIN ends - // ============================================ - - // ============================================ - // MVmAlgMgrObserver Observer MIXIN begins - // ============================================ - - /** - * Notifies that grammar combination has been done. - */ - void CombineComplete( HBufC8* aResult, TInt aError ); - - /** - * Get the specified grammar. - * - * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found. - * @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI - * active grammar, NULL if not found. - * @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI - * non-active grammar, NULL if not found - */ - void GetGrammarL( const TSIGrammarID aGrammarID, - CSICompiledGrammar** aSIActiveGrammar, - CSICompiledGrammar** aSIDeActivatedGrammar ); - - // ============================================ - // MVmAlgMgrObserver Observer MIXIN begins - // ============================================ - - private: - - /** - * C++ default constructor. - * - * @param "MDevASRObserver& aObserver" Reference to observer. - */ - CSRSAlgorithmManager( MDevASRObserver& aObserver ); - - /** - * By default Symbian 2nd phase constructor is private. - */ - void ConstructL(); - - /** - * From CActive. - */ - void RunL(); - void DoCancel(); - - /** - * Empties and copies the data to holding buffer. - * - * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data - * @param "TInt aBufferLength" Length of buffer - */ - TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); - - /** - * Fill the received buffer with utterance data upto the specified length. - * - * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data - * @param "TInt aBufferLength" Length of buffer - */ - TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); - - /** - * Transition from current state to target state if possible. - * - * @param "TDevASRState aState" A target state - * @return ETrue if successful. - */ - TBool StateTransition( TDevASRState aState ); - - /** - * Translate system error codes to DevASR cause codes. - * - * @param "TInt aError" System error constant - * @return A DevASR error code - */ - TDevASRError TranslateError( TInt aError ); - - /** - * Async handler for a play request. - */ - void HandlePlayL(); - - /** - * Async handler for a record request. - */ - void HandleRecordL(); - - /** - * Async handler for a UtteranceDataProcessed event. - */ - void HandleUtteranceDataProcessed(); - - /** - * Completes a request with specified status. - * - * @param "TInt aStatus" Operation status. - */ - void Ready( const TInt aStatus ); - - /** - * Configure the sound device according to parameters in the resource file. - */ - void ConfigureSoundDeviceL(); - - /** - * Initializes DevSound for sampling - */ - void InitializeDevSoundL(); - - /** - * Starts sampling with DevSound - */ - void StartDevSoundL(); - - private: // Data - - // DevASR Observer, the SRS Plugin - MDevASRObserver* iDevASRObserver; - - // ----- DevSound related data ----- - - // Pointer to DevSound - CMMFDevSound* iDevSound; - - // structure of capabilities for DevSound instance - TMMFCapabilities iDevSoundCapabilities; - - // structure of priorities for DevSound instance - TMMFPrioritySettings iPrioritySettings; - - // stores the current volume and gain - TInt iCurrentVolume; - TInt iCurrentGain; - - // ----- Algorithm managers ---- - - // Pointer to Recognition Algorithm Manager - CRecognitionAlgMgr* iRecognitionAlgMgr; - - // Pointer to Vocabulary algorithm manager - CVMAlgorithmManager* iVMAlgorithmManager; - - // Flag to tell if recognition should be started when first audio - // buffer has been sampled. - TBool iStartRecognition; - - // Flag to tell if there is a pending RequestSpeechData() call from - // recognition hw device. - TBool iPendingRequestSpeechData; - - // --- SD Training audio buffer managemet --- - // Start and stop points in the audio buffer, used during recording - TUint32 iStartPoint; - TUint32 iStopPoint; - // Audio buffer to hold PCM data coming from DevSound - TUint8* iAudioBuffer; - TPtr8 iPtr; - - - // ----- Buffer management ----- - - // Number of ms to record - TTimeIntervalMicroSeconds32 iRecordDuration; - - - // Overall number of bytes that need to be sampled. - // Zero if we should sample until EndRecord() is called. - TInt iOverallLength; - - // Number of samples that are recorded. - TInt iOverallSampled; - - // Indicates if utterance processing is currently being done - TBool iProcessingUtterance; - - - // Queue of audio buffer descriptors to be processed - TSglQue* iAudioBufferQue; - - // An item in the queue of audio buffer descriptors - CQueItem* iQueItem; - - // Flag to tell if recording should be ended - TBool iEndFlag; - - // Recognizer mode - TRecognizerMode iMode; - - - // ----- End-pointing usage ----- - - // Start and end frames in audio buffer - TUint32 iStartFrame; - TUint32 iEndFrame; - TReal iFrameLength; - - // Start and end point in the audio buffer after end-pointing. - TUint32 iBufferStartPoint; - TUint32 iBufferEndPoint; - - // ----- State management ----- - - // State of DevASR - TDevASRState iDevASRState; - - // State of DevSound - TDevSoundState iDevSoundState; - - // ----- Active object handling ----- - TInt iRequestFunction; - - // resource file handler - CDevASRResourceHandler* iResourceHandler; - - - TUint8* iBufferUnderConstruction; - TInt iConstructionPoint; - - const CSIModelBank* iSIModelBank; - const CSILexicon* iSILexicon; - - // Flag which tells if pre-sampling has been started - TBool iPreSamplingStarted; - - // Wait loop to wait for audio initialization callback - CActiveSchedulerWait iAudioWait; - - // The following are used when dumping captured audio to file for analysis -#ifdef AUDIOBUFFER_TO_FILE - RFs iFs; - RFile iBufferDataFile; - TBool iFileCreated; -#endif - - }; - -#endif // DEVASRSRSALGORITHMMANAGER_H - -// End of file