diff -r 000000000000 -r bf1d17376201 srsf/devasr/src/devasrsrsalgorithmmanager.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srsf/devasr/src/devasrsrsalgorithmmanager.h Thu Dec 17 08:46:30 2009 +0200 @@ -0,0 +1,773 @@ +/* +* Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: This file contains definition of the DevASR private interface. +* +*/ + + +#ifndef DEVASRSRSALGORITHMMANAGER_H +#define DEVASRSRSALGORITHMMANAGER_H + +//#define AUDIOBUFFER_TO_FILE + +// INCLUDES +#include +#include +#include +#include "devasrutil.h" +#include "devasrrecognitionalgmgr.h" +#include "devasrresourcehandler.h" +#include "devasrvmalgorithmmanager.h" + + +#ifdef AUDIOBUFFER_TO_FILE +#include +#endif + +// DATA TYPES + +// Possible DevASR states +enum TDevASRState + { + EDASRIdle, + EDASRInitTrain, + EDASRTrain, + EDASRSampling, + EDASRTrainSampling, + EDASRPlayback, + EDASRRecognitionPlayback, + EDASRPreRecognition, + EDASRInitRecognition, + EDASRRecognition, + EDASRRecognitionSampling, + EDASRCancel + }; + +// Possible DevSound States +enum TDevSoundState + { + ESoundDeviceNotInitialized, + ESoundDeviceInitialized, + ESoundDeviceRecord, + ESoundDevicePlayback, + ESoundDeviceStopped + }; + +// FORWARD DECLARATIONS +class MDevASRObserver; +class CRecognitionAlgMgr; + +// CLASS DECLARATION + +/** +* Interface Class to be used by the DevASR thin client implementation. +* +* @lib NssDevASR.lib +*/ +class CSRSAlgorithmManager : public CActive, + public MDevSoundObserver, + public MRecAlgMgrObserver, + public MVmAlgMgrObserver + { + public: // Constructors and destructor + + /** + * Two-phased constructors. + */ + static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver); + static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver); + + /** + * Destructor. + */ + virtual ~CSRSAlgorithmManager(); + + public: // New functions + + + /** + * Does grammar compilation. + */ + void CompileGrammarL( CSICompiledGrammar& aGrammar ); + + /** + * Computes new grammar size with given number of variants. + */ + void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar, + const TUint32 aTargetNRuleVariants, + const TUint32 aMaxNRuleVariants, + const RArray& aNewRuleScores, + RArray& aNNeNRuleVariants, + TUint32& aNPrune ); + + /** + * Prunes unnecessay rule variants away from grammar. + */ + TBool PruneGrammar( const CSIGrammar& aGrammar, + const TUint32 aMinNumber, + RArray& aPrunedRuleVariants ); + + /** + * Starts speaker independent training from text. + */ + void StartTrainingFromTextL( CSITtpWordList& aWordList, + const RArray& aDefaultLanguage, + const RArray& aMaxNPronunsForWord ); + + + + /** + * Adapts recognition models + * + * @param "CSIResultSet& aResultSet" Result set + * @param "TInt aResultIndex" Index of correct result + */ + void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex ); + + /** + * Cancels the current or started tasks. + */ + void Cancel(); + + /** + * This method is used to indicate the end of a recognition session. + */ + void EndRecSession(); + + /** + * Retreive the properties of the underlying speech recognition engine. + * + * @param "RArray& aPropertyId" An array of identifiers being + * querried. + * @param "RArray& aPropertyValue" An array of values corresponding + * to the querried identifiers + */ + void GetEnginePropertiesL( const RArray& aPropertyId, + RArray& aPropertyValue ); + + /** + * Initializes the front-end module in the speech recognition engine. + * The frontend module used during training/recognition functions is started + * as a result. This method is intended to be used in conjunction with InitTrainBE(). + * + * @param "TRecognizerMode aFeMode" Mode of the recognizer. + */ + void InitFrontEnd( TRecognizerMode aFeMode ); + + /** + * Initialize the recognition engine back-end. The module responsible for recognition + * function is started as a result. This method must be used before any recognition + * operations and intended to be used in conjunction with InitFrontEnd(). + * + * @param "CSIResultSet& aResult" A reference to an object where the + * recognition result will be written + */ + void InitRecognizerBE( CSIResultSet& aResult ); + + /** + * Load the specified recognizer parameter(s). These parameters are used to alter + * the recognizer's default parameters. The parameters are specified as attribute-value + * pairs. + * + * @param "RArray& aParameterId" An array of parameter identifiers. + * @param "RArray& aParameterValue" An array of parameter values. + */ + void LoadEnginePropertiesL( const RArray& aParameterId, + const RArray& aParameterValue ); + + /** + * Load the specified grammar into the recognizer. + * + * @param "aGrammar" A reference to a grammar in an internal format. + */ + void LoadGrammarL( const CSIGrammar& aGrammar ); + void LoadGrammarL( const CSICompiledGrammar& aGrammar ); + + /** + * Unloads the specified grammar from the recognizer. + * + * @param "aGrammar" A reference to a grammar in an internal format. + */ + void UnloadGrammarL( const CSIGrammar& aGrammar ); + void UnloadGrammarL( const CSICompiledGrammar& aGrammar ); + + /** + * Activates a grammar + * + * @param "TSIGrammarID aGrammarID" A grammar identifier. + */ + void ActivateGrammarL( TSIGrammarID aGrammarID ); + + /** + * Deactivates a grammar + * + * @param "TSIGrammarID aGrammarID" A grammar identifier. + */ + void DeactivateGrammarL( TSIGrammarID aGrammarID ); + + /** + * Load the specified lexicon into the recognizer. + * + * @param "aLexicon" A reference to a lexicon. + */ + void LoadLexiconL( const CSILexicon& aLexicon ); + + /** + * Load the specified models into the recognizer. + * + * @param "aModels" A reference to a model bank. + */ + void LoadModelsL( const CSIModelBank& aModels ); + + /** + * Request to begin recording. + * + * @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time + * to record, expressed in microseconds. + */ + void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration ); + + /** + * Ends recording process + */ + void StopRecognition(); + + /** + * Use to set the priority of the sound device + * + * @param "TMMFPrioritySettings& aPrioritySettings" Priority settings + * structure. + */ + void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings ); + + /** + * Request to start a recognition session. + * + * @param "TRecognizerMode aMode" Recognizer mode. + * @return result code of request + */ + TInt StartRecSession( TRecognizerMode aMode ); + + /** + * Call to give speech data to recognizer. + * + * @param "TDesC8& aBuffer" Filled buffer. + * @param "TBool aEnd" Flag to tell if this is the last buffer. + */ + void SendSpeechData( TPtrC8& aBuffer, TBool aEnd ); + + /** + * Request to unload the specified rule in the grammar from recognizer. + * + * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on. + * @param "TSIRuleID aRuleID" Identifier of the rule to remove. + */ + void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID ); + + /** + * Retreive the raw audio data accumulated during recording. + * + * @param "TDes8* aBuffer" Reference to a buffer containing the data. + * @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the + * utterance. + */ + void GetUtteranceData( TDes8* aBuffer, + TTimeIntervalMicroSeconds32& aDuration ); + + /** + * Checks if grammar is loaded or not + * + * @since 2.8 + * @param "TSIGrammarID aGrammarID" Grammar ID + * @return ETrue if grammar is currently loaded, EFalse otherwise. + */ + TBool IsGrammarLoaded( TSIGrammarID aGrammarID ); + + /** + * Checks if grammar is active or not + * + * @since 2.8 + * @param "TSIGrammarID aGrammarID" Grammar ID + * @return ETrue if grammar is active, EFalse otherwise. + */ + TBool IsGrammarActive( TSIGrammarID aGrammarID ); + + /** + * Prints DevASR state information for debugging + */ + void DevASRState(); + + /** + * Pre-starts recording before StartRecording call. + * + * @since 3.2 + */ + void PreStartSamplingL(); + + // =================================== + // DevSound Observer MIXIN begins + // =================================== + + /** + * Called by DevSound when a buffer is available to be read. + * + * @param "CMMFBuffer* aBuffer" Buffer containing data to be processed + */ + void BufferToBeEmptied( CMMFBuffer* aBuffer ); + + /** + * Called by DevSound when a buffer is available to be written. + * + * @param "CMMFBuffer* aBuffer" Buffer to which data is written + */ + void BufferToBeFilled( CMMFBuffer* aBuffer ); + + /** + * Not used by DevASR + */ + void ConvertError( TInt aError ); + + /** + * Handles device event. + * + * @param "TDesC8& aMsg" A message packed in the descriptor format. + */ + void DeviceMessage( TUid aMessageType, const TDesC8& aMsg ); + + /** + * Called by DevSound when initialization is completed. + * + * @param aError Error code, KErrNone if successful. + */ + void InitializeComplete( TInt aError ); + + /** + * Handles record completion or cancel event from DevSound. + * + * @param aError Error code, KErrNone if successful. + */ + void RecordError( TInt aError ); + + /** + * Handles play completion or cancel event from DevSound. + * + * @param "TInt aError" Error code, KErrNone if successful. + */ + void PlayError( TInt aError ); + + /** + * Handles tone play completion or cancel event from DevSound + * Tone is not used by DevASR so this event is not expected. + * + * @param "TInt aError" Error code, KErrNone if successful. + */ + void ToneFinished( TInt aError ); + + /** + * Handles audio policy events from DevSound. + * + * @param "TMMFEvent&" aEvent An audio policy event. + */ + void SendEventToClient( const TMMFEvent& aEvent ); + + // =================================== + // DevSound Observer MIXIN ends + // =================================== + + // ============================================ + // RecognitionAlgMgr Observer MIXIN begins + // ============================================ + + /** + * Invoked by the adaptation hw device when model adaptation has + * finished its processing. + * + * @since 2.8 + * @param "TInt aResultCode" Error code of operation. + */ + void AdaptComplete( TInt aResultCode ); + + /** + * Invoked by the front-end when EOU has been detected. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void EouDetected( TInt aResultCode ); + + /** + * Invoked by the algorithm manager when the frontend initialization + * is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void InitFEComplete( TInt aResultCode ); + + /** + * Invoked by the algorithm manager when the recognition backend + * initialization is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void InitRecognizerBEComplete( TInt aResultCode ); + + /** + * Invoked by the algorithm manager when loading grammar + * is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void LoadGrammarComplete( TInt aResultCode ); + + /** + * Invoked when grammar has been unloaded. + * + * @param "TInt aResultCode" Result of grammar loading + */ + void UnloadGrammarComplete( TInt aResultCode ); + + /** + * Invoked when grammar has been activated. + * + * @param "TInt aResultCode" Result of grammar activation + */ + void ActivateGrammarComplete( TInt aResultCode ); + + /** + * Invoked when grammar has been deactivated. + * + * @param "TInt aResultCode" Result of grammar deactivation + */ + void DeActivateGrammarComplete( TInt aResultCode ); + + /** + * Invoked by the algorithm manager when loading lexicon + * is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void LoadLexiconComplete( TInt aResultCode ); + + /** + * Invoked by the algorithm manager when loading models + * is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void LoadModelsComplete( TInt aResultCode ); + + /** + * Invoked when the recognition process is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void RecognitionComplete( TInt aResultCode ); + + /** + * Invoked when the training process is completed. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void TrainComplete( TInt aResultCode ); + + /** + * Invoked when rule unloading has been done. + * + * @param "TInt aResultCode" Result code, KErrNone if successful. + */ + void UnloadRuleComplete( TInt aResultCode ); + + /** + * Invoked when a feature vector is available. + * + * @param "TDesC8& aFV" Buffer containing a feature vector. + * @param "TInt32 aSNR" Signal-to-noise ratio. + * @param "TInt32 aPosition" Indicates whether this is the first, + * subsequent or last feature vector. + */ + void FeatureVectorDataRcvd( const TDesC8& aFV, + TInt32 aSNR, + TInt32 aPosition ); + + /** + * Invoked by the front-end when utterance data is needed. + * + * @since 2.8 + */ + void RequestSpeechData(); + + + /** + * Resolves result. + */ + void ResolveResult( const RArray& aNBestIDs, + CSIResultSet& aSIResultSet, + const RPointerArray& aSICompiledGrammar, + const TDesC8& aCombinedData ); + + /** + * Invoked when grammar combining is needed + * + * @since 2.8 + * @param "RPointerArray& aCompiledGrammars" + * Array of previously compiled grammar + * @param "const RPointerArray& aExcludedRules" + * Rules to be blacklisted. + */ + void CombineGrammarL( const RPointerArray& aCompiledGrammars, + const RPointerArray& aExcludedRules ); + + // ============================================ + // RecognitionAlgMgr Observer MIXIN ends + // ============================================ + + // ============================================ + // MVmAlgMgrObserver Observer MIXIN begins + // ============================================ + + /** + * Notifies that grammar combination has been done. + */ + void CombineComplete( HBufC8* aResult, TInt aError ); + + /** + * Get the specified grammar. + * + * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found. + * @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI + * active grammar, NULL if not found. + * @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI + * non-active grammar, NULL if not found + */ + void GetGrammarL( const TSIGrammarID aGrammarID, + CSICompiledGrammar** aSIActiveGrammar, + CSICompiledGrammar** aSIDeActivatedGrammar ); + + // ============================================ + // MVmAlgMgrObserver Observer MIXIN begins + // ============================================ + + private: + + /** + * C++ default constructor. + * + * @param "MDevASRObserver& aObserver" Reference to observer. + */ + CSRSAlgorithmManager( MDevASRObserver& aObserver ); + + /** + * By default Symbian 2nd phase constructor is private. + */ + void ConstructL(); + + /** + * From CActive. + */ + void RunL(); + void DoCancel(); + + /** + * Empties and copies the data to holding buffer. + * + * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data + * @param "TInt aBufferLength" Length of buffer + */ + TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); + + /** + * Fill the received buffer with utterance data upto the specified length. + * + * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data + * @param "TInt aBufferLength" Length of buffer + */ + TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); + + /** + * Transition from current state to target state if possible. + * + * @param "TDevASRState aState" A target state + * @return ETrue if successful. + */ + TBool StateTransition( TDevASRState aState ); + + /** + * Translate system error codes to DevASR cause codes. + * + * @param "TInt aError" System error constant + * @return A DevASR error code + */ + TDevASRError TranslateError( TInt aError ); + + /** + * Async handler for a play request. + */ + void HandlePlayL(); + + /** + * Async handler for a record request. + */ + void HandleRecordL(); + + /** + * Async handler for a UtteranceDataProcessed event. + */ + void HandleUtteranceDataProcessed(); + + /** + * Completes a request with specified status. + * + * @param "TInt aStatus" Operation status. + */ + void Ready( const TInt aStatus ); + + /** + * Configure the sound device according to parameters in the resource file. + */ + void ConfigureSoundDeviceL(); + + /** + * Initializes DevSound for sampling + */ + void InitializeDevSoundL(); + + /** + * Starts sampling with DevSound + */ + void StartDevSoundL(); + + private: // Data + + // DevASR Observer, the SRS Plugin + MDevASRObserver* iDevASRObserver; + + // ----- DevSound related data ----- + + // Pointer to DevSound + CMMFDevSound* iDevSound; + + // structure of capabilities for DevSound instance + TMMFCapabilities iDevSoundCapabilities; + + // structure of priorities for DevSound instance + TMMFPrioritySettings iPrioritySettings; + + // stores the current volume and gain + TInt iCurrentVolume; + TInt iCurrentGain; + + // ----- Algorithm managers ---- + + // Pointer to Recognition Algorithm Manager + CRecognitionAlgMgr* iRecognitionAlgMgr; + + // Pointer to Vocabulary algorithm manager + CVMAlgorithmManager* iVMAlgorithmManager; + + // Flag to tell if recognition should be started when first audio + // buffer has been sampled. + TBool iStartRecognition; + + // Flag to tell if there is a pending RequestSpeechData() call from + // recognition hw device. + TBool iPendingRequestSpeechData; + + // --- SD Training audio buffer managemet --- + // Start and stop points in the audio buffer, used during recording + TUint32 iStartPoint; + TUint32 iStopPoint; + // Audio buffer to hold PCM data coming from DevSound + TUint8* iAudioBuffer; + TPtr8 iPtr; + + + // ----- Buffer management ----- + + // Number of ms to record + TTimeIntervalMicroSeconds32 iRecordDuration; + + + // Overall number of bytes that need to be sampled. + // Zero if we should sample until EndRecord() is called. + TInt iOverallLength; + + // Number of samples that are recorded. + TInt iOverallSampled; + + // Indicates if utterance processing is currently being done + TBool iProcessingUtterance; + + + // Queue of audio buffer descriptors to be processed + TSglQue* iAudioBufferQue; + + // An item in the queue of audio buffer descriptors + CQueItem* iQueItem; + + // Flag to tell if recording should be ended + TBool iEndFlag; + + // Recognizer mode + TRecognizerMode iMode; + + + // ----- End-pointing usage ----- + + // Start and end frames in audio buffer + TUint32 iStartFrame; + TUint32 iEndFrame; + TReal iFrameLength; + + // Start and end point in the audio buffer after end-pointing. + TUint32 iBufferStartPoint; + TUint32 iBufferEndPoint; + + // ----- State management ----- + + // State of DevASR + TDevASRState iDevASRState; + + // State of DevSound + TDevSoundState iDevSoundState; + + // ----- Active object handling ----- + TInt iRequestFunction; + + // resource file handler + CDevASRResourceHandler* iResourceHandler; + + + TUint8* iBufferUnderConstruction; + TInt iConstructionPoint; + + const CSIModelBank* iSIModelBank; + const CSILexicon* iSILexicon; + + // Flag which tells if pre-sampling has been started + TBool iPreSamplingStarted; + + // Wait loop to wait for audio initialization callback + CActiveSchedulerWait iAudioWait; + + // The following are used when dumping captured audio to file for analysis +#ifdef AUDIOBUFFER_TO_FILE + RFs iFs; + RFile iBufferDataFile; + TBool iFileCreated; +#endif + + }; + +#endif // DEVASRSRSALGORITHMMANAGER_H + +// End of file