--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/srsf/devasr/src/devasrsrsalgorithmmanager.h Thu Dec 17 08:46:30 2009 +0200
@@ -0,0 +1,773 @@
+/*
+* Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: This file contains definition of the DevASR private interface.
+*
+*/
+
+
+#ifndef DEVASRSRSALGORITHMMANAGER_H
+#define DEVASRSRSALGORITHMMANAGER_H
+
+//#define AUDIOBUFFER_TO_FILE
+
+// INCLUDES
+#include <sounddevice.h>
+#include <nsssispeechrecognitiondatadevasr.h>
+#include <nssdevasrcommon.h>
+#include "devasrutil.h"
+#include "devasrrecognitionalgmgr.h"
+#include "devasrresourcehandler.h"
+#include "devasrvmalgorithmmanager.h"
+
+
+#ifdef AUDIOBUFFER_TO_FILE
+#include <f32file.h>
+#endif
+
+// DATA TYPES
+
+// Possible DevASR states
+enum TDevASRState
+ {
+ EDASRIdle,
+ EDASRInitTrain,
+ EDASRTrain,
+ EDASRSampling,
+ EDASRTrainSampling,
+ EDASRPlayback,
+ EDASRRecognitionPlayback,
+ EDASRPreRecognition,
+ EDASRInitRecognition,
+ EDASRRecognition,
+ EDASRRecognitionSampling,
+ EDASRCancel
+ };
+
+// Possible DevSound States
+enum TDevSoundState
+ {
+ ESoundDeviceNotInitialized,
+ ESoundDeviceInitialized,
+ ESoundDeviceRecord,
+ ESoundDevicePlayback,
+ ESoundDeviceStopped
+ };
+
+// FORWARD DECLARATIONS
+class MDevASRObserver;
+class CRecognitionAlgMgr;
+
+// CLASS DECLARATION
+
+/**
+* Interface Class to be used by the DevASR thin client implementation.
+*
+* @lib NssDevASR.lib
+*/
+class CSRSAlgorithmManager : public CActive,
+ public MDevSoundObserver,
+ public MRecAlgMgrObserver,
+ public MVmAlgMgrObserver
+ {
+ public: // Constructors and destructor
+
+ /**
+ * Two-phased constructors.
+ */
+ static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver);
+ static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver);
+
+ /**
+ * Destructor.
+ */
+ virtual ~CSRSAlgorithmManager();
+
+ public: // New functions
+
+
+ /**
+ * Does grammar compilation.
+ */
+ void CompileGrammarL( CSICompiledGrammar& aGrammar );
+
+ /**
+ * Computes new grammar size with given number of variants.
+ */
+ void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar,
+ const TUint32 aTargetNRuleVariants,
+ const TUint32 aMaxNRuleVariants,
+ const RArray<TUint>& aNewRuleScores,
+ RArray<TUint>& aNNeNRuleVariants,
+ TUint32& aNPrune );
+
+ /**
+ * Prunes unnecessay rule variants away from grammar.
+ */
+ TBool PruneGrammar( const CSIGrammar& aGrammar,
+ const TUint32 aMinNumber,
+ RArray<TSIRuleVariantInfo>& aPrunedRuleVariants );
+
+ /**
+ * Starts speaker independent training from text.
+ */
+ void StartTrainingFromTextL( CSITtpWordList& aWordList,
+ const RArray<TLanguage>& aDefaultLanguage,
+ const RArray<TUint32>& aMaxNPronunsForWord );
+
+
+
+ /**
+ * Adapts recognition models
+ *
+ * @param "CSIResultSet& aResultSet" Result set
+ * @param "TInt aResultIndex" Index of correct result
+ */
+ void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex );
+
+ /**
+ * Cancels the current or started tasks.
+ */
+ void Cancel();
+
+ /**
+ * This method is used to indicate the end of a recognition session.
+ */
+ void EndRecSession();
+
+ /**
+ * Retreive the properties of the underlying speech recognition engine.
+ *
+ * @param "RArray<TInt>& aPropertyId" An array of identifiers being
+ * querried.
+ * @param "RArray<TInt>& aPropertyValue" An array of values corresponding
+ * to the querried identifiers
+ */
+ void GetEnginePropertiesL( const RArray<TInt>& aPropertyId,
+ RArray<TInt>& aPropertyValue );
+
+ /**
+ * Initializes the front-end module in the speech recognition engine.
+ * The frontend module used during training/recognition functions is started
+ * as a result. This method is intended to be used in conjunction with InitTrainBE().
+ *
+ * @param "TRecognizerMode aFeMode" Mode of the recognizer.
+ */
+ void InitFrontEnd( TRecognizerMode aFeMode );
+
+ /**
+ * Initialize the recognition engine back-end. The module responsible for recognition
+ * function is started as a result. This method must be used before any recognition
+ * operations and intended to be used in conjunction with InitFrontEnd().
+ *
+ * @param "CSIResultSet& aResult" A reference to an object where the
+ * recognition result will be written
+ */
+ void InitRecognizerBE( CSIResultSet& aResult );
+
+ /**
+ * Load the specified recognizer parameter(s). These parameters are used to alter
+ * the recognizer's default parameters. The parameters are specified as attribute-value
+ * pairs.
+ *
+ * @param "RArray<TInt>& aParameterId" An array of parameter identifiers.
+ * @param "RArray<TInt>& aParameterValue" An array of parameter values.
+ */
+ void LoadEnginePropertiesL( const RArray<TInt>& aParameterId,
+ const RArray<TInt>& aParameterValue );
+
+ /**
+ * Load the specified grammar into the recognizer.
+ *
+ * @param "aGrammar" A reference to a grammar in an internal format.
+ */
+ void LoadGrammarL( const CSIGrammar& aGrammar );
+ void LoadGrammarL( const CSICompiledGrammar& aGrammar );
+
+ /**
+ * Unloads the specified grammar from the recognizer.
+ *
+ * @param "aGrammar" A reference to a grammar in an internal format.
+ */
+ void UnloadGrammarL( const CSIGrammar& aGrammar );
+ void UnloadGrammarL( const CSICompiledGrammar& aGrammar );
+
+ /**
+ * Activates a grammar
+ *
+ * @param "TSIGrammarID aGrammarID" A grammar identifier.
+ */
+ void ActivateGrammarL( TSIGrammarID aGrammarID );
+
+ /**
+ * Deactivates a grammar
+ *
+ * @param "TSIGrammarID aGrammarID" A grammar identifier.
+ */
+ void DeactivateGrammarL( TSIGrammarID aGrammarID );
+
+ /**
+ * Load the specified lexicon into the recognizer.
+ *
+ * @param "aLexicon" A reference to a lexicon.
+ */
+ void LoadLexiconL( const CSILexicon& aLexicon );
+
+ /**
+ * Load the specified models into the recognizer.
+ *
+ * @param "aModels" A reference to a model bank.
+ */
+ void LoadModelsL( const CSIModelBank& aModels );
+
+ /**
+ * Request to begin recording.
+ *
+ * @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time
+ * to record, expressed in microseconds.
+ */
+ void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration );
+
+ /**
+ * Ends recording process
+ */
+ void StopRecognition();
+
+ /**
+ * Use to set the priority of the sound device
+ *
+ * @param "TMMFPrioritySettings& aPrioritySettings" Priority settings
+ * structure.
+ */
+ void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings );
+
+ /**
+ * Request to start a recognition session.
+ *
+ * @param "TRecognizerMode aMode" Recognizer mode.
+ * @return result code of request
+ */
+ TInt StartRecSession( TRecognizerMode aMode );
+
+ /**
+ * Call to give speech data to recognizer.
+ *
+ * @param "TDesC8& aBuffer" Filled buffer.
+ * @param "TBool aEnd" Flag to tell if this is the last buffer.
+ */
+ void SendSpeechData( TPtrC8& aBuffer, TBool aEnd );
+
+ /**
+ * Request to unload the specified rule in the grammar from recognizer.
+ *
+ * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on.
+ * @param "TSIRuleID aRuleID" Identifier of the rule to remove.
+ */
+ void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID );
+
+ /**
+ * Retreive the raw audio data accumulated during recording.
+ *
+ * @param "TDes8* aBuffer" Reference to a buffer containing the data.
+ * @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the
+ * utterance.
+ */
+ void GetUtteranceData( TDes8* aBuffer,
+ TTimeIntervalMicroSeconds32& aDuration );
+
+ /**
+ * Checks if grammar is loaded or not
+ *
+ * @since 2.8
+ * @param "TSIGrammarID aGrammarID" Grammar ID
+ * @return ETrue if grammar is currently loaded, EFalse otherwise.
+ */
+ TBool IsGrammarLoaded( TSIGrammarID aGrammarID );
+
+ /**
+ * Checks if grammar is active or not
+ *
+ * @since 2.8
+ * @param "TSIGrammarID aGrammarID" Grammar ID
+ * @return ETrue if grammar is active, EFalse otherwise.
+ */
+ TBool IsGrammarActive( TSIGrammarID aGrammarID );
+
+ /**
+ * Prints DevASR state information for debugging
+ */
+ void DevASRState();
+
+ /**
+ * Pre-starts recording before StartRecording call.
+ *
+ * @since 3.2
+ */
+ void PreStartSamplingL();
+
+ // ===================================
+ // DevSound Observer MIXIN begins
+ // ===================================
+
+ /**
+ * Called by DevSound when a buffer is available to be read.
+ *
+ * @param "CMMFBuffer* aBuffer" Buffer containing data to be processed
+ */
+ void BufferToBeEmptied( CMMFBuffer* aBuffer );
+
+ /**
+ * Called by DevSound when a buffer is available to be written.
+ *
+ * @param "CMMFBuffer* aBuffer" Buffer to which data is written
+ */
+ void BufferToBeFilled( CMMFBuffer* aBuffer );
+
+ /**
+ * Not used by DevASR
+ */
+ void ConvertError( TInt aError );
+
+ /**
+ * Handles device event.
+ *
+ * @param "TDesC8& aMsg" A message packed in the descriptor format.
+ */
+ void DeviceMessage( TUid aMessageType, const TDesC8& aMsg );
+
+ /**
+ * Called by DevSound when initialization is completed.
+ *
+ * @param aError Error code, KErrNone if successful.
+ */
+ void InitializeComplete( TInt aError );
+
+ /**
+ * Handles record completion or cancel event from DevSound.
+ *
+ * @param aError Error code, KErrNone if successful.
+ */
+ void RecordError( TInt aError );
+
+ /**
+ * Handles play completion or cancel event from DevSound.
+ *
+ * @param "TInt aError" Error code, KErrNone if successful.
+ */
+ void PlayError( TInt aError );
+
+ /**
+ * Handles tone play completion or cancel event from DevSound
+ * Tone is not used by DevASR so this event is not expected.
+ *
+ * @param "TInt aError" Error code, KErrNone if successful.
+ */
+ void ToneFinished( TInt aError );
+
+ /**
+ * Handles audio policy events from DevSound.
+ *
+ * @param "TMMFEvent&" aEvent An audio policy event.
+ */
+ void SendEventToClient( const TMMFEvent& aEvent );
+
+ // ===================================
+ // DevSound Observer MIXIN ends
+ // ===================================
+
+ // ============================================
+ // RecognitionAlgMgr Observer MIXIN begins
+ // ============================================
+
+ /**
+ * Invoked by the adaptation hw device when model adaptation has
+ * finished its processing.
+ *
+ * @since 2.8
+ * @param "TInt aResultCode" Error code of operation.
+ */
+ void AdaptComplete( TInt aResultCode );
+
+ /**
+ * Invoked by the front-end when EOU has been detected.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void EouDetected( TInt aResultCode );
+
+ /**
+ * Invoked by the algorithm manager when the frontend initialization
+ * is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void InitFEComplete( TInt aResultCode );
+
+ /**
+ * Invoked by the algorithm manager when the recognition backend
+ * initialization is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void InitRecognizerBEComplete( TInt aResultCode );
+
+ /**
+ * Invoked by the algorithm manager when loading grammar
+ * is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void LoadGrammarComplete( TInt aResultCode );
+
+ /**
+ * Invoked when grammar has been unloaded.
+ *
+ * @param "TInt aResultCode" Result of grammar loading
+ */
+ void UnloadGrammarComplete( TInt aResultCode );
+
+ /**
+ * Invoked when grammar has been activated.
+ *
+ * @param "TInt aResultCode" Result of grammar activation
+ */
+ void ActivateGrammarComplete( TInt aResultCode );
+
+ /**
+ * Invoked when grammar has been deactivated.
+ *
+ * @param "TInt aResultCode" Result of grammar deactivation
+ */
+ void DeActivateGrammarComplete( TInt aResultCode );
+
+ /**
+ * Invoked by the algorithm manager when loading lexicon
+ * is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void LoadLexiconComplete( TInt aResultCode );
+
+ /**
+ * Invoked by the algorithm manager when loading models
+ * is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void LoadModelsComplete( TInt aResultCode );
+
+ /**
+ * Invoked when the recognition process is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void RecognitionComplete( TInt aResultCode );
+
+ /**
+ * Invoked when the training process is completed.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void TrainComplete( TInt aResultCode );
+
+ /**
+ * Invoked when rule unloading has been done.
+ *
+ * @param "TInt aResultCode" Result code, KErrNone if successful.
+ */
+ void UnloadRuleComplete( TInt aResultCode );
+
+ /**
+ * Invoked when a feature vector is available.
+ *
+ * @param "TDesC8& aFV" Buffer containing a feature vector.
+ * @param "TInt32 aSNR" Signal-to-noise ratio.
+ * @param "TInt32 aPosition" Indicates whether this is the first,
+ * subsequent or last feature vector.
+ */
+ void FeatureVectorDataRcvd( const TDesC8& aFV,
+ TInt32 aSNR,
+ TInt32 aPosition );
+
+ /**
+ * Invoked by the front-end when utterance data is needed.
+ *
+ * @since 2.8
+ */
+ void RequestSpeechData();
+
+
+ /**
+ * Resolves result.
+ */
+ void ResolveResult( const RArray<TUint>& aNBestIDs,
+ CSIResultSet& aSIResultSet,
+ const RPointerArray<CSICompiledGrammar>& aSICompiledGrammar,
+ const TDesC8& aCombinedData );
+
+ /**
+ * Invoked when grammar combining is needed
+ *
+ * @since 2.8
+ * @param "RPointerArray<CSICompiledGrammar>& aCompiledGrammars"
+ * Array of previously compiled grammar
+ * @param "const RPointerArray<TSIRuleVariantInfo>& aExcludedRules"
+ * Rules to be blacklisted.
+ */
+ void CombineGrammarL( const RPointerArray<CSICompiledGrammar>& aCompiledGrammars,
+ const RPointerArray<TSIRuleVariantInfo>& aExcludedRules );
+
+ // ============================================
+ // RecognitionAlgMgr Observer MIXIN ends
+ // ============================================
+
+ // ============================================
+ // MVmAlgMgrObserver Observer MIXIN begins
+ // ============================================
+
+ /**
+ * Notifies that grammar combination has been done.
+ */
+ void CombineComplete( HBufC8* aResult, TInt aError );
+
+ /**
+ * Get the specified grammar.
+ *
+ * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found.
+ * @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI
+ * active grammar, NULL if not found.
+ * @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI
+ * non-active grammar, NULL if not found
+ */
+ void GetGrammarL( const TSIGrammarID aGrammarID,
+ CSICompiledGrammar** aSIActiveGrammar,
+ CSICompiledGrammar** aSIDeActivatedGrammar );
+
+ // ============================================
+ // MVmAlgMgrObserver Observer MIXIN begins
+ // ============================================
+
+ private:
+
+ /**
+ * C++ default constructor.
+ *
+ * @param "MDevASRObserver& aObserver" Reference to observer.
+ */
+ CSRSAlgorithmManager( MDevASRObserver& aObserver );
+
+ /**
+ * By default Symbian 2nd phase constructor is private.
+ */
+ void ConstructL();
+
+ /**
+ * From CActive.
+ */
+ void RunL();
+ void DoCancel();
+
+ /**
+ * Empties and copies the data to holding buffer.
+ *
+ * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
+ * @param "TInt aBufferLength" Length of buffer
+ */
+ TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
+
+ /**
+ * Fill the received buffer with utterance data upto the specified length.
+ *
+ * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data
+ * @param "TInt aBufferLength" Length of buffer
+ */
+ TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength );
+
+ /**
+ * Transition from current state to target state if possible.
+ *
+ * @param "TDevASRState aState" A target state
+ * @return ETrue if successful.
+ */
+ TBool StateTransition( TDevASRState aState );
+
+ /**
+ * Translate system error codes to DevASR cause codes.
+ *
+ * @param "TInt aError" System error constant
+ * @return A DevASR error code
+ */
+ TDevASRError TranslateError( TInt aError );
+
+ /**
+ * Async handler for a play request.
+ */
+ void HandlePlayL();
+
+ /**
+ * Async handler for a record request.
+ */
+ void HandleRecordL();
+
+ /**
+ * Async handler for a UtteranceDataProcessed event.
+ */
+ void HandleUtteranceDataProcessed();
+
+ /**
+ * Completes a request with specified status.
+ *
+ * @param "TInt aStatus" Operation status.
+ */
+ void Ready( const TInt aStatus );
+
+ /**
+ * Configure the sound device according to parameters in the resource file.
+ */
+ void ConfigureSoundDeviceL();
+
+ /**
+ * Initializes DevSound for sampling
+ */
+ void InitializeDevSoundL();
+
+ /**
+ * Starts sampling with DevSound
+ */
+ void StartDevSoundL();
+
+ private: // Data
+
+ // DevASR Observer, the SRS Plugin
+ MDevASRObserver* iDevASRObserver;
+
+ // ----- DevSound related data -----
+
+ // Pointer to DevSound
+ CMMFDevSound* iDevSound;
+
+ // structure of capabilities for DevSound instance
+ TMMFCapabilities iDevSoundCapabilities;
+
+ // structure of priorities for DevSound instance
+ TMMFPrioritySettings iPrioritySettings;
+
+ // stores the current volume and gain
+ TInt iCurrentVolume;
+ TInt iCurrentGain;
+
+ // ----- Algorithm managers ----
+
+ // Pointer to Recognition Algorithm Manager
+ CRecognitionAlgMgr* iRecognitionAlgMgr;
+
+ // Pointer to Vocabulary algorithm manager
+ CVMAlgorithmManager* iVMAlgorithmManager;
+
+ // Flag to tell if recognition should be started when first audio
+ // buffer has been sampled.
+ TBool iStartRecognition;
+
+ // Flag to tell if there is a pending RequestSpeechData() call from
+ // recognition hw device.
+ TBool iPendingRequestSpeechData;
+
+ // --- SD Training audio buffer managemet ---
+ // Start and stop points in the audio buffer, used during recording
+ TUint32 iStartPoint;
+ TUint32 iStopPoint;
+ // Audio buffer to hold PCM data coming from DevSound
+ TUint8* iAudioBuffer;
+ TPtr8 iPtr;
+
+
+ // ----- Buffer management -----
+
+ // Number of ms to record
+ TTimeIntervalMicroSeconds32 iRecordDuration;
+
+
+ // Overall number of bytes that need to be sampled.
+ // Zero if we should sample until EndRecord() is called.
+ TInt iOverallLength;
+
+ // Number of samples that are recorded.
+ TInt iOverallSampled;
+
+ // Indicates if utterance processing is currently being done
+ TBool iProcessingUtterance;
+
+
+ // Queue of audio buffer descriptors to be processed
+ TSglQue<CQueItem>* iAudioBufferQue;
+
+ // An item in the queue of audio buffer descriptors
+ CQueItem* iQueItem;
+
+ // Flag to tell if recording should be ended
+ TBool iEndFlag;
+
+ // Recognizer mode
+ TRecognizerMode iMode;
+
+
+ // ----- End-pointing usage -----
+
+ // Start and end frames in audio buffer
+ TUint32 iStartFrame;
+ TUint32 iEndFrame;
+ TReal iFrameLength;
+
+ // Start and end point in the audio buffer after end-pointing.
+ TUint32 iBufferStartPoint;
+ TUint32 iBufferEndPoint;
+
+ // ----- State management -----
+
+ // State of DevASR
+ TDevASRState iDevASRState;
+
+ // State of DevSound
+ TDevSoundState iDevSoundState;
+
+ // ----- Active object handling -----
+ TInt iRequestFunction;
+
+ // resource file handler
+ CDevASRResourceHandler* iResourceHandler;
+
+
+ TUint8* iBufferUnderConstruction;
+ TInt iConstructionPoint;
+
+ const CSIModelBank* iSIModelBank;
+ const CSILexicon* iSILexicon;
+
+ // Flag which tells if pre-sampling has been started
+ TBool iPreSamplingStarted;
+
+ // Wait loop to wait for audio initialization callback
+ CActiveSchedulerWait iAudioWait;
+
+ // The following are used when dumping captured audio to file for analysis
+#ifdef AUDIOBUFFER_TO_FILE
+ RFs iFs;
+ RFile iBufferDataFile;
+ TBool iFileCreated;
+#endif
+
+ };
+
+#endif // DEVASRSRSALGORITHMMANAGER_H
+
+// End of file