|
1 /* |
|
2 * Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: This file contains definition of the DevASR private interface. |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #ifndef DEVASRSRSALGORITHMMANAGER_H |
|
20 #define DEVASRSRSALGORITHMMANAGER_H |
|
21 |
|
22 //#define AUDIOBUFFER_TO_FILE |
|
23 |
|
24 // INCLUDES |
|
25 #include <sounddevice.h> |
|
26 #include <nsssispeechrecognitiondatadevasr.h> |
|
27 #include <nssdevasrcommon.h> |
|
28 #include "devasrutil.h" |
|
29 #include "devasrrecognitionalgmgr.h" |
|
30 #include "devasrresourcehandler.h" |
|
31 #include "devasrvmalgorithmmanager.h" |
|
32 |
|
33 |
|
34 #ifdef AUDIOBUFFER_TO_FILE |
|
35 #include <f32file.h> |
|
36 #endif |
|
37 |
|
38 // DATA TYPES |
|
39 |
|
40 // Possible DevASR states |
|
41 enum TDevASRState |
|
42 { |
|
43 EDASRIdle, |
|
44 EDASRInitTrain, |
|
45 EDASRTrain, |
|
46 EDASRSampling, |
|
47 EDASRTrainSampling, |
|
48 EDASRPlayback, |
|
49 EDASRRecognitionPlayback, |
|
50 EDASRPreRecognition, |
|
51 EDASRInitRecognition, |
|
52 EDASRRecognition, |
|
53 EDASRRecognitionSampling, |
|
54 EDASRCancel |
|
55 }; |
|
56 |
|
57 // Possible DevSound States |
|
58 enum TDevSoundState |
|
59 { |
|
60 ESoundDeviceNotInitialized, |
|
61 ESoundDeviceInitialized, |
|
62 ESoundDeviceRecord, |
|
63 ESoundDevicePlayback, |
|
64 ESoundDeviceStopped |
|
65 }; |
|
66 |
|
67 // FORWARD DECLARATIONS |
|
68 class MDevASRObserver; |
|
69 class CRecognitionAlgMgr; |
|
70 |
|
71 // CLASS DECLARATION |
|
72 |
|
73 /** |
|
74 * Interface Class to be used by the DevASR thin client implementation. |
|
75 * |
|
76 * @lib NssDevASR.lib |
|
77 */ |
|
78 class CSRSAlgorithmManager : public CActive, |
|
79 public MDevSoundObserver, |
|
80 public MRecAlgMgrObserver, |
|
81 public MVmAlgMgrObserver |
|
82 { |
|
83 public: // Constructors and destructor |
|
84 |
|
85 /** |
|
86 * Two-phased constructors. |
|
87 */ |
|
88 static CSRSAlgorithmManager* NewL(MDevASRObserver& aObserver); |
|
89 static CSRSAlgorithmManager* NewLC(MDevASRObserver& aObserver); |
|
90 |
|
91 /** |
|
92 * Destructor. |
|
93 */ |
|
94 virtual ~CSRSAlgorithmManager(); |
|
95 |
|
96 public: // New functions |
|
97 |
|
98 |
|
99 /** |
|
100 * Does grammar compilation. |
|
101 */ |
|
102 void CompileGrammarL( CSICompiledGrammar& aGrammar ); |
|
103 |
|
104 /** |
|
105 * Computes new grammar size with given number of variants. |
|
106 */ |
|
107 void ComputeNewGrammarSizeL( const CSIGrammar& aGrammar, |
|
108 const TUint32 aTargetNRuleVariants, |
|
109 const TUint32 aMaxNRuleVariants, |
|
110 const RArray<TUint>& aNewRuleScores, |
|
111 RArray<TUint>& aNNeNRuleVariants, |
|
112 TUint32& aNPrune ); |
|
113 |
|
114 /** |
|
115 * Prunes unnecessay rule variants away from grammar. |
|
116 */ |
|
117 TBool PruneGrammar( const CSIGrammar& aGrammar, |
|
118 const TUint32 aMinNumber, |
|
119 RArray<TSIRuleVariantInfo>& aPrunedRuleVariants ); |
|
120 |
|
121 /** |
|
122 * Starts speaker independent training from text. |
|
123 */ |
|
124 void StartTrainingFromTextL( CSITtpWordList& aWordList, |
|
125 const RArray<TLanguage>& aDefaultLanguage, |
|
126 const RArray<TUint32>& aMaxNPronunsForWord ); |
|
127 |
|
128 |
|
129 |
|
130 /** |
|
131 * Adapts recognition models |
|
132 * |
|
133 * @param "CSIResultSet& aResultSet" Result set |
|
134 * @param "TInt aResultIndex" Index of correct result |
|
135 */ |
|
136 void AdaptL( const CSIResultSet& aResultSet, TInt aResultIndex ); |
|
137 |
|
138 /** |
|
139 * Cancels the current or started tasks. |
|
140 */ |
|
141 void Cancel(); |
|
142 |
|
143 /** |
|
144 * This method is used to indicate the end of a recognition session. |
|
145 */ |
|
146 void EndRecSession(); |
|
147 |
|
148 /** |
|
149 * Retreive the properties of the underlying speech recognition engine. |
|
150 * |
|
151 * @param "RArray<TInt>& aPropertyId" An array of identifiers being |
|
152 * querried. |
|
153 * @param "RArray<TInt>& aPropertyValue" An array of values corresponding |
|
154 * to the querried identifiers |
|
155 */ |
|
156 void GetEnginePropertiesL( const RArray<TInt>& aPropertyId, |
|
157 RArray<TInt>& aPropertyValue ); |
|
158 |
|
159 /** |
|
160 * Initializes the front-end module in the speech recognition engine. |
|
161 * The frontend module used during training/recognition functions is started |
|
162 * as a result. This method is intended to be used in conjunction with InitTrainBE(). |
|
163 * |
|
164 * @param "TRecognizerMode aFeMode" Mode of the recognizer. |
|
165 */ |
|
166 void InitFrontEnd( TRecognizerMode aFeMode ); |
|
167 |
|
168 /** |
|
169 * Initialize the recognition engine back-end. The module responsible for recognition |
|
170 * function is started as a result. This method must be used before any recognition |
|
171 * operations and intended to be used in conjunction with InitFrontEnd(). |
|
172 * |
|
173 * @param "CSIResultSet& aResult" A reference to an object where the |
|
174 * recognition result will be written |
|
175 */ |
|
176 void InitRecognizerBE( CSIResultSet& aResult ); |
|
177 |
|
178 /** |
|
179 * Load the specified recognizer parameter(s). These parameters are used to alter |
|
180 * the recognizer's default parameters. The parameters are specified as attribute-value |
|
181 * pairs. |
|
182 * |
|
183 * @param "RArray<TInt>& aParameterId" An array of parameter identifiers. |
|
184 * @param "RArray<TInt>& aParameterValue" An array of parameter values. |
|
185 */ |
|
186 void LoadEnginePropertiesL( const RArray<TInt>& aParameterId, |
|
187 const RArray<TInt>& aParameterValue ); |
|
188 |
|
189 /** |
|
190 * Load the specified grammar into the recognizer. |
|
191 * |
|
192 * @param "aGrammar" A reference to a grammar in an internal format. |
|
193 */ |
|
194 void LoadGrammarL( const CSIGrammar& aGrammar ); |
|
195 void LoadGrammarL( const CSICompiledGrammar& aGrammar ); |
|
196 |
|
197 /** |
|
198 * Unloads the specified grammar from the recognizer. |
|
199 * |
|
200 * @param "aGrammar" A reference to a grammar in an internal format. |
|
201 */ |
|
202 void UnloadGrammarL( const CSIGrammar& aGrammar ); |
|
203 void UnloadGrammarL( const CSICompiledGrammar& aGrammar ); |
|
204 |
|
205 /** |
|
206 * Activates a grammar |
|
207 * |
|
208 * @param "TSIGrammarID aGrammarID" A grammar identifier. |
|
209 */ |
|
210 void ActivateGrammarL( TSIGrammarID aGrammarID ); |
|
211 |
|
212 /** |
|
213 * Deactivates a grammar |
|
214 * |
|
215 * @param "TSIGrammarID aGrammarID" A grammar identifier. |
|
216 */ |
|
217 void DeactivateGrammarL( TSIGrammarID aGrammarID ); |
|
218 |
|
219 /** |
|
220 * Load the specified lexicon into the recognizer. |
|
221 * |
|
222 * @param "aLexicon" A reference to a lexicon. |
|
223 */ |
|
224 void LoadLexiconL( const CSILexicon& aLexicon ); |
|
225 |
|
226 /** |
|
227 * Load the specified models into the recognizer. |
|
228 * |
|
229 * @param "aModels" A reference to a model bank. |
|
230 */ |
|
231 void LoadModelsL( const CSIModelBank& aModels ); |
|
232 |
|
233 /** |
|
234 * Request to begin recording. |
|
235 * |
|
236 * @param "TTimeIntervalMicroSeconds32 aRecordDuration" Length of time |
|
237 * to record, expressed in microseconds. |
|
238 */ |
|
239 void StartRecognition( TTimeIntervalMicroSeconds32 aRecordDuration ); |
|
240 |
|
241 /** |
|
242 * Ends recording process |
|
243 */ |
|
244 void StopRecognition(); |
|
245 |
|
246 /** |
|
247 * Use to set the priority of the sound device |
|
248 * |
|
249 * @param "TMMFPrioritySettings& aPrioritySettings" Priority settings |
|
250 * structure. |
|
251 */ |
|
252 void SetPrioritySettings( const TMMFPrioritySettings& aPrioritySettings ); |
|
253 |
|
254 /** |
|
255 * Request to start a recognition session. |
|
256 * |
|
257 * @param "TRecognizerMode aMode" Recognizer mode. |
|
258 * @return result code of request |
|
259 */ |
|
260 TInt StartRecSession( TRecognizerMode aMode ); |
|
261 |
|
262 /** |
|
263 * Call to give speech data to recognizer. |
|
264 * |
|
265 * @param "TDesC8& aBuffer" Filled buffer. |
|
266 * @param "TBool aEnd" Flag to tell if this is the last buffer. |
|
267 */ |
|
268 void SendSpeechData( TPtrC8& aBuffer, TBool aEnd ); |
|
269 |
|
270 /** |
|
271 * Request to unload the specified rule in the grammar from recognizer. |
|
272 * |
|
273 * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on. |
|
274 * @param "TSIRuleID aRuleID" Identifier of the rule to remove. |
|
275 */ |
|
276 void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID ); |
|
277 |
|
278 /** |
|
279 * Retreive the raw audio data accumulated during recording. |
|
280 * |
|
281 * @param "TDes8* aBuffer" Reference to a buffer containing the data. |
|
282 * @param "TTimeIntervalMicroSeconds32& aDuration" Duration of the |
|
283 * utterance. |
|
284 */ |
|
285 void GetUtteranceData( TDes8* aBuffer, |
|
286 TTimeIntervalMicroSeconds32& aDuration ); |
|
287 |
|
288 /** |
|
289 * Checks if grammar is loaded or not |
|
290 * |
|
291 * @since 2.8 |
|
292 * @param "TSIGrammarID aGrammarID" Grammar ID |
|
293 * @return ETrue if grammar is currently loaded, EFalse otherwise. |
|
294 */ |
|
295 TBool IsGrammarLoaded( TSIGrammarID aGrammarID ); |
|
296 |
|
297 /** |
|
298 * Checks if grammar is active or not |
|
299 * |
|
300 * @since 2.8 |
|
301 * @param "TSIGrammarID aGrammarID" Grammar ID |
|
302 * @return ETrue if grammar is active, EFalse otherwise. |
|
303 */ |
|
304 TBool IsGrammarActive( TSIGrammarID aGrammarID ); |
|
305 |
|
306 /** |
|
307 * Prints DevASR state information for debugging |
|
308 */ |
|
309 void DevASRState(); |
|
310 |
|
311 /** |
|
312 * Pre-starts recording before StartRecording call. |
|
313 * |
|
314 * @since 3.2 |
|
315 */ |
|
316 void PreStartSamplingL(); |
|
317 |
|
318 // =================================== |
|
319 // DevSound Observer MIXIN begins |
|
320 // =================================== |
|
321 |
|
322 /** |
|
323 * Called by DevSound when a buffer is available to be read. |
|
324 * |
|
325 * @param "CMMFBuffer* aBuffer" Buffer containing data to be processed |
|
326 */ |
|
327 void BufferToBeEmptied( CMMFBuffer* aBuffer ); |
|
328 |
|
329 /** |
|
330 * Called by DevSound when a buffer is available to be written. |
|
331 * |
|
332 * @param "CMMFBuffer* aBuffer" Buffer to which data is written |
|
333 */ |
|
334 void BufferToBeFilled( CMMFBuffer* aBuffer ); |
|
335 |
|
336 /** |
|
337 * Not used by DevASR |
|
338 */ |
|
339 void ConvertError( TInt aError ); |
|
340 |
|
341 /** |
|
342 * Handles device event. |
|
343 * |
|
344 * @param "TDesC8& aMsg" A message packed in the descriptor format. |
|
345 */ |
|
346 void DeviceMessage( TUid aMessageType, const TDesC8& aMsg ); |
|
347 |
|
348 /** |
|
349 * Called by DevSound when initialization is completed. |
|
350 * |
|
351 * @param aError Error code, KErrNone if successful. |
|
352 */ |
|
353 void InitializeComplete( TInt aError ); |
|
354 |
|
355 /** |
|
356 * Handles record completion or cancel event from DevSound. |
|
357 * |
|
358 * @param aError Error code, KErrNone if successful. |
|
359 */ |
|
360 void RecordError( TInt aError ); |
|
361 |
|
362 /** |
|
363 * Handles play completion or cancel event from DevSound. |
|
364 * |
|
365 * @param "TInt aError" Error code, KErrNone if successful. |
|
366 */ |
|
367 void PlayError( TInt aError ); |
|
368 |
|
369 /** |
|
370 * Handles tone play completion or cancel event from DevSound |
|
371 * Tone is not used by DevASR so this event is not expected. |
|
372 * |
|
373 * @param "TInt aError" Error code, KErrNone if successful. |
|
374 */ |
|
375 void ToneFinished( TInt aError ); |
|
376 |
|
377 /** |
|
378 * Handles audio policy events from DevSound. |
|
379 * |
|
380 * @param "TMMFEvent&" aEvent An audio policy event. |
|
381 */ |
|
382 void SendEventToClient( const TMMFEvent& aEvent ); |
|
383 |
|
384 // =================================== |
|
385 // DevSound Observer MIXIN ends |
|
386 // =================================== |
|
387 |
|
388 // ============================================ |
|
389 // RecognitionAlgMgr Observer MIXIN begins |
|
390 // ============================================ |
|
391 |
|
392 /** |
|
393 * Invoked by the adaptation hw device when model adaptation has |
|
394 * finished its processing. |
|
395 * |
|
396 * @since 2.8 |
|
397 * @param "TInt aResultCode" Error code of operation. |
|
398 */ |
|
399 void AdaptComplete( TInt aResultCode ); |
|
400 |
|
401 /** |
|
402 * Invoked by the front-end when EOU has been detected. |
|
403 * |
|
404 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
405 */ |
|
406 void EouDetected( TInt aResultCode ); |
|
407 |
|
408 /** |
|
409 * Invoked by the algorithm manager when the frontend initialization |
|
410 * is completed. |
|
411 * |
|
412 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
413 */ |
|
414 void InitFEComplete( TInt aResultCode ); |
|
415 |
|
416 /** |
|
417 * Invoked by the algorithm manager when the recognition backend |
|
418 * initialization is completed. |
|
419 * |
|
420 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
421 */ |
|
422 void InitRecognizerBEComplete( TInt aResultCode ); |
|
423 |
|
424 /** |
|
425 * Invoked by the algorithm manager when loading grammar |
|
426 * is completed. |
|
427 * |
|
428 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
429 */ |
|
430 void LoadGrammarComplete( TInt aResultCode ); |
|
431 |
|
432 /** |
|
433 * Invoked when grammar has been unloaded. |
|
434 * |
|
435 * @param "TInt aResultCode" Result of grammar loading |
|
436 */ |
|
437 void UnloadGrammarComplete( TInt aResultCode ); |
|
438 |
|
439 /** |
|
440 * Invoked when grammar has been activated. |
|
441 * |
|
442 * @param "TInt aResultCode" Result of grammar activation |
|
443 */ |
|
444 void ActivateGrammarComplete( TInt aResultCode ); |
|
445 |
|
446 /** |
|
447 * Invoked when grammar has been deactivated. |
|
448 * |
|
449 * @param "TInt aResultCode" Result of grammar deactivation |
|
450 */ |
|
451 void DeActivateGrammarComplete( TInt aResultCode ); |
|
452 |
|
453 /** |
|
454 * Invoked by the algorithm manager when loading lexicon |
|
455 * is completed. |
|
456 * |
|
457 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
458 */ |
|
459 void LoadLexiconComplete( TInt aResultCode ); |
|
460 |
|
461 /** |
|
462 * Invoked by the algorithm manager when loading models |
|
463 * is completed. |
|
464 * |
|
465 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
466 */ |
|
467 void LoadModelsComplete( TInt aResultCode ); |
|
468 |
|
469 /** |
|
470 * Invoked when the recognition process is completed. |
|
471 * |
|
472 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
473 */ |
|
474 void RecognitionComplete( TInt aResultCode ); |
|
475 |
|
476 /** |
|
477 * Invoked when the training process is completed. |
|
478 * |
|
479 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
480 */ |
|
481 void TrainComplete( TInt aResultCode ); |
|
482 |
|
483 /** |
|
484 * Invoked when rule unloading has been done. |
|
485 * |
|
486 * @param "TInt aResultCode" Result code, KErrNone if successful. |
|
487 */ |
|
488 void UnloadRuleComplete( TInt aResultCode ); |
|
489 |
|
490 /** |
|
491 * Invoked when a feature vector is available. |
|
492 * |
|
493 * @param "TDesC8& aFV" Buffer containing a feature vector. |
|
494 * @param "TInt32 aSNR" Signal-to-noise ratio. |
|
495 * @param "TInt32 aPosition" Indicates whether this is the first, |
|
496 * subsequent or last feature vector. |
|
497 */ |
|
498 void FeatureVectorDataRcvd( const TDesC8& aFV, |
|
499 TInt32 aSNR, |
|
500 TInt32 aPosition ); |
|
501 |
|
502 /** |
|
503 * Invoked by the front-end when utterance data is needed. |
|
504 * |
|
505 * @since 2.8 |
|
506 */ |
|
507 void RequestSpeechData(); |
|
508 |
|
509 |
|
510 /** |
|
511 * Resolves result. |
|
512 */ |
|
513 void ResolveResult( const RArray<TUint>& aNBestIDs, |
|
514 CSIResultSet& aSIResultSet, |
|
515 const RPointerArray<CSICompiledGrammar>& aSICompiledGrammar, |
|
516 const TDesC8& aCombinedData ); |
|
517 |
|
518 /** |
|
519 * Invoked when grammar combining is needed |
|
520 * |
|
521 * @since 2.8 |
|
522 * @param "RPointerArray<CSICompiledGrammar>& aCompiledGrammars" |
|
523 * Array of previously compiled grammar |
|
524 * @param "const RPointerArray<TSIRuleVariantInfo>& aExcludedRules" |
|
525 * Rules to be blacklisted. |
|
526 */ |
|
527 void CombineGrammarL( const RPointerArray<CSICompiledGrammar>& aCompiledGrammars, |
|
528 const RPointerArray<TSIRuleVariantInfo>& aExcludedRules ); |
|
529 |
|
530 // ============================================ |
|
531 // RecognitionAlgMgr Observer MIXIN ends |
|
532 // ============================================ |
|
533 |
|
534 // ============================================ |
|
535 // MVmAlgMgrObserver Observer MIXIN begins |
|
536 // ============================================ |
|
537 |
|
538 /** |
|
539 * Notifies that grammar combination has been done. |
|
540 */ |
|
541 void CombineComplete( HBufC8* aResult, TInt aError ); |
|
542 |
|
543 /** |
|
544 * Get the specified grammar. |
|
545 * |
|
546 * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found. |
|
547 * @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI |
|
548 * active grammar, NULL if not found. |
|
549 * @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI |
|
550 * non-active grammar, NULL if not found |
|
551 */ |
|
552 void GetGrammarL( const TSIGrammarID aGrammarID, |
|
553 CSICompiledGrammar** aSIActiveGrammar, |
|
554 CSICompiledGrammar** aSIDeActivatedGrammar ); |
|
555 |
|
556 // ============================================ |
|
557 // MVmAlgMgrObserver Observer MIXIN begins |
|
558 // ============================================ |
|
559 |
|
560 private: |
|
561 |
|
562 /** |
|
563 * C++ default constructor. |
|
564 * |
|
565 * @param "MDevASRObserver& aObserver" Reference to observer. |
|
566 */ |
|
567 CSRSAlgorithmManager( MDevASRObserver& aObserver ); |
|
568 |
|
569 /** |
|
570 * By default Symbian 2nd phase constructor is private. |
|
571 */ |
|
572 void ConstructL(); |
|
573 |
|
574 /** |
|
575 * From CActive. |
|
576 */ |
|
577 void RunL(); |
|
578 void DoCancel(); |
|
579 |
|
580 /** |
|
581 * Empties and copies the data to holding buffer. |
|
582 * |
|
583 * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data |
|
584 * @param "TInt aBufferLength" Length of buffer |
|
585 */ |
|
586 TInt EmptyBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); |
|
587 |
|
588 /** |
|
589 * Fill the received buffer with utterance data upto the specified length. |
|
590 * |
|
591 * @param "CMMFDataBuffer& aBuffer" Buffer containing utterance data |
|
592 * @param "TInt aBufferLength" Length of buffer |
|
593 */ |
|
594 TInt FillBuffer( CMMFDataBuffer& aBuffer, TInt aBufferLength ); |
|
595 |
|
596 /** |
|
597 * Transition from current state to target state if possible. |
|
598 * |
|
599 * @param "TDevASRState aState" A target state |
|
600 * @return ETrue if successful. |
|
601 */ |
|
602 TBool StateTransition( TDevASRState aState ); |
|
603 |
|
604 /** |
|
605 * Translate system error codes to DevASR cause codes. |
|
606 * |
|
607 * @param "TInt aError" System error constant |
|
608 * @return A DevASR error code |
|
609 */ |
|
610 TDevASRError TranslateError( TInt aError ); |
|
611 |
|
612 /** |
|
613 * Async handler for a play request. |
|
614 */ |
|
615 void HandlePlayL(); |
|
616 |
|
617 /** |
|
618 * Async handler for a record request. |
|
619 */ |
|
620 void HandleRecordL(); |
|
621 |
|
622 /** |
|
623 * Async handler for a UtteranceDataProcessed event. |
|
624 */ |
|
625 void HandleUtteranceDataProcessed(); |
|
626 |
|
627 /** |
|
628 * Completes a request with specified status. |
|
629 * |
|
630 * @param "TInt aStatus" Operation status. |
|
631 */ |
|
632 void Ready( const TInt aStatus ); |
|
633 |
|
634 /** |
|
635 * Configure the sound device according to parameters in the resource file. |
|
636 */ |
|
637 void ConfigureSoundDeviceL(); |
|
638 |
|
639 /** |
|
640 * Initializes DevSound for sampling |
|
641 */ |
|
642 void InitializeDevSoundL(); |
|
643 |
|
644 /** |
|
645 * Starts sampling with DevSound |
|
646 */ |
|
647 void StartDevSoundL(); |
|
648 |
|
649 private: // Data |
|
650 |
|
651 // DevASR Observer, the SRS Plugin |
|
652 MDevASRObserver* iDevASRObserver; |
|
653 |
|
654 // ----- DevSound related data ----- |
|
655 |
|
656 // Pointer to DevSound |
|
657 CMMFDevSound* iDevSound; |
|
658 |
|
659 // structure of capabilities for DevSound instance |
|
660 TMMFCapabilities iDevSoundCapabilities; |
|
661 |
|
662 // structure of priorities for DevSound instance |
|
663 TMMFPrioritySettings iPrioritySettings; |
|
664 |
|
665 // stores the current volume and gain |
|
666 TInt iCurrentVolume; |
|
667 TInt iCurrentGain; |
|
668 |
|
669 // ----- Algorithm managers ---- |
|
670 |
|
671 // Pointer to Recognition Algorithm Manager |
|
672 CRecognitionAlgMgr* iRecognitionAlgMgr; |
|
673 |
|
674 // Pointer to Vocabulary algorithm manager |
|
675 CVMAlgorithmManager* iVMAlgorithmManager; |
|
676 |
|
677 // Flag to tell if recognition should be started when first audio |
|
678 // buffer has been sampled. |
|
679 TBool iStartRecognition; |
|
680 |
|
681 // Flag to tell if there is a pending RequestSpeechData() call from |
|
682 // recognition hw device. |
|
683 TBool iPendingRequestSpeechData; |
|
684 |
|
685 // --- SD Training audio buffer managemet --- |
|
686 // Start and stop points in the audio buffer, used during recording |
|
687 TUint32 iStartPoint; |
|
688 TUint32 iStopPoint; |
|
689 // Audio buffer to hold PCM data coming from DevSound |
|
690 TUint8* iAudioBuffer; |
|
691 TPtr8 iPtr; |
|
692 |
|
693 |
|
694 // ----- Buffer management ----- |
|
695 |
|
696 // Number of ms to record |
|
697 TTimeIntervalMicroSeconds32 iRecordDuration; |
|
698 |
|
699 |
|
700 // Overall number of bytes that need to be sampled. |
|
701 // Zero if we should sample until EndRecord() is called. |
|
702 TInt iOverallLength; |
|
703 |
|
704 // Number of samples that are recorded. |
|
705 TInt iOverallSampled; |
|
706 |
|
707 // Indicates if utterance processing is currently being done |
|
708 TBool iProcessingUtterance; |
|
709 |
|
710 |
|
711 // Queue of audio buffer descriptors to be processed |
|
712 TSglQue<CQueItem>* iAudioBufferQue; |
|
713 |
|
714 // An item in the queue of audio buffer descriptors |
|
715 CQueItem* iQueItem; |
|
716 |
|
717 // Flag to tell if recording should be ended |
|
718 TBool iEndFlag; |
|
719 |
|
720 // Recognizer mode |
|
721 TRecognizerMode iMode; |
|
722 |
|
723 |
|
724 // ----- End-pointing usage ----- |
|
725 |
|
726 // Start and end frames in audio buffer |
|
727 TUint32 iStartFrame; |
|
728 TUint32 iEndFrame; |
|
729 TReal iFrameLength; |
|
730 |
|
731 // Start and end point in the audio buffer after end-pointing. |
|
732 TUint32 iBufferStartPoint; |
|
733 TUint32 iBufferEndPoint; |
|
734 |
|
735 // ----- State management ----- |
|
736 |
|
737 // State of DevASR |
|
738 TDevASRState iDevASRState; |
|
739 |
|
740 // State of DevSound |
|
741 TDevSoundState iDevSoundState; |
|
742 |
|
743 // ----- Active object handling ----- |
|
744 TInt iRequestFunction; |
|
745 |
|
746 // resource file handler |
|
747 CDevASRResourceHandler* iResourceHandler; |
|
748 |
|
749 |
|
750 TUint8* iBufferUnderConstruction; |
|
751 TInt iConstructionPoint; |
|
752 |
|
753 const CSIModelBank* iSIModelBank; |
|
754 const CSILexicon* iSILexicon; |
|
755 |
|
756 // Flag which tells if pre-sampling has been started |
|
757 TBool iPreSamplingStarted; |
|
758 |
|
759 // Wait loop to wait for audio initialization callback |
|
760 CActiveSchedulerWait iAudioWait; |
|
761 |
|
762 // The following are used when dumping captured audio to file for analysis |
|
763 #ifdef AUDIOBUFFER_TO_FILE |
|
764 RFs iFs; |
|
765 RFile iBufferDataFile; |
|
766 TBool iFileCreated; |
|
767 #endif |
|
768 |
|
769 }; |
|
770 |
|
771 #endif // DEVASRSRSALGORITHMMANAGER_H |
|
772 |
|
773 // End of file |