1 /* |
|
2 * Copyright (c) 2004-2007 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: This class implements the recognition algorithm manager. |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #ifndef DEVASRRECOGNITIONALGMGR_H |
|
20 #define DEVASRRECOGNITIONALGMGR_H |
|
21 |
|
22 // INCLUDES |
|
23 #include <nsssispeechrecognitiondatadevasr.h> |
|
24 #include <asrsrecognitionhwdevice.h> |
|
25 #include <nssdevasrcommon.h> |
|
26 #include <nssdevasr.h> |
|
27 #include <asrsadaptationhwdevice.h> |
|
28 |
|
29 // CLASS DECLARATION |
|
30 |
|
31 /** |
|
32 * Interface class to be implemented by observer of the recognition algorithm manager. |
|
33 * |
|
34 * @since 2.0 |
|
35 */ |
|
36 class MRecAlgMgrObserver |
|
37 { |
|
38 public: |
|
39 |
|
40 /** |
|
41 * Invoked by the adaptation hw device when model adaptation has |
|
42 * finished its processing. |
|
43 * |
|
44 * @since 2.8 |
|
45 * @param "TInt aResultCode" Error code of operation. |
|
46 */ |
|
47 virtual void AdaptComplete( TInt aResultCode ) = 0; |
|
48 |
|
49 |
|
50 /** |
|
51 * Feature vector received event from the front-end algorithm. |
|
52 * |
|
53 * @since 2.0 |
|
54 * @param "TDesC8& aFV" Buffer containing a feature vector. |
|
55 * @param "TInt32 aSNR" Signal-to-noise ratio. |
|
56 * @param "TInt32 aPosition" Indicates whether this is the first, |
|
57 * subsequent or last feature vector. |
|
58 */ |
|
59 virtual void FeatureVectorDataRcvd( const TDesC8& aFV, |
|
60 TInt32 aSNR, |
|
61 TInt32 aPosition ) = 0; |
|
62 |
|
63 /** |
|
64 * Invoked by the front-end when EOU has been detected |
|
65 * |
|
66 * @since 2.8 |
|
67 * @param "TInt aResultCode" Result of front-end processing. |
|
68 */ |
|
69 virtual void EouDetected( TInt aResultCode ) = 0; |
|
70 |
|
71 /** |
|
72 * Invoked by the front-end when initialization has completed. |
|
73 * |
|
74 * @since 2.0 |
|
75 * @param "TInt aResultCode" Result of front-end initialization. |
|
76 */ |
|
77 virtual void InitFEComplete( TInt aResultCode ) = 0; |
|
78 |
|
79 /** |
|
80 * Invoked by the recognizer back-end when initialization has completed. |
|
81 * |
|
82 * @since 2.0 |
|
83 * @param "TInt aResultCode" Result of recognizer back-end initialization. |
|
84 */ |
|
85 virtual void InitRecognizerBEComplete( TInt aResultCode ) = 0; |
|
86 |
|
87 /** |
|
88 * Invoked by the recognizer back-end when grammar loading has completed. |
|
89 * |
|
90 * @since 2.0 |
|
91 * @param "TInt aResultCode" Result of grammar loading |
|
92 */ |
|
93 virtual void LoadGrammarComplete( TInt aResultCode ) = 0; |
|
94 |
|
95 /** |
|
96 * Invoked when grammar has been unloaded. |
|
97 * |
|
98 * @since 2.8 |
|
99 * @param "TInt aResultCode" Result of grammar loading |
|
100 */ |
|
101 virtual void UnloadGrammarComplete( TInt aResultCode ) = 0; |
|
102 |
|
103 /** |
|
104 * Invoked when grammar has been activated. |
|
105 * |
|
106 * @since 2.0 |
|
107 * @param "TInt aResultCode" Result of grammar activation |
|
108 */ |
|
109 virtual void ActivateGrammarComplete( TInt aResultCode ) = 0; |
|
110 |
|
111 /** |
|
112 * Invoked when grammar has been deactivated. |
|
113 * |
|
114 * @since 2.0 |
|
115 * @param "TInt aResultCode" Result of grammar deactivation |
|
116 */ |
|
117 virtual void DeActivateGrammarComplete( TInt aResultCode ) = 0; |
|
118 |
|
119 /** |
|
120 * Invoked by the recognizer back-end when lexicon loading has completed. |
|
121 * |
|
122 * @since 2.0 |
|
123 * @param "TInt aResultCode" Result of lexicon loading. |
|
124 */ |
|
125 virtual void LoadLexiconComplete( TInt aResultCode ) = 0; |
|
126 |
|
127 /** |
|
128 * Invoked by the recognizer back-end when model loading has completed. |
|
129 * |
|
130 * @since 2.0 |
|
131 * @param "TInt aResultCode" Result of model loading. |
|
132 */ |
|
133 virtual void LoadModelsComplete( TInt aResultCode ) = 0; |
|
134 |
|
135 /** |
|
136 * Invoked by the recognizer when recognition process has completed. |
|
137 * |
|
138 * @since 2.0 |
|
139 * @param "TInt aResultCode" Result of the recognition process. |
|
140 */ |
|
141 virtual void RecognitionComplete( TInt aResultCode ) = 0; |
|
142 |
|
143 /** |
|
144 * Invoked by the training module when the training process has completed. |
|
145 * |
|
146 * @since 2.0 |
|
147 * @param "TInt aResultCode" Result of training process. |
|
148 */ |
|
149 virtual void TrainComplete( TInt aResultCode ) = 0; |
|
150 |
|
151 /** |
|
152 * Invoked when rule unloading has been done. |
|
153 * |
|
154 * @since 2.8 |
|
155 * @param "TInt aResultCode" Result of training process. |
|
156 */ |
|
157 virtual void UnloadRuleComplete( TInt aResultCode ) = 0; |
|
158 |
|
159 /** |
|
160 * Invoked when speech data is needed. |
|
161 * |
|
162 * @since 2.8 |
|
163 */ |
|
164 virtual void RequestSpeechData() = 0; |
|
165 |
|
166 /** |
|
167 * Invoked when result resolving is needed. |
|
168 * |
|
169 * @since 2.8 |
|
170 * @param "RArray<TUint>& aNBestIDs" |
|
171 * @param "CSIResultSet& aSIResultSet" |
|
172 * @param "RPointerArray<CSICompiledGrammar>& aSICompiledGrammar" |
|
173 * @param "TDesC8& aCombinedData" |
|
174 */ |
|
175 virtual void ResolveResult( const RArray<TUint>& aNBestIDs, |
|
176 CSIResultSet& aSIResultSet, |
|
177 const RPointerArray<CSICompiledGrammar>& aSICompiledGrammar, |
|
178 const TDesC8& aCombinedData/*, |
|
179 CSIModelBank& iSIModelBank*/ ) = 0; |
|
180 |
|
181 /** |
|
182 * Invoked when grammar combining is needed |
|
183 * |
|
184 * @since 2.8 |
|
185 * @param "RPointerArray<CSICompiledGrammar>& aCompiledGrammars" |
|
186 * Array of previously compiled grammar |
|
187 * @param "const RPointerArray<TSIRuleVariantInfo>& aExcludedRules" |
|
188 * Rules to be blacklisted. |
|
189 */ |
|
190 virtual void CombineGrammarL( const RPointerArray<CSICompiledGrammar>& aCompiledGrammars, |
|
191 const RPointerArray<TSIRuleVariantInfo>& aExcludedRules ) = 0; |
|
192 |
|
193 }; |
|
194 |
|
195 |
|
196 /** |
|
197 * This class is an aggregation of all related algorithms. It manages the algorithms |
|
198 * and coordinate transfer of data. |
|
199 * |
|
200 * @lib NssDevASR.lib |
|
201 * @since 2.0 |
|
202 */ |
|
203 class CRecognitionAlgMgr : public CActive, |
|
204 public MASRSRecognitionHwDeviceObserver, |
|
205 public MASRAdaptationHwDeviceObserver |
|
206 { |
|
207 public: // Constructors and destructor |
|
208 |
|
209 /** |
|
210 * Two-phased constructor. |
|
211 */ |
|
212 static CRecognitionAlgMgr* NewL( MRecAlgMgrObserver& aObserver ); |
|
213 |
|
214 /** |
|
215 * Destructor. |
|
216 */ |
|
217 virtual ~CRecognitionAlgMgr(); |
|
218 |
|
219 public: // New functions |
|
220 |
|
221 /** |
|
222 * Adapts models |
|
223 * |
|
224 * @sinxe 2.8 |
|
225 * @param "CSIResultSet& aResultSet" Result set reference. |
|
226 * @param "TInt aResultIndex" Index of correct result. |
|
227 * @param "TLanguage aLanguage" Language of correct result. |
|
228 */ |
|
229 void AdaptModelsL( const CSIResultSet& aResultSet, TInt aResultIndex, |
|
230 TLanguage aLanguage ); |
|
231 |
|
232 /** |
|
233 * Cancels the current or started tasks. |
|
234 * |
|
235 * @since 2.0 |
|
236 */ |
|
237 void Cancel(); |
|
238 |
|
239 /** |
|
240 * This method is used to indicate the end of a recognition session. |
|
241 * The EndRecSession() method should be used to end the session. |
|
242 * |
|
243 * @since 2.0 |
|
244 */ |
|
245 void EndRecSession(); |
|
246 |
|
247 /** |
|
248 * Grammar combination has been completed. |
|
249 */ |
|
250 void CombineComplete( HBufC8* aResult, TInt aError ); |
|
251 |
|
252 /** |
|
253 * Retreive the properties of the underlying speech recognition engine. |
|
254 * |
|
255 * @since 2.0 |
|
256 * @param "RArray<TInt>& aPropertyId" An array of identifiers being querried. |
|
257 * @param "RArray<TInt>& aPropertyValue" An array of values corresponding |
|
258 * to the querried identifiers. |
|
259 */ |
|
260 void GetEnginePropertiesL( const RArray<TInt>& aPropertyId, |
|
261 RArray<TInt>& aPropertyValue ); |
|
262 |
|
263 /** |
|
264 * Load the specified parameter(s) to the engines. |
|
265 * |
|
266 * @param "RArray<TInt>& aParameterId" An array of parameter identifiers. |
|
267 * @param "RArray<TInt>& aParameterValue" An array of parameter values. |
|
268 */ |
|
269 void LoadEnginePropertiesL( const RArray<TInt>& aParameterId, |
|
270 const RArray<TInt>& aParameterValue ); |
|
271 |
|
272 /** |
|
273 * Retreive the duration of the utterance detected by the algorithm. |
|
274 * |
|
275 * @since 2.0 |
|
276 * @param "TUint32& aStartFrame" Indicates the start frame of feature vector. |
|
277 * @param "TUint32& aEndFrame" Indicates the end frame of the feature vector. |
|
278 * @param "TReal& aFrameLength" The length of each frame |
|
279 * @return ETrue if successful |
|
280 */ |
|
281 TBool GetUtteranceDuration( TUint32& aStartFrame, TUint32& aEndFrame, |
|
282 TReal& aFrameLength ); |
|
283 |
|
284 /** |
|
285 * Initializes the front-end module in the speech recognition engine. |
|
286 * The frontend module used during training/recognition functions is started |
|
287 * as a result. This method is intended to be used in conjunction with InitTrainBE(). |
|
288 * |
|
289 * @since 2.0 |
|
290 * @param "TRecognizerMode aFeMode" Mode of the recognizer. |
|
291 */ |
|
292 void InitFrontEnd( TRecognizerMode aFeMode ); |
|
293 |
|
294 /** |
|
295 * Initialize the recognition engine back-end. The module responsible for recognition |
|
296 * function is started as a result. This method must be used before any recognition |
|
297 * operations and intended to be used in conjunction with InitFrontEnd(). |
|
298 * |
|
299 * @since 2.0 |
|
300 * @param "aResult" A reference to an object where the recognition |
|
301 * result will be written. |
|
302 */ |
|
303 void InitRecognizerBE( CSIResultSet& aResult ); |
|
304 |
|
305 /** |
|
306 * Load the specified grammar into the recognizer. |
|
307 * |
|
308 * @since 2.8 |
|
309 * @param "aGrammar" A reference to a grammar in an internal format. |
|
310 */ |
|
311 void LoadGrammarL( const CSIGrammar& aGrammar ); |
|
312 void LoadGrammarL( const CSICompiledGrammar& aGrammar ); |
|
313 |
|
314 /** |
|
315 * Unloads the specified grammar from the recognizer. |
|
316 * |
|
317 * @since 2.8 |
|
318 * @param "aGrammar" A reference to a grammar. |
|
319 */ |
|
320 void UnloadGrammarL( const CSIGrammar& aGrammar ); |
|
321 void UnloadGrammarL( const CSICompiledGrammar& aGrammar ); |
|
322 |
|
323 /** |
|
324 * Get the specified grammar. |
|
325 * |
|
326 * @since 2.8 |
|
327 * @param "TSIGrammarID aGrammarID" Identifier of grammar to be found. |
|
328 * @param "CSDGrammar** aSDGrammar" Pointer to found SD grammar, |
|
329 * NULL if not found. |
|
330 * @param "CSICompiledGrammar** aSIActiveGrammar" Pointer to found SI |
|
331 * active grammar, NULL if not found. |
|
332 * @param "CSICompiledGrammar** aSIDeActivatedGrammar" Pointer to found SI |
|
333 * non-active grammar, NULL if not found |
|
334 */ |
|
335 void GetGrammarL( const TSIGrammarID aGrammarID, |
|
336 CSICompiledGrammar** aSIActiveGrammar, |
|
337 CSICompiledGrammar** aSIDeActivatedGrammar ); |
|
338 |
|
339 /** |
|
340 * Activates a grammar. |
|
341 * |
|
342 * @since 2.8 |
|
343 * @param "TSIGrammarID aGrammarID" A grammar identifier. |
|
344 */ |
|
345 void ActivateGrammarL( const TSIGrammarID aGrammarID ); |
|
346 |
|
347 /** |
|
348 * Deactivates a grammar |
|
349 * |
|
350 * @since 2.8 |
|
351 * @param "TSIGrammarID aGrammarID" A grammar identifier |
|
352 */ |
|
353 void DeactivateGrammarL( const TSIGrammarID aGrammarID ); |
|
354 |
|
355 /** |
|
356 * Load the specified lexicon into the recognizer. |
|
357 * |
|
358 * @since 2.0 |
|
359 * @param "aLexicon" A reference to a lexicon in an internal format. |
|
360 */ |
|
361 void LoadLexiconL( const CSILexicon& aLexicon ); |
|
362 |
|
363 /** |
|
364 * Load the specified model bank into the recognizer. |
|
365 * |
|
366 * @since 2.0 |
|
367 * @param "aModels" A reference to a model bank. |
|
368 */ |
|
369 void LoadModelsL( const CSDModelBank& aModels ); |
|
370 void LoadModelsL( const CSIModelBank& aModels ); |
|
371 |
|
372 /** |
|
373 * Use to send the utterance data as a response to the RequestSpeechData(). |
|
374 * |
|
375 * @since 2.8 |
|
376 * @param "TDesC8& aBuffer" Buffer containing utterance data. |
|
377 * @param "TBool aEnd" Flag to tell if given buffer is the last one. |
|
378 */ |
|
379 void SendSpeechData( TPtrC8& aBuffer, TBool aEnd ); |
|
380 |
|
381 /** |
|
382 * Request to start a recognition session. |
|
383 * |
|
384 * @since 2.0 |
|
385 * @param "TRecognizerMode aMode" Mode of recognizer. |
|
386 * @return result code of request. |
|
387 */ |
|
388 TInt StartRecSession( TRecognizerMode aMode ); |
|
389 |
|
390 /** |
|
391 * Starts recognition. |
|
392 */ |
|
393 void StartRecognitionL(); |
|
394 |
|
395 /** |
|
396 * Request to unload the specified rule in the grammar from recognizer. |
|
397 * |
|
398 * @param "TSIGrammarID aGrammarID" Identifer of the grammar to operate on. |
|
399 * @param "TSIRuleID aRuleID" Identifier of the rule to remove. |
|
400 */ |
|
401 void UnloadRule( TSIGrammarID aGrammarID, TSIRuleID aRuleID ); |
|
402 |
|
403 /** |
|
404 * Used during unit testing to print the state of the algorithm |
|
405 */ |
|
406 void AlgorithmState(); |
|
407 |
|
408 /** |
|
409 * Sets rejection value |
|
410 * |
|
411 * @param "TUint32 aRejection" Rejection threshold value. |
|
412 */ |
|
413 void SetRejection( TUint32 aRejection ); |
|
414 |
|
415 public: // MASRAdaptationHwDeviceObserver |
|
416 /** |
|
417 * Called when adaptation has been done |
|
418 */ |
|
419 void MaahdAdaptationComplete( TInt aError ); |
|
420 |
|
421 public: // From MASRSRecognitionHwDeviceObserver |
|
422 |
|
423 /** |
|
424 * Called by the hardware device when the InitializeL() method has completed. |
|
425 * @since Series60 2.8 |
|
426 * @param aError Initialization result code. |
|
427 * KErrNone if successful otherwise a system-wide error code. |
|
428 * @return none |
|
429 */ |
|
430 void MarhdoInitializationComplete( TInt aError ); |
|
431 |
|
432 /** |
|
433 * Called by the hardware device when the InitializeL() method has completed. |
|
434 * @since Series60 2.8 |
|
435 * @param aError Initialization result code. |
|
436 * KErrNone if successful otherwise a system-wide error code. |
|
437 * @return none |
|
438 */ |
|
439 void MarhdoInitRecognizerFEComplete( TInt aError ); |
|
440 |
|
441 /** |
|
442 * Called by the hardware device when backend initialization is completed. |
|
443 * @since Series60 2.8 |
|
444 * @param aError Initialization result code. |
|
445 * KErrNone if successful otherwise a system-wide error code. |
|
446 * @return none |
|
447 */ |
|
448 void MarhdoInitRecognizerBEComplete( TInt aError ); |
|
449 |
|
450 /** |
|
451 * Called by the hardware device when recognition result is available. |
|
452 * @since Series60 2.8 |
|
453 * @param aError Recognition result code. KErrNone if successful. |
|
454 * KErrRejected if the recognition result is rejected, |
|
455 * otherwise a system-wide error code |
|
456 * @return none |
|
457 */ |
|
458 void MarhdoRecognitionComplete( TInt aError ); |
|
459 |
|
460 /** |
|
461 * Called by the hardware device when end-of-utterance is detected by the |
|
462 * acoustic frontend. This method must be called before MarhdoRecognitionComplete(). |
|
463 * @since Series60 2.8 |
|
464 * @param aError Recognition result code. KErrNone if successful, otherwise KErrTooLong, |
|
465 * KErrTooShort, KErrAsrSpeechTooEarly, KErrNoSpeech |
|
466 * @return none |
|
467 */ |
|
468 void MarhdoEouDetected( TInt aError ); |
|
469 |
|
470 /** |
|
471 * Called by the hardware device when a feature vector is extracted by acoustic frontend. |
|
472 * @since Series60 2.8 |
|
473 * @param aFV A buffer containing a feature vector. |
|
474 * @param aSNR Signal-to-noise ratio. |
|
475 * @param aPosition Indicates whether this is the first, subsequent or last feature vector in a series. |
|
476 * @return none |
|
477 */ |
|
478 void MarhdoFeatureVector( const TDesC8& aFV, TInt32 aSNR, TInt32 aPosition ); |
|
479 |
|
480 /** |
|
481 * Called by the hardware device to request for utterance data. |
|
482 * The response to the request is sent in SendSpeechData(). |
|
483 * @since Series60 2.8 |
|
484 * @param none |
|
485 * @return none |
|
486 */ |
|
487 void MarhdoRequestSpeechData(); |
|
488 |
|
489 |
|
490 private: |
|
491 |
|
492 /** |
|
493 * C++ default constructor. |
|
494 */ |
|
495 CRecognitionAlgMgr(MRecAlgMgrObserver& aObserver); |
|
496 |
|
497 /** |
|
498 * Symbian 2nd phase constructor |
|
499 */ |
|
500 void ConstructL(); |
|
501 |
|
502 void RunL(); |
|
503 void DoCancel(); |
|
504 |
|
505 /** |
|
506 * The following methods are asynchronous handlers for the corresponding |
|
507 * synchronous calls. |
|
508 */ |
|
509 void HandleInitFrontEnd(); |
|
510 void HandleInitTrainBE(); |
|
511 void HandleInitRecognizerBE(); |
|
512 void HandleLoadGrammar(); |
|
513 void HandleLoadLexicon(); |
|
514 void HandleLoadModels(); |
|
515 void HandleUtteranceDataRcvd(); |
|
516 void HandleActivateGrammar(); |
|
517 void HandleDeActivateGrammar(); |
|
518 void HandleUnloadRule(); |
|
519 void HandleUnloadGrammar(); |
|
520 |
|
521 /** |
|
522 * Used to complete a request |
|
523 */ |
|
524 void Ready(const TInt aStatus); |
|
525 |
|
526 |
|
527 private: // Data |
|
528 |
|
529 /** |
|
530 * Current working state of the recognizer |
|
531 */ |
|
532 enum TAlgState |
|
533 { |
|
534 EIdle=0, |
|
535 EProcessing, |
|
536 ECancel |
|
537 }; |
|
538 |
|
539 // Pointer to Algorithm Manager Observer |
|
540 MRecAlgMgrObserver* iRecAlgMgrObserver; |
|
541 |
|
542 // Grammar handling |
|
543 RPointerArray<CSICompiledGrammar> iSIActiveGrammars; |
|
544 RPointerArray<CSICompiledGrammar> iSIDeActivatedGrammars; |
|
545 |
|
546 // Blacklisted rules |
|
547 RPointerArray<TSIRuleVariantInfo> iBlackList; |
|
548 |
|
549 // Result IDs |
|
550 RArray<TUint> iNBestList; |
|
551 |
|
552 // Result scores |
|
553 RArray<TInt> iScores; |
|
554 |
|
555 //CSICompiledGrammar* iCombinedGrammar; |
|
556 HBufC8* iCombinedGrammar; |
|
557 |
|
558 // Flag to tell if combining is needed for active grammars. |
|
559 TBool iCombineNeeded; |
|
560 |
|
561 // Flag to inform if recognition hw device has been successfully |
|
562 // initialized. |
|
563 TBool iInitialized; |
|
564 |
|
565 // Pointer to Algorithms |
|
566 CASRSRecognitionHwDevice* iRecoHw; |
|
567 CASRSAdaptHwDevice* iAdaptHw; |
|
568 |
|
569 // Begin: data used in async handling of requests |
|
570 TRecognizerMode iMode; |
|
571 void* iModel; |
|
572 |
|
573 CSDModelBank* iSDModelBank; |
|
574 CSIModelBank* iSIModelBank; |
|
575 void* iSDResult; |
|
576 |
|
577 CSIResultSet* iSIResult; |
|
578 const void* iSDGrammar; |
|
579 const CSIGrammar* iSIGrammar; |
|
580 const CSICompiledGrammar* iSICompGrammar; |
|
581 const void* iSDCompGrammar; |
|
582 const void* iSDLexicon; |
|
583 |
|
584 const CSILexicon* iSILexicon; |
|
585 TSIGrammarID iGrammarID; |
|
586 TSIRuleID iRuleID; |
|
587 TUint32 iStartFrame; |
|
588 TUint32 iEndFrame; |
|
589 TReal iFrameLength; |
|
590 TPtrC8 iBuffer; |
|
591 TBool iEnd; |
|
592 TInt iRequestFunction; |
|
593 // End: data used in async handling of requests |
|
594 |
|
595 // Algorithm states for front-end, back-end, and train back-end. |
|
596 TAlgState iFEState; |
|
597 TAlgState iBEState; |
|
598 |
|
599 // Should we send feature vectors to upper layers or not |
|
600 TBool iFeatures; |
|
601 |
|
602 // Should we include adaptation data to result set or not |
|
603 TBool iAdaptation; |
|
604 |
|
605 // Adaptation data |
|
606 HBufC8* iAdaptationData; |
|
607 }; |
|
608 |
|
609 #endif // DEVASRRECOGNITIONALGMGR_H |
|
610 |
|
611 // End of File |
|