Skip to content

Commit 6214c95

Browse files
Stephen HodgsonStephen Hodgson
authored andcommitted
Initial version of dictation manager.
1 parent fec9506 commit 6214c95

File tree

2 files changed

+251
-0
lines changed

2 files changed

+251
-0
lines changed
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License. See LICENSE in the project root for license information.
3+
4+
using System;
5+
using System.Text;
6+
using UnityEngine;
7+
using UnityEngine.Windows.Speech;
8+
9+
namespace HoloToolkit.Unity.InputModule
10+
{
11+
public class DictationManager : Singleton<DictationManager>
12+
{
13+
/// <summary>
14+
/// Dictation Audio Source. Useful for dictation playback.
15+
/// </summary>
16+
public AudioSource DictationAudioSource { get; private set; }
17+
18+
/// <summary>
19+
/// String result of the current dictation.
20+
/// </summary>
21+
public string DictationResult { get; private set; }
22+
23+
/// <summary>
24+
/// Initial value for InitialSilenceTimeout. Only used to initialize the DictationRecognizer's InitialSilenceTimeout value during Start.
25+
/// </summary>
26+
[SerializeField]
27+
[Tooltip("The default timeout with initial silence is 5 seconds.")]
28+
[Range(0.1f, 30f)]
29+
private float initialSilenceTimeout = 5f;
30+
public float InitialSilenceTimeout
31+
{
32+
get
33+
{
34+
return dictationRecognizer != null ? dictationRecognizer.InitialSilenceTimeoutSeconds : initialSilenceTimeout;
35+
}
36+
set
37+
{
38+
if (value <= 0) throw new ArgumentOutOfRangeException("value");
39+
40+
initialSilenceTimeout = value;
41+
42+
if (dictationRecognizer != null)
43+
{
44+
dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout;
45+
}
46+
}
47+
}
48+
49+
/// <summary>
50+
/// Initial value for AutoSilenceTimeout. Only used to initalize the DictationRecognizer's AutoSilenceTimeout value during Start.
51+
/// </summary>
52+
[SerializeField]
53+
[Tooltip("The default timeout after a recognition is 20 seconds.")]
54+
[Range(5f, 60f)]
55+
private float autoSilenceTimeout = 20f;
56+
public float AutoSilenceTimeout
57+
{
58+
get
59+
{
60+
return dictationRecognizer != null ? dictationRecognizer.AutoSilenceTimeoutSeconds : autoSilenceTimeout;
61+
}
62+
63+
set
64+
{
65+
if (value <= 0) throw new ArgumentOutOfRangeException("value");
66+
67+
autoSilenceTimeout = value;
68+
69+
if (dictationRecognizer != null)
70+
{
71+
dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout;
72+
}
73+
}
74+
}
75+
76+
/// <summary>
77+
/// Length in seconds for the manager to listen.
78+
/// </summary>
79+
[SerializeField]
80+
[Tooltip("Length in seconds for the manager to listen.")]
81+
[Range(1f, 60f)]
82+
private int recordingTime = 10;
83+
84+
/// <summary>
85+
/// Caches the text currently being displayed in dictation display text.
86+
/// </summary>
87+
private StringBuilder textSoFar;
88+
89+
/// <summary>
90+
/// <remarks>Using an empty string specifies the default microphone.</remarks>
91+
/// </summary>
92+
private static readonly string DeviceName = string.Empty;
93+
94+
/// <summary>
95+
/// The device audio sammpling rate.
96+
/// <remarks>Set by UnityEngine.Microphone.<see cref="Microphone.GetDeviceCaps"/></remarks>
97+
/// </summary>
98+
private int samplingRate;
99+
100+
/// <summary>
101+
/// Use this to reset the UI once the Mic is done recording.
102+
/// </summary>
103+
private bool recordingStarted;
104+
105+
private DictationRecognizer dictationRecognizer;
106+
107+
protected override void Awake()
108+
{
109+
base.Awake();
110+
111+
DictationResult = string.Empty;
112+
113+
DictationAudioSource = gameObject.GetComponent<AudioSource>();
114+
115+
dictationRecognizer = new DictationRecognizer();
116+
dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis;
117+
dictationRecognizer.DictationResult += DictationRecognizer_DictationResult;
118+
dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete;
119+
dictationRecognizer.DictationError += DictationRecognizer_DictationError;
120+
121+
//Initialize our timeout values
122+
dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout;
123+
dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout;
124+
125+
// Query the maximum frequency of the default microphone.
126+
int minSamplingRate; // Unsued.
127+
Microphone.GetDeviceCaps(DeviceName, out minSamplingRate, out samplingRate);
128+
129+
// Use this string to cache the text currently displayed.
130+
textSoFar = new StringBuilder();
131+
132+
// Use this to reset once the Microphone is done recording after it was started.
133+
recordingStarted = false;
134+
}
135+
136+
private void Update()
137+
{
138+
Debug.Log(dictationRecognizer.Status);
139+
140+
if (recordingStarted && !Microphone.IsRecording(DeviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running)
141+
{
142+
recordingStarted = false;
143+
144+
// If the microphone stops as a result of timing out, make sure to manually stop the dictation recognizer.
145+
StopRecording();
146+
}
147+
}
148+
149+
protected override void OnDestroy()
150+
{
151+
dictationRecognizer.Dispose();
152+
153+
base.OnDestroy();
154+
}
155+
156+
/// <summary>
157+
/// Turns on the dictation recognizer and begins recording audio from the default microphone.
158+
/// </summary>
159+
/// <returns>The audio clip recorded from the microphone.</returns>
160+
public void StartRecording()
161+
{
162+
PhraseRecognitionSystem.Shutdown();
163+
164+
dictationRecognizer.Start();
165+
166+
recordingStarted = true;
167+
168+
// Start recording from the microphone.
169+
DictationAudioSource.clip = Microphone.Start(DeviceName, false, recordingTime, samplingRate);
170+
}
171+
172+
/// <summary>
173+
/// Ends the recording session.
174+
/// </summary>
175+
public void StopRecording()
176+
{
177+
if (dictationRecognizer.Status == SpeechSystemStatus.Running)
178+
{
179+
dictationRecognizer.Stop();
180+
}
181+
182+
Microphone.End(DeviceName);
183+
184+
PhraseRecognitionSystem.Restart();
185+
}
186+
187+
#region Dictation Recognizer Callbacks
188+
189+
/// <summary>
190+
/// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
191+
/// </summary>
192+
/// <param name="text">The currently hypothesized recognition.</param>
193+
private void DictationRecognizer_DictationHypothesis(string text)
194+
{
195+
// We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event.
196+
DictationResult = textSoFar.ToString() + " " + text + "...";
197+
}
198+
199+
/// <summary>
200+
/// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
201+
/// </summary>
202+
/// <param name="text">The text that was heard by the recognizer.</param>
203+
/// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param>
204+
private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence)
205+
{
206+
textSoFar.Append(text + ". ");
207+
208+
DictationResult = textSoFar.ToString();
209+
}
210+
211+
/// <summary>
212+
/// This event is fired when the recognizer stops, whether from StartRecording() being called, a timeout occurring, or some other error.
213+
/// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out.
214+
/// </summary>
215+
/// <param name="cause">An enumerated reason for the session completing.</param>
216+
private void DictationRecognizer_DictationComplete(DictationCompletionCause cause)
217+
{
218+
// If Timeout occurs, the user has been silent for too long.
219+
if (cause == DictationCompletionCause.TimeoutExceeded)
220+
{
221+
Microphone.End(DeviceName);
222+
223+
DictationResult = "Dictation has timed out. Please try again.";
224+
}
225+
}
226+
227+
/// <summary>
228+
/// This event is fired when an error occurs.
229+
/// </summary>
230+
/// <param name="error">The string representation of the error reason.</param>
231+
/// <param name="hresult">The int representation of the hresult.</param>
232+
private void DictationRecognizer_DictationError(string error, int hresult)
233+
{
234+
DictationResult = error + "\nHRESULT: " + hresult.ToString();
235+
}
236+
237+
#endregion // Dictation Recognizer Callbacks
238+
}
239+
}

Assets/HoloToolkit/Input/Scripts/Microphone/DictationManager.cs.meta

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)