|
| 1 | +// Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | +// Licensed under the MIT License. See LICENSE in the project root for license information. |
| 3 | + |
| 4 | +using System; |
| 5 | +using System.Text; |
| 6 | +using UnityEngine; |
| 7 | +using UnityEngine.Windows.Speech; |
| 8 | + |
| 9 | +namespace HoloToolkit.Unity.InputModule |
| 10 | +{ |
| 11 | + public class DictationManager : Singleton<DictationManager> |
| 12 | + { |
| 13 | + /// <summary> |
| 14 | + /// Dictation Audio Source. Useful for dictation playback. |
| 15 | + /// </summary> |
| 16 | + public AudioSource DictationAudioSource { get; private set; } |
| 17 | + |
| 18 | + /// <summary> |
| 19 | + /// String result of the current dictation. |
| 20 | + /// </summary> |
| 21 | + public string DictationResult { get; private set; } |
| 22 | + |
| 23 | + /// <summary> |
| 24 | + /// Initial value for InitialSilenceTimeout. Only used to initialize the DictationRecognizer's InitialSilenceTimeout value during Start. |
| 25 | + /// </summary> |
| 26 | + [SerializeField] |
| 27 | + [Tooltip("The default timeout with initial silence is 5 seconds.")] |
| 28 | + [Range(0.1f, 30f)] |
| 29 | + private float initialSilenceTimeout = 5f; |
| 30 | + public float InitialSilenceTimeout |
| 31 | + { |
| 32 | + get |
| 33 | + { |
| 34 | + return dictationRecognizer != null ? dictationRecognizer.InitialSilenceTimeoutSeconds : initialSilenceTimeout; |
| 35 | + } |
| 36 | + set |
| 37 | + { |
| 38 | + if (value <= 0) throw new ArgumentOutOfRangeException("value"); |
| 39 | + |
| 40 | + initialSilenceTimeout = value; |
| 41 | + |
| 42 | + if (dictationRecognizer != null) |
| 43 | + { |
| 44 | + dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout; |
| 45 | + } |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + /// <summary> |
| 50 | + /// Initial value for AutoSilenceTimeout. Only used to initalize the DictationRecognizer's AutoSilenceTimeout value during Start. |
| 51 | + /// </summary> |
| 52 | + [SerializeField] |
| 53 | + [Tooltip("The default timeout after a recognition is 20 seconds.")] |
| 54 | + [Range(5f, 60f)] |
| 55 | + private float autoSilenceTimeout = 20f; |
| 56 | + public float AutoSilenceTimeout |
| 57 | + { |
| 58 | + get |
| 59 | + { |
| 60 | + return dictationRecognizer != null ? dictationRecognizer.AutoSilenceTimeoutSeconds : autoSilenceTimeout; |
| 61 | + } |
| 62 | + |
| 63 | + set |
| 64 | + { |
| 65 | + if (value <= 0) throw new ArgumentOutOfRangeException("value"); |
| 66 | + |
| 67 | + autoSilenceTimeout = value; |
| 68 | + |
| 69 | + if (dictationRecognizer != null) |
| 70 | + { |
| 71 | + dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout; |
| 72 | + } |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + /// <summary> |
| 77 | + /// Length in seconds for the manager to listen. |
| 78 | + /// </summary> |
| 79 | + [SerializeField] |
| 80 | + [Tooltip("Length in seconds for the manager to listen.")] |
| 81 | + [Range(1f, 60f)] |
| 82 | + private int recordingTime = 10; |
| 83 | + |
| 84 | + /// <summary> |
| 85 | + /// Caches the text currently being displayed in dictation display text. |
| 86 | + /// </summary> |
| 87 | + private StringBuilder textSoFar; |
| 88 | + |
| 89 | + /// <summary> |
| 90 | + /// <remarks>Using an empty string specifies the default microphone.</remarks> |
| 91 | + /// </summary> |
| 92 | + private static readonly string DeviceName = string.Empty; |
| 93 | + |
| 94 | + /// <summary> |
| 95 | + /// The device audio sammpling rate. |
| 96 | + /// <remarks>Set by UnityEngine.Microphone.<see cref="Microphone.GetDeviceCaps"/></remarks> |
| 97 | + /// </summary> |
| 98 | + private int samplingRate; |
| 99 | + |
| 100 | + /// <summary> |
| 101 | + /// Use this to reset the UI once the Mic is done recording. |
| 102 | + /// </summary> |
| 103 | + private bool recordingStarted; |
| 104 | + |
| 105 | + private DictationRecognizer dictationRecognizer; |
| 106 | + |
| 107 | + protected override void Awake() |
| 108 | + { |
| 109 | + base.Awake(); |
| 110 | + |
| 111 | + DictationResult = string.Empty; |
| 112 | + |
| 113 | + DictationAudioSource = gameObject.GetComponent<AudioSource>(); |
| 114 | + |
| 115 | + dictationRecognizer = new DictationRecognizer(); |
| 116 | + dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis; |
| 117 | + dictationRecognizer.DictationResult += DictationRecognizer_DictationResult; |
| 118 | + dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete; |
| 119 | + dictationRecognizer.DictationError += DictationRecognizer_DictationError; |
| 120 | + |
| 121 | + //Initialize our timeout values |
| 122 | + dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout; |
| 123 | + dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout; |
| 124 | + |
| 125 | + // Query the maximum frequency of the default microphone. |
| 126 | + int minSamplingRate; // Unsued. |
| 127 | + Microphone.GetDeviceCaps(DeviceName, out minSamplingRate, out samplingRate); |
| 128 | + |
| 129 | + // Use this string to cache the text currently displayed. |
| 130 | + textSoFar = new StringBuilder(); |
| 131 | + |
| 132 | + // Use this to reset once the Microphone is done recording after it was started. |
| 133 | + recordingStarted = false; |
| 134 | + } |
| 135 | + |
| 136 | + private void Update() |
| 137 | + { |
| 138 | + Debug.Log(dictationRecognizer.Status); |
| 139 | + |
| 140 | + if (recordingStarted && !Microphone.IsRecording(DeviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running) |
| 141 | + { |
| 142 | + recordingStarted = false; |
| 143 | + |
| 144 | + // If the microphone stops as a result of timing out, make sure to manually stop the dictation recognizer. |
| 145 | + StopRecording(); |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + protected override void OnDestroy() |
| 150 | + { |
| 151 | + dictationRecognizer.Dispose(); |
| 152 | + |
| 153 | + base.OnDestroy(); |
| 154 | + } |
| 155 | + |
| 156 | + /// <summary> |
| 157 | + /// Turns on the dictation recognizer and begins recording audio from the default microphone. |
| 158 | + /// </summary> |
| 159 | + /// <returns>The audio clip recorded from the microphone.</returns> |
| 160 | + public void StartRecording() |
| 161 | + { |
| 162 | + PhraseRecognitionSystem.Shutdown(); |
| 163 | + |
| 164 | + dictationRecognizer.Start(); |
| 165 | + |
| 166 | + recordingStarted = true; |
| 167 | + |
| 168 | + // Start recording from the microphone. |
| 169 | + DictationAudioSource.clip = Microphone.Start(DeviceName, false, recordingTime, samplingRate); |
| 170 | + } |
| 171 | + |
| 172 | + /// <summary> |
| 173 | + /// Ends the recording session. |
| 174 | + /// </summary> |
| 175 | + public void StopRecording() |
| 176 | + { |
| 177 | + if (dictationRecognizer.Status == SpeechSystemStatus.Running) |
| 178 | + { |
| 179 | + dictationRecognizer.Stop(); |
| 180 | + } |
| 181 | + |
| 182 | + Microphone.End(DeviceName); |
| 183 | + |
| 184 | + PhraseRecognitionSystem.Restart(); |
| 185 | + } |
| 186 | + |
| 187 | + #region Dictation Recognizer Callbacks |
| 188 | + |
| 189 | + /// <summary> |
| 190 | + /// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far. |
| 191 | + /// </summary> |
| 192 | + /// <param name="text">The currently hypothesized recognition.</param> |
| 193 | + private void DictationRecognizer_DictationHypothesis(string text) |
| 194 | + { |
| 195 | + // We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event. |
| 196 | + DictationResult = textSoFar.ToString() + " " + text + "..."; |
| 197 | + } |
| 198 | + |
| 199 | + /// <summary> |
| 200 | + /// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here. |
| 201 | + /// </summary> |
| 202 | + /// <param name="text">The text that was heard by the recognizer.</param> |
| 203 | + /// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param> |
| 204 | + private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence) |
| 205 | + { |
| 206 | + textSoFar.Append(text + ". "); |
| 207 | + |
| 208 | + DictationResult = textSoFar.ToString(); |
| 209 | + } |
| 210 | + |
| 211 | + /// <summary> |
| 212 | + /// This event is fired when the recognizer stops, whether from StartRecording() being called, a timeout occurring, or some other error. |
| 213 | + /// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out. |
| 214 | + /// </summary> |
| 215 | + /// <param name="cause">An enumerated reason for the session completing.</param> |
| 216 | + private void DictationRecognizer_DictationComplete(DictationCompletionCause cause) |
| 217 | + { |
| 218 | + // If Timeout occurs, the user has been silent for too long. |
| 219 | + if (cause == DictationCompletionCause.TimeoutExceeded) |
| 220 | + { |
| 221 | + Microphone.End(DeviceName); |
| 222 | + |
| 223 | + DictationResult = "Dictation has timed out. Please try again."; |
| 224 | + } |
| 225 | + } |
| 226 | + |
| 227 | + /// <summary> |
| 228 | + /// This event is fired when an error occurs. |
| 229 | + /// </summary> |
| 230 | + /// <param name="error">The string representation of the error reason.</param> |
| 231 | + /// <param name="hresult">The int representation of the hresult.</param> |
| 232 | + private void DictationRecognizer_DictationError(string error, int hresult) |
| 233 | + { |
| 234 | + DictationResult = error + "\nHRESULT: " + hresult.ToString(); |
| 235 | + } |
| 236 | + |
| 237 | + #endregion // Dictation Recognizer Callbacks |
| 238 | + } |
| 239 | +} |
0 commit comments