Fix PySpeechService communicator not combining item tracking

MattEqualsCoder · MattEqualsCoder · commit b317a4bc7953 · 2025-04-01T22:26:01.000-04:00
diff --git a/src/TrackerCouncil.Smz3.Tracking/Services/ICommunicator.cs b/src/TrackerCouncil.Smz3.Tracking/Services/ICommunicator.cs
@@ -5,7 +5,7 @@ namespace TrackerCouncil.Smz3.Tracking.Services;
 /// <summary>
 /// Defines a mechanism to communicate with the player.
 /// </summary>
-public interface ICommunicator
+public interface ICommunicator : IDisposable
 {
     /// <summary>
     /// Communicates the specified text to the player
diff --git a/src/TrackerCouncil.Smz3.Tracking/Services/PyTextToSpeechCommunicator.cs b/src/TrackerCouncil.Smz3.Tracking/Services/PyTextToSpeechCommunicator.cs
@@ -4,6 +4,7 @@
 using System.IO;
 using System.Runtime.Versioning;
 using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
 using PySpeechService.Client;
 using PySpeechService.TextToSpeech;
 using TrackerCouncil.Smz3.Data.Options;
@@ -15,6 +16,8 @@ namespace TrackerCouncil.Smz3.Tracking.Services;
 internal class PyTextToSpeechCommunicator : ICommunicator
 {
     private readonly IPySpeechService _pySpeechService;
+    private readonly ILogger<PyTextToSpeechCommunicator> _logger;
+    private DateTime? _startSpeakingTime;
     private (string onnxPath, string jsonPath)? _defaultPrimaryVoice;
     private (string onnxPath, string jsonPath)? _defaultAltVoice;
     private (string onnxPath, string jsonPath)? _primaryVoice;
@@ -25,9 +28,10 @@ internal class PyTextToSpeechCommunicator : ICommunicator
     private int volume;
     private ConcurrentDictionary<string, SpeechRequest> _pendingRequests = [];
 
-    public PyTextToSpeechCommunicator(IPySpeechService pySpeechService, TrackerOptionsAccessor trackerOptionsAccessor)
+    public PyTextToSpeechCommunicator(IPySpeechService pySpeechService, TrackerOptionsAccessor trackerOptionsAccessor, ILogger<PyTextToSpeechCommunicator> logger)
     {
         _pySpeechService = pySpeechService;
+        _logger = logger;
 
         // Check to see if the user has the tracker voice files to use
         var piperPath = Path.Combine(Directories.AppDataFolder, "PiperModels");
@@ -42,47 +46,70 @@ public PyTextToSpeechCommunicator(IPySpeechService pySpeechService, TrackerOptio
         volume = trackerOptionsAccessor.Options?.TextToSpeechVolume ?? 100;
         _ = Initialize();
 
-        _pySpeechService.Initialized += (_, _) =>
+        _pySpeechService.Initialized += PySpeechServiceOnInitialized;
+        _pySpeechService.SpeakCommandResponded += PySpeechServiceOnSpeakCommandResponded;
+
+        _isEnabled = trackerOptionsAccessor.Options?.VoiceFrequency != Shared.Enums.TrackerVoiceFrequency.Disabled;
+    }
+
+    private void PySpeechServiceOnSpeakCommandResponded(object? sender, SpeakCommandResponseEventArgs args)
+    {
+        SpeechRequest? request = null;
+
+        _logger.LogInformation("Response: {Id}", args.Response.MessageId);
+
+        if (string.IsNullOrEmpty(args.Response.MessageId) || !_pendingRequests.TryGetValue(args.Response.MessageId, out request))
         {
-            _ = Initialize();
-        };
+            _logger.LogError("Received PySpeechService SpeakCommandResponse with no valid message id");
+        }
 
-        _pySpeechService.SpeakCommandResponded += (_, args) =>
+        if (args.Response.IsStartOfChunk)
+        {
+            VisemeReached?.Invoke(this, new SpeakingUpdatedEventArgs(true, request));
+        }
+        else if (args.Response.IsEndOfChunk)
         {
-            _pendingRequests.TryGetValue(args.Response.FullMessage, out var request);
+            VisemeReached?.Invoke(this, new SpeakingUpdatedEventArgs(false, request));
+        }
 
-            if (args.Response.IsStartOfChunk)
-            {
-                VisemeReached?.Invoke(this, new SpeakingUpdatedEventArgs(true, request));
-            }
-            else if (args.Response.IsEndOfChunk)
+        if (args.Response.IsStartOfMessage)
+        {
+            if (_startSpeakingTime == null)
             {
-                VisemeReached?.Invoke(this, new SpeakingUpdatedEventArgs(false, request));
+                _startSpeakingTime = DateTime.Now;
             }
 
-            if (args.Response.IsStartOfMessage)
+            _isSpeaking = true;
+            SpeakStarted?.Invoke(this, EventArgs.Empty);
+        }
+        else if (args.Response.IsEndOfMessage)
+        {
+            if (request != null)
             {
-                _isSpeaking = true;
-                SpeakStarted?.Invoke(this, EventArgs.Empty);
+                _pendingRequests.TryRemove(
+                    new KeyValuePair<string, SpeechRequest>(args.Response.MessageId!, request));
             }
-            else if (args.Response.IsEndOfMessage)
+
+            if (!args.Response.HasAnotherRequest)
             {
-                SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(TimeSpan.FromSeconds(3)));
-
-                if (request != null)
-                {
-                    _pendingRequests.TryRemove(
-                        new KeyValuePair<string, SpeechRequest>(args.Response.FullMessage, request));
-                }
-
-                if (!args.Response.HasAnotherRequest)
-                {
-                    _isSpeaking = false;
-                }
+                var duration = DateTime.Now - _startSpeakingTime;
+                _startSpeakingTime = null;
+                SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(duration ?? TimeSpan.Zero, request));
+                _isSpeaking = false;
             }
-        };
+        }
+    }
 
-        _isEnabled = trackerOptionsAccessor.Options?.VoiceFrequency != Shared.Enums.TrackerVoiceFrequency.Disabled;
+    private async void PySpeechServiceOnInitialized(object? sender, EventArgs args)
+    {
+        try
+        {
+            await Initialize();
+        }
+        catch (Exception e)
+        {
+            _logger.LogError(e, "Error initializing PySpeechService");
+        }
     }
 
     public void UseAlternateVoice(bool useAlt = true)
@@ -133,15 +160,18 @@ public void Say(SpeechRequest request)
     {
         if (!_isEnabled || !_pySpeechService.IsSpeechEnabled) return;
 
-        _pendingRequests.TryAdd(request.Text, request);
+        var messageId = Guid.NewGuid().ToString();
+        _pendingRequests.TryAdd(messageId, request);
+
+        _logger.LogInformation("Request: {Id}", messageId);
 
         if (request.Wait)
         {
-            _pySpeechService.Speak(request.Text, GetSpeechSettings());
+            _pySpeechService.Speak(request.Text, GetSpeechSettings(), messageId);
         }
         else
         {
-            _pySpeechService.SpeakAsync(request.Text, GetSpeechSettings());
+            _pySpeechService.SpeakAsync(request.Text, GetSpeechSettings(), messageId);
         }
     }
 
@@ -188,4 +218,10 @@ public void UpdateVolume(int newVolume)
     public event EventHandler? SpeakStarted;
     public event EventHandler<SpeakCompletedEventArgs>? SpeakCompleted;
     public event EventHandler<SpeakingUpdatedEventArgs>? VisemeReached;
+
+    public void Dispose()
+    {
+        _pySpeechService.Initialized -= PySpeechServiceOnInitialized;
+        _pySpeechService.SpeakCommandResponded -= PySpeechServiceOnSpeakCommandResponded;
+    }
 }
diff --git a/src/TrackerCouncil.Smz3.Tracking/Services/SpeakCompletedEventArgs.cs b/src/TrackerCouncil.Smz3.Tracking/Services/SpeakCompletedEventArgs.cs
@@ -6,10 +6,16 @@ namespace TrackerCouncil.Smz3.Tracking.Services;
 /// Event for when the communicator has finished speaking
 /// </summary>
 /// <param name="speechDuration">How long the speech was going on for</param>
-public class SpeakCompletedEventArgs(TimeSpan speechDuration) : EventArgs
+/// <param name="speechRequest">The speech request that is ending</param>
+public class SpeakCompletedEventArgs(TimeSpan speechDuration, SpeechRequest? speechRequest) : EventArgs
 {
     /// <summary>
     /// How long the speech was going on for
     /// </summary>
     public TimeSpan SpeechDuration => speechDuration;
+
+    /// <summary>
+    /// The speech request that is ending
+    /// </summary>
+    public SpeechRequest? SpeechRequest => speechRequest;
 }
diff --git a/src/TrackerCouncil.Smz3.Tracking/Services/TextToSpeechCommunicator.cs b/src/TrackerCouncil.Smz3.Tracking/Services/TextToSpeechCommunicator.cs
@@ -10,7 +10,7 @@ namespace TrackerCouncil.Smz3.Tracking.Services;
 /// Facilitates communication with the player using Windows' built-in
 /// text-to-speech engine.
 /// </summary>
-public class TextToSpeechCommunicator : ICommunicator, IDisposable
+public class TextToSpeechCommunicator : ICommunicator
 {
     private readonly SpeechSynthesizer _tts = null!;
     private bool _canSpeak;
@@ -53,8 +53,9 @@ public TextToSpeechCommunicator(TrackerOptionsAccessor trackerOptionsAccessor, I
             {
                 IsSpeaking = false;
                 var duration = DateTime.Now - _startSpeakingTime;
+                var request = _currentSpeechRequest;
                 _currentSpeechRequest = null;
-                SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(duration));
+                SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(duration, request));
             }
             else
             {
diff --git a/src/TrackerCouncil.Smz3.Tracking/TrackerCouncil.Smz3.Tracking.csproj b/src/TrackerCouncil.Smz3.Tracking/TrackerCouncil.Smz3.Tracking.csproj
@@ -9,7 +9,7 @@
   <ItemGroup>
     <PackageReference Include="BunLabs.Common" Version="1.0.4" />
     <PackageReference Include="MattEqualsCoder.MSURandomizer.Library" Version="3.1.1" />
-    <PackageReference Include="MattEqualsCoder.PySpeechService.Client" Version="0.1.2" />
+    <PackageReference Include="MattEqualsCoder.PySpeechService.Client" Version="0.1.4" />
     <PackageReference Include="MattEqualsCoder.PySpeechService.Recognition" Version="0.1.0" />
     <PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.3" />
     <PackageReference Include="NAudio.Wasapi" Version="2.2.1" />

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ namespace TrackerCouncil.Smz3.Tracking.Services;`
`5`	`5`	`/// <summary>`
`6`	`6`	`/// Defines a mechanism to communicate with the player.`
`7`	`7`	`/// </summary>`
`8`		`-public interface ICommunicator`
	`8`	`+public interface ICommunicator : IDisposable`
`9`	`9`	`{`
`10`	`10`	`/// <summary>`
`11`	`11`	`/// Communicates the specified text to the player`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ namespace TrackerCouncil.Smz3.Tracking.Services;`
`10`	`10`	`/// Facilitates communication with the player using Windows' built-in`
`11`	`11`	`/// text-to-speech engine.`
`12`	`12`	`/// </summary>`
`13`		`-public class TextToSpeechCommunicator : ICommunicator, IDisposable`
	`13`	`+public class TextToSpeechCommunicator : ICommunicator`
`14`	`14`	`{`
`15`	`15`	`private readonly SpeechSynthesizer _tts = null!;`
`16`	`16`	`private bool _canSpeak;`
`@@ -53,8 +53,9 @@ public TextToSpeechCommunicator(TrackerOptionsAccessor trackerOptionsAccessor, I`
`53`	`53`	`{`
`54`	`54`	`IsSpeaking = false;`
`55`	`55`	`var duration = DateTime.Now - _startSpeakingTime;`
	`56`	`+ var request = _currentSpeechRequest;`
`56`	`57`	`_currentSpeechRequest = null;`
`57`		`- SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(duration));`
	`58`	`+ SpeakCompleted?.Invoke(this, new SpeakCompletedEventArgs(duration, request));`
`58`	`59`	`}`
`59`	`60`	`else`
`60`	`61`	`{`