Skip to content
16 changes: 11 additions & 5 deletions src/Translumo.Processing/TranslationProcessingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ public class TranslationProcessingService : IProcessingService, IDisposable
private long _lastTranslatedTextTicks;

private const float MIN_SCORE_THRESHOLD = 2.1f;

public TranslationProcessingService(ICapturerFactory capturerFactory, IChatTextMediator chatTextMediator, OcrEnginesFactory ocrEnginesFactory,
TranslatorFactory translationFactory, TtsFactory ttsFactory, TtsConfiguration ttsConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextResultCacheService textResultCacheService, TextProcessingConfiguration textConfiguration, ILogger<TranslationProcessingService> logger)
{
_logger = logger;
Expand Down Expand Up @@ -236,7 +236,7 @@ void CapturerEnsureInitialized()
continue;
}

if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
bestDetected.Language.Asian, out iterationId))
{
sequentialText = false;
Expand All @@ -257,7 +257,7 @@ void CapturerEnsureInitialized()
}

_logger.LogError(ex, $"Screen capture failed (code: {ex.ErrorCode})");

_capturer.Dispose();
_capturer = null;
CapturerEnsureInitialized();
Expand Down Expand Up @@ -407,9 +407,15 @@ private void TtsConfigurationOnPropertyChanged(object sender, PropertyChangedEve
if (e.PropertyName == nameof(_ttsConfiguration.TtsLanguage)
|| e.PropertyName == nameof(_ttsConfiguration.TtsSystem))
{
_ttsEngine.Dispose();
_ttsEngine?.Dispose();
_ttsEngine = null;
_ttsEngine = _ttsFactory.CreateTtsEngine(_ttsConfiguration);
}
else if (e.PropertyName == nameof(_ttsConfiguration.CurrentVoice)
&& _ttsEngine != null && _ttsConfiguration.CurrentVoice != null)
{
_ttsEngine.SetVoice(_ttsConfiguration.CurrentVoice);
}
}

private void OcrGeneralConfigurationOnPropertyChanged(object sender, PropertyChangedEventArgs e)
Expand Down
6 changes: 5 additions & 1 deletion src/Translumo.TTS/Engines/ITTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
namespace Translumo.TTS.Engines;

public interface ITTSEngine: IDisposable
public interface ITTSEngine : IDisposable
{
void SpeechText(string text);

string[] GetVoices();

void SetVoice(string voice);
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/Engines/NoneTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ public void Dispose()
{
}

public string[] GetVoices() => new[] { "None" };

public void SetVoice(string voice)
{
}

public void SpeechText(string text)
{
}
Expand Down
8 changes: 7 additions & 1 deletion src/Translumo.TTS/Engines/SileroTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class SileroTTSEngine : ITTSEngine
{
private dynamic _ipython;
private dynamic _model;
private string[] _voices;
private string _voice;
private readonly string _modelPath;
private readonly PythonEngineWrapper _pythonEngine;
Expand Down Expand Up @@ -51,7 +52,8 @@ private void Init()
_pyObjects.Add(_ipython);
});

_voice = ((string[])_model.speakers).First();
_voices = (string[])_model.speakers;
_voice = _voices.First();
}

public void SpeechText(string text)
Expand Down Expand Up @@ -146,5 +148,9 @@ private string GetModelFullPath(string langCode)
_ => null
};

public string[] GetVoices() => _voices;

public void SetVoice(string voice) => _voice = _voices.First(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a bug, exception occurred when TTS with voice is selected, then user switch to another TTS. Need to set TtsConfiguration.CurrentVoice after(before) this update.


private sealed record ModelDescription(string FileUrl, string WarmUpText);
}
97 changes: 93 additions & 4 deletions src/Translumo.TTS/Engines/WindowsTTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
using System.Globalization;
using System.Collections;
using System.Collections.ObjectModel;
using System.Globalization;
using System.Reflection;
using System.Speech.Synthesis;

namespace Translumo.TTS.Engines;

public class WindowsTTSEngine : ITTSEngine
{
private readonly VoiceInfo _voiceInfo;
private VoiceInfo _voiceInfo;
private readonly SpeechSynthesizer _synthesizer;
private readonly ReadOnlyDictionary<string, VoiceInfo> _voices;

public WindowsTTSEngine(string languageCode)
{
_synthesizer = new SpeechSynthesizer();
_synthesizer.SetOutputToDefaultAudioDevice();
_synthesizer.Rate = 1;

_voiceInfo = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).FirstOrDefault()?.VoiceInfo;
SpeechApiReflectionHelper.InjectOneCoreVoices(_synthesizer);
_voices = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).ToDictionary(x => x.VoiceInfo.Name, x => x.VoiceInfo).AsReadOnly();
_voiceInfo = _voices.First().Value;
}

public void SpeechText(string text)
Expand All @@ -36,4 +41,88 @@ public void Dispose()
{
_synthesizer.Dispose();
}

public string[] GetVoices() => _voices.Keys.ToArray();

public void SetVoice(string voice) => _voiceInfo = _voices.First(x => x.Key.Equals(voice, StringComparison.OrdinalIgnoreCase)).Value;

// by default SpeechSynthesizer show not all available voices
// https://stackoverflow.com/a/71198211
private static class SpeechApiReflectionHelper
{
private const string PROP_VOICE_SYNTHESIZER = "VoiceSynthesizer";
private const string FIELD_INSTALLED_VOICES = "_installedVoices";

private const string ONE_CORE_VOICES_REGISTRY = @"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech_OneCore\Voices";

private static readonly Type _objectTokenCategoryType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Internal.ObjectTokens.ObjectTokenCategory")!;

private static readonly Type _voiceInfoType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.VoiceInfo")!;

private static readonly Type _installedVoiceType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.InstalledVoice")!;


public static void InjectOneCoreVoices(SpeechSynthesizer synthesizer)
{
var voiceSynthesizer = GetProperty(synthesizer, PROP_VOICE_SYNTHESIZER);
if (voiceSynthesizer == null)
throw new NotSupportedException($"Property not found: {PROP_VOICE_SYNTHESIZER}");

var installedVoices = GetField(voiceSynthesizer, FIELD_INSTALLED_VOICES) as IList;
if (installedVoices == null)
throw new NotSupportedException($"Field not found or null: {FIELD_INSTALLED_VOICES}");

if (_objectTokenCategoryType
.GetMethod("Create", BindingFlags.Static | BindingFlags.NonPublic)?
.Invoke(null, new object?[] { ONE_CORE_VOICES_REGISTRY }) is not IDisposable otc)
throw new NotSupportedException($"Failed to call Create on {_objectTokenCategoryType} instance");

using (otc)
{
if (_objectTokenCategoryType
.GetMethod("FindMatchingTokens", BindingFlags.Instance | BindingFlags.NonPublic)?
.Invoke(otc, new object?[] { null, null }) is not IList tokens)
throw new NotSupportedException($"Failed to list matching tokens");

foreach (var token in tokens)
{
if (token == null || GetProperty(token, "Attributes") == null)
continue;

var voiceInfo =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_voiceInfoType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { token }, null, null);

if (voiceInfo == null)
throw new NotSupportedException($"Failed to instantiate {_voiceInfoType}");

var installedVoice =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_installedVoiceType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { voiceSynthesizer, voiceInfo }, null, null);

if (installedVoice == null)
throw new NotSupportedException($"Failed to instantiate {_installedVoiceType}");

installedVoices.Add(installedVoice);
}
}
}

private static object? GetProperty(object target, string propName)
{
return target.GetType().GetProperty(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}

private static object? GetField(object target, string propName)
{
return target.GetType().GetField(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}
}
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/IObserverAvailableVoices.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Translumo.TTS;

public interface IObserverAvailableVoices
{
void UpdateVoice(IList<string> currentVoices);
}
14 changes: 11 additions & 3 deletions src/Translumo.TTS/TtsConfiguration.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Translumo.Infrastructure.Language;
using System.Collections.ObjectModel;
using Translumo.Infrastructure.Language;
using Translumo.Utils;
using Windows.Security.EnterpriseData;

namespace Translumo.TTS;

Expand All @@ -12,12 +12,14 @@ public class TtsConfiguration : BindableBase
{
TtsLanguage = Languages.English,
TtsSystem = TTSEngines.None,
InstalledWinTtsLanguages = new List<Languages>()
InstalledWinTtsLanguages = new List<Languages>(),
_currentVoice = string.Empty,
};

private TTSEngines _ttsSystem;
private Languages _ttsLanguage;
private List<Languages> _installedWinTtsLanguages;
private string _currentVoice;

public TTSEngines TtsSystem
{
Expand All @@ -37,6 +39,12 @@ public Languages TtsLanguage
}
}

public string CurrentVoice
{
get => _currentVoice;
set => SetProperty(ref _currentVoice, value);
}

public List<Languages> InstalledWinTtsLanguages
{
get => _installedWinTtsLanguages;
Expand Down
15 changes: 12 additions & 3 deletions src/Translumo.TTS/TtsFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,33 @@ public class TtsFactory
{
private readonly LanguageService _languageService;
private readonly PythonEngineWrapper _pythonEngine;
private readonly IObserverAvailableVoices _observerAvailableVoices;
private readonly ILogger _logger;

public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, ILogger<TtsFactory> logger)
public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, IObserverAvailableVoices observerAvailableVoices, ILogger<TtsFactory> logger)
{
_languageService = languageService;
_pythonEngine = pythonEngine;
_observerAvailableVoices = observerAvailableVoices;
_logger = logger;
}

public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) =>
ttsConfiguration.TtsSystem switch
public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration)
{
ITTSEngine ttsEngine = ttsConfiguration.TtsSystem switch
{
TTSEngines.None => new NoneTTSEngine(),
TTSEngines.WindowsTTS => new WindowsTTSEngine(GetLangCode(ttsConfiguration)),
TTSEngines.SileroTTS => new SileroTTSEngine(_pythonEngine, GetLangCode(ttsConfiguration)),
_ => throw new NotSupportedException()
};

var voices = ttsEngine.GetVoices();
_observerAvailableVoices.UpdateVoice(voices);

return ttsEngine;
}

private string GetLangCode(TtsConfiguration ttsConfiguration) =>
_languageService.GetLanguageDescriptor(ttsConfiguration.TtsLanguage).Code;
}
Expand Down
1 change: 1 addition & 0 deletions src/Translumo/App.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ private void ConfigureServices(ServiceCollection services)
services.AddScoped<HotkeysSettingsViewModel>();
services.AddScoped<LanguagesSettingsViewModel>();
services.AddScoped<OcrSettingsViewModel>();
services.AddScoped<IObserverAvailableVoices, LanguagesSettingsViewModel>();

var chatWindowConfiguration = ChatWindowConfiguration.Default;
services.AddSingleton<OcrGeneralConfiguration>(OcrGeneralConfiguration.Default);
Expand Down
Loading