diff --git a/src/Translumo.Processing/TranslationProcessingService.cs b/src/Translumo.Processing/TranslationProcessingService.cs index 11cb38b..9c5c840 100644 --- a/src/Translumo.Processing/TranslationProcessingService.cs +++ b/src/Translumo.Processing/TranslationProcessingService.cs @@ -52,10 +52,10 @@ public class TranslationProcessingService : IProcessingService, IDisposable private long _lastTranslatedTextTicks; private const float MIN_SCORE_THRESHOLD = 2.1f; - + public TranslationProcessingService(ICapturerFactory capturerFactory, IChatTextMediator chatTextMediator, OcrEnginesFactory ocrEnginesFactory, TranslatorFactory translationFactory, TtsFactory ttsFactory, TtsConfiguration ttsConfiguration, - TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration, + TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration, TextResultCacheService textResultCacheService, TextProcessingConfiguration textConfiguration, ILogger logger) { _logger = logger; @@ -236,7 +236,7 @@ void CapturerEnsureInitialized() continue; } - if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText, + if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText, bestDetected.Language.Asian, out iterationId)) { sequentialText = false; @@ -257,7 +257,7 @@ void CapturerEnsureInitialized() } _logger.LogError(ex, $"Screen capture failed (code: {ex.ErrorCode})"); - + _capturer.Dispose(); _capturer = null; CapturerEnsureInitialized(); @@ -407,9 +407,15 @@ private void TtsConfigurationOnPropertyChanged(object sender, PropertyChangedEve if (e.PropertyName == nameof(_ttsConfiguration.TtsLanguage) || e.PropertyName == nameof(_ttsConfiguration.TtsSystem)) { - _ttsEngine.Dispose(); + _ttsEngine?.Dispose(); + _ttsEngine = null; _ttsEngine = _ttsFactory.CreateTtsEngine(_ttsConfiguration); } + else if (e.PropertyName == nameof(_ttsConfiguration.CurrentVoice) + && _ttsEngine != null && _ttsConfiguration.CurrentVoice != null) + { + _ttsEngine.SetVoice(_ttsConfiguration.CurrentVoice); + } } private void OcrGeneralConfigurationOnPropertyChanged(object sender, PropertyChangedEventArgs e) diff --git a/src/Translumo.TTS/Engines/ITTSEngine.cs b/src/Translumo.TTS/Engines/ITTSEngine.cs index e919f9d..191bf73 100644 --- a/src/Translumo.TTS/Engines/ITTSEngine.cs +++ b/src/Translumo.TTS/Engines/ITTSEngine.cs @@ -1,6 +1,10 @@ namespace Translumo.TTS.Engines; -public interface ITTSEngine: IDisposable +public interface ITTSEngine : IDisposable { void SpeechText(string text); + + string[] GetVoices(); + + void SetVoice(string voice); } diff --git a/src/Translumo.TTS/Engines/NoneTTSEngine.cs b/src/Translumo.TTS/Engines/NoneTTSEngine.cs index 6ef3e2a..e08424d 100644 --- a/src/Translumo.TTS/Engines/NoneTTSEngine.cs +++ b/src/Translumo.TTS/Engines/NoneTTSEngine.cs @@ -10,6 +10,12 @@ public void Dispose() { } + public string[] GetVoices() => new[] { "None" }; + + public void SetVoice(string voice) + { + } + public void SpeechText(string text) { } diff --git a/src/Translumo.TTS/Engines/SileroTTSEngine.cs b/src/Translumo.TTS/Engines/SileroTTSEngine.cs index f5e69bb..ee55113 100644 --- a/src/Translumo.TTS/Engines/SileroTTSEngine.cs +++ b/src/Translumo.TTS/Engines/SileroTTSEngine.cs @@ -8,6 +8,7 @@ public class SileroTTSEngine : ITTSEngine { private dynamic _ipython; private dynamic _model; + private string[] _voices; private string _voice; private readonly string _modelPath; private readonly PythonEngineWrapper _pythonEngine; @@ -51,7 +52,8 @@ private void Init() _pyObjects.Add(_ipython); }); - _voice = ((string[])_model.speakers).First(); + _voices = (string[])_model.speakers; + _voice = _voices.First(); } public void SpeechText(string text) @@ -146,5 +148,9 @@ private string GetModelFullPath(string langCode) _ => null }; + public string[] GetVoices() => _voices; + + public void SetVoice(string voice) => _voice = _voices.First(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase)); + private sealed record ModelDescription(string FileUrl, string WarmUpText); } \ No newline at end of file diff --git a/src/Translumo.TTS/Engines/WindowsTTSEngine.cs b/src/Translumo.TTS/Engines/WindowsTTSEngine.cs index ef8b2f4..5b58d31 100644 --- a/src/Translumo.TTS/Engines/WindowsTTSEngine.cs +++ b/src/Translumo.TTS/Engines/WindowsTTSEngine.cs @@ -1,20 +1,25 @@ -using System.Globalization; +using System.Collections; +using System.Collections.ObjectModel; +using System.Globalization; +using System.Reflection; using System.Speech.Synthesis; namespace Translumo.TTS.Engines; public class WindowsTTSEngine : ITTSEngine { - private readonly VoiceInfo _voiceInfo; + private VoiceInfo _voiceInfo; private readonly SpeechSynthesizer _synthesizer; + private readonly ReadOnlyDictionary _voices; public WindowsTTSEngine(string languageCode) { _synthesizer = new SpeechSynthesizer(); _synthesizer.SetOutputToDefaultAudioDevice(); _synthesizer.Rate = 1; - - _voiceInfo = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).FirstOrDefault()?.VoiceInfo; + SpeechApiReflectionHelper.InjectOneCoreVoices(_synthesizer); + _voices = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).ToDictionary(x => x.VoiceInfo.Name, x => x.VoiceInfo).AsReadOnly(); + _voiceInfo = _voices.First().Value; } public void SpeechText(string text) @@ -36,4 +41,88 @@ public void Dispose() { _synthesizer.Dispose(); } + + public string[] GetVoices() => _voices.Keys.ToArray(); + + public void SetVoice(string voice) => _voiceInfo = _voices.First(x => x.Key.Equals(voice, StringComparison.OrdinalIgnoreCase)).Value; + + // by default SpeechSynthesizer show not all available voices + // https://stackoverflow.com/a/71198211 + private static class SpeechApiReflectionHelper + { + private const string PROP_VOICE_SYNTHESIZER = "VoiceSynthesizer"; + private const string FIELD_INSTALLED_VOICES = "_installedVoices"; + + private const string ONE_CORE_VOICES_REGISTRY = @"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech_OneCore\Voices"; + + private static readonly Type _objectTokenCategoryType = typeof(SpeechSynthesizer).Assembly + .GetType("System.Speech.Internal.ObjectTokens.ObjectTokenCategory")!; + + private static readonly Type _voiceInfoType = typeof(SpeechSynthesizer).Assembly + .GetType("System.Speech.Synthesis.VoiceInfo")!; + + private static readonly Type _installedVoiceType = typeof(SpeechSynthesizer).Assembly + .GetType("System.Speech.Synthesis.InstalledVoice")!; + + + public static void InjectOneCoreVoices(SpeechSynthesizer synthesizer) + { + var voiceSynthesizer = GetProperty(synthesizer, PROP_VOICE_SYNTHESIZER); + if (voiceSynthesizer == null) + throw new NotSupportedException($"Property not found: {PROP_VOICE_SYNTHESIZER}"); + + var installedVoices = GetField(voiceSynthesizer, FIELD_INSTALLED_VOICES) as IList; + if (installedVoices == null) + throw new NotSupportedException($"Field not found or null: {FIELD_INSTALLED_VOICES}"); + + if (_objectTokenCategoryType + .GetMethod("Create", BindingFlags.Static | BindingFlags.NonPublic)? + .Invoke(null, new object?[] { ONE_CORE_VOICES_REGISTRY }) is not IDisposable otc) + throw new NotSupportedException($"Failed to call Create on {_objectTokenCategoryType} instance"); + + using (otc) + { + if (_objectTokenCategoryType + .GetMethod("FindMatchingTokens", BindingFlags.Instance | BindingFlags.NonPublic)? + .Invoke(otc, new object?[] { null, null }) is not IList tokens) + throw new NotSupportedException($"Failed to list matching tokens"); + + foreach (var token in tokens) + { + if (token == null || GetProperty(token, "Attributes") == null) + continue; + + var voiceInfo = + typeof(SpeechSynthesizer).Assembly + .CreateInstance(_voiceInfoType.FullName!, true, + BindingFlags.Instance | BindingFlags.NonPublic, null, + new object[] { token }, null, null); + + if (voiceInfo == null) + throw new NotSupportedException($"Failed to instantiate {_voiceInfoType}"); + + var installedVoice = + typeof(SpeechSynthesizer).Assembly + .CreateInstance(_installedVoiceType.FullName!, true, + BindingFlags.Instance | BindingFlags.NonPublic, null, + new object[] { voiceSynthesizer, voiceInfo }, null, null); + + if (installedVoice == null) + throw new NotSupportedException($"Failed to instantiate {_installedVoiceType}"); + + installedVoices.Add(installedVoice); + } + } + } + + private static object? GetProperty(object target, string propName) + { + return target.GetType().GetProperty(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target); + } + + private static object? GetField(object target, string propName) + { + return target.GetType().GetField(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target); + } + } } \ No newline at end of file diff --git a/src/Translumo.TTS/IObserverAvailableVoices.cs b/src/Translumo.TTS/IObserverAvailableVoices.cs new file mode 100644 index 0000000..5ad5f20 --- /dev/null +++ b/src/Translumo.TTS/IObserverAvailableVoices.cs @@ -0,0 +1,6 @@ +namespace Translumo.TTS; + +public interface IObserverAvailableVoices +{ + void UpdateVoice(IList currentVoices); +} diff --git a/src/Translumo.TTS/TtsConfiguration.cs b/src/Translumo.TTS/TtsConfiguration.cs index 98fff8b..b529591 100644 --- a/src/Translumo.TTS/TtsConfiguration.cs +++ b/src/Translumo.TTS/TtsConfiguration.cs @@ -1,6 +1,6 @@ -using Translumo.Infrastructure.Language; +using System.Collections.ObjectModel; +using Translumo.Infrastructure.Language; using Translumo.Utils; -using Windows.Security.EnterpriseData; namespace Translumo.TTS; @@ -12,12 +12,14 @@ public class TtsConfiguration : BindableBase { TtsLanguage = Languages.English, TtsSystem = TTSEngines.None, - InstalledWinTtsLanguages = new List() + InstalledWinTtsLanguages = new List(), + _currentVoice = string.Empty, }; private TTSEngines _ttsSystem; private Languages _ttsLanguage; private List _installedWinTtsLanguages; + private string _currentVoice; public TTSEngines TtsSystem { @@ -37,6 +39,12 @@ public Languages TtsLanguage } } + public string CurrentVoice + { + get => _currentVoice; + set => SetProperty(ref _currentVoice, value); + } + public List InstalledWinTtsLanguages { get => _installedWinTtsLanguages; diff --git a/src/Translumo.TTS/TtsFactory.cs b/src/Translumo.TTS/TtsFactory.cs index 1b5d0ad..d6cff33 100644 --- a/src/Translumo.TTS/TtsFactory.cs +++ b/src/Translumo.TTS/TtsFactory.cs @@ -9,17 +9,20 @@ public class TtsFactory { private readonly LanguageService _languageService; private readonly PythonEngineWrapper _pythonEngine; + private readonly IObserverAvailableVoices _observerAvailableVoices; private readonly ILogger _logger; - public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, ILogger logger) + public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, IObserverAvailableVoices observerAvailableVoices, ILogger logger) { _languageService = languageService; _pythonEngine = pythonEngine; + _observerAvailableVoices = observerAvailableVoices; _logger = logger; } - public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) => - ttsConfiguration.TtsSystem switch + public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) + { + ITTSEngine ttsEngine = ttsConfiguration.TtsSystem switch { TTSEngines.None => new NoneTTSEngine(), TTSEngines.WindowsTTS => new WindowsTTSEngine(GetLangCode(ttsConfiguration)), @@ -27,6 +30,12 @@ public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) => _ => throw new NotSupportedException() }; + var voices = ttsEngine.GetVoices(); + _observerAvailableVoices.UpdateVoice(voices); + + return ttsEngine; + } + private string GetLangCode(TtsConfiguration ttsConfiguration) => _languageService.GetLanguageDescriptor(ttsConfiguration.TtsLanguage).Code; } diff --git a/src/Translumo/App.xaml.cs b/src/Translumo/App.xaml.cs index 767acce..5572711 100644 --- a/src/Translumo/App.xaml.cs +++ b/src/Translumo/App.xaml.cs @@ -97,6 +97,7 @@ private void ConfigureServices(ServiceCollection services) services.AddScoped(); services.AddScoped(); services.AddScoped(); + services.AddScoped(); var chatWindowConfiguration = ChatWindowConfiguration.Default; services.AddSingleton(OcrGeneralConfiguration.Default); diff --git a/src/Translumo/MVVM/ViewModels/LanguagesSettingsViewModel.cs b/src/Translumo/MVVM/ViewModels/LanguagesSettingsViewModel.cs index 27458e7..f6e865e 100644 --- a/src/Translumo/MVVM/ViewModels/LanguagesSettingsViewModel.cs +++ b/src/Translumo/MVVM/ViewModels/LanguagesSettingsViewModel.cs @@ -2,7 +2,10 @@ using System.Collections.Generic; using System.Collections.ObjectModel; using System.Linq; +using System.Threading; using System.Threading.Tasks; +using System.Windows; +using System.Windows.Data; using System.Windows.Input; using Microsoft.Extensions.Logging; using Microsoft.Toolkit.Mvvm.Input; @@ -23,7 +26,7 @@ namespace Translumo.MVVM.ViewModels { - public sealed class LanguagesSettingsViewModel : BindableBase, IAdditionalPanelController, IDisposable + public sealed class LanguagesSettingsViewModel : BindableBase, IAdditionalPanelController, IDisposable, IObserverAvailableVoices { public event EventHandler PanelStateIsChanged; @@ -34,6 +37,14 @@ public sealed class LanguagesSettingsViewModel : BindableBase, IAdditionalPanelC public TtsConfiguration TtsSettings { get; set; } + // NOTE: wfp doesnt update combobox source for non-static propertions or properties from non-singletone class 💀, I cant find another workaround + public static ObservableCollection AvailableVoices { get; } = new(); + + public string CurrentVoice + { + get => TtsSettings.CurrentVoice; + set => ChangeCurrentVoice(value); + } public ObservableCollection ProxyCollection { @@ -92,7 +103,6 @@ public TTSEngines TtsSystem private readonly OcrGeneralConfiguration _ocrConfiguration; private readonly LanguageService _languageService; private readonly ILogger _logger; - public LanguagesSettingsViewModel(LanguageService languageService, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration, TtsConfiguration ttsConfiguration, DialogService dialogService, ILogger logger) @@ -111,7 +121,6 @@ public LanguagesSettingsViewModel(LanguageService languageService, TranslationCo this.TtsSettings = ttsConfiguration; this.TtsSettings.TtsLanguage = this.Model.TranslateToLang; - this._languageService = languageService; this._dialogService = dialogService; this._ocrConfiguration = ocrConfiguration; @@ -194,6 +203,24 @@ private async Task ChangeTtsSystem(TTSEngines engine) OnPropertyChanged(nameof(TtsSystem)); } + private async Task ChangeCurrentVoice(string voice) + { + if (voice == null) + { + // NOTE: wpf sends null value when removed current selected item + return; + } + + var changeAction = () => + { + this.TtsSettings.CurrentVoice = voice; + OnPropertyChanged(nameof(CurrentVoice)); + }; + + var changeStage = StagesFactory.CreateLanguageChangeStages(_dialogService, changeAction, _logger); + await changeStage.ExecuteAsync(); + } + private async Task ReconfigureTts(Languages language, TTSEngines engine, Action changeParameter) { try @@ -254,5 +281,17 @@ public void Dispose() { LocalizationManager.ReleaseChangedValuesCallbacks(this); } + + public void UpdateVoice(IList voices) + { + var currentVoice = voices.Contains(CurrentVoice) ? CurrentVoice : voices.First(); + var previousVoices = AvailableVoices.ToArray(); + + Application.Current.Dispatcher.InvokeAsync(() => + { + voices.Except(previousVoices).ForEach(x => AvailableVoices.Add(x)); + previousVoices.Except(voices).ForEach(x => AvailableVoices.Remove(x)); + }); + } } -} +} \ No newline at end of file diff --git a/src/Translumo/MVVM/ViewModels/WaitingDialogViewModel.cs b/src/Translumo/MVVM/ViewModels/WaitingDialogViewModel.cs index 75f983e..e0f41d1 100644 --- a/src/Translumo/MVVM/ViewModels/WaitingDialogViewModel.cs +++ b/src/Translumo/MVVM/ViewModels/WaitingDialogViewModel.cs @@ -17,12 +17,11 @@ public string TextContent public event EventHandler DialogIsClosed; private string _textContent; - + public WaitingDialogViewModel(Task innerTask, string taskText) { this.TextContent = taskText; - - innerTask.ContinueWith(t => Close()); + innerTask.ContinueWith(_ => Close(), TaskContinuationOptions.ExecuteSynchronously); } private void Close() diff --git a/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml b/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml index 82bfc8c..9690dde 100644 --- a/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml +++ b/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml @@ -7,7 +7,7 @@ xmlns:controls="clr-namespace:Translumo.Controls" xmlns:translation="clr-namespace:Translumo.Translation;assembly=Translumo.Translation" xmlns:tts="clr-namespace:Translumo.TTS;assembly=Translumo.TTS" - mc:Ignorable="d" + mc:Ignorable="d" d:DesignHeight="450" d:DesignWidth="800">