Skip to content

Commit 9637894

Browse files
committed
Merge pull request godotengine#104873 from bruvzg/tts_on_demand
Enable TTS on demand, instead of fully disabling it when project setting is not set.
2 parents b37e368 + 4638ade commit 9637894

16 files changed

+238
-82
lines changed

doc/classes/DisplayServer.xml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,7 +1325,6 @@
13251325
- [code]language[/code] is language code in [code]lang_Variant[/code] format. The [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. The [code skip-lint]Variant[/code] part is an engine-dependent string describing country, region or/and dialect.
13261326
Note that Godot depends on system libraries for text-to-speech functionality. These libraries are installed by default on Windows and macOS, but not on all Linux distributions. If they are not present, this method will return an empty list. This applies to both Godot users on Linux, as well as end-users on Linux running Godot games that use text-to-speech.
13271327
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1328-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13291328
</description>
13301329
</method>
13311330
<method name="tts_get_voices_for_language" qualifiers="const">
@@ -1334,39 +1333,34 @@
13341333
<description>
13351334
Returns an [PackedStringArray] of voice identifiers for the [param language].
13361335
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1337-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13381336
</description>
13391337
</method>
13401338
<method name="tts_is_paused" qualifiers="const">
13411339
<return type="bool" />
13421340
<description>
13431341
Returns [code]true[/code] if the synthesizer is in a paused state.
13441342
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1345-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13461343
</description>
13471344
</method>
13481345
<method name="tts_is_speaking" qualifiers="const">
13491346
<return type="bool" />
13501347
<description>
13511348
Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
13521349
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1353-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13541350
</description>
13551351
</method>
13561352
<method name="tts_pause">
13571353
<return type="void" />
13581354
<description>
13591355
Puts the synthesizer into a paused state.
13601356
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1361-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13621357
</description>
13631358
</method>
13641359
<method name="tts_resume">
13651360
<return type="void" />
13661361
<description>
13671362
Resumes the synthesizer if it was paused.
13681363
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1369-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13701364
</description>
13711365
</method>
13721366
<method name="tts_set_utterance_callback">
@@ -1379,7 +1373,6 @@
13791373
- [constant TTS_UTTERANCE_BOUNDARY] callable's method should take two [int] parameters, the index of the character and the utterance ID.
13801374
[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
13811375
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1382-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
13831376
</description>
13841377
</method>
13851378
<method name="tts_speak">
@@ -1401,15 +1394,13 @@
14011394
[b]Note:[/b] On Windows and Linux (X11/Wayland), utterance [param text] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
14021395
[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
14031396
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Wayland), macOS, and Windows.
1404-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
14051397
</description>
14061398
</method>
14071399
<method name="tts_stop">
14081400
<return type="void" />
14091401
<description>
14101402
Stops synthesis in progress and removes all utterances from the queue.
14111403
[b]Note:[/b] This method is implemented on Android, iOS, Web, Linux (X11/Linux), macOS, and Windows.
1412-
[b]Note:[/b] [member ProjectSettings.audio/general/text_to_speech] should be [code]true[/code] to use text-to-speech.
14131404
</description>
14141405
</method>
14151406
<method name="unregister_additional_output">

doc/classes/ProjectSettings.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@
427427
Sets the [url=https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory]AVAudioSessionCategory[/url] on iOS. Use the [code]Playback[/code] category to get sound output, even if the phone is in silent mode.
428428
</member>
429429
<member name="audio/general/text_to_speech" type="bool" setter="" getter="" default="false">
430-
If [code]true[/code], text-to-speech support is enabled, see [method DisplayServer.tts_get_voices] and [method DisplayServer.tts_speak].
430+
If [code]true[/code], text-to-speech support is enabled on startup, otherwise it is enabled first time TTS method is used, see [method DisplayServer.tts_get_voices] and [method DisplayServer.tts_speak].
431431
[b]Note:[/b] Enabling TTS can cause addition idle CPU usage and interfere with the sleep mode, so consider disabling it if TTS is not used.
432432
</member>
433433
<member name="audio/video/video_delay_compensation_ms" type="int" setter="" getter="" default="0">

platform/android/tts_android.cpp

Lines changed: 64 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,43 +49,57 @@ jmethodID TTS_Android::_stop_speaking = nullptr;
4949

5050
HashMap<int, Char16String> TTS_Android::ids;
5151

52+
void TTS_Android::initialize_tts() {
53+
JNIEnv *env = get_jni_env();
54+
ERR_FAIL_NULL(env);
55+
56+
if (_init) {
57+
env->CallVoidMethod(tts, _init);
58+
initialized = true;
59+
}
60+
}
61+
5262
void TTS_Android::setup(jobject p_tts) {
53-
bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
54-
if (tts_enabled) {
55-
JNIEnv *env = get_jni_env();
56-
ERR_FAIL_NULL(env);
63+
JNIEnv *env = get_jni_env();
64+
ERR_FAIL_NULL(env);
5765

58-
tts = env->NewGlobalRef(p_tts);
66+
tts = env->NewGlobalRef(p_tts);
5967

60-
jclass c = env->GetObjectClass(tts);
61-
cls = (jclass)env->NewGlobalRef(c);
68+
jclass c = env->GetObjectClass(tts);
69+
cls = (jclass)env->NewGlobalRef(c);
6270

63-
_init = env->GetMethodID(cls, "init", "()V");
64-
_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
65-
_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
66-
_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
67-
_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
68-
_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
69-
_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
70-
_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
71+
_init = env->GetMethodID(cls, "init", "()V");
72+
_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
73+
_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
74+
_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
75+
_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
76+
_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
77+
_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
78+
_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
7179

72-
if (_init) {
73-
env->CallVoidMethod(tts, _init);
74-
initialized = true;
75-
}
80+
bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
81+
if (tts_enabled) {
82+
initialize_tts();
7683
}
7784
}
7885

7986
void TTS_Android::terminate() {
8087
JNIEnv *env = get_jni_env();
8188
ERR_FAIL_NULL(env);
8289

83-
env->DeleteGlobalRef(cls);
84-
env->DeleteGlobalRef(tts);
90+
if (cls) {
91+
env->DeleteGlobalRef(cls);
92+
}
93+
if (tts) {
94+
env->DeleteGlobalRef(tts);
95+
}
8596
}
8697

8798
void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
88-
ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
99+
if (unlikely(!initialized)) {
100+
initialize_tts();
101+
}
102+
ERR_FAIL_NULL(tts);
89103
if (ids.has(p_id)) {
90104
int pos = 0;
91105
if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
@@ -106,7 +120,10 @@ void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
106120
}
107121

108122
bool TTS_Android::is_speaking() {
109-
ERR_FAIL_COND_V_MSG(!initialized, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
123+
if (unlikely(!initialized)) {
124+
initialize_tts();
125+
}
126+
ERR_FAIL_NULL_V(tts, false);
110127
if (_is_speaking) {
111128
JNIEnv *env = get_jni_env();
112129

@@ -118,7 +135,10 @@ bool TTS_Android::is_speaking() {
118135
}
119136

120137
bool TTS_Android::is_paused() {
121-
ERR_FAIL_COND_V_MSG(!initialized, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
138+
if (unlikely(!initialized)) {
139+
initialize_tts();
140+
}
141+
ERR_FAIL_NULL_V(tts, false);
122142
if (_is_paused) {
123143
JNIEnv *env = get_jni_env();
124144

@@ -130,7 +150,10 @@ bool TTS_Android::is_paused() {
130150
}
131151

132152
Array TTS_Android::get_voices() {
133-
ERR_FAIL_COND_V_MSG(!initialized, Array(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
153+
if (unlikely(!initialized)) {
154+
initialize_tts();
155+
}
156+
ERR_FAIL_NULL_V(tts, Array());
134157
Array list;
135158
if (_get_voices) {
136159
JNIEnv *env = get_jni_env();
@@ -158,7 +181,10 @@ Array TTS_Android::get_voices() {
158181
}
159182

160183
void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
161-
ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
184+
if (unlikely(!initialized)) {
185+
initialize_tts();
186+
}
187+
ERR_FAIL_NULL(tts);
162188
if (p_interrupt) {
163189
stop();
164190
}
@@ -183,7 +209,10 @@ void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volum
183209
}
184210

185211
void TTS_Android::pause() {
186-
ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
212+
if (unlikely(!initialized)) {
213+
initialize_tts();
214+
}
215+
ERR_FAIL_NULL(tts);
187216
if (_pause_speaking) {
188217
JNIEnv *env = get_jni_env();
189218

@@ -193,7 +222,10 @@ void TTS_Android::pause() {
193222
}
194223

195224
void TTS_Android::resume() {
196-
ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
225+
if (unlikely(!initialized)) {
226+
initialize_tts();
227+
}
228+
ERR_FAIL_NULL(tts);
197229
if (_resume_speaking) {
198230
JNIEnv *env = get_jni_env();
199231

@@ -203,7 +235,10 @@ void TTS_Android::resume() {
203235
}
204236

205237
void TTS_Android::stop() {
206-
ERR_FAIL_COND_MSG(!initialized, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
238+
if (unlikely(!initialized)) {
239+
initialize_tts();
240+
}
241+
ERR_FAIL_NULL(tts);
207242
for (const KeyValue<int, Char16String> &E : ids) {
208243
DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E.key);
209244
}

platform/android/tts_android.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ class TTS_Android {
5454

5555
static HashMap<int, Char16String> ids;
5656

57+
static void initialize_tts();
58+
5759
public:
5860
static void setup(jobject p_tts);
5961
static void terminate();

platform/ios/display_server_ios.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ class DisplayServerIOS : public DisplayServer {
8383

8484
void perform_event(const Ref<InputEvent> &p_event);
8585

86+
void initialize_tts() const;
87+
8688
DisplayServerIOS(const String &p_rendering_driver, DisplayServer::WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i *p_position, const Vector2i &p_resolution, int p_screen, Context p_context, int64_t p_parent_window, Error &r_error);
8789
~DisplayServerIOS();
8890

platform/ios/display_server_ios.mm

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
// Init TTS
6262
bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
6363
if (tts_enabled) {
64-
tts = [[TTS_IOS alloc] init];
64+
initialize_tts();
6565
}
6666
native_menu = memnew(NativeMenu);
6767

@@ -389,39 +389,63 @@
389389
String DisplayServerIOS::get_name() const {
390390
return "iOS";
391391
}
392+
void DisplayServerIOS::initialize_tts() const {
393+
const_cast<DisplayServerIOS *>(this)->tts = [[TTS_IOS alloc] init];
394+
}
392395

393396
bool DisplayServerIOS::tts_is_speaking() const {
394-
ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
397+
if (unlikely(!tts)) {
398+
initialize_tts();
399+
}
400+
ERR_FAIL_NULL_V(tts, false);
395401
return [tts isSpeaking];
396402
}
397403

398404
bool DisplayServerIOS::tts_is_paused() const {
399-
ERR_FAIL_NULL_V_MSG(tts, false, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
405+
if (unlikely(!tts)) {
406+
initialize_tts();
407+
}
408+
ERR_FAIL_NULL_V(tts, false);
400409
return [tts isPaused];
401410
}
402411

403412
TypedArray<Dictionary> DisplayServerIOS::tts_get_voices() const {
404-
ERR_FAIL_NULL_V_MSG(tts, TypedArray<Dictionary>(), "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
413+
if (unlikely(!tts)) {
414+
initialize_tts();
415+
}
416+
ERR_FAIL_NULL_V(tts, TypedArray<Dictionary>());
405417
return [tts getVoices];
406418
}
407419

408420
void DisplayServerIOS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
409-
ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
421+
if (unlikely(!tts)) {
422+
initialize_tts();
423+
}
424+
ERR_FAIL_NULL(tts);
410425
[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
411426
}
412427

413428
void DisplayServerIOS::tts_pause() {
414-
ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
429+
if (unlikely(!tts)) {
430+
initialize_tts();
431+
}
432+
ERR_FAIL_NULL(tts);
415433
[tts pauseSpeaking];
416434
}
417435

418436
void DisplayServerIOS::tts_resume() {
419-
ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
437+
if (unlikely(!tts)) {
438+
initialize_tts();
439+
}
440+
ERR_FAIL_NULL(tts);
420441
[tts resumeSpeaking];
421442
}
422443

423444
void DisplayServerIOS::tts_stop() {
424-
ERR_FAIL_NULL_MSG(tts, "Enable the \"audio/general/text_to_speech\" project setting to use text-to-speech.");
445+
if (unlikely(!tts)) {
446+
initialize_tts();
447+
}
448+
ERR_FAIL_NULL(tts);
425449
[tts stopSpeaking];
426450
}
427451

0 commit comments

Comments
 (0)