Skip to content

Commit e9a493f

Browse files
committed
Support for local TTS
1 parent a719138 commit e9a493f

File tree

1 file changed

+201
-0
lines changed
  • examples/Support for Local TTS function

1 file changed

+201
-0
lines changed
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
While many TTS servers require a subscription, or limit the number of words there is the possibility to operate your own TTS server in the home network.
2+
3+
[Nikhil-punia](https://github.com/Nikhil-punia) has written a function that can simply be added to this library. It works with a post Request, which allows much larger texts than fits in a GET header.
4+
The advantages:
5+
- Sending more amount of data for TTS
6+
- Fast TTS
7+
- Local TTS support
8+
9+
```c++
10+
bool Audio::connect_local_tts(const String& host, int port, const String& path,
11+
const String& text, const String& voice_id,
12+
const String& lang, const String& endpoint) {
13+
14+
if (text.isEmpty()) {
15+
AUDIO_INFO("TTS text is empty");
16+
stopSong();
17+
return false;
18+
}
19+
20+
xSemaphoreTakeRecursive(mutex_playAudioData, 0.3 * configTICK_RATE_HZ);
21+
setDefaults();
22+
m_f_ssl = false;
23+
m_f_tts = true;
24+
m_speechtxt.assign(text.c_str());
25+
26+
String body = "{";
27+
body += "\"text\":\"" + text + "\"";
28+
if (!voice_id.isEmpty()) body += ",\"voice\":\"" + voice_id + "\"";
29+
if (!lang.isEmpty()) body += ",\"lang\":\"" + lang + "\"";
30+
body += "}";
31+
32+
String req = "POST " + path + " HTTP/1.1\r\n";
33+
req += "Host: " + host + "\r\n";
34+
req += "User-Agent: ESP32-AudioI2S\r\n";
35+
req += "Accept: */*\r\n";
36+
req += "Content-Type: application/json\r\n";
37+
req += "Content-Length: " + String(body.length()) + "\r\n";
38+
req += "Connection: close\r\n\r\n";
39+
req += body;
40+
41+
_client = static_cast<WiFiClient*>(&client);
42+
AUDIO_INFO("Connecting to local TTS server %s:%d", host.c_str(), port);
43+
44+
if (!_client->connect(host.c_str(), port)) {
45+
AUDIO_ERROR("Connection failed");
46+
xSemaphoreGiveRecursive(mutex_playAudioData);
47+
return false;
48+
}
49+
50+
_client->print(req);
51+
52+
m_f_running = true;
53+
m_dataMode = HTTP_RESPONSE_HEADER;
54+
m_lastHost.assign(host.c_str());
55+
56+
if (endpoint == "gtts" || endpoint == "edge_tts") {
57+
m_expectedCodec = CODEC_MP3;
58+
} else if (endpoint == "tts") {
59+
m_expectedCodec = CODEC_WAV;
60+
}
61+
62+
xSemaphoreGiveRecursive(mutex_playAudioData);
63+
return true;
64+
}
65+
66+
```
67+
68+
And this can be used with a simple Python Server running locally :
69+
```py
70+
from flask import Flask, request, send_file, jsonify, after_this_request
71+
import pyttsx3
72+
import uuid
73+
import os
74+
from gtts import gTTS
75+
import asyncio
76+
import edge_tts
77+
78+
app = Flask(__name__)
79+
80+
# Load available pyttsx3 voices
81+
engine = pyttsx3.init()
82+
available_voices = {v.id: v.name for v in engine.getProperty('voices')}
83+
84+
85+
@app.route('/voices', methods=['GET'])
86+
def list_voices():
87+
"""List available system voices (pyttsx3)."""
88+
return jsonify(available_voices)
89+
90+
91+
@app.route('/tts', methods=['POST'])
92+
def tts():
93+
"""Offline TTS using pyttsx3."""
94+
data = request.get_json(force=True)
95+
text = data.get('text')
96+
voice_id = data.get('voice')
97+
rate = data.get('rate')
98+
99+
if not text:
100+
return jsonify({"error": "Missing 'text' parameter"}), 400
101+
102+
filename = f"/tmp/{uuid.uuid4().hex}.wav"
103+
104+
try:
105+
engine = pyttsx3.init()
106+
if voice_id:
107+
engine.setProperty('voice', voice_id)
108+
if rate:
109+
engine.setProperty('rate', rate)
110+
engine.save_to_file(text, filename)
111+
engine.runAndWait()
112+
except Exception as e:
113+
return jsonify({"error": str(e)}), 500
114+
115+
@after_this_request
116+
def remove_file(response):
117+
try:
118+
os.remove(filename)
119+
except Exception as e:
120+
app.logger.warning(f"Failed to delete file {filename}: {e}")
121+
return response
122+
123+
return send_file(filename, mimetype='audio/wav')
124+
125+
126+
@app.route('/gtts', methods=['POST'])
127+
def gtts_endpoint():
128+
"""Online TTS using Google gTTS."""
129+
data = request.get_json(force=True)
130+
text = data.get('text')
131+
lang = data.get('lang', 'en')
132+
133+
if not text:
134+
return jsonify({"error": "Missing 'text' parameter"}), 400
135+
136+
filename = f"/tmp/{uuid.uuid4().hex}.mp3"
137+
138+
try:
139+
tts = gTTS(text=text, lang=lang)
140+
tts.save(filename)
141+
except Exception as e:
142+
return jsonify({"error": str(e)}), 500
143+
144+
@after_this_request
145+
def remove_file(response):
146+
try:
147+
os.remove(filename)
148+
except Exception as e:
149+
app.logger.warning(f"Failed to delete file {filename}: {e}")
150+
return response
151+
152+
return send_file(filename, mimetype='audio/mpeg')
153+
154+
155+
@app.route('/edge_tts', methods=['POST'])
156+
def edge_tts_route():
157+
"""High-quality TTS using Microsoft Edge Neural voices (safe pitch/rate support)."""
158+
data = request.get_json(force=True)
159+
text = data.get('text')
160+
voice = data.get('voice', 'en-US-AriaNeural')
161+
rate = str(data.get('rate')) if 'rate' in data else None
162+
pitch = str(data.get('pitch')) if 'pitch' in data else None
163+
164+
if not text:
165+
return jsonify({"error": "Missing 'text' parameter"}), 400
166+
167+
filename = f"/tmp/{uuid.uuid4().hex}.mp3"
168+
169+
async def generate_tts():
170+
options = {
171+
"text": text,
172+
"voice": voice
173+
}
174+
175+
if rate and rate not in ["0%", "+0%", "-0%", "0", ""]:
176+
options["rate"] = rate
177+
if pitch and pitch not in ["0%", "+0%", "-0%", "0", ""]:
178+
options["pitch"] = pitch
179+
180+
try:
181+
communicate = edge_tts.Communicate(**options)
182+
await communicate.save(filename)
183+
except Exception as e:
184+
if "pitch" in str(e).lower() or "rate" in str(e).lower():
185+
communicate = edge_tts.Communicate(text=text, voice=voice)
186+
await communicate.save(filename)
187+
else:
188+
raise e
189+
190+
try:
191+
asyncio.run(generate_tts())
192+
except Exception as e:
193+
return jsonify({"error": str(e)}), 500
194+
195+
return send_file(filename, mimetype='audio/mpeg')
196+
197+
198+
if __name__ == '__main__':
199+
app.run(host='0.0.0.0', port=5000)
200+
201+
```

0 commit comments

Comments
 (0)