Skip to content
This repository was archived by the owner on Feb 5, 2024. It is now read-only.

Commit e04d7b4

Browse files
authored
Feat/voice selection (#6)
* add nancy voice * add ljspeech voice * add voice support authored-by: jarbasai <jarbasai@mailfence.com>
1 parent aaab592 commit e04d7b4

File tree

3 files changed

+67
-15
lines changed

3 files changed

+67
-15
lines changed

ovos_tts_plugin_mimic2/__init__.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#
1313
import base64
1414
import math
15+
import random
1516
import re
1617

1718
import requests
@@ -21,7 +22,7 @@
2122

2223

2324
class Mimic2TTSPlugin(TTS):
24-
"""Interface to Catotron TTS."""
25+
"""Interface to Mimic2 TTS."""
2526
# Heuristic value, caps character length of a chunk of text
2627
# to be spoken as a work around for current Tacotron implementation limits.
2728
max_sentence_size = 170
@@ -30,7 +31,25 @@ def __init__(self, lang="en-us", config=None):
3031
config = config or {}
3132
super(Mimic2TTSPlugin, self).__init__(lang, config,
3233
Mimic2TTSValidator(self), 'wav')
33-
self.url = config.get("url", "https://mimic-api.mycroft.ai/synthesize")
34+
self.voice = self.voice.lower()
35+
self._visemes = False
36+
self.cache.persist = True # save synths to avoid repeat queries
37+
if self.config.get("url"): # self hosted
38+
self.url = self.config["url"]
39+
# TODO disable cache to avoid filename conflicts with other voices
40+
if not self.voice or self.voice == "default":
41+
self.voice = f"selfhosted{random.randint(0, 9999999)}"
42+
self.cache.persist = False
43+
elif self.voice == "kusal" or self.voice == "default":
44+
self.url = "https://mimic-api.mycroft.ai/synthesize"
45+
self._visemes = True
46+
elif self.voice == "nancy":
47+
self.url = "https://nancy.2022.us/synthesize"
48+
elif self.voice == "ljspeech":
49+
self.url = "https://ljspeech.2022.us/synthesize"
50+
else:
51+
self.voice = "kusal"
52+
self.url = "https://mimic-api.mycroft.ai/synthesize"
3453

3554
def get_tts(self, sentence, wav_file, lang=None):
3655
"""Fetch tts audio using tacotron endpoint.
@@ -41,13 +60,17 @@ def get_tts(self, sentence, wav_file, lang=None):
4160
Returns:
4261
Tuple ((str) written file, None)
4362
"""
44-
params = {"text": sentence, "visimes": True}
63+
params = {"text": sentence, "visimes": self._visemes}
4564
r = requests.get(self.url, params=params)
4665
if not r.ok:
4766
raise RemoteTTSException(f"Mimic2 server error: {r.reason}")
48-
results = r.json()
49-
audio_data = base64.b64decode(results['audio_base64'])
50-
phonemes = results['visimes']
67+
if not self._visemes:
68+
audio_data = r.content
69+
phonemes = None
70+
else:
71+
results = r.json()
72+
audio_data = base64.b64decode(results['audio_base64'])
73+
phonemes = results['visimes']
5174
with open(wav_file, "wb") as f:
5275
f.write(audio_data)
5376
return (wav_file, phonemes) # No phonemes

readme.md

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,20 @@ OVOS TTS plugin for [Mimic2](https://github.com/MycroftAI/mimic2)
1212
"tts": {
1313
"module": "ovos-tts-plugin-mimic2",
1414
"ovos-tts-plugin-mimic2": {
15-
"url": "https://mimic-api.mycroft.ai/synthesize"
15+
"voice": "kusal"
1616
}
1717
}
1818

1919
```
2020

2121
### Voices
2222

23-
You can self host models trained on [NancyCorpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray) and [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron)
23+
Available Voices:
24+
- Kusal - Mycroft AI official voice, hosted by Mycroft
25+
- Nancy - trained on [Nancy Corpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray, hosted by Neon
26+
- ljspeech - trained on [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron), hosted by Neon
27+
28+
### Self Hosting
2429

2530
The Kusal voice model is not provided by MycroftAI and can not be self hosted
2631

@@ -36,6 +41,19 @@ docker build -f nancy.Dockerfile -t mimic2-nancy
3641
docker build -f ljspeech.Dockerfile -t mimic2-ljspeech
3742
```
3843

39-
run the container and set url in config `http://0.0.0.0:9000/synthesize`
44+
run the container
4045

4146
`docker run --rm -p 9000:9000 mimic2-nancy`
47+
48+
set url and voice in config, voice is used for local caching of files by ovos plugins
49+
50+
```json
51+
"tts": {
52+
"module": "ovos-tts-plugin-mimic2",
53+
"ovos-tts-plugin-mimic2": {
54+
"url": "http://0.0.0.0:9000/synthesize",
55+
"voice": "nancy"
56+
}
57+
}
58+
59+
```

test/unittests/test_something.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@
55

66

77
class TestTTS(unittest.TestCase):
8-
@classmethod
9-
def setUpClass(self):
10-
self.mimic = Mimic2TTSPlugin()
11-
12-
def test_something(self):
8+
def test_kusal(self):
139
path = "/tmp/hello_kusal.wav"
14-
audio, phonemes = self.mimic.get_tts("hello world", path)
10+
mimic = Mimic2TTSPlugin()
11+
audio, phonemes = mimic.get_tts("hello world", path)
1512
self.assertEqual(audio, path)
1613
self.assertEqual(phonemes,
1714
[['HH', '0.0775'],
@@ -22,3 +19,17 @@ def test_something(self):
2219
['ER', '0.5580'],
2320
['L', '0.6820'],
2421
['D', '0.8060']])
22+
23+
def test_nancy(self):
24+
path = "/tmp/hello_nancy.wav"
25+
mimic = Mimic2TTSPlugin(config={"voice": "nancy"})
26+
audio, phonemes = mimic.get_tts("hello world", path)
27+
self.assertEqual(audio, path)
28+
self.assertEqual(phonemes, None)
29+
30+
def test_ljspeech(self):
31+
path = "/tmp/hello_ljspeech.wav"
32+
mimic = Mimic2TTSPlugin(config={"voice": "ljspeech"})
33+
audio, phonemes = mimic.get_tts("hello world", path)
34+
self.assertEqual(audio, path)
35+
self.assertEqual(phonemes, None)

0 commit comments

Comments
 (0)