Skip to content

Commit 0d05c18

Browse files
author
hertz-pj
committed
fix bug of jsut dataset, add pyopenjtalk to setup.py
1 parent e771444 commit 0d05c18

File tree

6 files changed

+103
-6
lines changed

6 files changed

+103
-6
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
"click",
4545
"g2p_en",
4646
"dataclasses",
47+
"pyopenjtalk",
4748
],
4849
"setup": ["numpy", "pytest-runner",],
4950
"test": [

tensorflow_tts/configs/fastspeech.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from tensorflow_tts.processor.kss import KSS_SYMBOLS as kss_symbols
2222
from tensorflow_tts.processor.baker import BAKER_SYMBOLS as bk_symbols
2323
from tensorflow_tts.processor.libritts import LIBRITTS_SYMBOLS as lbri_symbols
24+
from tensorflow_tts.processor.jsut import JSUT_SYMBOLS as jsut_symbols
2425

2526

2627
SelfAttentionParams = collections.namedtuple(
@@ -95,6 +96,8 @@ def __init__(
9596
self.vocab_size = len(bk_symbols)
9697
elif dataset == "libritts":
9798
self.vocab_size = len(lbri_symbols)
99+
elif dataset == "jsut_symbols":
100+
self.vocab_size = len(jsut_symbols)
98101
else:
99102
raise ValueError("No such dataset: {}".format(dataset))
100103
self.initializer_range = initializer_range

tensorflow_tts/configs/tacotron2.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616

1717

1818
from tensorflow_tts.configs import BaseConfig
19+
from tensorflow_tts.processor.jsut import JSUT_SYMBOLS
1920
from tensorflow_tts.processor.ljspeech import LJSPEECH_SYMBOLS as lj_symbols
2021
from tensorflow_tts.processor.kss import KSS_SYMBOLS as kss_symbols
2122
from tensorflow_tts.processor.baker import BAKER_SYMBOLS as bk_symbols
2223
from tensorflow_tts.processor.libritts import LIBRITTS_SYMBOLS as lbri_symbols
2324
from tensorflow_tts.processor.ljspeechu import LJSPEECH_U_SYMBOLS as lju_symbols
2425
from tensorflow_tts.processor.synpaflex import SYNPAFLEX_SYMBOLS as synpaflex_symbols
26+
from tensorflow_tts.processor.jsut import JSUT_SYMBOLS as jsut_symbols
2527

2628

2729
class Tacotron2Config(BaseConfig):
@@ -72,6 +74,8 @@ def __init__(
7274
self.vocab_size = len(lju_symbols)
7375
elif dataset == "synpaflex":
7476
self.vocab_size = len(synpaflex_symbols)
77+
elif dataset == "jsut":
78+
self.vocab_size = len(jsut_symbols)
7579
else:
7680
raise ValueError("No such dataset: {}".format(dataset))
7781
self.embedding_hidden_size = embedding_hidden_size

tensorflow_tts/inference/auto_processor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
ThorstenProcessor,
2828
LJSpeechUltimateProcessor,
2929
SynpaflexProcessor,
30+
JSUTProcessor,
3031
)
3132

3233
from tensorflow_tts.utils import CACHE_DIRECTORY, PROCESSOR_FILE_NAME, LIBRARY_NAME
@@ -42,6 +43,7 @@
4243
("ThorstenProcessor", ThorstenProcessor),
4344
("LJSpeechUltimateProcessor", LJSpeechUltimateProcessor),
4445
("SynpaflexProcessor", SynpaflexProcessor),
46+
("JSUTProcessor", JSUTProcessor),
4547
]
4648
)
4749

tensorflow_tts/processor/jsut.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ class JSUTProcessor(BaseProcessor):
9191
"""JSUT processor."""
9292
cleaner_names: str = None
9393
speaker_name: str = "jsut"
94-
target_rate: int = 24000
9594
train_f_name: str = "text_kana/basic5000.yaml"
9695

9796
def create_items(self):
@@ -105,8 +104,6 @@ def create_items(self):
105104
for k, v in data_json.items():
106105
utt_id = k
107106
phones = v['phone_level3']
108-
# phones = phones.replace("I", "i")
109-
# phones = phones.replace("U", "u")
110107
phones = phones.split("-")
111108
phones = [_sil] + phones + [_sil]
112109
wav_path = os.path.join(self.data_dir, "wav", f"{utt_id}.wav")
@@ -129,9 +126,9 @@ def get_one_sample(self, item):
129126
audio, rate = sf.read(wav_path)
130127
audio = audio.astype(np.float32)
131128

132-
if rate != self.target_rate:
133-
assert rate > self.target_rate
134-
audio = librosa.resample(audio, rate, self.target_rate)
129+
# if rate != self.target_rate:
130+
# assert rate > self.target_rate
131+
# audio = librosa.resample(audio, rate, self.target_rate)
135132

136133
# convert text to ids
137134
text_ids = np.asarray(self.text_to_sequence(text), np.int32)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"symbol_to_id": {
3+
"pad": 0,
4+
"sil": 1,
5+
"N": 2,
6+
"a": 3,
7+
"b": 4,
8+
"by": 5,
9+
"ch": 6,
10+
"cl": 7,
11+
"d": 8,
12+
"dy": 9,
13+
"e": 10,
14+
"f": 11,
15+
"g": 12,
16+
"gy": 13,
17+
"h": 14,
18+
"hy": 15,
19+
"i": 16,
20+
"j": 17,
21+
"k": 18,
22+
"ky": 19,
23+
"m": 20,
24+
"my": 21,
25+
"n": 22,
26+
"ny": 23,
27+
"o": 24,
28+
"p": 25,
29+
"pau": 26,
30+
"py": 27,
31+
"r": 28,
32+
"ry": 29,
33+
"s": 30,
34+
"sh": 31,
35+
"t": 32,
36+
"ts": 33,
37+
"u": 34,
38+
"v": 35,
39+
"w": 36,
40+
"y": 37,
41+
"z": 38,
42+
"eos": 39
43+
},
44+
"id_to_symbol": {
45+
"0": "pad",
46+
"1": "sil",
47+
"2": "N",
48+
"3": "a",
49+
"4": "b",
50+
"5": "by",
51+
"6": "ch",
52+
"7": "cl",
53+
"8": "d",
54+
"9": "dy",
55+
"10": "e",
56+
"11": "f",
57+
"12": "g",
58+
"13": "gy",
59+
"14": "h",
60+
"15": "hy",
61+
"16": "i",
62+
"17": "j",
63+
"18": "k",
64+
"19": "ky",
65+
"20": "m",
66+
"21": "my",
67+
"22": "n",
68+
"23": "ny",
69+
"24": "o",
70+
"25": "p",
71+
"26": "pau",
72+
"27": "py",
73+
"28": "r",
74+
"29": "ry",
75+
"30": "s",
76+
"31": "sh",
77+
"32": "t",
78+
"33": "ts",
79+
"34": "u",
80+
"35": "v",
81+
"36": "w",
82+
"37": "y",
83+
"38": "z",
84+
"39": "eos"
85+
},
86+
"speakers_map": {
87+
"jsut": 0
88+
},
89+
"processor_name": "JSUTProcessor"
90+
}

0 commit comments

Comments
 (0)