Skip to content

Commit d1789e3

Browse files
authored
Merge pull request #367 from OscarVanL/speaker_ids_config
Load Speaker IDs mapping from Processor Mapper
2 parents 3a37400 + bd8df36 commit d1789e3

File tree

2 files changed

+24
-12
lines changed

2 files changed

+24
-12
lines changed

examples/fastspeech2_libritts/fastspeech2_dataset.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(
5959
f0_load_fn=np.load,
6060
energy_load_fn=np.load,
6161
mel_length_threshold=0,
62+
speakers_map=None
6263
):
6364
"""Initialize dataset.
6465
@@ -77,6 +78,7 @@ def __init__(
7778
f0_load_fn (func): Function to load f0 file.
7879
energy_load_fn (func): Function to load energy file.
7980
mel_length_threshold (int): Threshold to remove short feature files.
81+
speakers_map (dict): Speakers map generated in dataset preprocessing
8082
8183
"""
8284
# find all of charactor and mel files.
@@ -96,6 +98,8 @@ def __init__(
9698
== len(energy_files)
9799
), f"Number of charactor, mel, duration, f0 and energy files are different"
98100

101+
assert speakers_map != None, f"No speakers map found. Did you set --dataset_mapping?"
102+
99103
if ".npy" in charactor_query:
100104
suffix = charactor_query[1:]
101105
utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
@@ -113,18 +117,9 @@ def __init__(
113117
self.f0_load_fn = f0_load_fn
114118
self.energy_load_fn = energy_load_fn
115119
self.mel_length_threshold = mel_length_threshold
116-
117-
self.speakers_map = {} # TODO
118-
sp_id = 0
119-
for i in self.utt_ids:
120-
sp_name = i.split("_")[0]
121-
if sp_name not in self.speakers_map:
122-
self.speakers_map[sp_name] = sp_id
123-
sp_id += 1
124-
self.speakers = [
125-
self.speakers_map[i.split("_")[0]] for i in self.utt_ids
126-
] # TODO change but at the moment mfa folder name = speaker name
127-
120+
self.speakers_map = speakers_map
121+
self.speakers = [self.speakers_map[i.split("_")[0]] for i in self.utt_ids]
122+
print("Speaker: utt_id", list(zip(self.speakers, self.utt_ids)))
128123
self.f0_stat = np.load(f0_stat)
129124
self.energy_stat = np.load(energy_stat)
130125

examples/fastspeech2_libritts/train_fastspeech2.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import numpy as np
3232
import yaml
33+
import json
3334

3435
import tensorflow_tts
3536
from examples.fastspeech2_libritts.fastspeech2_dataset import \
@@ -274,6 +275,11 @@ def main():
274275
default="dump/stats.npy",
275276
type=str,
276277
)
278+
parser.add_argument(
279+
"--dataset_mapping",
280+
default="dump/libritts_mapper.npy",
281+
type=str,
282+
)
277283
parser.add_argument(
278284
"--pretrained",
279285
default="",
@@ -349,6 +355,15 @@ def main():
349355
else:
350356
raise ValueError("Only npy are supported.")
351357

358+
# load speakers map from dataset map
359+
with open(args.dataset_mapping) as f:
360+
dataset_mapping = json.load(f)
361+
speakers_map = dataset_mapping["speakers_map"]
362+
363+
# Check n_speakers matches number of speakers in speakers_map
364+
n_speakers = config["fastspeech2_params"]["n_speakers"]
365+
assert n_speakers == len(speakers_map), f"Number of speakers in dataset does not match n_speakers in config"
366+
352367
# define train/valid dataset
353368
train_dataset = CharactorDurationF0EnergyMelDataset(
354369
root_dir=args.train_dir,
@@ -360,6 +375,7 @@ def main():
360375
f0_stat=args.f0_stat,
361376
energy_stat=args.energy_stat,
362377
mel_length_threshold=mel_length_threshold,
378+
speakers_map=speakers_map
363379
).create(
364380
is_shuffle=config["is_shuffle"],
365381
allow_cache=config["allow_cache"],
@@ -376,6 +392,7 @@ def main():
376392
f0_stat=args.f0_stat,
377393
energy_stat=args.energy_stat,
378394
mel_length_threshold=mel_length_threshold,
395+
speakers_map=speakers_map
379396
).create(
380397
is_shuffle=config["is_shuffle"],
381398
allow_cache=config["allow_cache"],

0 commit comments

Comments
 (0)