Skip to content

Commit a615bf9

Browse files
committed
📝 Update new processor for Baker (Chinese) dataset.
1 parent ebd5209 commit a615bf9

File tree

2 files changed

+563
-474
lines changed

2 files changed

+563
-474
lines changed

tensorflow_tts/bin/preprocess.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from tensorflow_tts.processor import LibriTTSProcessor
3737

3838
from tensorflow_tts.processor.ljspeech import LJSPEECH_SYMBOLS
39+
from tensorflow_tts.processor.baker import BAKER_SYMBOLS
3940
from tensorflow_tts.processor.kss import KSS_SYMBOLS
4041
from tensorflow_tts.processor.libritts import LIBRITTS_SYMBOLS
4142

@@ -68,7 +69,7 @@ def parse_and_config():
6869
"--dataset",
6970
type=str,
7071
default="ljspeech",
71-
choices=["ljspeech", "kss", "libritts"],
72+
choices=["ljspeech", "kss", "libritts", "baker"],
7273
help="Dataset to preprocess.",
7374
)
7475
parser.add_argument(
@@ -341,18 +342,21 @@ def preprocess():
341342
"ljspeech": LJSpeechProcessor,
342343
"kss": KSSProcessor,
343344
"libritts": LibriTTSProcessor,
345+
"baker": BakerProcessor,
344346
}
345347

346348
dataset_symbol = {
347349
"ljspeech": LJSPEECH_SYMBOLS,
348350
"kss": KSS_SYMBOLS,
349351
"libritts": LIBRITTS_SYMBOLS,
352+
"baker": BAKER_SYMBOLS,
350353
}
351354

352355
dataset_cleaner = {
353356
"ljspeech": "english_cleaners",
354357
"kss": "korean_cleaners",
355358
"libritts": None,
359+
"baker": None,
356360
}
357361

358362
logging.info(f"Selected '{config['dataset']}' processor.")
@@ -372,7 +376,10 @@ def preprocess():
372376

373377
# save pretrained-processor to feature dir
374378
processor._save_mapper(
375-
os.path.join(config["outdir"], f"{config['dataset']}_mapper.json")
379+
os.path.join(config["outdir"], f"{config['dataset']}_mapper.json"),
380+
extra_attrs_to_save={"pinyin_dict": processor.pinyin_dict}
381+
if config["dataset"] is "baker"
382+
else {},
376383
)
377384

378385
# build train test split
@@ -553,4 +560,3 @@ def compute_statistics():
553560
logging.info("Saving computed statistics.")
554561
scaler_list = [(scaler_mel, ""), (scaler_energy, "_energy"), (scaler_f0, "_f0")]
555562
save_statistics_to_file(scaler_list, config)
556-

0 commit comments

Comments
 (0)