3636from tensorflow_tts .processor import LibriTTSProcessor
3737
3838from tensorflow_tts .processor .ljspeech import LJSPEECH_SYMBOLS
39+ from tensorflow_tts .processor .baker import BAKER_SYMBOLS
3940from tensorflow_tts .processor .kss import KSS_SYMBOLS
4041from tensorflow_tts .processor .libritts import LIBRITTS_SYMBOLS
4142
@@ -68,7 +69,7 @@ def parse_and_config():
6869 "--dataset" ,
6970 type = str ,
7071 default = "ljspeech" ,
71- choices = ["ljspeech" , "kss" , "libritts" ],
72+ choices = ["ljspeech" , "kss" , "libritts" , "baker" ],
7273 help = "Dataset to preprocess." ,
7374 )
7475 parser .add_argument (
@@ -341,18 +342,21 @@ def preprocess():
341342 "ljspeech" : LJSpeechProcessor ,
342343 "kss" : KSSProcessor ,
343344 "libritts" : LibriTTSProcessor ,
345+ "baker" : BakerProcessor ,
344346 }
345347
346348 dataset_symbol = {
347349 "ljspeech" : LJSPEECH_SYMBOLS ,
348350 "kss" : KSS_SYMBOLS ,
349351 "libritts" : LIBRITTS_SYMBOLS ,
352+ "baker" : BAKER_SYMBOLS ,
350353 }
351354
352355 dataset_cleaner = {
353356 "ljspeech" : "english_cleaners" ,
354357 "kss" : "korean_cleaners" ,
355358 "libritts" : None ,
359+ "baker" : None ,
356360 }
357361
358362 logging .info (f"Selected '{ config ['dataset' ]} ' processor." )
@@ -372,7 +376,10 @@ def preprocess():
372376
373377 # save pretrained-processor to feature dir
374378 processor ._save_mapper (
375- os .path .join (config ["outdir" ], f"{ config ['dataset' ]} _mapper.json" )
379+ os .path .join (config ["outdir" ], f"{ config ['dataset' ]} _mapper.json" ),
380+ extra_attrs_to_save = {"pinyin_dict" : processor .pinyin_dict }
381+ if config ["dataset" ] is "baker"
382+ else {},
376383 )
377384
378385 # build train test split
@@ -553,4 +560,3 @@ def compute_statistics():
553560 logging .info ("Saving computed statistics." )
554561 scaler_list = [(scaler_mel , "" ), (scaler_energy , "_energy" ), (scaler_f0 , "_f0" )]
555562 save_statistics_to_file (scaler_list , config )
556-
0 commit comments