|
| 1 | +{ |
| 2 | + "model_name_or_path": "./parler-tts-untrained-large/parler-tts-untrained-large", |
| 3 | + "save_to_disk": "./tmp_dataset_audio/", |
| 4 | + "temporary_save_to_disk": "./audio_code_tmp/", |
| 5 | + "wandb_project": "parler-tts-50k-hours", |
| 6 | + "wandb_run_name": "Large", |
| 7 | + |
| 8 | + "feature_extractor_name":"ylacombe/dac_44khZ_8kbps", |
| 9 | + "description_tokenizer_name":"google/flan-t5-large", |
| 10 | + "prompt_tokenizer_name":"google/flan-t5-large", |
| 11 | + |
| 12 | + "report_to": ["wandb"], |
| 13 | + "overwrite_output_dir": true, |
| 14 | + "output_dir": "./output_dir_training", |
| 15 | + |
| 16 | + "train_dataset_name": "ylacombe/libritts_r_filtered+ylacombe/libritts_r_filtered+ylacombe/libritts_r_filtered+parler-tts/mls_eng", |
| 17 | + "train_metadata_dataset_name": "ylacombe/libritts-r-filtered-descriptions-10k-v5-without-accents+ylacombe/libritts-r-filtered-descriptions-10k-v5-without-accents+ylacombe/libritts-r-filtered-descriptions-10k-v5-without-accents+ylacombe/mls-eng-descriptions-v4", |
| 18 | + "train_dataset_config_name": "clean+clean+other+default", |
| 19 | + "train_split_name": "train.clean.360+train.clean.100+train.other.500+train", |
| 20 | + |
| 21 | + "eval_dataset_name": "ylacombe/libritts_r_filtered+parler-tts/mls_eng", |
| 22 | + "eval_metadata_dataset_name": "ylacombe/libritts-r-filtered-descriptions-10k-v5-without-accents+ylacombe/mls-eng-descriptions-v4", |
| 23 | + "eval_dataset_config_name": "other+default", |
| 24 | + "eval_split_name": "test.other+test", |
| 25 | + |
| 26 | + "target_audio_column_name": "audio", |
| 27 | + "description_column_name": "text_description", |
| 28 | + "prompt_column_name": "text", |
| 29 | + |
| 30 | + "max_eval_samples": 96, |
| 31 | + |
| 32 | + "max_duration_in_seconds": 30, |
| 33 | + "min_duration_in_seconds": 2.0, |
| 34 | + "max_text_length": 600, |
| 35 | + |
| 36 | + "group_by_length": true, |
| 37 | + |
| 38 | + "add_audio_samples_to_wandb": true, |
| 39 | + "id_column_name": "id", |
| 40 | + |
| 41 | + "preprocessing_num_workers": 8, |
| 42 | + |
| 43 | + "do_train": true, |
| 44 | + "num_train_epochs": 4, |
| 45 | + "gradient_accumulation_steps": 4, |
| 46 | + "gradient_checkpointing": false, |
| 47 | + "per_device_train_batch_size": 3, |
| 48 | + "learning_rate": 0.0015, |
| 49 | + "adam_beta1": 0.9, |
| 50 | + "adam_beta2": 0.99, |
| 51 | + "weight_decay": 0.01, |
| 52 | + |
| 53 | + "lr_scheduler_type": "constant_with_warmup", |
| 54 | + "warmup_steps": 10000, |
| 55 | + |
| 56 | + |
| 57 | + "logging_steps": 1000, |
| 58 | + "freeze_text_encoder": true, |
| 59 | + |
| 60 | + |
| 61 | + "do_eval": true, |
| 62 | + "predict_with_generate": true, |
| 63 | + "include_inputs_for_metrics": true, |
| 64 | + "evaluation_strategy": "steps", |
| 65 | + "eval_steps": 10000, |
| 66 | + "save_steps": 10000, |
| 67 | + "save_total_limit": 10, |
| 68 | + |
| 69 | + "per_device_eval_batch_size": 6, |
| 70 | + |
| 71 | + "audio_encoder_per_device_batch_size":24, |
| 72 | + "dtype": "bfloat16", |
| 73 | + "seed": 738, |
| 74 | + |
| 75 | + "dataloader_num_workers":8, |
| 76 | + "attn_implementation": "sdpa" |
| 77 | +} |
0 commit comments