update readme/requirements & clean some unnecessary codes.

MoonInTheRiver · MoonInTheRiver · commit 36cf1589c186 · 2021-12-19T21:28:59.000+08:00
diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 # DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism
 [![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/abs/2105.02446)
+[![GitHub Stars](https://img.shields.io/github/stars/MoonInTheRiver/DiffSinger?style=social)](https://github.com/MoonInTheRiver/DiffSinger)
 
 This repository is the official PyTorch implementation of our AAAI-2022 [paper](https://arxiv.org/abs/2105.02446), in which we propose DiffSinger (for Singing-Voice-Synthesis) and DiffSpeech (for Text-to-Speech).
  
@@ -46,14 +47,14 @@ CUDA_VISIBLE_DEVICES=0 python data_gen/tts/bin/binarize.py --config configs/tts/
 ### 2. Training Example
 
 ```sh
-CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/lj_ds_beta6.yaml --exp_name xxx --reset
+CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/lj_ds_beta6.yaml --exp_name lj_exp1 --reset
 ```
 
 
 ### 3. Inference Example
 
 ```sh
-CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/lj_ds_beta6.yaml --exp_name xxx --reset --infer
+CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/lj_ds_beta6.yaml --exp_name lj_exp1 --reset --infer
 ```
 
 We also provide:
@@ -78,13 +79,13 @@ Similar to DiffSpeech.
 
 ### 2. Training Example
 ```sh
-CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/popcs_ds_beta6.yaml --exp_name xxx --reset
+CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/popcs_ds_beta6.yaml --exp_name popcs_exp1 --reset
 # or
-CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/popcs_ds_beta6_offline.yaml --exp_name xxx --reset
+CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/popcs_ds_beta6_offline.yaml --exp_name popcs_exp2 --reset
 ```
 ### 3. Inference Example
 ```sh
-CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config xxx --exp_name xxx --reset --infer
+CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config usr/configs/popcs_ds_beta6_offline.yaml --exp_name popcs_exp2 --reset --infer
 ```
 The pre-trained model for SVS will be provided recently. 
 <!--
diff --git a/configs/config_base.yaml b/configs/config_base.yaml
@@ -25,7 +25,7 @@ sort_by_len: true
 #########
 load_ckpt: ''
 save_ckpt: true
-save_best: true
+save_best: false
 num_ckpt_keep: 3
 clip_grad_norm: 0
 accumulate_grad_batches: 1
diff --git a/requirements_3090.txt b/requirements_3090.txt
@@ -35,7 +35,7 @@ packaging==21.2
 pandas==1.3.4
 Pillow==8.4.0
 pooch==1.5.2
-praat-parselmouth==0.4.0
+praat-parselmouth==0.3.3
 proglog==0.1.9
 protobuf==3.19.1
 pycparser==2.20
diff --git a/tasks/tts/fs2.py b/tasks/tts/fs2.py
@@ -283,12 +283,7 @@ def test_step(self, sample, batch_idx):
         mel2ph, uv, f0 = None, None, None
         ref_mels = None
         if hparams['profile_infer']:
-            if batch_idx % 10 == 0:
-                torch.cuda.empty_cache()
-            mel2ph, uv, f0 = sample['mel2ph'], sample['uv'], sample['f0']
-            with utils.Timer('fs', print_time=True):
-                self.model(
-                    txt_tokens, mel2ph=mel2ph, spk_embed=spk_embed, f0=f0, uv=uv, ref_mels=ref_mels, infer=True)
+            pass
         else:
             if hparams['use_gt_dur']:
                 mel2ph = sample['mel2ph']
diff --git a/usr/diffsinger_task.py b/usr/diffsinger_task.py
@@ -60,7 +60,7 @@ def validation_step(self, sample, batch_idx):
         outputs = utils.tensors_to_scalars(outputs)
         if batch_idx < hparams['num_valid_plots']:
             model_out = self.model(
-                txt_tokens, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, energy=energy, ref_mels=target, infer=True)
+                txt_tokens, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, energy=energy, ref_mels=None, infer=True)
             gt_f0 = denorm_f0(sample['f0'], sample['uv'], hparams)
             self.plot_wav(batch_idx, sample['mels'], model_out['mel_out'], is_mel=True, gt_f0=gt_f0, f0=model_out.get('f0_denorm'))
             self.plot_mel(batch_idx, sample['mels'], model_out['mel_out'], name=f'diffmel_{batch_idx}')
@@ -164,7 +164,7 @@ def validation_step(self, sample, batch_idx):
             fs2_mel = sample['fs2_mels']
             model_out = self.model(
                 txt_tokens, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, energy=energy,
-                ref_mels=[target, fs2_mel], infer=True)
+                ref_mels=[None, fs2_mel], infer=True)
             gt_f0 = denorm_f0(sample['f0'], sample['uv'], hparams)
             self.plot_wav(batch_idx, sample['mels'], model_out['mel_out'], is_mel=True, gt_f0=gt_f0, f0=model_out.get('f0_denorm'))
             self.plot_mel(batch_idx, sample['mels'], model_out['mel_out'], name=f'diffmel_{batch_idx}')
@@ -176,14 +176,7 @@ def test_step(self, sample, batch_idx):
         txt_tokens = sample['txt_tokens']
         energy = sample['energy']
         if hparams['profile_infer']:
-            print(sample['item_name'])
-            if batch_idx % 10 == 0:
-                torch.cuda.empty_cache()
-            mel2ph, uv, f0 = sample['mel2ph'], sample['uv'], sample['f0']
-            target = sample['mels']  # [B, T_s, 80]
-            with utils.Timer('diffsinger', print_time=True):
-                self.model(
-                    txt_tokens, mel2ph=mel2ph, spk_embed=spk_embed, f0=f0, uv=uv, ref_mels=target, infer=True)
+            pass
         else:
             mel2ph, uv, f0 = None, None, None
             if hparams['use_gt_dur']:
diff --git a/usr/diffspeech_task.py b/usr/diffspeech_task.py
@@ -116,14 +116,7 @@ def test_step(self, sample, batch_idx):
         mel2ph, uv, f0 = None, None, None
         energy = sample['energy']
         if hparams['profile_infer']:
-            print(sample['item_name'])
-            if batch_idx % 10 == 0:
-                torch.cuda.empty_cache()
-            mel2ph, uv, f0 = sample['mel2ph'], sample['uv'], sample['f0']
-            target = sample['mels']  # [B, T_s, 80]
-            with utils.Timer('diffspeech', print_time=True):
-                self.model(
-                    txt_tokens, mel2ph=mel2ph, spk_embed=spk_embed, f0=f0, uv=uv, ref_mels=target, infer=True)
+            pass
         else:
             mel2ph, uv, f0 = None, None, None
             if hparams['use_gt_dur']:
@@ -134,7 +127,7 @@ def test_step(self, sample, batch_idx):
             target = sample['mels']  # [B, T_s, 80]
             # fs2_mel = sample['fs2_mels']
             outputs = self.model(
-                txt_tokens, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, ref_mels=target, energy=energy, infer=True)
+                txt_tokens, spk_embed=spk_embed, mel2ph=mel2ph, f0=f0, uv=uv, ref_mels=None, energy=energy, infer=True)
             sample['outputs'] = self.model.out2mel(outputs['mel_out'])
             sample['mel2ph_pred'] = outputs['mel2ph']