TensorSpeech
diff --git a/‎examples/parallel_wavegan/conf/parallel_wavegan.v1.yaml‎
Lines changed: 104 additions & 0 deletions b/‎examples/parallel_wavegan/conf/parallel_wavegan.v1.yaml‎
Lines changed: 104 additions & 0 deletions
@@ -0,0 +1,104 @@
+
+# This is the hyperparameter configuration file for ParallelWavegan.
+# Please make sure this is adjusted for the LJSpeech dataset. If you want to
+# apply to the other dataset, you might need to carefully change some parameters.
+# This configuration performs 4000k iters.
+
+# Original: https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/egs/ljspeech/voc1/conf/parallel_wavegan.v1.yaml
+
+###########################################################
+#                FEATURE EXTRACTION SETTING               #
+###########################################################
+sampling_rate: 22050
+hop_size: 256            # Hop size.
+format: "npy"
+
+
+###########################################################
+#         GENERATOR NETWORK ARCHITECTURE SETTING          #
+###########################################################
+model_type: "parallel_wavegan_generator"
+
+parallel_wavegan_generator_params:
+    out_channels: 1       # Number of output channels.
+    kernel_size: 3        # Kernel size of dilated convolution.
+    n_layers: 30            # Number of residual block layers.
+    stacks: 3             # Number of stacks i.e., dilation cycles.
+    residual_channels: 64 # Number of channels in residual conv.
+    gate_channels: 128    # Number of channels in gated conv.
+    skip_channels: 64     # Number of channels in skip conv.
+    aux_channels: 80      # Number of channels for auxiliary feature conv.
+                          # Must be the same as num_mels.
+    aux_context_window: 2 # Context window size for auxiliary feature.
+                          # If set to 2, previous 2 and future 2 frames will be considered.
+    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
+    upsample_params:                      # Upsampling network parameters.
+        upsample_scales: [4, 4, 4, 4]     # Upsampling scales. Prodcut of these must be the same as hop size.
+
+###########################################################
+#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
+###########################################################
+parallel_wavegan_discriminator_params:
+    out_channels: 1       # Number of output channels.
+    kernel_size: 3        # Number of output channels.
+    n_layers: 10            # Number of conv layers.
+    conv_channels: 64     # Number of chnn layers.
+    use_bias: true            # Whether to use bias parameter in conv.
+    nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv.
+    nonlinear_activation_params:      # Nonlinear function parameters
+        alpha: 0.2           # Alpha in LeakyReLU.
+
+###########################################################
+#                   STFT LOSS SETTING                     #
+###########################################################
+stft_loss_params:
+    fft_lengths: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
+    frame_steps: [120, 240, 50]     # List of hop size for STFT-based loss
+    frame_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
+
+
+###########################################################
+#               ADVERSARIAL LOSS SETTING                  #
+###########################################################
+lambda_adv: 4.0  # Loss balancing coefficient.
+
+###########################################################
+#                  DATA LOADER SETTING                    #
+###########################################################
+batch_size: 8                 # Batch size.
+batch_max_steps: 16384          # Length of each audio in batch for training. Make sure dividable by hop_size.
+batch_max_steps_valid: 81920   # Length of each audio for validation. Make sure dividable by hope_size.
+remove_short_samples: true     # Whether to remove samples the length of which are less than batch_max_steps.
+allow_cache: true              # Whether to allow cache in dataset. If true, it requires cpu memory.
+is_shuffle: true               # shuffle dataset after each epoch.
+
+###########################################################
+#             OPTIMIZER & SCHEDULER SETTING               #
+###########################################################
+generator_optimizer_params:
+    lr_fn: "PiecewiseConstantDecay"
+    lr_params: 
+        boundaries: [100000]       # = discriminator_train_start_steps.
+        values: [0.0005, 0.0001]    # learning rate each interval.
+
+    
+discriminator_optimizer_params:
+    lr_fn: "PiecewiseConstantDecay"
+    lr_params: 
+        boundaries: [0]             # after resume and start training discriminator, global steps is 100k, but local discriminator step is 0
+        values: [0.0001, 0.0001]    # learning rate each interval.
+
+
+###########################################################
+#                    INTERVAL SETTING                     #
+###########################################################
+discriminator_train_start_steps: 0  # steps begin training discriminator
+train_max_steps: 4000000                 # Number of training steps.
+save_interval_steps: 20000               # Interval steps to save checkpoint.
+eval_interval_steps: 5000                # Interval steps to evaluate the network.
+log_interval_steps: 200                  # Interval steps to record the training log.
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_save_intermediate_results: 1  # Number of batch to be saved as intermediate results.