Skip to content

Commit bff69b6

Browse files
committed
🚀 Add conf, training parallelwavegan code.
1 parent 192f871 commit bff69b6

File tree

4 files changed

+606
-27
lines changed

4 files changed

+606
-27
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
2+
# This is the hyperparameter configuration file for ParallelWavegan.
3+
# Please make sure this is adjusted for the LJSpeech dataset. If you want to
4+
# apply to the other dataset, you might need to carefully change some parameters.
5+
# This configuration performs 4000k iters.
6+
7+
# Original: https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/egs/ljspeech/voc1/conf/parallel_wavegan.v1.yaml
8+
9+
###########################################################
10+
# FEATURE EXTRACTION SETTING #
11+
###########################################################
12+
sampling_rate: 22050
13+
hop_size: 256 # Hop size.
14+
format: "npy"
15+
16+
17+
###########################################################
18+
# GENERATOR NETWORK ARCHITECTURE SETTING #
19+
###########################################################
20+
model_type: "parallel_wavegan_generator"
21+
22+
parallel_wavegan_generator_params:
23+
out_channels: 1 # Number of output channels.
24+
kernel_size: 3 # Kernel size of dilated convolution.
25+
n_layers: 30 # Number of residual block layers.
26+
stacks: 3 # Number of stacks i.e., dilation cycles.
27+
residual_channels: 64 # Number of channels in residual conv.
28+
gate_channels: 128 # Number of channels in gated conv.
29+
skip_channels: 64 # Number of channels in skip conv.
30+
aux_channels: 80 # Number of channels for auxiliary feature conv.
31+
# Must be the same as num_mels.
32+
aux_context_window: 2 # Context window size for auxiliary feature.
33+
# If set to 2, previous 2 and future 2 frames will be considered.
34+
dropout: 0.0 # Dropout rate. 0.0 means no dropout applied.
35+
upsample_params: # Upsampling network parameters.
36+
upsample_scales: [4, 4, 4, 4] # Upsampling scales. Prodcut of these must be the same as hop size.
37+
38+
###########################################################
39+
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
40+
###########################################################
41+
parallel_wavegan_discriminator_params:
42+
out_channels: 1 # Number of output channels.
43+
kernel_size: 3 # Number of output channels.
44+
n_layers: 10 # Number of conv layers.
45+
conv_channels: 64 # Number of chnn layers.
46+
use_bias: true # Whether to use bias parameter in conv.
47+
nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv.
48+
nonlinear_activation_params: # Nonlinear function parameters
49+
alpha: 0.2 # Alpha in LeakyReLU.
50+
51+
###########################################################
52+
# STFT LOSS SETTING #
53+
###########################################################
54+
stft_loss_params:
55+
fft_lengths: [1024, 2048, 512] # List of FFT size for STFT-based loss.
56+
frame_steps: [120, 240, 50] # List of hop size for STFT-based loss
57+
frame_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
58+
59+
60+
###########################################################
61+
# ADVERSARIAL LOSS SETTING #
62+
###########################################################
63+
lambda_adv: 4.0 # Loss balancing coefficient.
64+
65+
###########################################################
66+
# DATA LOADER SETTING #
67+
###########################################################
68+
batch_size: 8 # Batch size.
69+
batch_max_steps: 16384 # Length of each audio in batch for training. Make sure dividable by hop_size.
70+
batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.
71+
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
72+
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
73+
is_shuffle: true # shuffle dataset after each epoch.
74+
75+
###########################################################
76+
# OPTIMIZER & SCHEDULER SETTING #
77+
###########################################################
78+
generator_optimizer_params:
79+
lr_fn: "PiecewiseConstantDecay"
80+
lr_params:
81+
boundaries: [100000] # = discriminator_train_start_steps.
82+
values: [0.0005, 0.0001] # learning rate each interval.
83+
84+
85+
discriminator_optimizer_params:
86+
lr_fn: "PiecewiseConstantDecay"
87+
lr_params:
88+
boundaries: [0] # after resume and start training discriminator, global steps is 100k, but local discriminator step is 0
89+
values: [0.0001, 0.0001] # learning rate each interval.
90+
91+
92+
###########################################################
93+
# INTERVAL SETTING #
94+
###########################################################
95+
discriminator_train_start_steps: 0 # steps begin training discriminator
96+
train_max_steps: 4000000 # Number of training steps.
97+
save_interval_steps: 20000 # Interval steps to save checkpoint.
98+
eval_interval_steps: 5000 # Interval steps to evaluate the network.
99+
log_interval_steps: 200 # Interval steps to record the training log.
100+
101+
###########################################################
102+
# OTHER SETTING #
103+
###########################################################
104+
num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.

0 commit comments

Comments
 (0)