1+ # This configuration is for Paddle to train Tacotron 2. Compared to the
2+ # original paper, this configuration additionally use the guided attention
3+ # loss to accelerate the learning of the diagonal attention. It requires
4+ # only a single GPU with 12 GB memory and it takes ~1 days to finish the
5+ # training on Titan V.
6+
7+ # ##########################################################
8+ # FEATURE EXTRACTION SETTING #
9+ # ##########################################################
10+
11+ fs : 24000 # sr
12+ n_fft : 2048 # FFT size (samples).
13+ n_shift : 300 # Hop size (samples). 12.5ms
14+ win_length : 1200 # Window length (samples). 50ms
15+ # If set to null, it will be the same as fft_size.
16+ window : " hann" # Window function.
17+
18+ # Only used for feats_type != raw
19+
20+ fmin : 80 # Minimum frequency of Mel basis.
21+ fmax : 7600 # Maximum frequency of Mel basis.
22+ n_mels : 80 # The number of mel basis.
23+
24+ # ##########################################################
25+ # DATA SETTING #
26+ # ##########################################################
27+ batch_size : 64
28+ num_workers : 2
29+
30+ # ##########################################################
31+ # MODEL SETTING #
32+ # ##########################################################
33+ model : # keyword arguments for the selected model
34+ embed_dim : 512 # char or phn embedding dimension
35+ elayers : 1 # number of blstm layers in encoder
36+ eunits : 512 # number of blstm units
37+ econv_layers : 3 # number of convolutional layers in encoder
38+ econv_chans : 512 # number of channels in convolutional layer
39+ econv_filts : 5 # filter size of convolutional layer
40+ atype : location # attention function type
41+ adim : 512 # attention dimension
42+ aconv_chans : 32 # number of channels in convolutional layer of attention
43+ aconv_filts : 15 # filter size of convolutional layer of attention
44+ cumulate_att_w : True # whether to cumulate attention weight
45+ dlayers : 2 # number of lstm layers in decoder
46+ dunits : 1024 # number of lstm units in decoder
47+ prenet_layers : 2 # number of layers in prenet
48+ prenet_units : 256 # number of units in prenet
49+ postnet_layers : 5 # number of layers in postnet
50+ postnet_chans : 512 # number of channels in postnet
51+ postnet_filts : 5 # filter size of postnet layer
52+ output_activation : null # activation function for the final output
53+ use_batch_norm : True # whether to use batch normalization in encoder
54+ use_concate : True # whether to concatenate encoder embedding with decoder outputs
55+ use_residual : False # whether to use residual connection in encoder
56+ dropout_rate : 0.5 # dropout rate
57+ zoneout_rate : 0.1 # zoneout rate
58+ reduction_factor : 1 # reduction factor
59+ spk_embed_dim : null # speaker embedding dimension
60+
61+
62+ # ##########################################################
63+ # UPDATER SETTING #
64+ # ##########################################################
65+ updater :
66+ use_masking : True # whether to apply masking for padded part in loss calculation
67+ bce_pos_weight : 5.0 # weight of positive sample in binary cross entropy calculation
68+ use_guided_attn_loss : True # whether to use guided attention loss
69+ guided_attn_loss_sigma : 0.4 # sigma of guided attention loss
70+ guided_attn_loss_lambda : 1.0 # strength of guided attention loss
71+
72+
73+ # #########################################################
74+ # OPTIMIZER SETTING #
75+ # #########################################################
76+ optimizer :
77+ optim : adam # optimizer type
78+ learning_rate : 1.0e-03 # learning rate
79+ epsilon : 1.0e-06 # epsilon
80+ weight_decay : 0.0 # weight decay coefficient
81+
82+ # ##########################################################
83+ # TRAINING SETTING #
84+ # ##########################################################
85+ max_epoch : 200
86+ num_snapshots : 5
87+
88+ # ##########################################################
89+ # OTHER SETTING #
90+ # ##########################################################
91+ seed : 42
0 commit comments