Skip to content

Commit 1ca581f

Browse files
committed
🚲 Added gradient_accumulate_steps for all config, clearer note for batch_size parameter.
1 parent 3e1788b commit 1ca581f

File tree

18 files changed

+48
-32
lines changed

18 files changed

+48
-32
lines changed

‎examples/fastspeech/conf/fastspeech.v1.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ fastspeech_params:
4646
###########################################################
4747
# DATA LOADER SETTING #
4848
###########################################################
49-
batch_size: 16 # Batch size.
49+
batch_size: 16 # Batch size for each GPU with asuming that gradient_accumulation_steps is 1
5050
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5151
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5252
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -60,7 +60,8 @@ optimizer_params:
6060
decay_steps: 150000 # < train_max_steps is recommend.
6161
warmup_proportion: 0.02
6262
weight_decay: 0.001
63-
63+
64+
gradient_accumulation_steps: 1
6465
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6566
# must separate by |. if var_train_expr is null then we
6667
# training all variable

‎examples/fastspeech/conf/fastspeech.v3.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ fastspeech_params:
4646
###########################################################
4747
# DATA LOADER SETTING #
4848
###########################################################
49-
batch_size: 16 # Batch size.
49+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5050
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5151
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5252
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -60,7 +60,8 @@ optimizer_params:
6060
decay_steps: 150000 # < train_max_steps is recommend.
6161
warmup_proportion: 0.02
6262
weight_decay: 0.001
63-
63+
64+
gradient_accumulation_steps: 1
6465
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6566
# must separate by |. if var_train_expr is null then we
6667
# training all variable

‎examples/fastspeech2/conf/fastspeech2.baker.v2.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fastspeech2_params:
4848
###########################################################
4949
# DATA LOADER SETTING #
5050
###########################################################
51-
batch_size: 16 # Batch size.
51+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5252
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5353
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5454
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -62,7 +62,8 @@ optimizer_params:
6262
decay_steps: 150000 # < train_max_steps is recommend.
6363
warmup_proportion: 0.02
6464
weight_decay: 0.001
65-
65+
66+
gradient_accumulation_steps: 1
6667
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6768
# must separate by |. if var_train_expr is null then we
6869
# training all variable

‎examples/fastspeech2/conf/fastspeech2.kss.v1.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fastspeech2_params:
4747
###########################################################
4848
# DATA LOADER SETTING #
4949
###########################################################
50-
batch_size: 16 # Batch size.
50+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5151
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5252
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5353
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -61,7 +61,8 @@ optimizer_params:
6161
decay_steps: 150000 # < train_max_steps is recommend.
6262
warmup_proportion: 0.02
6363
weight_decay: 0.001
64-
64+
65+
gradient_accumulation_steps: 1
6566
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6667
# must separate by |. if var_train_expr is null then we
6768
# training all variable

‎examples/fastspeech2/conf/fastspeech2.kss.v2.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fastspeech2_params:
4848
###########################################################
4949
# DATA LOADER SETTING #
5050
###########################################################
51-
batch_size: 16 # Batch size.
51+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5252
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5353
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5454
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -62,7 +62,8 @@ optimizer_params:
6262
decay_steps: 150000 # < train_max_steps is recommend.
6363
warmup_proportion: 0.02
6464
weight_decay: 0.001
65-
65+
66+
gradient_accumulation_steps: 1
6667
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6768
# must separate by |. if var_train_expr is null then we
6869
# training all variable

‎examples/fastspeech2/conf/fastspeech2.v1.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ fastspeech2_params:
4646
###########################################################
4747
# DATA LOADER SETTING #
4848
###########################################################
49-
batch_size: 16 # Batch size.
49+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5050
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5151
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5252
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -60,7 +60,8 @@ optimizer_params:
6060
decay_steps: 150000 # < train_max_steps is recommend.
6161
warmup_proportion: 0.02
6262
weight_decay: 0.001
63-
63+
64+
gradient_accumulation_steps: 1
6465
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6566
# must separate by |. if var_train_expr is null then we
6667
# training all variable

‎examples/fastspeech2/conf/fastspeech2.v2.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fastspeech2_params:
4747
###########################################################
4848
# DATA LOADER SETTING #
4949
###########################################################
50-
batch_size: 16 # Batch size.
50+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1
5151
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5252
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5353
mel_length_threshold: 32 # remove all targets has mel_length <= 32
@@ -61,7 +61,8 @@ optimizer_params:
6161
decay_steps: 150000 # < train_max_steps is recommend.
6262
warmup_proportion: 0.02
6363
weight_decay: 0.001
64-
64+
65+
gradient_accumulation_steps: 1
6566
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6667
# must separate by |. if var_train_expr is null then we
6768
# training all variable

‎examples/fastspeech2_libritts/conf/fastspeech2libritts.yaml‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ fastspeech2_params:
4646
###########################################################
4747
# DATA LOADER SETTING #
4848
###########################################################
49-
batch_size: 32 # Batch size.
49+
batch_size: 32 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5050
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
5151
allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
5252
mel_length_threshold: 48 # remove all targets has mel_length <= 32
@@ -60,7 +60,8 @@ optimizer_params:
6060
decay_steps: 120000 # < train_max_steps is recommend.
6161
warmup_proportion: 0.02
6262
weight_decay: 0.001
63-
63+
64+
gradient_accumulation_steps: 1
6465
var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
6566
# must separate by |. if var_train_expr is null then we
6667
# training all variable

‎examples/melgan.stft/conf/melgan.stft.v1.yaml‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ lambda_adv: 4.0
6363
###########################################################
6464
# DATA LOADER SETTING #
6565
###########################################################
66-
batch_size: 16 # Batch size.
66+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
6767
batch_max_steps: 8192 # Length of each audio in batch for training. Make sure dividable by hop_size.
6868
batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.
6969
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
@@ -86,7 +86,7 @@ discriminator_optimizer_params:
8686
boundaries: [0] # after resume and start training discriminator, global steps is 100k, but local discriminator step is 0
8787
values: [0.0001, 0.0001] # learning rate each interval.
8888

89-
89+
gradient_accumulation_steps: 1
9090
###########################################################
9191
# INTERVAL SETTING #
9292
###########################################################

‎examples/melgan/conf/melgan.v1.yaml‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ lambda_feat_match: 10.0
5353
###########################################################
5454
# DATA LOADER SETTING #
5555
###########################################################
56-
batch_size: 16 # Batch size.
56+
batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
5757
batch_max_steps: 8192 # Length of each audio in batch for training. Make sure dividable by hop_size.
5858
batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.
5959
remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
@@ -73,6 +73,7 @@ discriminator_optimizer_params:
7373
beta_1: 0.5
7474
beta_2: 0.9
7575

76+
gradient_accumulation_steps: 1
7677
###########################################################
7778
# INTERVAL SETTING #
7879
###########################################################

0 commit comments

Comments
 (0)