@@ -106,7 +106,7 @@ def configure_optimizer(self, options):
106
106
step_sizes = oo .step_sizes )
107
107
elif options .optimizer == "SGD" :
108
108
optimizer = optim .SGD (self .parameters (), lr = initial_rate , weight_decay = oo .weight_decay ,
109
- momentum = oo .momentum , dampening = oo .dampening )
109
+ momentum = oo .momentum , dampening = oo .dampening , nesterov = oo . nesterov )
110
110
return optimizer
111
111
112
112
def configure_lr (self , options , optimizer , ticks , total_iterations ):
@@ -478,7 +478,8 @@ def train(config, evaluate_only=False, outdir=".", detail=False, azureml=False):
478
478
model .cuda () # move the processing to GPU
479
479
480
480
start = time .time ()
481
- log = model .fit (training_data , validation_data , config .training , config .model .sparsify , device , detail , run )
481
+ log = model .fit (training_data , validation_data , config .training ,
482
+ config .model .sparsify , device , detail , run )
482
483
end = time .time ()
483
484
484
485
passed , total , rate = model .evaluate (training_data , batch_size , device )
0 commit comments