Skip to content

Commit 053eee5

Browse files
committed
add check_overflow even no loss scale enable
1 parent 88601be commit 053eee5

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

bmtrain/optim/optim_manager.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ def step(self):
136136
self.zero_grad()
137137
return
138138
for optimizer, lr_scheduler in zip(self.optimizers, self.lr_schedulers):
139+
try:
140+
check_overflow(optimizer.param_groups)
141+
except OverflowError:
142+
has_overflow = True
143+
print_rank("Gradient overflow, change scale from %lf to %lf" % (self.loss_scale, self.loss_scale / self.loss_scale_factor))
144+
break
139145
if hasattr(optimizer, "_bmtrain_optimizer") and optimizer._bmtrain_optimizer:
140146
optimizer.step(scale=self.loss_scale)
141147
else:

0 commit comments

Comments
 (0)