We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ffeb153 commit 37524edCopy full SHA for 37524ed
pytorch_optimizer/optimizer/madgrad.py
@@ -116,11 +116,9 @@ def step(self, closure: CLOSURE = None) -> LOSS:
116
grad_sum_sq_masked = grad_sum_sq.sparse_mask(grad)
117
s_masked = s.sparse_mask(grad)
118
119
- # Compute x_0 from other known quantities
120
rms_masked_values = grad_sum_sq_masked._values().pow(1 / 3).add_(eps)
121
x0_masked_values = p_masked._values().addcdiv(s_masked._values(), rms_masked_values, value=1)
122
123
- # Dense + sparse op
124
grad_sq = grad * grad
125
grad_sum_sq.add_(grad_sq, alpha=_lambda)
126
grad_sum_sq_masked.add_(grad_sq, alpha=_lambda)
0 commit comments