Skip to content

Commit 3a8c2b6

Browse files
committed
[skip ci] docs: docstring
1 parent 2efc5e3 commit 3a8c2b6

File tree

4 files changed

+12
-8
lines changed

4 files changed

+12
-8
lines changed

pytorch_optimizer/adabound.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def __init__(
5353
:param weight_decouple: bool. the optimizer uses decoupled weight decay
5454
as in AdamW
5555
:param fixed_decay: bool.
56-
:param eps: float. term added to the denominator to improve numerical stability
56+
:param eps: float. term added to the denominator
57+
to improve numerical stability
5758
:param weight_decay: float. weight decay (L2 penalty)
5859
:param amsbound: bool. whether to use the AMSBound variant
5960
"""

pytorch_optimizer/adahessian.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ def __init__(
4747
:param lr: float. learning rate.
4848
:param betas: BETAS. coefficients used for computing running averages
4949
of gradient and the squared hessian trace
50-
:param eps: float. term added to the denominator to improve numerical stability
50+
:param eps: float. term added to the denominator
51+
to improve numerical stability
5152
:param weight_decay: float. weight decay (L2 penalty)
5253
:param hessian_power: float. exponent of the hessian trace
5354
:param update_each: int. compute the hessian trace approximation

pytorch_optimizer/adamp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ def __init__(
4747
:param lr: float. learning rate.
4848
:param betas: BETAS. coefficients used for computing running averages
4949
of gradient and the squared hessian trace
50-
:param eps: float. term added to the denominator to improve numerical stability
50+
:param eps: float. term added to the denominator
51+
to improve numerical stability
5152
:param weight_decay: float. weight decay (L2 penalty)
5253
:param delta: float. threshold that determines
5354
whether a set of parameters is scale invariant or not

pytorch_optimizer/diffgrad.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def __init__(
4343
:param lr: float. learning rate.
4444
:param betas: BETAS. coefficients used for computing running averages
4545
of gradient and the squared hessian trace
46-
:param eps: float. term added to the denominator to improve numerical stability
46+
:param eps: float. term added to the denominator
47+
to improve numerical stability
4748
:param weight_decay: float. weight decay (L2 penalty)
4849
"""
4950

@@ -60,11 +61,11 @@ def __init__(
6061
super().__init__(params, defaults)
6162

6263
def check_valid_parameters(self):
63-
if 0.0 > self.lr:
64+
if self.lr < 0.0:
6465
raise ValueError(f'Invalid learning rate : {self.lr}')
65-
if 0.0 > self.eps:
66+
if self.eps < 0.0:
6667
raise ValueError(f'Invalid eps : {self.eps}')
67-
if 0.0 > self.weight_decay:
68+
if self.weight_decay < 0.0:
6869
raise ValueError(f'Invalid weight_decay : {self.weight_decay}')
6970
if not 0.0 <= self.betas[0] < 1.0:
7071
raise ValueError(f'Invalid beta_0 : {self.betas[0]}')
@@ -87,7 +88,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
8788
grad = p.grad.data
8889
if grad.is_sparse:
8990
raise RuntimeError(
90-
'diffGrad does not support sparse gradients, please consider SparseAdam instead'
91+
'diffGrad does not support sparse gradients'
9192
)
9293

9394
state = self.state[p]

0 commit comments

Comments
 (0)