Skip to content

Commit 8dbf0f0

Browse files
committed
docs: docstring
1 parent 4da8245 commit 8dbf0f0

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

pytorch_optimizer/optimizer/muon.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,12 @@ class Muon(BaseOptimizer):
3535
:param ns_steps: int. the number of Newton-Schulz iterations to run. (5 is probably always enough)
3636
:param use_adjusted_lr: bool. whether to use adjusted learning rate, which is from the Moonlight.
3737
reference: https://github.com/MoonshotAI/Moonlight/blob/master/examples/toy_train.py
38-
:param adamw_params: The parameters to be optimized by AdamW. Any parameters in `muon_params` which are {0, 1}-D or
39-
are detected as being the embed or lm_head will be optimized by AdamW as well.
40-
:param adamw_lr: The learning rate for the internal AdamW.
41-
:param adamw_wd: The weight decay for the internal AdamW.
42-
:param adamw_eps: The epsilon for the internal AdamW.
38+
:param adamw_params: Optional[PARAMETERS] The parameters to be optimized by AdamW. Any parameters in `muon_params`
39+
which are {0, 1}-D or are detected as being the embed or lm_head will be optimized by AdamW as well. It'd be
40+
better to create AdamW optimizer instead of using this.
41+
:param adamw_lr: float. The learning rate for the internal AdamW.
42+
:param adamw_wd: float. The weight decay for the internal AdamW.
43+
:param adamw_eps: float. The epsilon for the internal AdamW.
4344
"""
4445

4546
def __init__(
@@ -55,7 +56,7 @@ def __init__(
5556
use_adjusted_lr: bool = False,
5657
adamw_params: Optional[PARAMETERS] = None,
5758
adamw_lr: float = 3e-4,
58-
adamw_wd: float = 0,
59+
adamw_wd: float = 0.0,
5960
adamw_eps: float = 1e-8,
6061
**kwargs,
6162
):

0 commit comments

Comments
 (0)