kozistr
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/changelogs/v3.6.2.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/changelogs/v3.6.2.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/optimizer.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/optimizer.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎poetry.lock‎
Lines changed: 24 additions & 24 deletions b/‎poetry.lock‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 8 additions & 8 deletions b/‎pyproject.toml‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎pytorch_optimizer/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎pytorch_optimizer/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pytorch_optimizer/optimizer/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎pytorch_optimizer/optimizer/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -10,7 +10,7 @@
 
 ## The reasons why you use `pytorch-optimizer`.
 
-* Wide range of supported optimizers. Currently, **108 optimizers (+ `bitsandbytes`, `qgalore`, `torchao`)**, **16 lr schedulers**, and **13 loss functions** are supported!
+* Wide range of supported optimizers. Currently, **109 optimizers (+ `bitsandbytes`, `qgalore`, `torchao`)**, **16 lr schedulers**, and **13 loss functions** are supported!
 * Including many variants such as `ADOPT`, `Cautious`, `AdamD`, `StableAdamW`, and `Gradient Centrailiaztion`
 * Easy to use, clean, and tested codes
 * Active maintenance
@@ -215,7 +215,8 @@ get_supported_optimizers(['adam*', 'ranger*'])
 | RACS & Alice        | *Towards Efficient Optimizer Design for LLM via Structured Fisher Approximation with a Low-Rank Extension*     |                                                                                                                | <https://arxiv.org/pdf/2502.07752>                                                          | [cite](https://ui.adsabs.harvard.edu/abs/2025arXiv250207752G/exportcitation)                                                        |
 | VSGD                | *Variational Stochastic Gradient Descent for Deep Neural Networks*                                             | [github](https://github.com/generativeai-tue/vsgd)                                                             | <https://openreview.net/forum?id=xu4ATNjcdy>                                                | [cite](https://github.com/generativeai-tue/vsgd/tree/main?tab=readme-ov-file#cite)                                                  |
 | SNSM                | *Subset-Norm and Subspace-Momentum: Faster Memory-Efficient Adaptive Optimization with Convergence Guarantees* | [github](https://github.com/timmytonga/sn-sm)                                                                  | <https://arxiv.org/abs/2411.07120>                                                          | [cite](https://ui.adsabs.harvard.edu/abs/2024arXiv241107120N/exportcitation)                                                        |
-| AdamC               | Why Gradients Rapidly Increase Near the End of Training*                                                       |                                                                                                                | <https://arxiv.org/abs/2506.02285>                                                          | [cite](https://ui.adsabs.harvard.edu/abs/2025arXiv250602285D/exportcitation)                                                        |
+| AdamC               | *Why Gradients Rapidly Increase Near the End of Training*                                                      |                                                                                                                | <https://arxiv.org/abs/2506.02285>                                                          | [cite](https://ui.adsabs.harvard.edu/abs/2025arXiv250602285D/exportcitation)                                                        |
+| AdaMuon             | *Adaptive Muon Optimizer*                                                                                      |                                                                                                                | <https://arxiv.org/abs/2507.11005v1>                                                        | [cite](https://ui.adsabs.harvard.edu/abs/2025arXiv250711005S/exportcitation)                                                        |
 
 ## Supported LR Scheduler
 
 
@@ -0,0 +1,6 @@
+## Change Log
+
+### Feature
+
+* Implement `AdaMuon` optimizer. (#394, #395)
+    * [Adaptive Muon Optimizer](https://arxiv.org/abs/2507.11005v1)
@@ -284,6 +284,10 @@
     :docstring:
     :members:
 
+::: pytorch_optimizer.AdaMuon
+    :docstring:
+    :members:
+
 ::: pytorch_optimizer.Nero
     :docstring:
     :members:
 
@@ -11,14 +11,14 @@ repository = "https://github.com/kozistr/pytorch_optimizer"
 documentation = "https://pytorch-optimizers.readthedocs.io/en/latest"
 keywords = [
     "pytorch", "deep-learning", "optimizer", "lr scheduler", "A2Grad", "Alice", "ASGD", "AccSGD", "AdaBelief",
-    "AdaBound", "AdaDelta", "AdaFactor", "AdaGC", "AdaMax", "AdamG", "AdaMod", "AdaNorm", "AdaPNM", "AdaSmooth",
-    "AdEMAMix", "Simplified-AdEMAMix", "ADOPT", "AdaHessian", "Adai", "Adalite", "AdaLomo", "AdamMini", "AdamP",
-    "AdamS", "Adan", "AggMo", "Aida", "AliG", "Amos", "Apollo", "APOLLO", "AvaGrad", "bSAM", "CAME", "DAdaptAdaGrad",
-    "DAdaptAdam", "DAdaptAdan", "DAdaptSGD", "DAdaptLion", "DeMo", "DiffGrad", "EXAdam", "FAdam", "Fira", "FOCUS",
-    "Fromage", "FTRL", "GaLore", "Grams", "Gravity", "GrokFast", "GSAM", "Kate", "Lamb", "LaProp", "LARS", "Lion",
-    "LOMO", "Lookahead", "MADGRAD", "MARS", "MSVAG", "Muno", "Nero", "NovoGrad", "OrthoGrad", "PAdam", "PCGrad", "PID",
-    "PNM", "Prodigy", "PSGD", "QHAdam", "QHM", "RACS", "RAdam", "Ranger", "Ranger21", "RotoGrad", "SAM", "GCSAM",
-    "LookSAM", "ScheduleFreeSGD", "ScheduleFreeAdamW", "ScheduleFreeRAdam", "SCION", "SGDP", "Shampoo",
+    "AdaBound", "AdaDelta", "AdaFactor", "AdaGC", "AdaMax", "AdaMuon", "AdamG", "AdaMod", "AdaNorm", "AdaPNM",
+    "AdaSmooth", "AdEMAMix", "Simplified-AdEMAMix", "ADOPT", "AdaHessian", "Adai", "Adalite", "AdaLomo", "AdamMini",
+    "AdamP", "AdamS", "Adan", "AggMo", "Aida", "AliG", "Amos", "Apollo", "APOLLO", "AvaGrad", "bSAM", "CAME",
+    "DAdaptAdaGrad", "DAdaptAdam", "DAdaptAdan", "DAdaptSGD", "DAdaptLion", "DeMo", "DiffGrad", "EXAdam", "FAdam",
+    "Fira", "FOCUS", "Fromage", "FTRL", "GaLore", "Grams", "Gravity", "GrokFast", "GSAM", "Kate", "Lamb", "LaProp",
+    "LARS", "Lion", "LOMO", "Lookahead", "MADGRAD", "MARS", "MSVAG", "Muno", "Nero", "NovoGrad", "OrthoGrad", "PAdam",
+    "PCGrad", "PID", "PNM", "Prodigy", "PSGD", "QHAdam", "QHM", "RACS", "RAdam", "Ranger", "Ranger21", "RotoGrad",
+    "SAM", "GCSAM", "LookSAM", "ScheduleFreeSGD", "ScheduleFreeAdamW", "ScheduleFreeRAdam", "SCION", "SGDP", "Shampoo",
     "ScalableShampoo", "SGDW", "SignSGD", "SM3", "SOAP", "SopihaH", "SPAM", "StableSPAM", "SRMM", "StableAdamW",
     "SWATS", "TAM", "Tiger", "TRAC", "VSGD", "WSAM", "Yogi", "BCE", "BCEFocal", "Focal", "FocalCosine", "SoftF1",
     "Dice", "LDAM", "Jaccard", "Bi-Tempered", "Tversky", "FocalTversky", "LovaszHinge", "bitsandbytes", "WSD",
 
@@ -89,6 +89,7 @@
     AdaMod,
     AdamP,
     AdamS,
+    AdaMuon,
     AdamW,
     AdamWSN,
     Adan,
 
@@ -66,7 +66,7 @@
 from pytorch_optimizer.optimizer.madgrad import MADGRAD
 from pytorch_optimizer.optimizer.mars import MARS
 from pytorch_optimizer.optimizer.msvag import MSVAG
-from pytorch_optimizer.optimizer.muon import Muon
+from pytorch_optimizer.optimizer.muon import AdaMuon, Muon
 from pytorch_optimizer.optimizer.nero import Nero
 from pytorch_optimizer.optimizer.novograd import NovoGrad
 from pytorch_optimizer.optimizer.orthograd import OrthoGrad
@@ -322,6 +322,7 @@ def load_optimizer(optimizer: str) -> OPTIMIZER:
     RACS,
     Alice,
     VSGD,
+    AdaMuon,
 ]
 OPTIMIZERS: Dict[str, OPTIMIZER] = {str(optimizer.__name__).lower(): optimizer for optimizer in OPTIMIZER_LIST}