Merge pull request #260 from kozistr/refactor/code

kozistr · web-flow · commit 474510fed66d · 2024-07-21T15:01:45.000+09:00
[Update] Improve the performance
diff --git a/docs/changelogs/v3.1.0.md b/docs/changelogs/v3.1.0.md
@@ -9,12 +9,15 @@
     * you can use by `optimizer = load_optimizer('q_galore_adamw8bit')`
 * Support more bnb optimizers. (#258)
     * `bnb_paged_adam8bit`, `bnb_paged_adamw8bit`, `bnb_*_*32bit`.
+* Improve `power_iteration()` speed up to 40%. (#259)
+* Improve `reg_noise()` (E-MCMC) speed up to 120%. (#260)
 
 ### Refactor
 
-* Refactor `AdamMini`. (#258)
+* Refactor `AdamMini` optimizer. (#258)
 * Deprecate optional dependency, `bitsandbytes`. (#258)
 * Move `get_rms`, `approximate_sq_grad` functions to `BaseOptimizer` for reusability. (#258)
+* Refactor `shampoo_utils.py`. (#259)
 
 ### Bug
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pytorch_optimizer/optimizer/utils.py b/pytorch_optimizer/optimizer/utils.py
@@ -7,7 +7,7 @@
 import torch
 from torch import nn
 from torch.distributed import all_reduce
-from torch.nn import functional as f
+from torch.nn.functional import cosine_similarity
 from torch.nn.modules.batchnorm import _BatchNorm
 from torch.nn.utils import clip_grad_norm_
 
@@ -62,7 +62,7 @@ def to_real(x: torch.Tensor) -> torch.Tensor:
     return x.real if torch.is_complex(x) else x
 
 
-def normalize_gradient(x: torch.Tensor, use_channels: bool = False, epsilon: float = 1e-8):
+def normalize_gradient(x: torch.Tensor, use_channels: bool = False, epsilon: float = 1e-8) -> None:
     r"""Normalize gradient with stddev.
 
     :param x: torch.Tensor. gradient.
@@ -119,7 +119,7 @@ def cosine_similarity_by_view(
     """
     x = view_func(x)
     y = view_func(y)
-    return f.cosine_similarity(x, y, dim=1, eps=eps).abs_()
+    return cosine_similarity(x, y, dim=1, eps=eps).abs_()
 
 
 def clip_grad_norm(
@@ -315,6 +315,7 @@ def reduce_max_except_dim(x: torch.Tensor, dim: int) -> torch.Tensor:
     return x
 
 
+@torch.no_grad()
 def reg_noise(
     network1: nn.Module, network2: nn.Module, num_data: int, lr: float, eta: float = 8e-3, temperature: float = 1e-4
 ) -> Union[torch.Tensor, float]:
@@ -332,11 +333,14 @@ def reg_noise(
     reg_coef: float = 0.5 / (eta * num_data)
     noise_coef: float = math.sqrt(2.0 / lr / num_data * temperature)
 
-    loss = 0
-    for param1, param2 in zip(network1.parameters(), network2.parameters(), strict=True):
-        reg = torch.sub(param1, param2).pow_(2) * reg_coef
-        noise1 = param1 * torch.randn_like(param1) * noise_coef
-        noise2 = param2 * torch.randn_like(param2) * noise_coef
-        loss += torch.sum(reg - noise1 - noise2)
+    loss = torch.tensor(0.0, device=next(network1.parameters()).device)
+
+    for param1, param2 in zip(network1.parameters(), network2.parameters()):
+        reg = (param1 - param2).pow_(2).mul_(reg_coef).sum()
+
+        noise = param1 * torch.randn_like(param1)
+        noise.add_(param2 * torch.randn_like(param2))
+
+        loss.add_(reg - noise.mul_(noise_coef).sum())
 
     return loss
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -22,9 +22,9 @@ pathspec==0.12.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
 platformdirs==4.2.2 ; python_version >= "3.8" and python_full_version < "4.0.0"
 pluggy==1.5.0 ; python_version >= "3.8" and python_full_version < "4.0.0"
 pytest-cov==5.0.0 ; python_version >= "3.8" and python_full_version < "4.0.0"
-pytest==8.2.2 ; python_version >= "3.8" and python_full_version < "4.0.0"
-ruff==0.5.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
-sympy==1.13.0 ; python_version >= "3.8" and python_full_version < "4.0.0"
+pytest==8.3.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
+ruff==0.5.4 ; python_version >= "3.8" and python_full_version < "4.0.0"
+sympy==1.13.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
 tbb==2021.13.0 ; python_version >= "3.8" and python_full_version < "4.0.0" and platform_system == "Windows"
 tomli==2.0.1 ; python_version >= "3.8" and python_full_version <= "3.11.0a6"
 torch==2.3.1+cpu ; python_version >= "3.8" and python_full_version < "4.0.0"
diff --git a/requirements.txt b/requirements.txt
@@ -9,7 +9,7 @@ mkl==2021.4.0 ; python_version >= "3.8" and python_full_version < "4.0.0" and pl
 mpmath==1.3.0 ; python_version >= "3.8" and python_full_version < "4.0.0"
 networkx==3.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
 numpy==1.24.4 ; python_version >= "3.8" and python_full_version < "4.0.0"
-sympy==1.13.0 ; python_version >= "3.8" and python_full_version < "4.0.0"
+sympy==1.13.1 ; python_version >= "3.8" and python_full_version < "4.0.0"
 tbb==2021.13.0 ; python_version >= "3.8" and python_full_version < "4.0.0" and platform_system == "Windows"
 torch==2.3.1+cpu ; python_version >= "3.8" and python_full_version < "4.0.0"
 typing-extensions==4.12.2 ; python_version >= "3.8" and python_full_version < "4.0.0"