revise Lipschitz constant computation

CielAl · CielAl · commit ebdae4510dc1 · 2026-01-27T02:59:20.000-05:00
diff --git a/torch_staintools/functional/optimization/solver.py b/torch_staintools/functional/optimization/solver.py
@@ -7,7 +7,7 @@
 from ..eps import get_eps
 import torch.nn.functional as F
 
-from ..utility import as_scalar
+from .sparse_util import as_scalar
 
 
 def coord_descent(x: torch.Tensor, z0: torch.Tensor, weight: torch.Tensor,
@@ -57,25 +57,6 @@ def cd_update(z, b):
     z = F.softshrink(b, alpha)
     return z
 
-def _lipschitz_constant(w: torch.Tensor):
-    """find the Lipscitz constant to compute the learning rate in ISTA
-
-    Args:
-        w: weights w in f(z) = ||Wz - x||^2
-
-    Returns:
-
-    """
-    # L = torch.linalg.norm(W, ord=2) ** 2
-    # W has nan
-    WtW = torch.matmul(w.t(), w)
-    WtW += torch.eye(WtW.size(0)).to(w.device) * get_eps(WtW)
-    L = torch.linalg.eigvalsh(WtW)[-1].squeeze()
-    L_is_finite = torch.isfinite(L).all()
-    L = torch.where(L_is_finite, L, torch.linalg.norm(w, ord=2) ** 2)
-    L = L.abs()
-    return L + torch.finfo(L.dtype).eps
-
 def rss_grad(z_k: torch.Tensor, x: torch.Tensor, weight: torch.Tensor):
     resid = torch.matmul(z_k, weight.T) - x
     return torch.matmul(resid, weight)
@@ -208,19 +189,6 @@ def fista_loop(
 
     return z
 
-def __collate_params(z0: torch.Tensor,
-                     x: torch.Tensor,
-                     lr: str| float,
-                     weight: torch.Tensor,
-                     alpha: float | torch.Tensor,
-                     tol: float) -> Tuple[torch.Tensor, torch.Tensor, float]:
-    if lr == 'auto':
-        L = _lipschitz_constant(weight)
-        lr = 1 / L
-    tol = z0.numel() * tol
-    alpha = as_scalar(alpha, x)
-    lr = as_scalar(lr, x)
-    return lr, alpha, tol
 
 def ista(x, z0, weight, alpha=0.01, lr: str | float = 'auto',
          maxiter: int = 50,
@@ -240,15 +208,17 @@ def ista(x, z0, weight, alpha=0.01, lr: str | float = 'auto',
     Returns:
 
     """
-    lr, alpha, tol = __collate_params(z0, x, lr, weight, alpha, tol)
+    # lr, alpha, tol = collate_params(z0, x, lr, weight, alpha, tol)
     z0 = z0.contiguous()
     x = x.contiguous()
     weight = weight.contiguous()
 
     return ista_loop(z0, x, weight, alpha, lr, tol, maxiter, positive_code)
 
 
-def fista(x, z0, weight, alpha=0.01, lr: str | float = 'auto',
+def fista(x: torch.Tensor, z0: torch.Tensor,
+          weight: torch.Tensor,
+          alpha: torch.Tensor, lr: str | float = 'auto',
           maxiter: int = 50,
           tol: float = 1e-5, positive_code: bool = False):
     """Fast ISTA solver
@@ -266,7 +236,7 @@ def fista(x, z0, weight, alpha=0.01, lr: str | float = 'auto',
     Returns:
 
     """
-    lr, alpha, tol = __collate_params(z0, x, lr, weight, alpha, tol)
+   #  lr, alpha, tol = collate_params(z0, x, lr, weight, alpha, tol)
     z0 = z0.contiguous()
     x = x.contiguous()
     weight = weight.contiguous()
diff --git a/torch_staintools/functional/optimization/sparse_util.py b/torch_staintools/functional/optimization/sparse_util.py
@@ -1,8 +1,8 @@
-from typing import Optional, Literal, get_args
+from typing import Optional, Literal, get_args, Tuple
 import torch
 from torch.nn import functional as F
 from torch_staintools.constants import CONST
-
+from torch_staintools.functional.eps import get_eps
 
 METHOD_ISTA = Literal['ista']
 METHOD_FISTA = Literal['fista']
@@ -94,3 +94,46 @@ def validate_code(algorithm: METHOD_SPARSE,
         z0 = initialize_code(x, weight, mode=init, rng=rng)
     assert z0.shape == (n_samples, n_components)
     return z0
+
+
+def lipschitz_constant(w: torch.Tensor):
+    """find the Lipschitz constant to compute the learning rate in ISTA
+
+    Args:
+        w: weights w in f(z) = ||Wz - x||^2
+
+    Returns:
+
+    """
+    # L = torch.linalg.norm(W, ord=2) ** 2
+    # W has nan
+    # WtW = torch.matmul(w.t(), w)
+    # WtW += torch.eye(WtW.size(0)).to(w.device) * get_eps(WtW)
+    # L = torch.linalg.eigvalsh(WtW)[-1].squeeze()
+    # L_is_finite = torch.isfinite(L).all()
+    # L = torch.where(L_is_finite, L, torch.linalg.norm(w, ord=2) ** 2)
+    # L = L.abs()
+    L = torch.linalg.norm(w, ord=2) ** 2
+    return L + torch.finfo(L.dtype).eps
+
+
+def collate_params(z0: torch.Tensor,
+                   x: torch.Tensor,
+                   lr: str| float,
+                   weight: torch.Tensor,
+                   alpha: float | torch.Tensor,
+                   tol: float) -> Tuple[torch.Tensor, torch.Tensor, float]:
+    if lr == 'auto':
+        L = lipschitz_constant(weight)
+        lr = 1 / L
+    tol = z0.numel() * tol
+    alpha = as_scalar(alpha, x)
+    lr = as_scalar(lr, x)
+    return lr, alpha, tol
+
+
+def as_scalar(v: float | torch.Tensor, like: torch.Tensor) -> torch.Tensor:
+    if isinstance(v, torch.Tensor):
+        # will except on non-scalar
+        return v.to(device=like.device, dtype=like.dtype).reshape(())
+    return torch.tensor(v, device=like.device, dtype=like.dtype)
diff --git a/torch_staintools/functional/utility/implementation.py b/torch_staintools/functional/utility/implementation.py
@@ -92,10 +92,3 @@ def nanstd(data: torch.Tensor, dim: Optional[int | tuple] = None,
     sum_dev2 = ((data - mean) ** 2).nansum(dim=dim,  keepdim=True)
     # sqrt and normalize by corrected degrees of freedom
     return torch.sqrt(sum_dev2 / (non_nan_count - correction))
-
-
-def as_scalar(v: float | torch.Tensor, like: torch.Tensor) -> torch.Tensor:
-    if isinstance(v, torch.Tensor):
-        # will except on non-scalar
-        return v.to(device=like.device, dtype=like.dtype).reshape(())
-    return torch.tensor(v, device=like.device, dtype=like.dtype)