Merge pull request PyDMD#472 from greinerth/enhancement/svd_rank

fandreuz · web-flow · commit dd8a4317ee25 · 2023-11-16T19:43:11.000+01:00
Fixing redundant SVD computation
diff --git a/pydmd/utils.py b/pydmd/utils.py
@@ -1,15 +1,66 @@
 """Utilities module."""
 
 import warnings
-
+from numbers import Number
+from typing import NamedTuple
+from collections import namedtuple
 import numpy as np
 from numpy.lib.stride_tricks import sliding_window_view
 
+#  Named tuples used in functions.
+#  compute_svd uses "SVD",
+#  compute_tlsq uses "TLSQ".
+SVD = namedtuple("SVD", ["U", "s", "V"])
+TLSQ = namedtuple("TLSQ", ["X_denoised", "Y_denoised"])
+
+
+def _svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int:
+    """
+    Singular Value Hard Threshold.
+
+    :param sigma_svd: Singual values computed by SVD
+    :type sigma_svd: np.ndarray
+    :param rows: Number of rows of original data matrix.
+    :type rows: int
+    :param cols: Number of columns of original data matrix.
+    :type cols: int
+    :return: Computed rank.
+    :rtype: int
 
-def compute_rank(X, svd_rank=0):
+    References:
+    Gavish, Matan, and David L. Donoho, The optimal hard threshold for
+    singular values is, IEEE Transactions on Information Theory 60.8
+    (2014): 5040-5053.
+    https://ieeexplore.ieee.org/document/6846297
+    """
+    beta = np.divide(*sorted((rows, cols)))
+    omega = 0.56 * beta**3 - 0.95 * beta**2 + 1.82 * beta + 1.43
+    tau = np.median(sigma_svd) * omega
+    rank = np.sum(sigma_svd > tau)
+
+    if rank == 0:
+        warnings.warn(
+            "SVD optimal rank is 0. The largest singular values are "
+            "indistinguishable from noise. Setting rank truncation to 1.",
+            RuntimeWarning,
+        )
+        rank = 1
+
+    return rank
+
+
+def _compute_rank(
+    sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Number
+) -> int:
     """
     Rank computation for the truncated Singular Value Decomposition.
-    :param numpy.ndarray X: the matrix to decompose.
+
+    :param sigma_svd: 1D singular values of SVD.
+    :type sigma_svd: np.ndarray
+    :param rows: Number of rows of original matrix.
+    :type rows: int
+    :param cols: Number of columns of original matrix.
+    :type cols: int
     :param svd_rank: the rank for the truncation; If 0, the method computes
         the optimal rank and uses it for truncation; if positive interger,
         the method uses the argument for the truncation; if float between 0
@@ -19,50 +70,70 @@ def compute_rank(X, svd_rank=0):
     :type svd_rank: int or float
     :return: the computed rank truncation.
     :rtype: int
+
     References:
     Gavish, Matan, and David L. Donoho, The optimal hard threshold for
     singular values is, IEEE Transactions on Information Theory 60.8
     (2014): 5040-5053.
     """
-    U, s, _ = np.linalg.svd(X, full_matrices=False)
-
-    def omega(x):
-        return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43
-
     if svd_rank == 0:
-        beta = np.divide(*sorted(X.shape))
-        tau = np.median(s) * omega(beta)
-        rank = np.sum(s > tau)
-        if rank == 0:
-            warnings.warn(
-                "SVD optimal rank is 0. The largest singular values are "
-                "indistinguishable from noise. Setting rank truncation to 1.",
-                RuntimeWarning,
-            )
-            rank = 1
+        rank = _svht(sigma_svd, rows, cols)
     elif 0 < svd_rank < 1:
-        cumulative_energy = np.cumsum(s**2 / (s**2).sum())
+        cumulative_energy = np.cumsum(sigma_svd**2 / (sigma_svd**2).sum())
         rank = np.searchsorted(cumulative_energy, svd_rank) + 1
     elif svd_rank >= 1 and isinstance(svd_rank, int):
-        rank = min(svd_rank, U.shape[1])
+        rank = min(svd_rank, sigma_svd.size)
     else:
-        rank = min(X.shape)
+        rank = min(rows, cols)
 
     return rank
 
 
-def compute_tlsq(X, Y, tlsq_rank):
+def compute_rank(X: np.ndarray, svd_rank: Number = 0) -> int:
+    """
+    Rank computation for the truncated Singular Value Decomposition.
+
+    :param X: the matrix to decompose.
+    :type X: np.ndarray
+    :param svd_rank: the rank for the truncation; If 0, the method computes
+        the optimal rank and uses it for truncation; if positive interger,
+        the method uses the argument for the truncation; if float between 0
+        and 1, the rank is the number of the biggest singular values that
+        are needed to reach the 'energy' specified by `svd_rank`; if -1,
+        the method does not compute truncation. Default is 0.
+    :type svd_rank: int or float
+    :return: the computed rank truncation.
+    :rtype: int
+
+    References:
+    Gavish, Matan, and David L. Donoho, The optimal hard threshold for
+    singular values is, IEEE Transactions on Information Theory 60.8
+    (2014): 5040-5053.
+    """
+    _, s, _ = np.linalg.svd(X, full_matrices=False)
+    return _compute_rank(s, X.shape[0], X.shape[1], svd_rank)
+
+
+def compute_tlsq(
+    X: np.ndarray, Y: np.ndarray, tlsq_rank: int
+) -> NamedTuple(
+    "TLSQ", [("X_denoised", np.ndarray), ("Y_denoised", np.ndarray)]
+):
     """
     Compute Total Least Square.
 
-    :param numpy.ndarray X: the first matrix;
-    :param numpy.ndarray Y: the second matrix;
-    :param int tlsq_rank: the rank for the truncation; If 0, the method
+    :param X: the first matrix;
+    :type X: np.ndarray
+    :param Y: the second matrix;
+    :type Y: np.ndarray
+    :param tlsq_rank: the rank for the truncation; If 0, the method
         does not compute any noise reduction; if positive number, the
         method uses the argument for the SVD truncation used in the TLSQ
         method.
+    :type tlsq_rank: int
     :return: the denoised matrix X, the denoised matrix Y
-    :rtype: numpy.ndarray, numpy.ndarray
+    :rtype: NamedTuple("TLSQ", [('X_denoised', np.ndarray),
+                                ('Y_denoised', np.ndarray)])
 
     References:
     https://arxiv.org/pdf/1703.11004.pdf
@@ -76,14 +147,19 @@ def compute_tlsq(X, Y, tlsq_rank):
     rank = min(tlsq_rank, V.shape[0])
     VV = V[:rank, :].conj().T.dot(V[:rank, :])
 
-    return X.dot(VV), Y.dot(VV)
+    return TLSQ(X.dot(VV), Y.dot(VV))
 
 
-def compute_svd(X, svd_rank=0):
+def compute_svd(
+    X: np.ndarray, svd_rank: Number = 0
+) -> NamedTuple(
+    "SVD", [("U", np.ndarray), ("s", np.ndarray), ("V", np.ndarray)]
+):
     """
     Truncated Singular Value Decomposition.
 
-    :param numpy.ndarray X: the matrix to decompose.
+    :param X: the matrix to decompose.
+    :type X: np.ndarray
     :param svd_rank: the rank for the truncation; If 0, the method computes
         the optimal rank and uses it for truncation; if positive interger,
         the method uses the argument for the truncation; if float between 0
@@ -93,25 +169,27 @@ def compute_svd(X, svd_rank=0):
     :type svd_rank: int or float
     :return: the truncated left-singular vectors matrix, the truncated
         singular values array, the truncated right-singular vectors matrix.
-    :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
+    :rtype: NamedTuple("SVD", [('U', np.ndarray),
+                               ('s', np.ndarray),
+                               ('V', np.ndarray)])
 
     References:
     Gavish, Matan, and David L. Donoho, The optimal hard threshold for
     singular values is, IEEE Transactions on Information Theory 60.8
     (2014): 5040-5053.
     """
-    rank = compute_rank(X, svd_rank)
     U, s, V = np.linalg.svd(X, full_matrices=False)
+    rank = _compute_rank(s, X.shape[0], X.shape[1], svd_rank)
     V = V.conj().T
 
     U = U[:, :rank]
     V = V[:, :rank]
     s = s[:rank]
 
-    return U, s, V
+    return SVD(U, s, V)
 
 
-def pseudo_hankel_matrix(X: np.ndarray, d: int):
+def pseudo_hankel_matrix(X: np.ndarray, d: int) -> np.ndarray:
     """
     Arrange the snapshot in the matrix `X` into the (pseudo) Hankel
     matrix. The attribute `d` controls the number of snapshot from `X` in