11"""Utilities module."""
22
33import warnings
4-
4+ from numbers import Number
5+ from typing import NamedTuple
6+ from collections import namedtuple
57import numpy as np
68from numpy .lib .stride_tricks import sliding_window_view
79
10+ # Named tuples used in functions.
11+ # compute_svd uses "SVD",
12+ # compute_tlsq uses "TLSQ".
13+ SVD = namedtuple ("SVD" , ["U" , "s" , "V" ])
14+ TLSQ = namedtuple ("TLSQ" , ["X_denoised" , "Y_denoised" ])
15+
16+
17+ def _svht (sigma_svd : np .ndarray , rows : int , cols : int ) -> int :
18+ """
19+ Singular Value Hard Threshold.
20+
21+ :param sigma_svd: Singual values computed by SVD
22+ :type sigma_svd: np.ndarray
23+ :param rows: Number of rows of original data matrix.
24+ :type rows: int
25+ :param cols: Number of columns of original data matrix.
26+ :type cols: int
27+ :return: Computed rank.
28+ :rtype: int
829
9- def compute_rank (X , svd_rank = 0 ):
30+ References:
31+ Gavish, Matan, and David L. Donoho, The optimal hard threshold for
32+ singular values is, IEEE Transactions on Information Theory 60.8
33+ (2014): 5040-5053.
34+ https://ieeexplore.ieee.org/document/6846297
35+ """
36+ beta = np .divide (* sorted ((rows , cols )))
37+ omega = 0.56 * beta ** 3 - 0.95 * beta ** 2 + 1.82 * beta + 1.43
38+ tau = np .median (sigma_svd ) * omega
39+ rank = np .sum (sigma_svd > tau )
40+
41+ if rank == 0 :
42+ warnings .warn (
43+ "SVD optimal rank is 0. The largest singular values are "
44+ "indistinguishable from noise. Setting rank truncation to 1." ,
45+ RuntimeWarning ,
46+ )
47+ rank = 1
48+
49+ return rank
50+
51+
52+ def _compute_rank (
53+ sigma_svd : np .ndarray , rows : int , cols : int , svd_rank : Number
54+ ) -> int :
1055 """
1156 Rank computation for the truncated Singular Value Decomposition.
12- :param numpy.ndarray X: the matrix to decompose.
57+
58+ :param sigma_svd: 1D singular values of SVD.
59+ :type sigma_svd: np.ndarray
60+ :param rows: Number of rows of original matrix.
61+ :type rows: int
62+ :param cols: Number of columns of original matrix.
63+ :type cols: int
1364 :param svd_rank: the rank for the truncation; If 0, the method computes
1465 the optimal rank and uses it for truncation; if positive interger,
1566 the method uses the argument for the truncation; if float between 0
@@ -19,50 +70,70 @@ def compute_rank(X, svd_rank=0):
1970 :type svd_rank: int or float
2071 :return: the computed rank truncation.
2172 :rtype: int
73+
2274 References:
2375 Gavish, Matan, and David L. Donoho, The optimal hard threshold for
2476 singular values is, IEEE Transactions on Information Theory 60.8
2577 (2014): 5040-5053.
2678 """
27- U , s , _ = np .linalg .svd (X , full_matrices = False )
28-
29- def omega (x ):
30- return 0.56 * x ** 3 - 0.95 * x ** 2 + 1.82 * x + 1.43
31-
3279 if svd_rank == 0 :
33- beta = np .divide (* sorted (X .shape ))
34- tau = np .median (s ) * omega (beta )
35- rank = np .sum (s > tau )
36- if rank == 0 :
37- warnings .warn (
38- "SVD optimal rank is 0. The largest singular values are "
39- "indistinguishable from noise. Setting rank truncation to 1." ,
40- RuntimeWarning ,
41- )
42- rank = 1
80+ rank = _svht (sigma_svd , rows , cols )
4381 elif 0 < svd_rank < 1 :
44- cumulative_energy = np .cumsum (s ** 2 / (s ** 2 ).sum ())
82+ cumulative_energy = np .cumsum (sigma_svd ** 2 / (sigma_svd ** 2 ).sum ())
4583 rank = np .searchsorted (cumulative_energy , svd_rank ) + 1
4684 elif svd_rank >= 1 and isinstance (svd_rank , int ):
47- rank = min (svd_rank , U . shape [ 1 ] )
85+ rank = min (svd_rank , sigma_svd . size )
4886 else :
49- rank = min (X . shape )
87+ rank = min (rows , cols )
5088
5189 return rank
5290
5391
54- def compute_tlsq (X , Y , tlsq_rank ):
92+ def compute_rank (X : np .ndarray , svd_rank : Number = 0 ) -> int :
93+ """
94+ Rank computation for the truncated Singular Value Decomposition.
95+
96+ :param X: the matrix to decompose.
97+ :type X: np.ndarray
98+ :param svd_rank: the rank for the truncation; If 0, the method computes
99+ the optimal rank and uses it for truncation; if positive interger,
100+ the method uses the argument for the truncation; if float between 0
101+ and 1, the rank is the number of the biggest singular values that
102+ are needed to reach the 'energy' specified by `svd_rank`; if -1,
103+ the method does not compute truncation. Default is 0.
104+ :type svd_rank: int or float
105+ :return: the computed rank truncation.
106+ :rtype: int
107+
108+ References:
109+ Gavish, Matan, and David L. Donoho, The optimal hard threshold for
110+ singular values is, IEEE Transactions on Information Theory 60.8
111+ (2014): 5040-5053.
112+ """
113+ _ , s , _ = np .linalg .svd (X , full_matrices = False )
114+ return _compute_rank (s , X .shape [0 ], X .shape [1 ], svd_rank )
115+
116+
117+ def compute_tlsq (
118+ X : np .ndarray , Y : np .ndarray , tlsq_rank : int
119+ ) -> NamedTuple (
120+ "TLSQ" , [("X_denoised" , np .ndarray ), ("Y_denoised" , np .ndarray )]
121+ ):
55122 """
56123 Compute Total Least Square.
57124
58- :param numpy.ndarray X: the first matrix;
59- :param numpy.ndarray Y: the second matrix;
60- :param int tlsq_rank: the rank for the truncation; If 0, the method
125+ :param X: the first matrix;
126+ :type X: np.ndarray
127+ :param Y: the second matrix;
128+ :type Y: np.ndarray
129+ :param tlsq_rank: the rank for the truncation; If 0, the method
61130 does not compute any noise reduction; if positive number, the
62131 method uses the argument for the SVD truncation used in the TLSQ
63132 method.
133+ :type tlsq_rank: int
64134 :return: the denoised matrix X, the denoised matrix Y
65- :rtype: numpy.ndarray, numpy.ndarray
135+ :rtype: NamedTuple("TLSQ", [('X_denoised', np.ndarray),
136+ ('Y_denoised', np.ndarray)])
66137
67138 References:
68139 https://arxiv.org/pdf/1703.11004.pdf
@@ -76,14 +147,19 @@ def compute_tlsq(X, Y, tlsq_rank):
76147 rank = min (tlsq_rank , V .shape [0 ])
77148 VV = V [:rank , :].conj ().T .dot (V [:rank , :])
78149
79- return X .dot (VV ), Y .dot (VV )
150+ return TLSQ ( X .dot (VV ), Y .dot (VV ) )
80151
81152
82- def compute_svd (X , svd_rank = 0 ):
153+ def compute_svd (
154+ X : np .ndarray , svd_rank : Number = 0
155+ ) -> NamedTuple (
156+ "SVD" , [("U" , np .ndarray ), ("s" , np .ndarray ), ("V" , np .ndarray )]
157+ ):
83158 """
84159 Truncated Singular Value Decomposition.
85160
86- :param numpy.ndarray X: the matrix to decompose.
161+ :param X: the matrix to decompose.
162+ :type X: np.ndarray
87163 :param svd_rank: the rank for the truncation; If 0, the method computes
88164 the optimal rank and uses it for truncation; if positive interger,
89165 the method uses the argument for the truncation; if float between 0
@@ -93,25 +169,27 @@ def compute_svd(X, svd_rank=0):
93169 :type svd_rank: int or float
94170 :return: the truncated left-singular vectors matrix, the truncated
95171 singular values array, the truncated right-singular vectors matrix.
96- :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
172+ :rtype: NamedTuple("SVD", [('U', np.ndarray),
173+ ('s', np.ndarray),
174+ ('V', np.ndarray)])
97175
98176 References:
99177 Gavish, Matan, and David L. Donoho, The optimal hard threshold for
100178 singular values is, IEEE Transactions on Information Theory 60.8
101179 (2014): 5040-5053.
102180 """
103- rank = compute_rank (X , svd_rank )
104181 U , s , V = np .linalg .svd (X , full_matrices = False )
182+ rank = _compute_rank (s , X .shape [0 ], X .shape [1 ], svd_rank )
105183 V = V .conj ().T
106184
107185 U = U [:, :rank ]
108186 V = V [:, :rank ]
109187 s = s [:rank ]
110188
111- return U , s , V
189+ return SVD ( U , s , V )
112190
113191
114- def pseudo_hankel_matrix (X : np .ndarray , d : int ):
192+ def pseudo_hankel_matrix (X : np .ndarray , d : int ) -> np . ndarray :
115193 """
116194 Arrange the snapshot in the matrix `X` into the (pseudo) Hankel
117195 matrix. The attribute `d` controls the number of snapshot from `X` in
0 commit comments