From 6dfa051acdb81395cb14a5fb694b8675fc7ff225 Mon Sep 17 00:00:00 2001 From: Julien Roussel <3178729-JulienRoussel77@users.noreply.gitlab.com> Date: Fri, 29 Aug 2025 16:11:15 +0200 Subject: [PATCH 1/4] long EM/RPCA operations wrapped with tqdm --- pyproject.toml | 1 + qolmat/imputations/em_sampler.py | 5 ++++- qolmat/imputations/rpca/rpca_noisy.py | 5 ++++- qolmat/imputations/rpca/rpca_pcp.py | 5 ++++- qolmat/imputations/softimpute.py | 5 ++++- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ca6327cb..f911e295 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0" typed-ast = { version = "*", optional = true } category-encoders = "^2.6.3" dcor = ">= 0.6" +tqdm = "^0.0.1" [tool.poetry.group.torch.dependencies] torch = "< 2.5" diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py index 449b4f68..9c1796c2 100644 --- a/qolmat/imputations/em_sampler.py +++ b/qolmat/imputations/em_sampler.py @@ -11,6 +11,7 @@ from scipy import optimize as spo from sklearn import utils as sku from sklearn.base import BaseEstimator, TransformerMixin +from tqdm import tqdm from qolmat.utils import utils from qolmat.utils.utils import RandomSetting @@ -433,7 +434,9 @@ def fit_X(self, X: NDArray) -> None: X = self._maximize_likelihood(X_imp, mask_na) - for iter_em in range(self.max_iter_em): + for iter_em in tqdm( + range(self.max_iter_em), desc="EM parameters estimation" + ): X = self._sample_ou(X, mask_na) self.combine_parameters() diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py index 59164a3b..408e4cdc 100644 --- a/qolmat/imputations/rpca/rpca_noisy.py +++ b/qolmat/imputations/rpca/rpca_noisy.py @@ -11,6 +11,7 @@ from scipy.sparse import dok_matrix, identity from scipy.sparse.linalg import spsolve from sklearn import utils as sku +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.imputations.rpca.rpca import RPCA @@ -311,7 +312,9 @@ def minimise_loss( Ir = np.eye(rank) In = identity(n_rows) - for _ in range(max_iterations): + for _ in tqdm( + range(max_iterations), desc="Noisy RPCA loss minimization" + ): M_temp = M.copy() A_temp = A.copy() L_temp = L.copy() diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py index afb6dea3..0c9576c6 100644 --- a/qolmat/imputations/rpca/rpca_pcp.py +++ b/qolmat/imputations/rpca/rpca_pcp.py @@ -8,6 +8,7 @@ import numpy as np from numpy.typing import NDArray from sklearn import utils as sku +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.imputations.rpca.rpca import RPCA @@ -125,7 +126,9 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan) M: NDArray = D - A - for iteration in range(self.max_iterations): + for iteration in tqdm( + range(self.max_iterations), desc="RPCA PCP decomposition" + ): M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu) A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu) A[~Omega] = (D - M)[~Omega] diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py index 912a9294..0b8f0b8b 100644 --- a/qolmat/imputations/softimpute.py +++ b/qolmat/imputations/softimpute.py @@ -10,6 +10,7 @@ from numpy.typing import NDArray from sklearn import utils as sku from sklearn.base import BaseEstimator, TransformerMixin +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.utils import utils @@ -146,7 +147,9 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: B = V * D M = A @ B.T cost_start = SoftImpute.cost_function(X, M, A, Omega, tau) - for iter_ in range(self.max_iterations): + for iter_ in tqdm( + range(self.max_iterations), desc="Soft Impute decomposition" + ): U_old = U V_old = V D_old = D From cd9865e54de0e93bfa8f3eb9846f508fc56838db Mon Sep 17 00:00:00 2001 From: Julien Roussel <3178729-JulienRoussel77@users.noreply.gitlab.com> Date: Fri, 29 Aug 2025 16:20:26 +0200 Subject: [PATCH 2/4] history updated --- HISTORY.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index 4b499475..321864b2 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,7 +2,11 @@ History ======= -0.1.8 (2024-08-29) +0.1.10 (2024-??-??) +------------------ +* Long EM and RPCA operations wrapped with tqdm progress bars + +0.1.9 (2024-08-29) ------------------ * Tutorials reproducibility improved with random_state parameters * RPCA now accepts random_state parameters From 1d52ab269f84d38c90586c9681c5e8f545fee7e6 Mon Sep 17 00:00:00 2001 From: Julien Roussel <3178729-JulienRoussel77@users.noreply.gitlab.com> Date: Fri, 29 Aug 2025 16:22:42 +0200 Subject: [PATCH 3/4] tqdm version updated --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f911e295..7f3d6c37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ statsmodels = ">= 0.14.0" typed-ast = { version = "*", optional = true } category-encoders = "^2.6.3" dcor = ">= 0.6" -tqdm = "^0.0.1" +tqdm = "*" [tool.poetry.group.torch.dependencies] torch = "< 2.5" From 7fb0c568bf66824f91d4de65635b93395e0dc867 Mon Sep 17 00:00:00 2001 From: Julien Roussel <3178729-JulienRoussel77@users.noreply.gitlab.com> Date: Fri, 29 Aug 2025 17:13:00 +0200 Subject: [PATCH 4/4] progress bars conditioned by verbose --- README.rst | 5 +++-- qolmat/imputations/em_sampler.py | 5 ++++- qolmat/imputations/rpca/rpca_noisy.py | 9 ++++++++- qolmat/imputations/rpca/rpca_pcp.py | 4 +++- qolmat/imputations/softimpute.py | 4 +++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 1f292ebf..83af908f 100644 --- a/README.rst +++ b/README.rst @@ -70,17 +70,18 @@ With just these few lines of code, you can see how easy it is to from qolmat.utils import data # load and prepare csv data + df_data = data.get_data("Beijing") columns = ["TEMP", "PRES", "WSPM"] df_data = df_data[columns] df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120) # impute and compare - imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",)) + imputer_median = imputers.ImputerSimple(groups=("station",)) imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",)) imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1) dict_imputers = { - "mean": imputer_mean, + "median": imputer_median, "interpolation": imputer_interpol, "VAR(1) process": imputer_var1 } diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py index 9c1796c2..d6845079 100644 --- a/qolmat/imputations/em_sampler.py +++ b/qolmat/imputations/em_sampler.py @@ -435,7 +435,9 @@ def fit_X(self, X: NDArray) -> None: X = self._maximize_likelihood(X_imp, mask_na) for iter_em in tqdm( - range(self.max_iter_em), desc="EM parameters estimation" + range(self.max_iter_em), + desc="EM parameters estimation", + disable=not self.verbose, ): X = self._sample_ou(X, mask_na) @@ -477,6 +479,7 @@ def fit(self, X: NDArray) -> "EM": if hasattr(self, "p_to_fit") and self.p_to_fit: aics: List[float] = [] for p in range(self.max_lagp + 1): + print("p=", p) self.p = p self.fit_X(X) n1, n2 = self.X.shape diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py index 408e4cdc..62bd6f08 100644 --- a/qolmat/imputations/rpca/rpca_noisy.py +++ b/qolmat/imputations/rpca/rpca_noisy.py @@ -201,6 +201,7 @@ def decompose_with_basis( max_iterations=self.max_iterations, tolerance=self.tolerance, norm=self.norm, + verbose=self.verbose, ) self._check_cost_function_minimized(D, M, A, Omega, tau, lam) @@ -220,6 +221,7 @@ def minimise_loss( max_iterations: int = 10000, tolerance: float = 1e-6, norm: str = "L2", + verbose: bool = False, ) -> Tuple: """Compute the noisy RPCA with a L2 time penalisation. @@ -256,6 +258,9 @@ def minimise_loss( consecutive iterations. Defaults to 1e-6. norm : str, optional Error norm, can be "L1" or "L2". Defaults to "L2". + verbose : bool, optional + Verbosity level, if False the warnings are silenced. Defaults to + False. Returns ------- @@ -313,7 +318,9 @@ def minimise_loss( In = identity(n_rows) for _ in tqdm( - range(max_iterations), desc="Noisy RPCA loss minimization" + range(max_iterations), + desc="Noisy RPCA loss minimization", + disable=not verbose, ): M_temp = M.copy() A_temp = A.copy() diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py index 0c9576c6..e018dbf9 100644 --- a/qolmat/imputations/rpca/rpca_pcp.py +++ b/qolmat/imputations/rpca/rpca_pcp.py @@ -127,7 +127,9 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: M: NDArray = D - A for iteration in tqdm( - range(self.max_iterations), desc="RPCA PCP decomposition" + range(self.max_iterations), + desc="RPCA PCP decomposition", + disable=not self.verbose, ): M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu) A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu) diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py index 0b8f0b8b..63688812 100644 --- a/qolmat/imputations/softimpute.py +++ b/qolmat/imputations/softimpute.py @@ -148,7 +148,9 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: M = A @ B.T cost_start = SoftImpute.cost_function(X, M, A, Omega, tau) for iter_ in tqdm( - range(self.max_iterations), desc="Soft Impute decomposition" + range(self.max_iterations), + desc="Soft Impute decomposition", + disable=not self.verbose, ): U_old = U V_old = V