Skip to content

Commit fbdf663

Browse files
author
vm-aifluence-jro
committed
formatted
1 parent 4ca6f59 commit fbdf663

File tree

17 files changed

+297
-250
lines changed

17 files changed

+297
-250
lines changed

docs/conf.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@
1212
#
1313
import os
1414
import sys
15-
sys.path.insert(0, os.path.abspath('..'))
15+
16+
sys.path.insert(0, os.path.abspath(".."))
1617

1718
# -- Project information -----------------------------------------------------
1819

19-
project = 'robust-pca'
20-
copyright = '2022, Quantmetry'
21-
author = 'Quantmetry'
20+
project = "robust-pca"
21+
copyright = "2022, Quantmetry"
22+
author = "Quantmetry"
2223

2324
# The full version, including alpha/beta/rc tags
24-
release = '0.1'
25+
release = "0.1"
2526

2627

2728
# -- General configuration ---------------------------------------------------
@@ -38,15 +39,17 @@
3839
"sphinx.ext.mathjax",
3940
"numpydoc",
4041
]
41-
mathjax_path = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
42+
mathjax_path = (
43+
"https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
44+
)
4245

4346
# Add any paths that contain templates here, relative to this directory.
44-
templates_path = ['_templates']
47+
templates_path = ["_templates"]
4548

4649
# List of patterns, relative to source directory, that match files and
4750
# directories to ignore when looking for source files.
4851
# This pattern also affects html_static_path and html_extra_path.
49-
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
52+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
5053

5154
# generate autosummary even if no references
5255
autosummary_generate = True
@@ -56,9 +59,9 @@
5659
# The theme to use for HTML and HTML Help pages. See the documentation for
5760
# a list of builtin themes.
5861
#
59-
html_theme = 'sphinx_rtd_theme'
62+
html_theme = "sphinx_rtd_theme"
6063

6164
# Add any paths that contain custom static files (such as style sheets) here,
6265
# relative to this directory. They are copied after the builtin static files,
6366
# so a file named "default.css" will overwrite the builtin "default.css".
64-
html_static_path = ['_static']
67+
html_static_path = ["_static"]

robust_pca/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
from . import classes
44
from . import utils
55

6-
__all__ = ["classes", "utils", "__version__"]
6+
__all__ = ["classes", "utils", "__version__"]

robust_pca/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1"
1+
__version__ = "0.1"

robust_pca/benchmark/comparator.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pandas as pd
22
import numpy as np
33
from robust_pca.benchmark import cross_validation
4-
from robust_pca.benchmark import utils
4+
from robust_pca.benchmark import utils
55
from sklearn.metrics import (
66
mean_squared_error,
77
mean_absolute_error,
@@ -14,39 +14,32 @@
1414
import matplotlib.pyplot as plt
1515

1616

17-
1817
class Comparator:
1918
def __init__(
2019
self,
2120
data,
2221
ratio_missing,
2322
models_to_test,
2423
cols_to_impute,
24+
n_samples=1,
2525
search_params={},
2626
corruption="missing",
27-
filter_value_nan=-1e10
27+
filter_value_nan=-1e10,
2828
):
2929

3030
self.df = data[cols_to_impute]
3131
self.ratio_missing = ratio_missing
3232
self.cols_to_impute = cols_to_impute
33+
self.n_samples = n_samples
3334
self.filter_value_nan = filter_value_nan
3435
self.models_to_test = models_to_test
3536
self.search_params = search_params
3637
self.corruption = corruption
3738

38-
def create_corruptions(
39-
self,
40-
df: pd.DataFrame,
41-
random_state: Optional[int] = 29
42-
):
39+
def create_corruptions(self, df: pd.DataFrame, random_state: Optional[int] = 29):
4340

4441
self.df_is_altered = utils.choice_with_mask(
45-
df,
46-
df.notna(),
47-
self.ratio_missing,
48-
self.filter_value_nan,
49-
random_state
42+
df, df.notna(), self.ratio_missing, self.filter_value_nan, random_state
5043
)
5144

5245
self.corrupted_df = df.copy()
@@ -58,7 +51,9 @@ def create_corruptions(
5851
)
5952

6053
def get_errors(
61-
self, signal_ref: pd.DataFrame, signal_imputed: pd.DataFrame,
54+
self,
55+
signal_ref: pd.DataFrame,
56+
signal_imputed: pd.DataFrame,
6257
) -> float:
6358

6459
rmse = utils.mean_squared_error(
@@ -86,7 +81,7 @@ def compare(self):
8681

8782
df = self.df[self.cols_to_impute]
8883
errors = defaultdict(list)
89-
for _ in range(1):
84+
for _ in range(self.n_samples):
9085
random_state = np.random.randint(0, 10 * 9)
9186
self.create_corruptions(df, random_state=random_state)
9287
cv = cross_validation.CrossValidation(
@@ -96,9 +91,9 @@ def compare(self):
9691
ratio_missing=self.ratio_missing,
9792
corruption=self.corruption,
9893
)
99-
#print("# nan before imputation:", df.isna().sum().sum())
94+
# print("# nan before imputation:", df.isna().sum().sum())
10095
imputed_df = cv.fit_transform(self.corrupted_df)
101-
#print("# nan after imputation...:", imputed_df.isna().sum().sum())
96+
# print("# nan after imputation...:", imputed_df.isna().sum().sum())
10297
for k, v in self.get_errors(df, imputed_df).items():
10398
errors[k].append(v)
10499

robust_pca/benchmark/utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
BOUNDS = Bounds(1, np.inf, keep_feasible=True)
1313

14+
1415
def get_search_space(tested_model, search_params):
1516
search_space = None
1617
search_name = None
@@ -54,9 +55,9 @@ def custom_groupby(df, groups):
5455
def choice_with_mask(df, mask, ratio, filter_value=None, random_state=None):
5556
mask = mask.to_numpy().flatten()
5657
if filter_value:
57-
mask_filter = (df.values>filter_value).flatten()
58+
mask_filter = (df.values > filter_value).flatten()
5859
mask += mask_filter
59-
60+
6061
indices = np.argwhere(mask)
6162
indices = resample(
6263
indices,
@@ -126,6 +127,7 @@ def aggregate_time_data(df, target, agg_time):
126127
)
127128
return df_aggregated
128129

130+
129131
def cross_entropy(t, t_hyp):
130132
loss = np.sum(t * np.log(t / t_hyp))
131133
jac = np.log(t / t_hyp) - 1
@@ -184,7 +186,7 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
184186
df_day["n_train"] = df_day.groupby("datetime_round")[target].transform(
185187
lambda x: x.shape[0]
186188
)
187-
189+
188190
df_day["hyp_values"] = (
189191
df_day[["datetime_round"]]
190192
.merge(ts_agg, left_on="datetime_round", right_on="agg_time", how="left")[
@@ -204,15 +206,15 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
204206

205207
df_day["impute"] = np.nan
206208
df_day.loc[is_in_zero_slot, "impute"] = 0
207-
209+
208210
non_zero_impute = impute_by_max_entropy(
209211
df_dt=df_day.loc[~is_in_zero_slot, "datetime"].values,
210212
df_dt_agg=ts_agg.loc[ts_agg[col_name] > zero_soil, "agg_time"].values,
211213
df_values_agg=ts_agg.loc[ts_agg[col_name] > zero_soil, col_name].values,
212214
freq=agg_time,
213215
df_values_hyp=df_day.loc[~is_in_zero_slot, "hyp_values"].values,
214216
)
215-
217+
216218
df_day.loc[~is_in_zero_slot, "impute"] = (
217219
df_day.loc[~is_in_zero_slot, ["datetime"]]
218220
.merge(non_zero_impute, on="datetime", how="left")["impute"]
@@ -221,4 +223,4 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
221223

222224
df_res = df.merge(df_day[["datetime", "impute"]], on="datetime", how="left")
223225

224-
return df_res
226+
return df_res

robust_pca/classes/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from . import rpca
22
from . import improved_rpca
33
from . import noisy_rpca
4-
from . import graph_rpca
4+
from . import graph_rpca

robust_pca/classes/graph_rpca.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class GraphRPCA:
1212
1313
References
1414
----------
15-
Shahid, Nauman, et al. "Fast robust PCA on graphs."
15+
Shahid, Nauman, et al. "Fast robust PCA on graphs."
1616
IEEE Journal of Selected Topics in Signal Processing 10.4 (2016): 740-756.
1717
1818
Parameters
@@ -28,7 +28,7 @@ class GraphRPCA:
2828
gamma1 : int
2929
regularizing parameter for the graph G1, constructed from the columns of D
3030
gamma2 : int
31-
regularizing parameter for the graph G1, constructed from the rows of D
31+
regularizing parameter for the graph G1, constructed from the rows of D
3232
G1 : Optional[np.ndarray]
3333
graph G1, constructed from the columns of D
3434
G2 : Optional[np.ndarray]
@@ -58,7 +58,7 @@ def __init__(
5858
nbg2: Optional[int] = 10,
5959
maxIter: Optional[int] = int(1e4),
6060
tol: Optional[float] = 1e-6,
61-
cv: Optional[int] = 5,
61+
cv: Optional[int] = 5,
6262
verbose: Optional[bool] = False,
6363
) -> None:
6464

@@ -80,16 +80,16 @@ def __init__(
8080
self.maxIter = maxIter
8181
self.tol = tol
8282
self.verbose = verbose
83-
83+
8484
self._prepare_data()
8585

8686
def _prepare_data(self) -> None:
8787
"""Prepare data fot RPCA computation:
88-
Transform signal to matrix if needed
89-
Get the omega matrix
90-
Impute the nan values if needed
88+
Transform signal to matrix if needed
89+
Get the omega matrix
90+
Impute the nan values if needed
9191
"""
92-
92+
9393
self.ret = 0
9494
if (self.D is None) and (self.period is None):
9595
self.period = utils.get_period(self.signal)
@@ -98,7 +98,7 @@ def _prepare_data(self) -> None:
9898

9999
self.initial_D = self.D.copy()
100100
self.initial_D_proj = utils.impute_nans(self.initial_D, method="median")
101-
101+
102102
self.omega = 1 - (self.D != self.D)
103103
if np.isnan(np.sum(self.D)):
104104
self.proj_D = utils.impute_nans(self.D, method="median")
@@ -113,15 +113,15 @@ def compute_graph_rpca(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
113113
Tuple[np.ndarray, np.ndarray, np.ndarray]
114114
observations, low-rank and sparse matrices
115115
"""
116-
116+
117117
self.omega = 1 - (self.D != self.D)
118118
if np.isnan(np.sum(self.D)):
119119
self.proj_D = utils.impute_nans(self.D, method="median")
120120
else:
121121
self.proj_D = self.D
122122
if self.rank is None:
123123
self.rank = utils.approx_rank(self.proj_D)
124-
124+
125125
if self.G1 is None:
126126
self.G1 = utils.construct_graph((self.D).T, n_neighbors=self.nbg1)
127127
if self.G2 is None:
@@ -148,10 +148,12 @@ def compute_graph_rpca(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
148148
grad_g = 2 * (self.gamma1 * Y @ laplacian1 + self.gamma2 * laplacian2 @ Y)
149149

150150
X = utils.proximal_operator(Y_past - lam * grad_g, self.D, lam)
151-
t = (1 + (1 + 4 * t_past ** 2) ** 0.5) / 2
151+
t = (1 + (1 + 4 * t_past**2) ** 0.5) / 2
152152
Y = X + (t_past - 1) / t * (X - X_past)
153153

154-
errors.append(np.linalg.norm(Y - Y_past, "fro") / np.linalg.norm(Y_past, "fro"))
154+
errors.append(
155+
np.linalg.norm(Y - Y_past, "fro") / np.linalg.norm(Y_past, "fro")
156+
)
155157
if errors[-1] < self.tol:
156158
if self.verbose:
157159
print(
@@ -177,12 +179,12 @@ class GraphRPCAHyperparams(GraphRPCA):
177179
GraphRPCA : Type[GraphRPCA]
178180
[description]
179181
"""
180-
182+
181183
def add_hyperparams(
182184
self,
183185
hyperparams_gamma1: Optional[List[float]] = [],
184186
hyperparams_gamma2: Optional[List[float]] = [],
185-
cv: Optional[int] = 5,
187+
cv: Optional[int] = 5,
186188
) -> None:
187189
"""Define the search space associated to each hyperparameter
188190
@@ -224,7 +226,7 @@ def objective(self, args):
224226
float
225227
criterion to minimise
226228
"""
227-
229+
228230
self.gamma1 = args[0]
229231
self.gamma2 = args[1]
230232

@@ -244,8 +246,7 @@ def objective(self, args):
244246

245247
error = (
246248
np.linalg.norm(
247-
self.initial_D[indices_x, indices_y]
248-
- W[indices_x, indices_y],
249+
self.initial_D[indices_x, indices_y] - W[indices_x, indices_y],
249250
1,
250251
)
251252
/ nb_missing
@@ -255,20 +256,22 @@ def objective(self, args):
255256

256257
if len(errors) == 0:
257258
print("Warning: not converged - return default 10^10")
258-
return 10 ** 10
259+
return 10**10
259260

260261
return np.mean(errors)
261-
262-
def compute_graph_rpca_hyperparams(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
262+
263+
def compute_graph_rpca_hyperparams(
264+
self,
265+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
263266
"""Decompose a matrix into a low rank part and a sparse part
264-
Hyperparams are set by Bayesian optimisation and cross-validation
267+
Hyperparams are set by Bayesian optimisation and cross-validation
265268
266269
Returns
267270
-------
268271
Tuple[np.ndarray, np.ndarray]
269272
the low rank matrix and the sparse matrix
270273
"""
271-
274+
272275
res = skopt.gp_minimize(
273276
self.objective,
274277
self.search_space,
@@ -285,4 +288,4 @@ def compute_graph_rpca_hyperparams(self) -> Tuple[np.ndarray, np.ndarray, np.nda
285288
self.gamma2 = res.x[1]
286289
D, X, A = self.compute_graph_rpca()
287290

288-
return D, X, A
291+
return D, X, A

0 commit comments

Comments
 (0)