Skip to content

Commit ad01af4

Browse files
authored
Change CMTF to SVD imputation with a set starting seed (#32)
* Try this out * Fixed * Make rank required * Bump version number
1 parent f2563e7 commit ad01af4

File tree

3 files changed

+93
-2
lines changed

3 files changed

+93
-2
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "tensorpack"
3-
version = "0.0.6"
3+
version = "0.0.7"
44
description = "A collection of tensor methods from the Meyer lab."
55
authors = ["Your Name <[email protected]>"]
66
license = "MIT"

tensorpack/SVD_impute.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
from sklearn.decomposition import TruncatedSVD
14+
from sklearn.utils import check_array
15+
import numpy as np
16+
17+
from .soft_impute import Solver
18+
19+
F32PREC = np.finfo(np.float32).eps
20+
21+
def masked_mae(X_true, X_pred, mask):
22+
masked_diff = X_true[mask] - X_pred[mask]
23+
return np.mean(np.abs(masked_diff))
24+
25+
26+
class IterativeSVD(Solver):
27+
def __init__(
28+
self,
29+
rank,
30+
convergence_threshold=0.00001,
31+
max_iters=200,
32+
svd_algorithm="arpack",
33+
init_fill_method="zero",
34+
random_state=None,
35+
min_value=None,
36+
max_value=None,
37+
verbose=False):
38+
Solver.__init__(
39+
self,
40+
fill_method=init_fill_method,
41+
min_value=min_value,
42+
max_value=max_value)
43+
self.rank = rank
44+
self.max_iters = max_iters
45+
self.svd_algorithm = svd_algorithm
46+
self.convergence_threshold = convergence_threshold
47+
self.verbose = verbose
48+
self.random_state = random_state
49+
50+
def _converged(self, X_old, X_new, missing_mask):
51+
# check for convergence
52+
old_missing_values = X_old[missing_mask]
53+
new_missing_values = X_new[missing_mask]
54+
difference = old_missing_values - new_missing_values
55+
ssd = np.sum(difference ** 2)
56+
old_norm_squared = (old_missing_values ** 2).sum()
57+
# edge cases
58+
if old_norm_squared == 0 or \
59+
(old_norm_squared < F32PREC and ssd > F32PREC):
60+
return False
61+
else:
62+
return (ssd / old_norm_squared) < self.convergence_threshold
63+
64+
def solve(self, X, missing_mask):
65+
X = check_array(X, force_all_finite=False)
66+
67+
observed_mask = ~missing_mask
68+
X_filled = X
69+
for i in range(self.max_iters):
70+
curr_rank = self.rank
71+
tsvd = TruncatedSVD(curr_rank, algorithm=self.svd_algorithm, random_state=self.random_state)
72+
X_reduced = tsvd.fit_transform(X_filled)
73+
X_reconstructed = tsvd.inverse_transform(X_reduced)
74+
X_reconstructed = self.clip(X_reconstructed)
75+
mae = masked_mae(
76+
X_true=X,
77+
X_pred=X_reconstructed,
78+
mask=observed_mask)
79+
if self.verbose:
80+
print(
81+
"[IterativeSVD] Iter %d: observed MAE=%0.6f" % (
82+
i + 1, mae))
83+
converged = self._converged(
84+
X_old=X_filled,
85+
X_new=X_reconstructed,
86+
missing_mask=missing_mask)
87+
X_filled[missing_mask] = X_reconstructed[missing_mask]
88+
if converged:
89+
break
90+
return X_filled

tensorpack/cmtf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tensorly.tenalg import khatri_rao
99
from copy import deepcopy
1010
from tensorly.decomposition._cp import initialize_cp, parafac
11+
from .SVD_impute import IterativeSVD
1112
from .soft_impute import SoftImpute
1213

1314

@@ -173,7 +174,7 @@ def initialize_cmtf(tensor: np.ndarray, matrix: np.ndarray, rank: int):
173174
unfold = np.hstack((unfold, matrix))
174175

175176
if np.sum(~np.isfinite(unfold)) > 0:
176-
si = SoftImpute(max_rank=rank)
177+
si = IterativeSVD(rank=rank, random_state=1)
177178
unfold = si.fit_transform(unfold)
178179

179180
factors[0] = np.linalg.svd(unfold)[0][:, :rank]

0 commit comments

Comments
 (0)