Skip to content

Commit a54cd4b

Browse files
committed
deprecate adaptive_k and matrix theta, fix tests
1 parent c2ad629 commit a54cd4b

File tree

3 files changed

+79
-144
lines changed

3 files changed

+79
-144
lines changed

graphtools/base.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def _get_param_names(cls):
6565
return parameters
6666

6767
def set_params(self, **kwargs):
68+
# for k in kwargs:
69+
# raise TypeError("set_params() got an unexpected "
70+
# "keyword argument '{}'".format(k))
6871
return self
6972

7073

@@ -866,3 +869,28 @@ def interpolate(self, transform, transitions=None, Y=None):
866869
transitions = self.extend_to_data(Y)
867870
Y_transform = transitions.dot(transform)
868871
return Y_transform
872+
873+
def set_params(self, **params):
874+
"""Set parameters on this object
875+
876+
Safe setter method - attributes should not be modified directly as some
877+
changes are not valid.
878+
Valid parameters:
879+
- n_jobs
880+
- verbose
881+
882+
Parameters
883+
----------
884+
params : key-value pairs of parameter name and new values
885+
886+
Returns
887+
-------
888+
self
889+
"""
890+
if 'n_jobs' in params:
891+
self.n_jobs = params['n_jobs']
892+
if 'verbose' in params:
893+
self.verbose = params['verbose']
894+
tasklogger.set_level(self.verbose)
895+
super().set_params(**params)
896+
return self

graphtools/graphs.py

Lines changed: 12 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
from builtins import super
33
import numpy as np
44
from sklearn.neighbors import NearestNeighbors
5-
from scipy.spatial.distance import pdist, cdist
6-
from scipy.spatial.distance import squareform
75
from sklearn.utils.extmath import randomized_svd
86
from sklearn.preprocessing import normalize
97
from sklearn.cluster import MiniBatchKMeans
108
from sklearn.utils.graph import graph_shortest_path
9+
from scipy.spatial.distance import pdist, cdist
10+
from scipy.spatial.distance import squareform
1111
from scipy import sparse
1212
import numbers
1313
import warnings
@@ -1038,8 +1038,8 @@ class MNNGraph(DataGraph):
10381038

10391039
def __init__(self, data, sample_idx,
10401040
knn=5, beta=1, n_pca=None,
1041-
adaptive_k=None,
10421041
decay=None,
1042+
adaptive_k=None,
10431043
bandwidth=None,
10441044
distance='euclidean',
10451045
thresh=1e-4,
@@ -1049,14 +1049,12 @@ def __init__(self, data, sample_idx,
10491049
self.sample_idx = sample_idx
10501050
self.samples, self.n_cells = np.unique(
10511051
self.sample_idx, return_counts=True)
1052-
self.adaptive_k = adaptive_k
10531052
self.knn = knn
10541053
self.decay = decay
10551054
self.distance = distance
10561055
self.bandwidth = bandwidth
10571056
self.thresh = thresh
10581057
self.n_jobs = n_jobs
1059-
self.weighted_knn = self._weight_knn()
10601058

10611059
if sample_idx is None:
10621060
raise ValueError("sample_idx must be given. For a graph without"
@@ -1068,78 +1066,25 @@ def __init__(self, data, sample_idx,
10681066
elif len(self.samples) == 1:
10691067
raise ValueError(
10701068
"sample_idx must contain more than one unique value")
1069+
if adaptive_k is not None:
1070+
warnings.warn("`adaptive_k` has been deprecated. Using fixed knn.",
1071+
DeprecationWarning)
10711072

10721073
super().__init__(data, n_pca=n_pca, **kwargs)
10731074

10741075
def _check_symmetrization(self, kernel_symm, theta):
10751076
if kernel_symm == 'theta' and theta is not None and \
10761077
not isinstance(theta, numbers.Number):
1077-
# matrix theta
1078-
try:
1079-
theta.shape
1080-
except AttributeError:
1081-
raise ValueError("theta {} not recognized. "
1082-
"Expected a float between 0 and 1 "
1083-
"or a [n_batch,n_batch] matrix of "
1084-
"floats between 0 and 1".format(theta))
1085-
if not np.shape(theta) == (len(self.samples),
1086-
len(self.samples)):
1087-
raise ValueError(
1088-
"Matrix theta must be of shape "
1089-
"({}), got ({})".format(
1090-
(len(self.samples),
1091-
len(self.samples)), theta.shape))
1092-
elif np.max(theta) > 1 or np.min(theta) < 0:
1093-
raise ValueError(
1094-
"Values in matrix theta must be between"
1095-
" 0 and 1, got values between {} and {}".format(
1096-
np.max(theta), np.min(theta)))
1097-
elif np.any(theta != theta.T):
1098-
raise ValueError("theta must be a symmetric matrix")
1078+
raise TypeError("Expected `theta` as a float. "
1079+
"Got {}.".format(type(theta)))
10991080
else:
11001081
super()._check_symmetrization(kernel_symm, theta)
11011082

1102-
def _weight_knn(self, sample_size=None):
1103-
"""Select adaptive values of knn
1104-
1105-
Parameters
1106-
----------
1107-
1108-
sample_size : `int` or `None`
1109-
Number of cells in the sample in question. Used only for
1110-
out-of-sample extension. If `None`, calculates within-sample
1111-
knn values.
1112-
1113-
Returns
1114-
-------
1115-
1116-
knn : array-like or `int`, weighted knn values
1117-
"""
1118-
if sample_size is None:
1119-
# calculate within sample knn values
1120-
sample_size = self.n_cells
1121-
if self.adaptive_k == 'min':
1122-
# the smallest sample has k
1123-
knn_weight = self.n_cells / np.min(self.n_cells)
1124-
elif self.adaptive_k == 'mean':
1125-
# the average sample has k
1126-
knn_weight = self.n_cells / np.mean(self.n_cells)
1127-
elif self.adaptive_k == 'sqrt':
1128-
# the samples are sqrt'd first, then smallest has k
1129-
knn_weight = np.sqrt(self.n_cells / np.min(self.n_cells))
1130-
elif self.adaptive_k is None:
1131-
knn_weight = np.repeat(1, len(self.n_cells))
1132-
weighted_knn = np.round(self.knn * knn_weight).astype(np.int32)
1133-
if len(weighted_knn) == 1:
1134-
weighted_knn = weighted_knn[0]
1135-
return weighted_knn
1136-
11371083
def get_params(self):
11381084
"""Get parameters from this object
11391085
"""
11401086
params = super().get_params()
11411087
params.update({'beta': self.beta,
1142-
'adaptive_k': self.adaptive_k,
11431088
'knn': self.knn,
11441089
'decay': self.decay,
11451090
'bandwidth': self.bandwidth,
@@ -1176,9 +1121,6 @@ def set_params(self, **params):
11761121
# mnn specific arguments
11771122
if 'beta' in params and params['beta'] != self.beta:
11781123
raise ValueError("Cannot update beta. Please create a new graph")
1179-
if 'adaptive_k' in params and params['adaptive_k'] != self.adaptive_k:
1180-
raise ValueError(
1181-
"Cannot update adaptive_k. Please create a new graph")
11821124

11831125
# knn arguments
11841126
knn_kernel_args = ['knn', 'decay', 'distance', 'thresh', 'bandwidth']
@@ -1216,19 +1158,20 @@ def build_kernel(self):
12161158
tasklogger.log_debug("subgraph {}: sample {}, "
12171159
"n = {}, knn = {}".format(
12181160
i, idx, np.sum(self.sample_idx == idx),
1219-
self.weighted_knn[i]))
1161+
self.knn))
12201162
# select data for sample
12211163
data = self.data_nu[self.sample_idx == idx]
12221164
# build a kNN graph for cells within sample
12231165
graph = Graph(data, n_pca=None,
1224-
knn=self.weighted_knn[i],
1166+
knn=self.knn,
12251167
decay=self.decay,
12261168
bandwidth=self.bandwidth,
12271169
distance=self.distance,
12281170
thresh=self.thresh,
12291171
verbose=self.verbose,
12301172
random_state=self.random_state,
12311173
n_jobs=self.n_jobs,
1174+
kernel_symm='+',
12321175
initialize=True)
12331176
self.subgraphs.append(graph) # append to list of subgraphs
12341177
tasklogger.log_complete("subgraphs")
@@ -1251,7 +1194,7 @@ def build_kernel(self):
12511194
self.samples[j]))
12521195
Kij = Y.build_kernel_to_data(
12531196
X.data_nu,
1254-
knn=self.weighted_knn[i])
1197+
knn=self.knn)
12551198
between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
12561199
scale = np.minimum(1, within_batch_norm /
12571200
between_batch_norm) * self.beta
@@ -1267,37 +1210,6 @@ def build_kernel(self):
12671210
tasklogger.log_complete("MNN kernel")
12681211
return K
12691212

1270-
def symmetrize_kernel(self, K):
1271-
if self.kernel_symm == 'theta' and self.theta is not None and \
1272-
not isinstance(self.theta, numbers.Number):
1273-
# matrix theta
1274-
# Theta can be a matrix with specific values transitions for
1275-
# each batch. This allows for technical replicates and
1276-
# experimental samples to be corrected simultaneously
1277-
tasklogger.log_debug("Using theta symmetrization. "
1278-
"Theta:\n{}".format(self.theta))
1279-
for i, sample_i in enumerate(self.samples):
1280-
for j, sample_j in enumerate(self.samples):
1281-
if j < i:
1282-
continue
1283-
Kij = K[np.ix_(self.sample_idx == sample_i,
1284-
self.sample_idx == sample_j)]
1285-
Kji = K[np.ix_(self.sample_idx == sample_j,
1286-
self.sample_idx == sample_i)]
1287-
Kij_symm = self.theta[i, j] * \
1288-
elementwise_minimum(Kij, Kji.T) + \
1289-
(1 - self.theta[i, j]) * \
1290-
elementwise_maximum(Kij, Kji.T)
1291-
K = set_submatrix(K, self.sample_idx == sample_i,
1292-
self.sample_idx == sample_j, Kij_symm)
1293-
if not i == j:
1294-
K = set_submatrix(K, self.sample_idx == sample_j,
1295-
self.sample_idx == sample_i,
1296-
Kij_symm.T)
1297-
else:
1298-
K = super().symmetrize_kernel(K)
1299-
return K
1300-
13011213
def build_kernel_to_data(self, Y, theta=None):
13021214
"""Build transition matrix from new data to the graph
13031215

0 commit comments

Comments
 (0)