22from builtins import super
33import numpy as np
44from sklearn .neighbors import NearestNeighbors
5- from scipy .spatial .distance import pdist , cdist
6- from scipy .spatial .distance import squareform
75from sklearn .utils .extmath import randomized_svd
86from sklearn .preprocessing import normalize
97from sklearn .cluster import MiniBatchKMeans
108from sklearn .utils .graph import graph_shortest_path
9+ from scipy .spatial .distance import pdist , cdist
10+ from scipy .spatial .distance import squareform
1111from scipy import sparse
1212import numbers
1313import warnings
@@ -1038,8 +1038,8 @@ class MNNGraph(DataGraph):
10381038
10391039 def __init__ (self , data , sample_idx ,
10401040 knn = 5 , beta = 1 , n_pca = None ,
1041- adaptive_k = None ,
10421041 decay = None ,
1042+ adaptive_k = None ,
10431043 bandwidth = None ,
10441044 distance = 'euclidean' ,
10451045 thresh = 1e-4 ,
@@ -1049,14 +1049,12 @@ def __init__(self, data, sample_idx,
10491049 self .sample_idx = sample_idx
10501050 self .samples , self .n_cells = np .unique (
10511051 self .sample_idx , return_counts = True )
1052- self .adaptive_k = adaptive_k
10531052 self .knn = knn
10541053 self .decay = decay
10551054 self .distance = distance
10561055 self .bandwidth = bandwidth
10571056 self .thresh = thresh
10581057 self .n_jobs = n_jobs
1059- self .weighted_knn = self ._weight_knn ()
10601058
10611059 if sample_idx is None :
10621060 raise ValueError ("sample_idx must be given. For a graph without"
@@ -1068,78 +1066,25 @@ def __init__(self, data, sample_idx,
10681066 elif len (self .samples ) == 1 :
10691067 raise ValueError (
10701068 "sample_idx must contain more than one unique value" )
1069+ if adaptive_k is not None :
1070+ warnings .warn ("`adaptive_k` has been deprecated. Using fixed knn." ,
1071+ DeprecationWarning )
10711072
10721073 super ().__init__ (data , n_pca = n_pca , ** kwargs )
10731074
10741075 def _check_symmetrization (self , kernel_symm , theta ):
10751076 if kernel_symm == 'theta' and theta is not None and \
10761077 not isinstance (theta , numbers .Number ):
1077- # matrix theta
1078- try :
1079- theta .shape
1080- except AttributeError :
1081- raise ValueError ("theta {} not recognized. "
1082- "Expected a float between 0 and 1 "
1083- "or a [n_batch,n_batch] matrix of "
1084- "floats between 0 and 1" .format (theta ))
1085- if not np .shape (theta ) == (len (self .samples ),
1086- len (self .samples )):
1087- raise ValueError (
1088- "Matrix theta must be of shape "
1089- "({}), got ({})" .format (
1090- (len (self .samples ),
1091- len (self .samples )), theta .shape ))
1092- elif np .max (theta ) > 1 or np .min (theta ) < 0 :
1093- raise ValueError (
1094- "Values in matrix theta must be between"
1095- " 0 and 1, got values between {} and {}" .format (
1096- np .max (theta ), np .min (theta )))
1097- elif np .any (theta != theta .T ):
1098- raise ValueError ("theta must be a symmetric matrix" )
1078+ raise TypeError ("Expected `theta` as a float. "
1079+ "Got {}." .format (type (theta )))
10991080 else :
11001081 super ()._check_symmetrization (kernel_symm , theta )
11011082
1102- def _weight_knn (self , sample_size = None ):
1103- """Select adaptive values of knn
1104-
1105- Parameters
1106- ----------
1107-
1108- sample_size : `int` or `None`
1109- Number of cells in the sample in question. Used only for
1110- out-of-sample extension. If `None`, calculates within-sample
1111- knn values.
1112-
1113- Returns
1114- -------
1115-
1116- knn : array-like or `int`, weighted knn values
1117- """
1118- if sample_size is None :
1119- # calculate within sample knn values
1120- sample_size = self .n_cells
1121- if self .adaptive_k == 'min' :
1122- # the smallest sample has k
1123- knn_weight = self .n_cells / np .min (self .n_cells )
1124- elif self .adaptive_k == 'mean' :
1125- # the average sample has k
1126- knn_weight = self .n_cells / np .mean (self .n_cells )
1127- elif self .adaptive_k == 'sqrt' :
1128- # the samples are sqrt'd first, then smallest has k
1129- knn_weight = np .sqrt (self .n_cells / np .min (self .n_cells ))
1130- elif self .adaptive_k is None :
1131- knn_weight = np .repeat (1 , len (self .n_cells ))
1132- weighted_knn = np .round (self .knn * knn_weight ).astype (np .int32 )
1133- if len (weighted_knn ) == 1 :
1134- weighted_knn = weighted_knn [0 ]
1135- return weighted_knn
1136-
11371083 def get_params (self ):
11381084 """Get parameters from this object
11391085 """
11401086 params = super ().get_params ()
11411087 params .update ({'beta' : self .beta ,
1142- 'adaptive_k' : self .adaptive_k ,
11431088 'knn' : self .knn ,
11441089 'decay' : self .decay ,
11451090 'bandwidth' : self .bandwidth ,
@@ -1176,9 +1121,6 @@ def set_params(self, **params):
11761121 # mnn specific arguments
11771122 if 'beta' in params and params ['beta' ] != self .beta :
11781123 raise ValueError ("Cannot update beta. Please create a new graph" )
1179- if 'adaptive_k' in params and params ['adaptive_k' ] != self .adaptive_k :
1180- raise ValueError (
1181- "Cannot update adaptive_k. Please create a new graph" )
11821124
11831125 # knn arguments
11841126 knn_kernel_args = ['knn' , 'decay' , 'distance' , 'thresh' , 'bandwidth' ]
@@ -1216,19 +1158,20 @@ def build_kernel(self):
12161158 tasklogger .log_debug ("subgraph {}: sample {}, "
12171159 "n = {}, knn = {}" .format (
12181160 i , idx , np .sum (self .sample_idx == idx ),
1219- self .weighted_knn [ i ] ))
1161+ self .knn ))
12201162 # select data for sample
12211163 data = self .data_nu [self .sample_idx == idx ]
12221164 # build a kNN graph for cells within sample
12231165 graph = Graph (data , n_pca = None ,
1224- knn = self .weighted_knn [ i ] ,
1166+ knn = self .knn ,
12251167 decay = self .decay ,
12261168 bandwidth = self .bandwidth ,
12271169 distance = self .distance ,
12281170 thresh = self .thresh ,
12291171 verbose = self .verbose ,
12301172 random_state = self .random_state ,
12311173 n_jobs = self .n_jobs ,
1174+ kernel_symm = '+' ,
12321175 initialize = True )
12331176 self .subgraphs .append (graph ) # append to list of subgraphs
12341177 tasklogger .log_complete ("subgraphs" )
@@ -1251,7 +1194,7 @@ def build_kernel(self):
12511194 self .samples [j ]))
12521195 Kij = Y .build_kernel_to_data (
12531196 X .data_nu ,
1254- knn = self .weighted_knn [ i ] )
1197+ knn = self .knn )
12551198 between_batch_norm = np .array (np .sum (Kij , 1 )).flatten ()
12561199 scale = np .minimum (1 , within_batch_norm /
12571200 between_batch_norm ) * self .beta
@@ -1267,37 +1210,6 @@ def build_kernel(self):
12671210 tasklogger .log_complete ("MNN kernel" )
12681211 return K
12691212
1270- def symmetrize_kernel (self , K ):
1271- if self .kernel_symm == 'theta' and self .theta is not None and \
1272- not isinstance (self .theta , numbers .Number ):
1273- # matrix theta
1274- # Theta can be a matrix with specific values transitions for
1275- # each batch. This allows for technical replicates and
1276- # experimental samples to be corrected simultaneously
1277- tasklogger .log_debug ("Using theta symmetrization. "
1278- "Theta:\n {}" .format (self .theta ))
1279- for i , sample_i in enumerate (self .samples ):
1280- for j , sample_j in enumerate (self .samples ):
1281- if j < i :
1282- continue
1283- Kij = K [np .ix_ (self .sample_idx == sample_i ,
1284- self .sample_idx == sample_j )]
1285- Kji = K [np .ix_ (self .sample_idx == sample_j ,
1286- self .sample_idx == sample_i )]
1287- Kij_symm = self .theta [i , j ] * \
1288- elementwise_minimum (Kij , Kji .T ) + \
1289- (1 - self .theta [i , j ]) * \
1290- elementwise_maximum (Kij , Kji .T )
1291- K = set_submatrix (K , self .sample_idx == sample_i ,
1292- self .sample_idx == sample_j , Kij_symm )
1293- if not i == j :
1294- K = set_submatrix (K , self .sample_idx == sample_j ,
1295- self .sample_idx == sample_i ,
1296- Kij_symm .T )
1297- else :
1298- K = super ().symmetrize_kernel (K )
1299- return K
1300-
13011213 def build_kernel_to_data (self , Y , theta = None ):
13021214 """Build transition matrix from new data to the graph
13031215
0 commit comments