@@ -87,7 +87,11 @@ def _rsl_prims_balltree(X, k=5, alpha=1.4142135623730951, metric='euclidean', **
8787
8888
8989def _rsl_boruvka_kdtree (X , k = 5 , alpha = 1.0 ,
90- metric = 'euclidean' , leaf_size = 40 , ** kwargs ):
90+ metric = 'euclidean' , leaf_size = 40 ,
91+ core_dist_n_jobs = 4 , ** kwargs ):
92+
93+ if core_dist_n_jobs < 1 :
94+ core_dist_n_jobs = max (cpu_count () + 1 + core_dist_n_jobs , 1 )
9195
9296 dim = X .shape [0 ]
9397 min_samples = min (dim - 1 , k )
@@ -104,9 +108,13 @@ def _rsl_boruvka_kdtree(X, k=5, alpha=1.0,
104108
105109
106110def _rsl_boruvka_balltree (X , k = 5 , alpha = 1.0 ,
107- metric = 'euclidean' , leaf_size = 40 , ** kwargs ):
111+ metric = 'euclidean' , leaf_size = 40 ,
112+ core_dist_n_jobs = 4 , ** kwargs ):
108113
109- dim = X .shape [0 ]
114+ if core_dist_n_jobs < 1 :
115+ core_dist_n_jobs = max (cpu_count () + 1 + core_dist_n_jobs , 1 )
116+
117+ dim = X .shape [0 ]
110118 min_samples = min (dim - 1 , k )
111119
112120 tree = BallTree (X , metric = metric , leaf_size = leaf_size , ** kwargs )
@@ -122,7 +130,8 @@ def _rsl_boruvka_balltree(X, k=5, alpha=1.0,
122130
123131def robust_single_linkage (X , cut , k = 5 , alpha = 1.4142135623730951 ,
124132 gamma = 5 , metric = 'euclidean' , algorithm = 'best' ,
125- memory = Memory (cachedir = None , verbose = 0 ), leaf_size = 40 , ** kwargs ):
133+ memory = Memory (cachedir = None , verbose = 0 ), leaf_size = 40 ,
134+ core_dist_n_jobs = 4 , ** kwargs ):
126135 """Perform robust single linkage clustering from a vector array
127136 or distance matrix.
128137
@@ -180,6 +189,12 @@ def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951,
180189 Leaf size for trees responsible for fast nearest
181190 neighbour queries. (default 40)
182191
192+ core_dist_n_jobs : int, optional
193+ Number of parallel jobs to run in core distance computations (if
194+ supported by the specific algorithm). For ``core_dist_n_jobs``
195+ below -1, (n_cpus + 1 + core_dist_n_jobs) are used.
196+ (default 4)
197+
183198 Returns
184199 -------
185200 labels : array [n_samples]
@@ -232,10 +247,12 @@ def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951,
232247 memory .cache (_rsl_prims_balltree )(X , k , alpha , metric , ** kwargs )
233248 elif algorithm == 'boruvka_kdtree' :
234249 single_linkage_tree = \
235- memory .cache (_rsl_boruvka_kdtree )(X , k , alpha , metric , leaf_size , ** kwargs )
250+ memory .cache (_rsl_boruvka_kdtree )(X , k , alpha , metric , leaf_size ,
251+ core_dist_n_jobs , ** kwargs )
236252 elif algorithm == 'boruvka_balltree' :
237253 single_linkage_tree = \
238- memory .cache (_rsl_boruvka_balltree )(X , k , alpha , metric , leaf_size , ** kwargs )
254+ memory .cache (_rsl_boruvka_balltree )(X , k , alpha , metric , leaf_size ,
255+ core_dist_n_jobs , ** kwargs )
239256 else :
240257 raise TypeError ('Unknown algorithm type %s specified' % algorithm )
241258 else :
@@ -249,21 +266,26 @@ def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951,
249266 memory .cache (_rsl_prims_kdtree )(X , k , alpha , metric , ** kwargs )
250267 else :
251268 single_linkage_tree = \
252- memory .cache (_rsl_boruvka_kdtree )(X , k , alpha , metric , ** kwargs )
269+ memory .cache (_rsl_boruvka_kdtree )(X , k , alpha , metric ,
270+ leaf_size ,
271+ core_dist_n_jobs ,
272+ ** kwargs )
253273 else : # Metric is a valid BallTree metric
254274 # Need heuristic to decide when to go to boruvka; still debugging for now
255275 if X .shape [1 ] > 128 :
256276 single_linkage_tree = \
257277 memory .cache (_rsl_prims_kdtree )(X , k , alpha , metric , ** kwargs )
258278 else :
259279 single_linkage_tree = \
260- memory .cache (_rsl_boruvka_balltree )(X , k , alpha , metric , ** kwargs )
280+ memory .cache (_rsl_boruvka_balltree )(X , k , alpha , metric ,
281+ leaf_size ,
282+ core_dist_n_jobs ,
283+ ** kwargs )
261284
262285 labels = single_linkage_tree .get_clusters (cut , gamma )
263286
264287 return labels , single_linkage_tree
265288
266-
267289class RobustSingleLinkage (BaseEstimator , ClusterMixin ):
268290 """Perform robust single linkage clustering from a vector array
269291 or distance matrix.
@@ -317,6 +339,13 @@ class RobustSingleLinkage(BaseEstimator, ClusterMixin):
317339 * ``large_kdtree``
318340 * ``large_kdtree_fastcluster``
319341
342+
343+ core_dist_n_jobs : int, optional
344+ Number of parallel jobs to run in core distance computations (if
345+ supported by the specific algorithm). For ``core_dist_n_jobs``
346+ below -1, (n_cpus + 1 + core_dist_n_jobs) are used.
347+ (default 4)
348+
320349 Attributes
321350 -------
322351 labels_ : array [n_samples]
@@ -339,14 +368,15 @@ class RobustSingleLinkage(BaseEstimator, ClusterMixin):
339368 """
340369
341370 def __init__ (self , cut = 0.4 , k = 5 , alpha = 1.4142135623730951 , gamma = 5 , metric = 'euclidean' ,
342- algorithm = 'best' , ** kwargs ):
371+ algorithm = 'best' , core_dist_n_jobs = 4 , ** kwargs ):
343372
344373 self .cut = cut
345374 self .k = k
346375 self .alpha = alpha
347376 self .gamma = gamma
348377 self .metric = metric
349378 self .algorithm = algorithm
379+ self .core_dist_n_jobs = core_dist_n_jobs
350380
351381 self ._metric_kwargs = kwargs
352382
0 commit comments