@@ -41,18 +41,25 @@ class HierarchicalClustering(BaseClusterMethod):
4141 Note that all of the returned clusters are more that 90 apart
4242 """
4343
44- def __init__ (self , data , distance_function , linkage = None ):
44+ def __init__ (self , data , distance_function , linkage = None , num_processes = 1 ):
4545 """
4646 Constructor
4747
4848 See BaseClusterMethod.__init__ for more details.
49+
50+ num_processes
51+ - If you want to use multiprocessing to split up the work
52+ and run genmatrix() in parallel, specify num_processes
53+ > 1 and this number of workers will be spun up, the work
54+ split up amongst them evenly. Default: 1
4955 """
5056 if not linkage :
5157 linkage = 'single'
5258 logger .info ("Initializing HierarchicalClustering object with linkage "
5359 "method %s" , linkage )
5460 BaseClusterMethod .__init__ (self , data , distance_function )
5561 self .set_linkage_method (linkage )
62+ self .num_processes = num_processes
5663 self .__cluster_created = False
5764
5865 def set_linkage_method (self , method ):
@@ -193,7 +200,7 @@ def single_linkage_distance(self, x, y):
193200
194201 return mindist
195202
196- def cluster (self , matrix = None , level = None , sequence = None , num_processes = None ):
203+ def cluster (self , matrix = None , level = None , sequence = None ):
197204 """
198205 Perform hierarchical clustering. This method is automatically called
199206 by the constructor so you should not need to call it explicitly.
@@ -204,11 +211,6 @@ def cluster(self, matrix=None, level=None, sequence=None, num_processes=None):
204211 other
205212 level - The current level of clustering
206213 sequence - The sequence number of the clustering
207- num_processes
208- - If you want to use multiprocessing to split up the work
209- and run genmatrix() in parallel, specify num_processes
210- > 1 and this number of workers will be spun up, the work
211- split up amongst them evenly. Default: 1
212214 """
213215 logger .info ("Performing cluster()" )
214216
@@ -221,7 +223,11 @@ def cluster(self, matrix=None, level=None, sequence=None, num_processes=None):
221223 # if the matrix only has two rows left, we are done
222224 while len (matrix ) > 2 or matrix == []:
223225
224- matrix = genmatrix (self ._data , self .linkage , True , 0 , num_processes )
226+ matrix = genmatrix (self ._data ,
227+ self .linkage ,
228+ True ,
229+ 0 ,
230+ self .num_processes )
225231
226232 smallestpair = None
227233 mindistance = None
0 commit comments