Skip to content

Commit 34b4845

Browse files
committed
Moved num_processes param to HierarchicalClustering constructor
1 parent b14fed0 commit 34b4845

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

cluster/method/hierarchical.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,25 @@ class HierarchicalClustering(BaseClusterMethod):
4141
Note that all of the returned clusters are more that 90 apart
4242
"""
4343

44-
def __init__(self, data, distance_function, linkage=None):
44+
def __init__(self, data, distance_function, linkage=None, num_processes=1):
4545
"""
4646
Constructor
4747
4848
See BaseClusterMethod.__init__ for more details.
49+
50+
num_processes
51+
- If you want to use multiprocessing to split up the work
52+
and run genmatrix() in parallel, specify num_processes
53+
> 1 and this number of workers will be spun up, the work
54+
split up amongst them evenly. Default: 1
4955
"""
5056
if not linkage:
5157
linkage = 'single'
5258
logger.info("Initializing HierarchicalClustering object with linkage "
5359
"method %s", linkage)
5460
BaseClusterMethod.__init__(self, data, distance_function)
5561
self.set_linkage_method(linkage)
62+
self.num_processes = num_processes
5663
self.__cluster_created = False
5764

5865
def set_linkage_method(self, method):
@@ -193,7 +200,7 @@ def single_linkage_distance(self, x, y):
193200

194201
return mindist
195202

196-
def cluster(self, matrix=None, level=None, sequence=None, num_processes=None):
203+
def cluster(self, matrix=None, level=None, sequence=None):
197204
"""
198205
Perform hierarchical clustering. This method is automatically called
199206
by the constructor so you should not need to call it explicitly.
@@ -204,11 +211,6 @@ def cluster(self, matrix=None, level=None, sequence=None, num_processes=None):
204211
other
205212
level - The current level of clustering
206213
sequence - The sequence number of the clustering
207-
num_processes
208-
- If you want to use multiprocessing to split up the work
209-
and run genmatrix() in parallel, specify num_processes
210-
> 1 and this number of workers will be spun up, the work
211-
split up amongst them evenly. Default: 1
212214
"""
213215
logger.info("Performing cluster()")
214216

@@ -221,7 +223,11 @@ def cluster(self, matrix=None, level=None, sequence=None, num_processes=None):
221223
# if the matrix only has two rows left, we are done
222224
while len(matrix) > 2 or matrix == []:
223225

224-
matrix = genmatrix(self._data, self.linkage, True, 0, num_processes)
226+
matrix = genmatrix(self._data,
227+
self.linkage,
228+
True,
229+
0,
230+
self.num_processes)
225231

226232
smallestpair = None
227233
mindistance = None

0 commit comments

Comments
 (0)