@@ -57,16 +57,22 @@ class HierarchicalClustering(BaseClusterMethod):
5757 evenly.
5858 """
5959
60- def __init__ (self , data , distance_function , linkage = None , num_processes = 1 ):
60+ def __init__ (self , data , distance_function , linkage = None , num_processes = 1 ,
61+ progress_callback = None ):
6162 if not linkage :
6263 linkage = single
6364 logger .info ("Initializing HierarchicalClustering object with linkage "
6465 "method %s" , linkage )
6566 BaseClusterMethod .__init__ (self , sorted (data ), distance_function )
6667 self .set_linkage_method (linkage )
6768 self .num_processes = num_processes
69+ self .progress_callback = progress_callback
6870 self .__cluster_created = False
6971
72+ def publish_progress (self , total , current ):
73+ if self .progress_callback :
74+ self .progress_callback (total , current )
75+
7076 def set_linkage_method (self , method ):
7177 """
7278 Sets the method to determine the distance between two clusters.
@@ -107,6 +113,7 @@ def cluster(self, matrix=None, level=None, sequence=None):
107113
108114 # if the matrix only has two rows left, we are done
109115 linkage = partial (self .linkage , distance_function = self .distance )
116+ initial_element_count = len (self ._data )
110117 while len (matrix ) > 2 or matrix == []:
111118
112119 item_item_matrix = Matrix (self ._data ,
@@ -151,6 +158,8 @@ def cluster(self, matrix=None, level=None, sequence=None):
151158 smallestpair [1 ])]) # remove item 2
152159 self ._data .append (cluster ) # append item 1 and 2 combined
153160
161+ self .publish_progress (initial_element_count , len (self ._data ))
162+
154163 # all the data is in one single cluster. We return that and stop
155164 self .__cluster_created = True
156165 logger .info ("Call to cluster() is complete" )
0 commit comments