1515# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1616#
1717
18+ from functools import partial
1819import logging
1920
2021from cluster .cluster import Cluster
2122from cluster .matrix import Matrix
2223from cluster .method .base import BaseClusterMethod
2324from cluster .util import median , mean , fullyflatten
25+ from cluster .linkage import single , complete , average , uclus
2426
2527
2628logger = logging .getLogger (__name__ )
@@ -58,7 +60,7 @@ class HierarchicalClustering(BaseClusterMethod):
5860
5961 def __init__ (self , data , distance_function , linkage = None , num_processes = 1 ):
6062 if not linkage :
61- linkage = ' single'
63+ linkage = single
6264 logger .info ("Initializing HierarchicalClustering object with linkage "
6365 "method %s" , linkage )
6466 BaseClusterMethod .__init__ (self , sorted (data ), distance_function )
@@ -74,131 +76,19 @@ def set_linkage_method(self, method):
7476 ``'single'``, ``'complete'``, ``'average'`` or ``'uclus'``.
7577 """
7678 if method == 'single' :
77- self .linkage = self . single_linkage_distance
79+ self .linkage = single
7880 elif method == 'complete' :
79- self .linkage = self . complete_linkage_distance
81+ self .linkage = complete
8082 elif method == 'average' :
81- self .linkage = self . average_linkage_distance
83+ self .linkage = average
8284 elif method == 'uclus' :
83- self .linkage = self .uclus_distance
85+ self .linkage = uclus
86+ elif hasattr (method , '__call__' ):
87+ self .linkage = method
8488 else :
8589 raise ValueError ('distance method must be one of single, '
8690 'complete, average of uclus' )
8791
88- def uclus_distance (self , x , y ):
89- """
90- The method to determine the distance between one cluster an another
91- item/cluster. The distance equals to the *average* (median) distance
92- from any member of one cluster to any member of the other cluster.
93-
94- :param x: first cluster/item.
95- :param y: second cluster/item.
96- """
97- # create a flat list of all the items in <x>
98- if not isinstance (x , Cluster ):
99- x = [x ]
100- else :
101- x = fullyflatten (x .items )
102-
103- # create a flat list of all the items in <y>
104- if not isinstance (y , Cluster ):
105- y = [y ]
106- else :
107- y = fullyflatten (y .items )
108-
109- distances = []
110- for k in x :
111- for l in y :
112- distances .append (self .distance (k , l ))
113- return median (distances )
114-
115- def average_linkage_distance (self , x , y ):
116- """
117- The method to determine the distance between one cluster an another
118- item/cluster. The distance equals to the *average* (mean) distance
119- from any member of one cluster to any member of the other cluster.
120-
121- :param x: first cluster/item.
122- :param y: second cluster/item.
123- """
124- # create a flat list of all the items in <x>
125- if not isinstance (x , Cluster ):
126- x = [x ]
127- else :
128- x = fullyflatten (x .items )
129-
130- # create a flat list of all the items in <y>
131- if not isinstance (y , Cluster ):
132- y = [y ]
133- else :
134- y = fullyflatten (y .items )
135-
136- distances = []
137- for k in x :
138- for l in y :
139- distances .append (self .distance (k , l ))
140- return mean (distances )
141-
142- def complete_linkage_distance (self , x , y ):
143- """
144- The method to determine the distance between one cluster an another
145- item/cluster. The distance equals to the *longest* distance from any
146- member of one cluster to any member of the other cluster.
147-
148- :param x: first cluster/item.
149- :param y: second cluster/item.
150- """
151-
152- # create a flat list of all the items in <x>
153- if not isinstance (x , Cluster ):
154- x = [x ]
155- else :
156- x = fullyflatten (x .items )
157-
158- # create a flat list of all the items in <y>
159- if not isinstance (y , Cluster ):
160- y = [y ]
161- else :
162- y = fullyflatten (y .items )
163-
164- # retrieve the minimum distance (single-linkage)
165- maxdist = self .distance (x [0 ], y [0 ])
166- for k in x :
167- for l in y :
168- maxdist = max (maxdist , self .distance (k , l ))
169-
170- return maxdist
171-
172- def single_linkage_distance (self , x , y ):
173- """
174- The method to determine the distance between one cluster an another
175- item/cluster. The distance equals to the *shortest* distance from any
176- member of one cluster to any member of the other cluster.
177-
178- :param x: first cluster/item.
179- :param y: second cluster/item.
180- """
181-
182- # create a flat list of all the items in <x>
183- if not isinstance (x , Cluster ):
184- x = [x ]
185- else :
186- x = fullyflatten (x .items )
187-
188- # create a flat list of all the items in <y>
189- if not isinstance (y , Cluster ):
190- y = [y ]
191- else :
192- y = fullyflatten (y .items )
193-
194- # retrieve the minimum distance (single-linkage)
195- mindist = self .distance (x [0 ], y [0 ])
196- for k in x :
197- for l in y :
198- mindist = min (mindist , self .distance (k , l ))
199-
200- return mindist
201-
20292 def cluster (self , matrix = None , level = None , sequence = None ):
20393 """
20494 Perform hierarchical clustering.
@@ -217,10 +107,11 @@ def cluster(self, matrix=None, level=None, sequence=None):
217107 matrix = []
218108
219109 # if the matrix only has two rows left, we are done
110+ linkage = partial (self .linkage , distance_function = self .distance )
220111 while len (matrix ) > 2 or matrix == []:
221112
222113 item_item_matrix = Matrix (self ._data ,
223- self . linkage ,
114+ linkage ,
224115 True ,
225116 0 )
226117 item_item_matrix .genmatrix (self .num_processes )
0 commit comments