@@ -124,7 +124,7 @@ def _silhouette(a, b):
124124 if a is None :
125125 # a is infinite, i.e. only one element
126126 # in the cluster and thus no distances?
127- return 0.
127+ return 0.0
128128
129129 # for the formula, see [Desgraupes2013].
130130 try :
@@ -167,10 +167,10 @@ def distance(self, i, j):
167167 return d
168168
169169 def matrix (self ):
170- raise ValueError (' LazyDistances does not support matrix()' )
170+ raise ValueError (" LazyDistances does not support matrix()" )
171171
172172 def _compute_distance (self , i , j ):
173- raise ValueError (' _compute_distance was not implemented' )
173+ raise ValueError (" _compute_distance was not implemented" )
174174
175175
176176def _shuffled_range (n ):
@@ -288,7 +288,8 @@ def __init__(self, last_criterion=None):
288288 def should_split (self , siblings , merged , unmerged , distance ):
289289 try :
290290 criterion = self ._approximate_global_silhouette_index (
291- siblings + unmerged , distance )
291+ siblings + unmerged , distance
292+ )
292293 except InfiniteSilhouette :
293294 # zero distance between two clusters, do not accept split.
294295 return False , None
@@ -302,9 +303,11 @@ def should_split(self, siblings, merged, unmerged, distance):
302303
303304 def _approximate_global_silhouette_index (self , clusters , distance ):
304305 if len (clusters ) <= 1 :
305- return - 1.
306+ return - 1.0
306307 else :
307- return fsum (self ._approximate_mean_silhouette_widths (clusters , distance )) / len (clusters )
308+ return fsum (
309+ self ._approximate_mean_silhouette_widths (clusters , distance )
310+ ) / len (clusters )
308311
309312 def _approximate_mean_silhouette_widths (self , clusters , distance ):
310313 d_in = self ._approximate_within_distances (clusters , distance )
@@ -341,6 +344,7 @@ def other_members(i):
341344 for i , a in medoids ():
342345 yield robust_min ((distance (a , b ) for b in other (i )))
343346
347+
344348AutomaticSplitCriterion = ApproximateSilhouetteSplitCriterion
345349
346350
@@ -443,10 +447,10 @@ def swap(self):
443447 try :
444448 i , h = self ._next_random_swap ()
445449 except StopIteration :
446- self ._debug (' all swaps tried' )
450+ self ._debug (" all swaps tried" )
447451 return False
448452
449- self ._debug (' eval swap' , i , h )
453+ self ._debug (" eval swap" , i , h )
450454
451455 # try to swap medoid i with non-medoid h
452456 clusters = self ._clusters
@@ -493,7 +497,7 @@ def calculate_t():
493497 t = fsum (calculate_t ())
494498
495499 if t < 0 : # swap is an improvement?
496- self ._debug (' ACCEPT swap t:%f' % t , i , h )
500+ self ._debug (" ACCEPT swap t:%f" % t , i , h )
497501 self ._cost += t
498502
499503 selected = self ._selected
@@ -565,16 +569,16 @@ def with_k(self, k):
565569 for i in range (num_local ):
566570 medoids = _Medoids (self , k )
567571
568- self .debug (' new local %f' % medoids .cost ())
572+ self .debug (" new local %f" % medoids .cost ())
569573 j = 1
570574 while j <= max_neighbours :
571575 if medoids .swap ():
572- self .debug (' NEW MEDOIDS (%f)' % medoids .cost (), * medoids ._selected )
576+ self .debug (" NEW MEDOIDS (%f)" % medoids .cost (), * medoids ._selected )
573577 j = 1
574578 else :
575579 j += 1
576580
577- self .debug (' end local %f' % medoids .cost ())
581+ self .debug (" end local %f" % medoids .cost ())
578582 if min_cost_medoids is None or medoids .cost () < min_cost_medoids .cost ():
579583 min_cost_medoids = medoids
580584
@@ -599,10 +603,15 @@ def without_k(self, criterion):
599603 merged = _Cluster (self ._medoid0 , list (chain (* [c .members for c in clusters0 ])))
600604
601605 split , new_criterion = criterion .should_split (
602- self ._siblings , merged , clusters0 , self ._d0 )
606+ self ._siblings , merged , clusters0 , self ._d0
607+ )
603608
604609 if self .debug_output :
605- print ([[self ._p0 [i ] for i in c .members ] for c in self ._siblings + clusters0 ], split , new_criterion )
610+ print (
611+ [[self ._p0 [i ] for i in c .members ] for c in self ._siblings + clusters0 ],
612+ split ,
613+ new_criterion ,
614+ )
606615
607616 if not split :
608617 return [[i_to_i0 [i ] for i in range (n )]]
@@ -612,12 +621,19 @@ def without_k(self, criterion):
612621 t = clusters0 [i ].members
613622 d = clusters0 [1 - i ]
614623
615- sub = _Clusterer (len (t ), t , clusters0 [i ].medoid , self ._siblings + [d ], self ._p0 , self ._d0 )
624+ sub = _Clusterer (
625+ len (t ),
626+ t ,
627+ clusters0 [i ].medoid ,
628+ self ._siblings + [d ],
629+ self ._p0 ,
630+ self ._d0 ,
631+ )
616632 r .extend (sub .without_k (new_criterion ))
617633 return r
618634
619635
620- def optimize (p , k , distances , mode = ' clusters' , seed = 12345 , granularity = 1. ):
636+ def optimize (p , k , distances , mode = " clusters" , seed = 12345 , granularity = 1.0 ):
621637 if k == 1 :
622638 return [p ]
623639
@@ -628,7 +644,8 @@ def optimize(p, k, distances, mode='clusters', seed=12345, granularity=1.):
628644 random .seed (seed )
629645
630646 clusterer = _Clusterer (
631- len (p ), tuple (range (len (p ))), None , [], p , distances .distance )
647+ len (p ), tuple (range (len (p ))), None , [], p , distances .distance
648+ )
632649
633650 if isinstance (k , tuple ) and len (k ) == 2 :
634651 criterion = k [0 ](** k [1 ])
@@ -642,12 +659,12 @@ def optimize(p, k, distances, mode='clusters', seed=12345, granularity=1.):
642659 # sort clusters by order of their first element in the original list.
643660 clusters = sorted (clusters , key = lambda c : c [0 ])
644661
645- if mode == ' clusters' :
662+ if mode == " clusters" :
646663 return list (map (lambda c : map (lambda i : p [i ], c ), clusters ))
647- elif mode == ' components' :
664+ elif mode == " components" :
648665 return _components (clusters , len (p ))
649666 else :
650- raise ValueError (' illegal mode %s' % mode )
667+ raise ValueError (" illegal mode %s" % mode )
651668 finally :
652669 random .setstate (random_state )
653670
@@ -684,8 +701,8 @@ def __init__(self, distances, n, merge_limit=None):
684701
685702 super (_DunnMergeCriterion , self ).__init__ (distances , n )
686703
687- self ._diameters = [0. ] * n
688- self ._max_diameter = 0.
704+ self ._diameters = [0.0 ] * n
705+ self ._max_diameter = 0.0
689706 self ._best_dunn = None
690707
691708 self ._merge_limit = merge_limit
@@ -696,7 +713,7 @@ def merge(self, clusters, i, j, d_min, save):
696713 dunn = (d_min , self ._max_diameter )
697714 if self ._best_dunn is None or _ratio_bigger_than (dunn , self ._best_dunn ):
698715 save ()
699- if self ._max_diameter > 0. :
716+ if self ._max_diameter > 0.0 :
700717 self ._best_dunn = dunn
701718
702719 # now perform the merge.
@@ -718,7 +735,7 @@ def merge(self, clusters, i, j, d_min, save):
718735AutomaticMergeCriterion = _DunnMergeCriterion
719736
720737
721- def agglomerate (points_and_weights , k , distances , mode = ' clusters' ):
738+ def agglomerate (points_and_weights , k , distances , mode = " clusters" ):
722739 # this is an implementation of heap-based clustering as described
723740 # by [Kurita1991].
724741
@@ -745,7 +762,7 @@ def agglomerate(points_and_weights, k, distances, mode='clusters'):
745762 # representant of each cluster only, 'components' returns the index of
746763 # the cluster each element is in for each element.
747764
748- if mode == ' dominant' :
765+ if mode == " dominant" :
749766 points , weight_ = points_and_weights
750767 weight = [x for x in weight_ ]
751768 else :
@@ -858,8 +875,7 @@ def reduce():
858875 lookup = [(i , j ) for i in range (n ) for j in range (i )]
859876
860877 where = list (range (len (triangular_distance_matrix )))
861- heap = [(d , z , u ) for d , z , u in zip (
862- triangular_distance_matrix , where , pairs )]
878+ heap = [(d , z , u ) for d , z , u in zip (triangular_distance_matrix , where , pairs )]
863879
864880 for s in range (len (heap ) // 2 - 1 , - 1 , - 1 ): # ..., 1, 0
865881 shiftdown (s , heap , where )
@@ -870,7 +886,7 @@ def reduce():
870886 # save() allows to put a different configuration into "best" and keep on clustering and
871887 # return the "best" configuration later on as result.
872888
873- if mode in (' clusters' , ' components' ):
889+ if mode in (" clusters" , " components" ):
874890 dominant = False
875891 best = [clusters ]
876892
@@ -883,11 +899,12 @@ def result():
883899 # sort, so clusters appear in order of their first element appearance in the original list.
884900 r = sorted ([sorted (c ) for c in best_clusters if c ], key = lambda c : c [0 ])
885901
886- if mode == ' components' :
902+ if mode == " components" :
887903 return _components (r , n )
888- elif mode == ' clusters' :
904+ elif mode == " clusters" :
889905 return [[points [i ] for i in c ] for c in r ]
890- elif mode == 'dominant' :
906+
907+ elif mode == "dominant" :
891908 dominant = True
892909 best = [clusters , weight ]
893910
@@ -897,18 +914,27 @@ def save(): # save current configuration
897914
898915 def result ():
899916 best_clusters , best_weight = best
900- prototypes = [(points [i ], best_weight [i ], c ) for i , c in enumerate (best_clusters ) if c is not None ]
901- return sorted (prototypes , key = lambda t : t [1 ], reverse = True ) # most weighted first
917+ prototypes = [
918+ (points [i ], best_weight [i ], c )
919+ for i , c in enumerate (best_clusters )
920+ if c is not None
921+ ]
922+ return sorted (
923+ prototypes , key = lambda t : t [1 ], reverse = True
924+ ) # most weighted first
925+
902926 else :
903- raise ValueError (' illegal mode %s' % mode )
927+ raise ValueError (" illegal mode %s" % mode )
904928
905929 while len (heap ) > 0 and n_clusters > n_clusters_target :
906930 d , p , _ = heap [0 ]
907931
908932 i , j = lookup [p ]
909933
910934 if dominant :
911- if weight [j ] > weight [i ]: # always merge smaller (j) into larger, dominant (i)
935+ if (
936+ weight [j ] > weight [i ]
937+ ): # always merge smaller (j) into larger, dominant (i)
912938 i , j = j , i
913939 elif i > j :
914940 i , j = j , i # merge later chunk to earlier one to preserve order
@@ -1060,7 +1086,7 @@ def _kmeans(self, k):
10601086
10611087 # find new assignments
10621088 for i , ai in enumerate (a ):
1063- m = max (s [ai ] / 2. , l [i ])
1089+ m = max (s [ai ] / 2.0 , l [i ])
10641090
10651091 if u [i ] > m :
10661092 xi = x [i ]
@@ -1111,7 +1137,7 @@ def _kmeans(self, k):
11111137 within [ai ] += d (x [i ], c [ai ])
11121138 for j in range (len (c )):
11131139 if q [j ] == 1 :
1114- return a , - 1. # no good config
1140+ return a , - 1.0 # no good config
11151141 within [j ] /= q [j ] - 1
11161142
11171143 silhouette = fsum (_silhouette (a , b ) for a , b in zip (within , s )) / len (c )
@@ -1182,9 +1208,9 @@ def kmeans(x, x_repr, k, mode, seed, epsilon):
11821208 assert 1 < k < len (x )
11831209 a , _ , k = km .with_k (k )
11841210
1185- if mode == ' clusters' :
1211+ if mode == " clusters" :
11861212 return _clusters (x_repr , a , k )
1187- elif mode == ' components' :
1213+ elif mode == " components" :
11881214 return a
11891215 else :
1190- raise ValueError (' illegal mode %s' % mode )
1216+ raise ValueError (" illegal mode %s" % mode )
0 commit comments