@@ -95,6 +95,8 @@ def fit(self, X, y):
9595 and len (indices1 ) >= self .bahc_min_cluster_size
9696 ):
9797 # We calculate the discrimination scores using formula (1) in [1]
98+ # TODO: Move y[indices0] and y[indices1] into separate variables
99+ # to avoid recomputing them
98100 mask0 = np .ones (n_samples , dtype = bool )
99101 mask0 [indices0 ] = False
100102 score0 = np .mean (y [mask0 ]) - np .mean (y [indices0 ])
@@ -129,16 +131,15 @@ def fit(self, X, y):
129131 scores = np .array (scores )
130132
131133 # We sort clusters by decreasing scores
132- indices = np .argsort (- scores )
133- self .scores_ = scores [indices ]
134+ sorted_indices = np .argsort (- scores )
135+ self .scores_ = scores [sorted_indices ]
134136 leaf_labels = np .array ([leaf .label for leaf in leaves ])
135- leaf_labels = leaf_labels [indices ]
136- # TODO: Check this!!!
137- for i , leaf in enumerate (leaves ):
138- leaf .label = leaf_labels [i ]
137+ leaf_labels = leaf_labels [sorted_indices ]
139138 label_mapping = np .zeros (self .n_clusters_ , dtype = np .uint32 )
140139 label_mapping [leaf_labels ] = np .arange (self .n_clusters_ , dtype = np .uint32 )
141140 self .labels_ = label_mapping [labels ]
141+ for leaf in leaves :
142+ leaf .label = label_mapping [leaf .label ]
142143 return self
143144
144145 def predict (self , X ):
0 commit comments