Skip to content

Commit be17bb7

Browse files
Krsto ProrokovićKrsto Proroković
authored andcommitted
Fix computing labels
1 parent 960d13b commit be17bb7

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"""unsupervised-bias-detection."""
1+
"""unsupervised-bias-detection."""

unsupervised_bias_detection/cluster/_bahc.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def fit(self, X, y):
9595
and len(indices1) >= self.bahc_min_cluster_size
9696
):
9797
# We calculate the discrimination scores using formula (1) in [1]
98+
# TODO: Move y[indices0] and y[indices1] into separate variables
99+
# to avoid recomputing them
98100
mask0 = np.ones(n_samples, dtype=bool)
99101
mask0[indices0] = False
100102
score0 = np.mean(y[mask0]) - np.mean(y[indices0])
@@ -129,16 +131,15 @@ def fit(self, X, y):
129131
scores = np.array(scores)
130132

131133
# We sort clusters by decreasing scores
132-
indices = np.argsort(-scores)
133-
self.scores_ = scores[indices]
134+
sorted_indices = np.argsort(-scores)
135+
self.scores_ = scores[sorted_indices]
134136
leaf_labels = np.array([leaf.label for leaf in leaves])
135-
leaf_labels = leaf_labels[indices]
136-
# TODO: Check this!!!
137-
for i, leaf in enumerate(leaves):
138-
leaf.label = leaf_labels[i]
137+
leaf_labels = leaf_labels[sorted_indices]
139138
label_mapping = np.zeros(self.n_clusters_, dtype=np.uint32)
140139
label_mapping[leaf_labels] = np.arange(self.n_clusters_, dtype=np.uint32)
141140
self.labels_ = label_mapping[labels]
141+
for leaf in leaves:
142+
leaf.label = label_mapping[leaf.label]
142143
return self
143144

144145
def predict(self, X):

0 commit comments

Comments
 (0)