Skip to content

Commit 0bc19df

Browse files
authored
Merge pull request NGO-Algorithm-Audit#5 from krstopro/master
Docs, more tests, release workflow, new name
2 parents 082e595 + 83521f9 commit 0bc19df

File tree

12 files changed

+180
-80
lines changed

12 files changed

+180
-80
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
11
name: Continuous Integration Workflow
22

33
on:
4-
workflow_dispatch:
5-
push:
6-
branches:
7-
- master
8-
- dev
9-
pull_request:
10-
branches:
11-
- master
12-
- dev
4+
workflow_dispatch:
5+
push:
6+
pull_request:
137

148
jobs:
159
main:
1610
runs-on: ubuntu-latest
1711
strategy:
1812
matrix:
19-
python-version: ['3.11']
13+
python-version: ["3.11"]
2014
steps:
2115
- name: Checkout repository
2216
uses: actions/checkout@v4

.github/workflows/release.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Release Workflow
2+
3+
on:
4+
release:
5+
types: [created]
6+
7+
jobs:
8+
main:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Continuous integration
12+
uses: ./.github/workflows/ci.yml
13+
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Install poetry
18+
run: |
19+
pipx install poetry
20+
# poetry config virtualenvs.path .virtualenvs
21+
22+
- name: Publish to PyPI
23+
run: poetry publish --build --username "__token__" --password ${{ secrets.PIPY_TOKEN }}

bias_detection_tool/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""bias-detection-tool."""

bias_scan/clustering/__init__.py renamed to bias_detection_tool/clustering/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""The :mod:`bias_detection_tool.clustering` module implements bias-aware clustering algorithms."""
2+
13
from ._kmeans import BiasAwareHierarchicalKMeans
24
from ._kmodes import BiasAwareHierarchicalKModes
35

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ class BiasAwareHierarchicalClustering(ABC, BaseEstimator, ClusterMixin):
88
"""
99
Base class for Bias-Aware Hierarchical Clustering.
1010
11-
This abstract class specifies an interface for all bias-aware hierarchical
12-
clustering classes.
11+
This abstract class specifies an interface for all bias-aware hierarchical clustering classes.
1312
"""
1413

1514
def __init__(self, max_iter, min_cluster_size):
@@ -32,6 +31,9 @@ def fit(self, X, y):
3231
self : object
3332
Fitted estimator.
3433
"""
34+
X, y = self._validate_data(
35+
X, y, reset=False, accept_large_sparse=False, dtype=[np.float32, np.float64], order="C"
36+
)
3537
n_samples, _ = X.shape
3638
self.n_clusters_ = 1
3739
labels = np.zeros(n_samples, dtype=np.uint32)
@@ -40,15 +42,13 @@ def fit(self, X, y):
4042
label = 0
4143
bias = -np.mean(y)
4244
heap = [(None, label, bias)]
43-
print(labels)
4445
for _ in range(self.max_iter):
4546
if not heap:
4647
break
4748
_, label, bias = heapq.heappop(heap)
4849
cluster_indices = np.nonzero(labels == label)[0]
4950
cluster = X[cluster_indices]
5051
cluster_labels = self._split(cluster)
51-
# TODO: Maybe check if cluster_labels are 0s and 1s
5252
indices0 = cluster_indices[np.nonzero(cluster_labels == 0)[0]]
5353
indices1 = cluster_indices[np.nonzero(cluster_labels == 1)[0]]
5454
if (
@@ -74,13 +74,8 @@ def fit(self, X, y):
7474
else:
7575
clusters.append(label)
7676
biases.append(bias)
77-
print(labels)
78-
print(heap)
79-
print(clusters)
8077
clusters = np.array(clusters + [label for _, label, _ in heap])
8178
biases = np.array(biases + [bias for _, _, bias in heap])
82-
print(clusters)
83-
print(biases)
8479
indices = np.argsort(-biases)
8580
clusters = clusters[indices]
8681
self.biases_ = biases[indices]
@@ -91,15 +86,16 @@ def fit(self, X, y):
9186

9287
@abstractmethod
9388
def _split(self, X):
94-
"""Splits the data into two clusters.
89+
"""Split the data into two clusters.
9590
9691
Parameters
9792
----------
98-
X : array-like of shape (n_samples, n_features)
93+
X : ndarray of shape (n_samples, n_features)
9994
10095
Returns
10196
-------
102-
labels : (n_samples)
103-
ndarray of shape (n_samples,)
97+
labels : ndarray of shape (n_samples,)
98+
Cluster labels for each point. Every label is either 0 or 1 indicating
99+
that the point belongs to the first or the second cluster, respectively.
104100
"""
105101
pass
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from ._bahc import BiasAwareHierarchicalClustering
2+
from sklearn.cluster import KMeans
3+
4+
5+
class BiasAwareHierarchicalKMeans(BiasAwareHierarchicalClustering):
6+
"""Bias-Aware Hierarchical k-Means Clustering.
7+
8+
Parameters
9+
----------
10+
max_iter : int
11+
Maximum number of iterations.
12+
min_cluster_size : int
13+
Minimum size of a cluster.
14+
kmeans_params : dict
15+
k-means parameters
16+
17+
Attributes
18+
----------
19+
n_clusters_ : int
20+
The number of clusters found by the algorithm.
21+
labels_ : ndarray of shape (n_samples,)
22+
Cluster labels for each point.
23+
biases_ : ndarray of shape (n_clusters_,)
24+
Bias values for each cluster.
25+
26+
References
27+
----------
28+
.. [1] J. Misztal-Radecka, B. Indurkhya, "Bias-Aware Hierarchical Clustering for detecting the discriminated
29+
groups of users in recommendation systems", Information Processing & Management, vol. 58, no. 3, May. 2021.
30+
31+
Examples
32+
--------
33+
>>> from bias_detection_tool.clustering import BiasAwareHierarchicalKMeans
34+
>>> import numpy as np
35+
>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
36+
>>> y = np.array([0, 0, 0, 10, 10, 10])
37+
>>> bias_aware_kmeans = BiasAwareHierarchicalKMeans(max_iter=1, min_cluster_size=1, random_state=12).fit(X, y)
38+
>>> bias_aware_kmeans.labels_
39+
array([0, 0, 0, 1, 1, 1], dtype=uint32)
40+
>>> bias_aware_kmeans.biases_
41+
array([ 10., -10.])
42+
"""
43+
44+
def __init__(
45+
self,
46+
max_iter,
47+
min_cluster_size,
48+
**kmeans_params,
49+
):
50+
super().__init__(max_iter, min_cluster_size)
51+
52+
if "n_clusters" in kmeans_params and kmeans_params["n_clusters"] != 2:
53+
raise ValueError(
54+
f"The parameter `n_clusters` should be 2, got {kmeans_params['n_clusters']}."
55+
)
56+
else:
57+
kmeans_params["n_clusters"] = 2
58+
59+
if "n_init" not in kmeans_params:
60+
kmeans_params["n_init"] = "auto"
61+
62+
self.kmeans = KMeans(**kmeans_params)
63+
64+
def _split(self, X):
65+
return self.kmeans.fit_predict(X)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from ._bahc import BiasAwareHierarchicalClustering
2+
from kmodes.kmodes import KModes
3+
4+
5+
class BiasAwareHierarchicalKModes(BiasAwareHierarchicalClustering):
6+
"""Bias-Aware Hierarchical k-Modes Clustering.
7+
8+
Parameters
9+
----------
10+
max_iter : int
11+
Maximum number of iterations.
12+
min_cluster_size : int
13+
Minimum size of a cluster.
14+
kmodes_params : dict
15+
k-modes parameters
16+
17+
Attributes
18+
----------
19+
n_clusters_ : int
20+
The number of clusters found by the algorithm.
21+
labels_ : ndarray of shape (n_samples,)
22+
Cluster labels for each point.
23+
biases_ : ndarray of shape (n_clusters_,)
24+
Bias values for each cluster.
25+
26+
References
27+
----------
28+
.. [1] J. Misztal-Radecka, B. Indurkhya, "Bias-Aware Hierarchical Clustering for detecting the discriminated
29+
groups of users in recommendation systems", Information Processing & Management, vol. 58, no. 3, May. 2021.
30+
31+
Examples
32+
--------
33+
>>> from bias_detection_tool.clustering import BiasAwareHierarchicalKModes
34+
>>> import numpy as np
35+
>>> X = np.array([[0, 1], [0, 2], [0, 0], [1, 4], [1, 5], [1, 3]])
36+
>>> y = np.array([0, 0, 0, 10, 10, 10])
37+
>>> bias_aware_kmodes = BiasAwareHierarchicalKModes(max_iter=1, min_cluster_size=1, random_state=12).fit(X, y)
38+
>>> bias_aware_kmodes.labels_
39+
array([0, 0, 0, 1, 1, 1], dtype=uint32)
40+
>>> bias_aware_kmodes.biases_
41+
array([ 10., -10.])
42+
"""
43+
44+
def __init__(self, max_iter, min_cluster_size, **kmodes_params):
45+
super().__init__(max_iter, min_cluster_size)
46+
47+
if "n_clusters" in kmodes_params and kmodes_params["n_clusters"] != 2:
48+
raise ValueError(
49+
f"The parameter `n_clusters` should be 2, got {kmodes_params['n_clusters']}."
50+
)
51+
else:
52+
kmodes_params["n_clusters"] = 2
53+
54+
self.kmodes = KModes(**kmodes_params)
55+
56+
def _split(self, X):
57+
return self.kmodes.fit_predict(X)

bias_scan/clustering/_kmeans.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

bias_scan/clustering/_kmodes.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

bias_scan/clustering/utils.py

Whitespace-only changes.

0 commit comments

Comments
 (0)