Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
ce4c567
chore: update files for new gitflow (#227)
crismunoz Sep 23, 2024
b883add
chore: update readme (#229)
crismunoz Sep 23, 2024
830b660
docs: update tutorials
crismunoz Sep 25, 2024
40fba24
docs: update tutorials (#230)
crismunoz Sep 25, 2024
f9316f4
feat: new datasets and docs (#231)
Kleyt0n Sep 25, 2024
48c613d
Merge branch 'develop' of github.com:holistic-ai/holisticai into docs…
crismunoz Sep 25, 2024
5b6e36d
docs: rename lib (#234)
crismunoz Sep 26, 2024
b5b8ff4
fix: datasets labels (#245)
Kleyt0n Oct 7, 2024
922d341
feature: update dataset_shift module (#248)
andrelfnovaes Oct 16, 2024
ea1eece
chore(deps): bump actions/checkout from 4.1.7 to 4.2.2 (#250)
dependabot[bot] Oct 30, 2024
26e6b5c
chore(deps): bump actions/setup-python from 5.2.0 to 5.3.0 (#251)
dependabot[bot] Oct 30, 2024
55e3223
feat: New Robustness Regression Attacker: Ridge Poissoner (#241)
fracarfer5 Oct 31, 2024
91f6a00
feat: new methods for measuring security privacy risk score (#254)
fracarfer5 Oct 31, 2024
1c5293e
feat: new xai metrics based on globa feature importance, local featur…
crismunoz Oct 31, 2024
a57247c
chore: Improve Accuracy Degradation Profile Documentation (#249)
andrelfnovaes Nov 1, 2024
a4aa7e0
docs:update tutorials (#257)
crismunoz Nov 1, 2024
8cf0fb0
docs: updating docstrings for regression poissoners (#256)
fracarfer5 Nov 1, 2024
7d93e93
fix: load dataset (#259)
Kleyt0n Nov 1, 2024
a833ee0
Merge branch 'main' of github.com:holistic-ai/holisticai into develop
crismunoz Nov 1, 2024
8e42bc6
Delete paper directory (#262)
crismunoz Nov 4, 2024
4a180e5
Delete README.rst
crismunoz Nov 4, 2024
df99fb8
Delete workflows directory (#263)
crismunoz Nov 4, 2024
f80f467
Features/improvementes (#265)
Kleyt0n Nov 5, 2024
59950e3
add: bias tradeoff example (#266)
Kleyt0n Nov 5, 2024
832afc1
add: bank marketing description (#267)
Kleyt0n Nov 6, 2024
4593297
chore: updating hackaton notebook with privacy risk score interpretat…
fracarfer5 Nov 6, 2024
3084f57
docs: update documetnation (#274)
crismunoz Nov 6, 2024
accea56
chore: fix: conflicts merge develop main
crismunoz Nov 6, 2024
0457704
fix: merge conflicts
crismunoz Nov 6, 2024
31c18e7
Features/update documentation (#276)
crismunoz Nov 7, 2024
5cb050b
feat: remove dataset dependence of sklearn (#277)
Kleyt0n Nov 8, 2024
8d52e71
Merge branch 'main' into develop
crismunoz Nov 19, 2024
590dbb4
fix: format (#280)
Kleyt0n Nov 19, 2024
c7f2bbb
chore(deps): bump numpy from 1.26 to 2.2.0 (#281)
dependabot[bot] Dec 12, 2024
841f16e
chore(deps): bump slackapi/slack-github-action from 1.27.0 to 2.0.0 (…
dependabot[bot] Dec 12, 2024
74048f9
Rename rs_fair_topk_fa*ir_algorithm.rst to rs_fair_topk_fair_algorith…
crismunoz Feb 10, 2025
a47d40d
Hotfix (#286)
crismunoz Feb 10, 2025
b89b7d2
chore: Split dependencies (#293)
crismunoz Feb 12, 2025
5d6c02f
Update install.rst
crismunoz Feb 12, 2025
1ccb768
Merge branch 'main' into develop
crismunoz Feb 12, 2025
6b383ab
chore: update workflow to create namespace holisticai (#296)
crismunoz Mar 3, 2025
fd9cb65
Merge branch 'main' into develop
crismunoz Mar 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/publish-testpypi-workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Publish PyPI

on:
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ secrets.APPLICATION_ID }}
private-key: ${{ secrets.APPLICATION_PRIVATE_KEY }}

- name: Checkout code
uses: actions/checkout@v4.2.2
with:
fetch-tags: true
fetch-depth: 0
ref: main
token: ${{ steps.app-token.outputs.token }} # Needed to trigger other actions

- name: Set up Python
uses: actions/setup-python@v5.3.0
with:
python-version: "3.9"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hatch

- name: Get Latest Release Version
id: get_release
run: |
latest_tag=$(git describe --tags `git rev-list --tags --max-count=1`)
echo "Latest release version: $latest_tag"
echo "tag=$latest_tag" >> $GITHUB_ENV # Save the tag to an environment variable

- name: Publish to PyPI
env:
HATCH_INDEX_USER: __token__
HATCH_INDEX_AUTH: ${{ secrets.PYPI_TOKEN }}
run: |
hatch build
hatch publish --repo test
12 changes: 11 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,14 @@ foo = ["which python"]
env-vars = { PYTHONPATH = "src" }

[tool.hatch.build.targets.wheel]
packages = ["src/holisticai"]
packages = [
"src/holisticai/bias",
"src/holisticai/explainability",
"src/holisticai/robustness",
"src/holisticai/efficacy",
"src/holisticai/inspection",
"src/holisticai/pipeline",
"src/holisticai/security",
"src/holisticai/typing",
"src/holisticai/utils"
]
2 changes: 1 addition & 1 deletion ruff_default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ select = [
"ICN001",
"ICN002",
"ICN003",
"INP001",
#"INP001",
"INT001",
"INT002",
"INT003",
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from typing import Optional

from holisticai.bias.mitigation.commons.disparate_impact_remover._categorical_repairer import CategoricalRepairer
from holisticai.bias.mitigation.commons.disparate_impact_remover._utils import (
freedman_diaconis_bin_size as bin_calculator,
Expand Down Expand Up @@ -46,7 +44,7 @@ def __init__(
feature_to_repair: int,
repair_level: float,
kdd: bool = False,
features_to_ignore: Optional[list[str]] = None,
features_to_ignore: list[str] | None = None,
):
if features_to_ignore is None:
features_to_ignore = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import math
import random
from copy import deepcopy
from typing import Union

import numpy as np
from holisticai.bias.mitigation.commons.disparate_impact_remover._categorical_feature import CategoricalFeature
Expand Down Expand Up @@ -32,7 +31,7 @@ def get_categories_count_norm(categories, all_stratified_groups, count_dict, gro
dict
The dictionary containing the normalized count for each category.
"""
norm = {
return {
cat: SparseList(
data=(
count_dict[cat][i] * (1.0 / len(group_features[group].data)) if group_features[group].data else 0.0
Expand All @@ -41,7 +40,6 @@ def get_categories_count_norm(categories, all_stratified_groups, count_dict, gro
)
for cat in categories
}
return norm


def gen_desired_dist(group_index, cat, col_id, median, repair_level, norm_counts, feature_to_remove, mode):
Expand Down Expand Up @@ -158,7 +156,7 @@ def get_categories_count(categories, all_stratified_groups, group_feature):
dict
The dictionary containing the count for each category.
"""
count_dict = {
return {
cat: SparseList(
data=(
group_feature[group].category_count[cat] if cat in group_feature[group].category_count else 0
Expand All @@ -168,8 +166,6 @@ def get_categories_count(categories, all_stratified_groups, group_feature):
for cat in categories
}

return count_dict


def gen_desired_count(group_index, group, category, median, group_features, repair_level, categories_count):
"""
Expand Down Expand Up @@ -200,8 +196,7 @@ def gen_desired_count(group_index, group, category, median, group_features, repa
med = median[category]
size = len(group_features[group].data)
count = categories_count[category][group_index]
des_count = math.floor(((1 - repair_level) * count) + (repair_level) * med * size)
return des_count
return math.floor(((1 - repair_level) * count) + (repair_level) * med * size)


def flow_on_group_features(all_stratified_groups, group_features, repair_generator):
Expand Down Expand Up @@ -283,7 +278,7 @@ def get_count_norm(count, group_feature_data):
return 0.0


def get_column_type(values: Union[list, np.ndarray]):
def get_column_type(values: list | np.ndarray):
"""
Get the type of the column.

Expand Down Expand Up @@ -322,7 +317,8 @@ def get_median(values, kdd):
The median of the list of values.
"""
if not values:
raise ValueError("Cannot calculate median of list with no values!")
msg = "Cannot calculate median of list with no values!"
raise ValueError(msg)

sorted_values = deepcopy(values)
sorted_values.sort() # Not calling `sorted` b/c `sorted_values` may not be list.
Expand Down Expand Up @@ -450,9 +446,7 @@ def make_histogram_bins(bin_size_calculator, data, col_id):
index_bins[bin_num].append(row_index)
break

index_bins = [b for b in index_bins if b]

return index_bins
return [b for b in index_bins if b]


def freedman_diaconis_bin_size(feature_values):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def assign(self):
list
The mapping of points to their closest centers.
"""
mapping = [
return [
(
i,
sorted(
Expand All @@ -95,5 +95,3 @@ def assign(self):
)
for i in range(len(self.data))
]

return mapping
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def _check_nonnegative_int(self, value, desc, strict=True):
"""
negative = value is None or value <= 0 if strict else value is None or value < 0
if negative or not isinstance(value, (int, np.integer)):
raise ValueError(f"{desc} should be a nonnegative integer. " f"{value} was given")
msg = f"{desc} should be a nonnegative integer. " f"{value} was given"
raise ValueError(msg)

def _check_init_args(self):
"""
Expand Down Expand Up @@ -366,9 +367,7 @@ def _compute_inertia(self, distances):

# Define inertia as the sum of the sample-distances
# to closest cluster centers
inertia = np.sum(np.min(distances, axis=1))

return inertia
return np.sum(np.min(distances, axis=1))

def _initialize_medoids(self, D, n_clusters, random_state_):
"""
Expand Down Expand Up @@ -399,7 +398,8 @@ def _initialize_medoids(self, D, n_clusters, random_state_):
# to every other point. These are the initial medoids.
medoids = np.argpartition(np.sum(D, axis=1), n_clusters - 1)[:n_clusters]
else:
raise ValueError(f"init value '{self.init}' not recognized")
msg = f"init value '{self.init}' not recognized"
raise ValueError(msg)

return medoids

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ def decompose(self, node, dataset, donelist, depth):

if sum(R) == 0 or sum(B) == 0:
if sum(R) == 0 and sum(B) == 0:
raise ValueError("One color class became empty for this node while the other did not")
msg = "One color class became empty for this node while the other did not"
raise ValueError(msg)
return 0

NR = 0
Expand Down Expand Up @@ -226,7 +227,8 @@ def decompose(self, node, dataset, donelist, depth):
NB += excess_blue

if self.balanced(p, q, NR, NB):
raise ValueError("Constructed node sets are unbalanced")
msg = "Constructed node sets are unbalanced"
raise ValueError(msg)

reds = []
blues = []
Expand All @@ -239,7 +241,8 @@ def decompose(self, node, dataset, donelist, depth):
donelist[j] = 1

if len(reds) == NR and len(blues) == NB:
raise ValueError("Something went horribly wrong")
msg = "Something went horribly wrong"
raise ValueError(msg)

return super().decompose(blues, reds, dataset) + sum(
[self.decompose(child, dataset, donelist, depth + 1) for child in node.children]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ def _decompose(self, node, dataset, donelist, depth):
NB += excess_blue

if self.balanced(p, q, NR, NB):
raise ValueError("Constructed node sets are unbalanced")
msg = "Constructed node sets are unbalanced"
raise ValueError(msg)

reds = []
blues = []
Expand All @@ -235,7 +236,8 @@ def _decompose(self, node, dataset, donelist, depth):
donelist[j] = 1

if len(reds) == NR and len(blues) == NB:
raise ValueError("Something went horribly wrong")
msg = "Something went horribly wrong"
raise ValueError(msg)

return super()._decompose(blues, reds, dataset) + sum(
[self._decompose(child, dataset, donelist, depth + 1) for child in node.children]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ def adv_loss_fn(adversarial_params, classifier_params, batch, rng):
_, y_logits = cls_state.apply_fn({"params": classifier_params}, x, trainable=True, rngs=rngs)
_, z_logits = adv_state.apply_fn({"params": adversarial_params}, y_logits, y, trainable=True, rngs=rngs)

loss_adv = optax.sigmoid_binary_cross_entropy(z_logits, group).mean()
return loss_adv
return optax.sigmoid_binary_cross_entropy(z_logits, group).mean()

(loss, (loss_cls, loss_adv)), grads = jax.value_and_grad(loss_fn, argnums=(0), has_aux=True)(
cls_state.params, adv_state.params, batch, rng
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
from typing import Optional

import jax
import jax.numpy as jnp
Expand All @@ -26,7 +25,8 @@ def is_numeric(df):
return all(pd.api.types.is_numeric_dtype(df[col]) for col in df.columns)
if isinstance(df, np.ndarray):
return np.issubdtype(df.dtype, np.number)
raise ValueError("Input must be a pandas DataFrame or numpy array.")
msg = "Input must be a pandas DataFrame or numpy array."
raise ValueError(msg)


class AdversarialDebiasing(BMImp):
Expand Down Expand Up @@ -95,19 +95,19 @@ class AdversarialDebiasing(BMImp):

def __init__(
self,
features_dim: Optional[int] = None,
keep_prob: Optional[float] = 0.1,
hidden_size: Optional[int] = 128,
batch_size: Optional[int] = 32,
shuffle: Optional[bool] = True,
epochs: Optional[int] = 10,
learning_rate: Optional[float] = 0.01,
use_debias: Optional[bool] = True,
adversary_loss_weight: Optional[float] = 0.1,
verbose: Optional[int] = 1,
print_interval: Optional[int] = 100,
device: Optional[str] = "cpu",
seed: Optional[int] = None,
features_dim: int | None = None,
keep_prob: float | None = 0.1,
hidden_size: int | None = 128,
batch_size: int | None = 32,
shuffle: bool | None = True,
epochs: int | None = 10,
learning_rate: float | None = 0.01,
use_debias: bool | None = True,
adversary_loss_weight: float | None = 0.1,
verbose: int | None = 1,
print_interval: int | None = 100,
device: str | None = "cpu",
seed: int | None = None,
):
# default classifier config
self.features_dim = features_dim
Expand Down Expand Up @@ -179,7 +179,8 @@ def fit(
params = self._load_data(X=X, y=y, group_a=group_a, group_b=group_b)
x = pd.DataFrame(params["X"])
if not is_numeric(x):
raise ValueError("Adversarial Debiasing only works with numeric features.")
msg = "Adversarial Debiasing only works with numeric features."
raise ValueError(msg)

y = pd.Series(params["y"])
group_a = pd.Series(params["group_a"])
Expand Down Expand Up @@ -245,7 +246,8 @@ def predict(self, X):
np.ndarray: Predicted output per sample.
"""
if not is_numeric(X):
raise ValueError("Adversarial Debiasing only works with numeric features.")
msg = "Adversarial Debiasing only works with numeric features."
raise ValueError(msg)
p = self.predict_proba(X)
return np.argmax(p, axis=1).ravel()

Expand All @@ -268,7 +270,8 @@ def predict_proba(self, X):
np.ndarray: Predicted matrix probability per sample.
"""
if not is_numeric(X):
raise ValueError("Adversarial Debiasing only works with numeric features.")
msg = "Adversarial Debiasing only works with numeric features."
raise ValueError(msg)

proba = np.empty((X.shape[0], 2))
proba[:, 1] = self._predict_proba(X)
Expand All @@ -294,7 +297,7 @@ def predict_score(self, X):
np.ndarray: Predicted probability per sample.
"""
if not is_numeric(X):
raise ValueError("Adversarial Debiasing only works with numeric features.")
msg = "Adversarial Debiasing only works with numeric features."
raise ValueError(msg)

p = self._predict(X).reshape([-1])
return p
return self._predict(X).reshape([-1])
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,11 @@ def project_lambda(self, lambda_vec):
lambda_neg = -lambda_pos
lambda_pos[lambda_pos < 0.0] = 0.0
lambda_neg[lambda_neg < 0.0] = 0.0
lambda_projected = pd.concat(
return pd.concat(
[lambda_pos, lambda_neg],
keys=["+", "-"],
names=[_SIGNED, _EVENT, _GROUP_ID],
)
return lambda_projected
return lambda_vec

def bound(self):
Expand Down
Loading