holistic-ai · crismunoz · Mar 3, 2025 · Sep 23, 2024 · Sep 23, 2024 · Sep 25, 2024
@@ -0,0 +1,47 @@
+name: Publish PyPI
+
+on:
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/create-github-app-token@v1
+        id: app-token
+        with:
+          app-id: ${{ secrets.APPLICATION_ID }}
+          private-key: ${{ secrets.APPLICATION_PRIVATE_KEY }}
+
+      - name: Checkout code
+        uses: actions/checkout@v4.2.2
+        with:
+          fetch-tags: true
+          fetch-depth: 0
+          ref: main
+          token: ${{ steps.app-token.outputs.token }} # Needed to trigger other actions
+
+      - name: Set up Python
+        uses: actions/setup-python@v5.3.0
+        with:
+          python-version: "3.9"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install hatch
+
+      - name: Get Latest Release Version
+        id: get_release
+        run: |
+          latest_tag=$(git describe --tags `git rev-list --tags --max-count=1`)
+          echo "Latest release version: $latest_tag"
+          echo "tag=$latest_tag" >> $GITHUB_ENV # Save the tag to an environment variable
+
+      - name: Publish to PyPI
+        env:
+          HATCH_INDEX_USER: __token__
+          HATCH_INDEX_AUTH: ${{ secrets.PYPI_TOKEN }}
+        run: |
+          hatch build
+          hatch publish --repo test
@@ -130,4 +130,14 @@ foo = ["which python"]
 env-vars = { PYTHONPATH = "src" }
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/holisticai"]
+packages = [
+  "src/holisticai/bias",
+  "src/holisticai/explainability",
+  "src/holisticai/robustness",
+  "src/holisticai/efficacy",
+  "src/holisticai/inspection",
+  "src/holisticai/pipeline",
+  "src/holisticai/security",
+  "src/holisticai/typing",
+  "src/holisticai/utils"
+]
@@ -168,7 +168,7 @@ select = [
     "ICN001",
     "ICN002",
     "ICN003",
-    "INP001",
+    #"INP001",
     "INT001",
     "INT002",
     "INT003",

@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from typing import Optional
-
 from holisticai.bias.mitigation.commons.disparate_impact_remover._categorical_repairer import CategoricalRepairer
 from holisticai.bias.mitigation.commons.disparate_impact_remover._utils import (
     freedman_diaconis_bin_size as bin_calculator,
@@ -46,7 +44,7 @@ def __init__(
         feature_to_repair: int,
         repair_level: float,
         kdd: bool = False,
-        features_to_ignore: Optional[list[str]] = None,
+        features_to_ignore: list[str] | None = None,
     ):
         if features_to_ignore is None:
             features_to_ignore = []

@@ -3,7 +3,6 @@
 import math
 import random
 from copy import deepcopy
-from typing import Union
 
 import numpy as np
 from holisticai.bias.mitigation.commons.disparate_impact_remover._categorical_feature import CategoricalFeature
@@ -32,7 +31,7 @@ def get_categories_count_norm(categories, all_stratified_groups, count_dict, gro
     dict
         The dictionary containing the normalized count for each category.
     """
-    norm = {
+    return {
         cat: SparseList(
             data=(
                 count_dict[cat][i] * (1.0 / len(group_features[group].data)) if group_features[group].data else 0.0
@@ -41,7 +40,6 @@ def get_categories_count_norm(categories, all_stratified_groups, count_dict, gro
         )
         for cat in categories
     }
-    return norm
 
 
 def gen_desired_dist(group_index, cat, col_id, median, repair_level, norm_counts, feature_to_remove, mode):
@@ -158,7 +156,7 @@ def get_categories_count(categories, all_stratified_groups, group_feature):
     dict
         The dictionary containing the count for each category.
     """
-    count_dict = {
+    return {
         cat: SparseList(
             data=(
                 group_feature[group].category_count[cat] if cat in group_feature[group].category_count else 0
@@ -168,8 +166,6 @@ def get_categories_count(categories, all_stratified_groups, group_feature):
         for cat in categories
     }
 
-    return count_dict
-
 
 def gen_desired_count(group_index, group, category, median, group_features, repair_level, categories_count):
     """
@@ -200,8 +196,7 @@ def gen_desired_count(group_index, group, category, median, group_features, repa
     med = median[category]
     size = len(group_features[group].data)
     count = categories_count[category][group_index]
-    des_count = math.floor(((1 - repair_level) * count) + (repair_level) * med * size)
-    return des_count
+    return math.floor(((1 - repair_level) * count) + (repair_level) * med * size)
 
 
 def flow_on_group_features(all_stratified_groups, group_features, repair_generator):
@@ -283,7 +278,7 @@ def get_count_norm(count, group_feature_data):
     return 0.0
 
 
-def get_column_type(values: Union[list, np.ndarray]):
+def get_column_type(values: list | np.ndarray):
     """
     Get the type of the column.
 
@@ -322,7 +317,8 @@ def get_median(values, kdd):
         The median of the list of values.
     """
     if not values:
-        raise ValueError("Cannot calculate median of list with no values!")
+        msg = "Cannot calculate median of list with no values!"
+        raise ValueError(msg)
 
     sorted_values = deepcopy(values)
     sorted_values.sort()  # Not calling `sorted` b/c `sorted_values` may not be list.
@@ -450,9 +446,7 @@ def make_histogram_bins(bin_size_calculator, data, col_id):
                 index_bins[bin_num].append(row_index)
                 break
 
-    index_bins = [b for b in index_bins if b]
-
-    return index_bins
+    return [b for b in index_bins if b]
 
 
 def freedman_diaconis_bin_size(feature_values):

@@ -84,7 +84,7 @@ def assign(self):
         list
             The mapping of points to their closest centers.
         """
-        mapping = [
+        return [
             (
                 i,
                 sorted(
@@ -95,5 +95,3 @@ def assign(self):
             )
             for i in range(len(self.data))
         ]
-
-        return mapping
@@ -128,7 +128,8 @@ def _check_nonnegative_int(self, value, desc, strict=True):
         """
         negative = value is None or value <= 0 if strict else value is None or value < 0
         if negative or not isinstance(value, (int, np.integer)):
-            raise ValueError(f"{desc} should be a nonnegative integer. " f"{value} was given")
+            msg = f"{desc} should be a nonnegative integer. " f"{value} was given"
+            raise ValueError(msg)
 
     def _check_init_args(self):
         """
@@ -366,9 +367,7 @@ def _compute_inertia(self, distances):
 
         # Define inertia as the sum of the sample-distances
         # to closest cluster centers
-        inertia = np.sum(np.min(distances, axis=1))
-
-        return inertia
+        return np.sum(np.min(distances, axis=1))
 
     def _initialize_medoids(self, D, n_clusters, random_state_):
         """
@@ -399,7 +398,8 @@ def _initialize_medoids(self, D, n_clusters, random_state_):
             # to every other point. These are the initial medoids.
             medoids = np.argpartition(np.sum(D, axis=1), n_clusters - 1)[:n_clusters]
         else:
-            raise ValueError(f"init value '{self.init}' not recognized")
+            msg = f"init value '{self.init}' not recognized"
+            raise ValueError(msg)
 
         return medoids
 

@@ -165,7 +165,8 @@ def decompose(self, node, dataset, donelist, depth):
 
         if sum(R) == 0 or sum(B) == 0:
             if sum(R) == 0 and sum(B) == 0:
-                raise ValueError("One color class became empty for this node while the other did not")
+                msg = "One color class became empty for this node while the other did not"
+                raise ValueError(msg)
             return 0
 
         NR = 0
@@ -226,7 +227,8 @@ def decompose(self, node, dataset, donelist, depth):
             NB += excess_blue
 
         if self.balanced(p, q, NR, NB):
-            raise ValueError("Constructed node sets are unbalanced")
+            msg = "Constructed node sets are unbalanced"
+            raise ValueError(msg)
 
         reds = []
         blues = []
@@ -239,7 +241,8 @@ def decompose(self, node, dataset, donelist, depth):
                 donelist[j] = 1
 
         if len(reds) == NR and len(blues) == NB:
-            raise ValueError("Something went horribly wrong")
+            msg = "Something went horribly wrong"
+            raise ValueError(msg)
 
         return super().decompose(blues, reds, dataset) + sum(
             [self.decompose(child, dataset, donelist, depth + 1) for child in node.children]

@@ -222,7 +222,8 @@ def _decompose(self, node, dataset, donelist, depth):
             NB += excess_blue
 
         if self.balanced(p, q, NR, NB):
-            raise ValueError("Constructed node sets are unbalanced")
+            msg = "Constructed node sets are unbalanced"
+            raise ValueError(msg)
 
         reds = []
         blues = []
@@ -235,7 +236,8 @@ def _decompose(self, node, dataset, donelist, depth):
                 donelist[j] = 1
 
         if len(reds) == NR and len(blues) == NB:
-            raise ValueError("Something went horribly wrong")
+            msg = "Something went horribly wrong"
+            raise ValueError(msg)
 
         return super()._decompose(blues, reds, dataset) + sum(
             [self._decompose(child, dataset, donelist, depth + 1) for child in node.children]

@@ -71,8 +71,7 @@ def adv_loss_fn(adversarial_params, classifier_params, batch, rng):
         _, y_logits = cls_state.apply_fn({"params": classifier_params}, x, trainable=True, rngs=rngs)
         _, z_logits = adv_state.apply_fn({"params": adversarial_params}, y_logits, y, trainable=True, rngs=rngs)
 
-        loss_adv = optax.sigmoid_binary_cross_entropy(z_logits, group).mean()
-        return loss_adv
+        return optax.sigmoid_binary_cross_entropy(z_logits, group).mean()
 
     (loss, (loss_cls, loss_adv)), grads = jax.value_and_grad(loss_fn, argnums=(0), has_aux=True)(
         cls_state.params, adv_state.params, batch, rng

@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import logging
-from typing import Optional
 
 import jax
 import jax.numpy as jnp
@@ -26,7 +25,8 @@ def is_numeric(df):
         return all(pd.api.types.is_numeric_dtype(df[col]) for col in df.columns)
     if isinstance(df, np.ndarray):
         return np.issubdtype(df.dtype, np.number)
-    raise ValueError("Input must be a pandas DataFrame or numpy array.")
+    msg = "Input must be a pandas DataFrame or numpy array."
+    raise ValueError(msg)
 
 
 class AdversarialDebiasing(BMImp):
@@ -95,19 +95,19 @@ class AdversarialDebiasing(BMImp):
 
     def __init__(
         self,
-        features_dim: Optional[int] = None,
-        keep_prob: Optional[float] = 0.1,
-        hidden_size: Optional[int] = 128,
-        batch_size: Optional[int] = 32,
-        shuffle: Optional[bool] = True,
-        epochs: Optional[int] = 10,
-        learning_rate: Optional[float] = 0.01,
-        use_debias: Optional[bool] = True,
-        adversary_loss_weight: Optional[float] = 0.1,
-        verbose: Optional[int] = 1,
-        print_interval: Optional[int] = 100,
-        device: Optional[str] = "cpu",
-        seed: Optional[int] = None,
+        features_dim: int | None = None,
+        keep_prob: float | None = 0.1,
+        hidden_size: int | None = 128,
+        batch_size: int | None = 32,
+        shuffle: bool | None = True,
+        epochs: int | None = 10,
+        learning_rate: float | None = 0.01,
+        use_debias: bool | None = True,
+        adversary_loss_weight: float | None = 0.1,
+        verbose: int | None = 1,
+        print_interval: int | None = 100,
+        device: str | None = "cpu",
+        seed: int | None = None,
     ):
         # default classifier config
         self.features_dim = features_dim
@@ -179,7 +179,8 @@ def fit(
         params = self._load_data(X=X, y=y, group_a=group_a, group_b=group_b)
         x = pd.DataFrame(params["X"])
         if not is_numeric(x):
-            raise ValueError("Adversarial Debiasing only works with numeric features.")
+            msg = "Adversarial Debiasing only works with numeric features."
+            raise ValueError(msg)
 
         y = pd.Series(params["y"])
         group_a = pd.Series(params["group_a"])
@@ -245,7 +246,8 @@ def predict(self, X):
         np.ndarray: Predicted output per sample.
         """
         if not is_numeric(X):
-            raise ValueError("Adversarial Debiasing only works with numeric features.")
+            msg = "Adversarial Debiasing only works with numeric features."
+            raise ValueError(msg)
         p = self.predict_proba(X)
         return np.argmax(p, axis=1).ravel()
 
@@ -268,7 +270,8 @@ def predict_proba(self, X):
         np.ndarray: Predicted matrix probability per sample.
         """
         if not is_numeric(X):
-            raise ValueError("Adversarial Debiasing only works with numeric features.")
+            msg = "Adversarial Debiasing only works with numeric features."
+            raise ValueError(msg)
 
         proba = np.empty((X.shape[0], 2))
         proba[:, 1] = self._predict_proba(X)
@@ -294,7 +297,7 @@ def predict_score(self, X):
         np.ndarray: Predicted probability per sample.
         """
         if not is_numeric(X):
-            raise ValueError("Adversarial Debiasing only works with numeric features.")
+            msg = "Adversarial Debiasing only works with numeric features."
+            raise ValueError(msg)
 
-        p = self._predict(X).reshape([-1])
-        return p
+        return self._predict(X).reshape([-1])
@@ -59,12 +59,11 @@ def project_lambda(self, lambda_vec):
             lambda_neg = -lambda_pos
             lambda_pos[lambda_pos < 0.0] = 0.0
             lambda_neg[lambda_neg < 0.0] = 0.0
-            lambda_projected = pd.concat(
+            return pd.concat(
                 [lambda_pos, lambda_neg],
                 keys=["+", "-"],
                 names=[_SIGNED, _EVENT, _GROUP_ID],
             )
-            return lambda_projected
         return lambda_vec
 
     def bound(self):