aai-institute
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎.test_durations‎
Lines changed: 12 additions & 12 deletions b/‎.test_durations‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 32 additions & 7 deletions b/‎CHANGELOG.md‎
Lines changed: 32 additions & 7 deletions
diff --git a/‎docs/value/classwise-shapley.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/value/classwise-shapley.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎notebooks/data_oob.ipynb‎
Lines changed: 15 additions & 5 deletions b/‎notebooks/data_oob.ipynb‎
Lines changed: 15 additions & 5 deletions
@@ -8,11 +8,10 @@ repos:
         # HACK: ruff-pre-commit ignores pyproject.toml
         # https://github.com/astral-sh/ruff-pre-commit/issues/54
         args: [ "--extend-per-file-ignores", "tests/**/*.py:F811",
-                "--extend-per-file-ignores", "tests/**/*.py:F401",
-                "--fix" ]
+                "--extend-per-file-ignores", "tests/**/*.py:F401" ]
       - id: ruff-format
   - repo: https://github.com/kynan/nbstripout
     rev: 0.6.1
     hooks:
       - id: nbstripout
-        args: ["--keep-output", "--keep-count", "--drop-empty-cells", "--extra-keys", "metadata.pycharm cell.metadata.pycharm"]
+        args: [ "--keep-output", "--keep-count", "--drop-empty-cells", "--extra-keys", "metadata.pycharm cell.metadata.pycharm" ]
@@ -1493,22 +1493,22 @@
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[None-sampler_kwargs4-GroupTestingShapleyValuation-valuation_kwargs4-0.1-0.01-test_game1]": 3.596974375000002,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[OwenSampler-sampler_kwargs2-OwenShapleyValuation-valuation_kwargs2-0.2-0.0001-test_game0]": 2.566003500000022,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[OwenSampler-sampler_kwargs2-OwenShapleyValuation-valuation_kwargs2-0.2-0.0001-test_game1]": 3.0255352490000007,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[PermutationSampler-sampler_kwargs0-DataShapleyValuation-valuation_kwargs0-0.2-0.0001-test_game0]": 2.536671957999971,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[PermutationSampler-sampler_kwargs0-DataShapleyValuation-valuation_kwargs0-0.2-0.0001-test_game1]": 0.37417354199996566,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[UniformSampler-sampler_kwargs1-DataShapleyValuation-valuation_kwargs1-0.2-0.0001-test_game0]": 3.23137270899997,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[UniformSampler-sampler_kwargs1-DataShapleyValuation-valuation_kwargs1-0.2-0.0001-test_game1]": 3.4768419569999764,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[PermutationSampler-sampler_kwargs0-ShapleyValuation-valuation_kwargs0-0.2-0.0001-test_game0]": 2.536671957999971,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[PermutationSampler-sampler_kwargs0-ShapleyValuation-valuation_kwargs0-0.2-0.0001-test_game1]": 0.37417354199996566,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[UniformSampler-sampler_kwargs1-ShapleyValuation-valuation_kwargs1-0.2-0.0001-test_game0]": 3.23137270899997,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_games[UniformSampler-sampler_kwargs1-ShapleyValuation-valuation_kwargs1-0.2-0.0001-test_game1]": 3.4768419569999764,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_grouped_linear_montecarlo_shapley[PermutationSampler-kwargs0-2-0-21-2]": 0.7323970420000023,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_hoeffding_bound_montecarlo[PermutationSampler-6-0.1-0.1]": 22.979635875999975,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_hoeffding_bound_montecarlo[UniformSampler-6-0.1-0.1]": 26.41515983300002,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_linear_montecarlo_with_outlier[AntitheticOwenSampler-sampler_kwargs2-OwenShapleyValuation-valuation_kwargs2-2-0-21]": 14.262070917000017,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_linear_montecarlo_with_outlier[None-sampler_kwargs3-GroupTestingShapleyValuation-valuation_kwargs3-2-0-21]": 19.76072416599999,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_linear_montecarlo_with_outlier[OwenSampler-sampler_kwargs1-OwenShapleyValuation-valuation_kwargs1-2-0-21]": 9.141020416000003,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_linear_montecarlo_with_outlier[PermutationSampler-sampler_kwargs0-DataShapleyValuation-valuation_kwargs0-2-0-21]": 3.0010637080000038,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_linear_montecarlo_with_outlier[PermutationSampler-sampler_kwargs0-ShapleyValuation-valuation_kwargs0-2-0-21]": 3.0010637080000038,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[AntitheticOwenSampler-sampler_kwargs3-OwenShapleyValuation-valuation_kwargs3-test_game0]": 0.25927716699999337,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[None-sampler_kwargs4-GroupTestingShapleyValuation-valuation_kwargs4-test_game0]": 0.3608742090000021,
     "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[OwenSampler-sampler_kwargs2-OwenShapleyValuation-valuation_kwargs2-test_game0]": 0.10323104100001501,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[PermutationSampler-sampler_kwargs0-DataShapleyValuation-valuation_kwargs0-test_game0]": 0.006297582000001967,
-    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[UniformSampler-sampler_kwargs1-DataShapleyValuation-valuation_kwargs1-test_game0]": 0.00704366599998707,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[PermutationSampler-sampler_kwargs0-ShapleyValuation-valuation_kwargs0-test_game0]": 0.006297582000001967,
+    "tests/valuation/methods/test_montecarlo_shapley_valuations.py::test_seed[UniformSampler-sampler_kwargs1-ShapleyValuation-valuation_kwargs1-test_game0]": 0.00704366599998707,
     "tests/valuation/methods/test_semivalues.py::test_banzhaf[AntitheticPermutationSampler-5]": 3.0455862080000005,
     "tests/valuation/methods/test_semivalues.py::test_banzhaf[AntitheticSampler-5]": 0.4238251659999994,
     "tests/valuation/methods/test_semivalues.py::test_banzhaf[DeterministicPermutationSampler-5]": 0.006310583000000314,
@@ -1523,8 +1523,8 @@
     "tests/valuation/methods/test_semivalues.py::test_coefficients[BetaShapleyValuation-kwargs2-10]": 0.003863207999984297,
     "tests/valuation/methods/test_semivalues.py::test_coefficients[DataBanzhafValuation-kwargs3-100]": 0.001800666000065121,
     "tests/valuation/methods/test_semivalues.py::test_coefficients[DataBanzhafValuation-kwargs3-10]": 0.0016530420000435697,
-    "tests/valuation/methods/test_semivalues.py::test_coefficients[DataShapleyValuation-kwargs4-100]": 0.0018769589999578784,
-    "tests/valuation/methods/test_semivalues.py::test_coefficients[DataShapleyValuation-kwargs4-10]": 0.0016063749999375432,
+    "tests/valuation/methods/test_semivalues.py::test_coefficients[ShapleyValuation-kwargs4-100]": 0.0018769589999578784,
+    "tests/valuation/methods/test_semivalues.py::test_coefficients[ShapleyValuation-kwargs4-10]": 0.0016063749999375432,
     "tests/valuation/methods/test_semivalues.py::test_msr_banzhaf[5]": 9.342398666999998,
     "tests/valuation/methods/test_semivalues.py::test_shapley_batch_size[1-test_game0]": 0.07176091700006282,
     "tests/valuation/methods/test_semivalues.py::test_shapley_batch_size[2-test_game0]": 3.8395362910000586,
@@ -1640,8 +1640,8 @@
     "tests/valuation/test_interface.py::test_data_banzhaf_valuation[2]": 1.2780167490000025,
     "tests/valuation/test_interface.py::test_data_beta_shapley_valuation[1]": 4.139234666999997,
     "tests/valuation/test_interface.py::test_data_beta_shapley_valuation[2]": 3.603092916999998,
-    "tests/valuation/test_interface.py::test_data_shapley_valuation[1]": 0.27120083299999465,
-    "tests/valuation/test_interface.py::test_data_shapley_valuation[2]": 0.15037520699999618,
+    "tests/valuation/test_interface.py::test_shapley_valuation[1]": 0.27120083299999465,
+    "tests/valuation/test_interface.py::test_shapley_valuation[2]": 0.15037520699999618,
     "tests/valuation/test_interface.py::test_data_utility_learning[1]": 0.026216332999993597,
     "tests/valuation/test_interface.py::test_data_utility_learning[2]": 0.06457645800000478,
     "tests/valuation/test_interface.py::test_delta_shapley_valuation[1]": 3.562169998999977,
@@ -1941,4 +1941,4 @@
     "tests/value/test_stopping.py::test_standard_error": 0.0020545429999856424,
     "tests/value/test_stopping.py::test_stopping_criterion": 0.0016162080000015067,
     "tests/value/test_stopping.py::test_stopping_criterion_composition": 0.0024397500000077343
-}
+}
@@ -2,8 +2,16 @@
 
 ## Unreleased
 
+
 ### Added
 
+- Introduced the concept of `ResultUpdater` in order to allow samplers to
+  declare the proper strategy to use by valuations 
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Added Banzhaf precomputed values to some games.
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Introduced new `IndexIterations`, for consistent usage across all
+  `PowersetSamplers` [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
 - Added `run_removal_experiment` for easy removal experiments
   [PR #636](https://github.com/aai-institute/pyDVL/pull/636)
 - Refactor Classwise Shapley valuation with the interfaces and sampler
@@ -12,22 +20,24 @@
   [PR #610](https://github.com/aai-institute/pyDVL/pull/610)
 - Refactor MSR Banzhaf semivalues with the new sampler architecture.
   [PR #605](https://github.com/aai-institute/pyDVL/pull/605)
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
 - Refactor group-testing shapley values with new sampler architecture
   [PR #602](https://github.com/aai-institute/pyDVL/pull/602)
 - Refactor least-core data valuation methods with more supported sampling
   methods and consistent interface.
   [PR #580](https://github.com/aai-institute/pyDVL/pull/580)
-- Refactor Owen-Shapley valuation with new sampler architecture
+- Refactor Owen-Shapley valuation with new sampler architecture. Enable use of
+  `OwenSamplers` with all semi-values
   [PR #597](https://github.com/aai-institute/pyDVL/pull/597)
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
 - New method `InverseHarmonicMeanInfluence`, implementation for the paper
   `DataInf: Efficiently Estimating Data Influence in LoRA-tuned LLMs and
     Diffusion Models`
   [PR #582](https://github.com/aai-institute/pyDVL/pull/582)
-- Add new backend implementations for influence computation
-  to account for block-diagonal approximations
+- Add new backend implementations for influence computation to account for
+  block-diagonal approximations
   [PR #582](https://github.com/aai-institute/pyDVL/pull/582)
-- Extend `DirectInfluence` with block-diagonal and Gauss-Newton
-  approximation
+- Extend `DirectInfluence` with block-diagonal and Gauss-Newton approximation
   [PR #591](https://github.com/aai-institute/pyDVL/pull/591)
 - Extend `LissaInfluence` with block-diagonal and Gauss-Newton approximation
   [PR #593](https://github.com/aai-institute/pyDVL/pull/593)
@@ -37,12 +47,19 @@
 - Extend `ArnoldiInfluence` with block-diagonal and Gauss-Newton
   approximation
   [PR #598](https://github.com/aai-institute/pyDVL/pull/598)
-- Extend `CgInfluence` with block-diagonal and Gauss-Newton
-  approximation
+- Extend `CgInfluence` with block-diagonal and Gauss-Newton approximation
   [PR #601](https://github.com/aai-institute/pyDVL/pull/601)
 
 ### Fixed
 
+- Fixed several bugs in diverse stopping criteria, including: iteration counts,
+  computing completion and resetting
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Fixed all weights of all samplers to ensure that mix-and-matching samplers and
+  semi-value methods always works, for all possible combinations
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Fixed a bug whereby progress bars would not report the last step and remain
+  incomplete [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
 - Fixed the analysis of the adult dataset in the Data-OOB notebook
   [PR #636](https://github.com/aai-institute/pyDVL/pull/636)
 - Replace `np.float_` with `np.float64` and `np.alltrue` with `np.all`,
@@ -59,6 +76,14 @@
 
 ### Changed
 
+- Updated and rewrote some of the MSR banzhaf notebook
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Updated Least-Core notebook
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
+- Restructured and generalized `StratifiedSampler` to allow using heuristics,
+  thus subsuming Variance-Reduced stratified sampling into a unified framework.
+  Implemented the heuristics proposed in that paper
+  [PR #641](https://github.com/aai-institute/pyDVL/pull/641)
 - Changed the way semi-value coefficients are composed with sampler weights in
   order to avoid `OverflowError` for very small or large values
   [PR #639](https://github.com/aai-institute/pyDVL/pull/639)
 
@@ -1,8 +1,9 @@
 ---
 title: Class-wise Shapley
+alias: classwise-shapley
 ---
 
-# Class-wise Shapley
+# Class-wise Shapley { #intro-to-cw-shapley }
 
 Class-wise Shapley (CWS) [@schoch_csshapley_2022] offers a Shapley framework
 tailored for classification problems.  Given a sample $x_i$ with label $y_i \in
 
@@ -127,7 +127,7 @@
     "    train, test = load_adult_data(\n",
     "        train_size=train_size, subsample=0.01, random_state=random_state\n",
     "    )\n",
-    "    n_jobs = 2\n",
+    "    n_jobs = 1\n",
     "    n_runs = 1\n",
     "    n_est = 10"
    ]
@@ -536,6 +536,20 @@
     "removal_percentages = np.arange(0, 0.51, 0.02)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "hide"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "if is_CI:\n",
+    "    removal_percentages = np.arange(0, 0.51, 0.3)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 15,
@@ -769,10 +783,6 @@
    "source": [
     "from support.common import ConstantBinaryClassifier\n",
     "\n",
-    "train, test = load_adult_data(\n",
-    "    train_size=train_size, subsample=0.2, random_state=random_state\n",
-    ")\n",
-    "\n",
     "probs = [0.01, 0.5, 0.99]\n",
     "all_values = []\n",
     "for p in probs:\n",