Skip to content

Commit 0971b04

Browse files
authored
Merge branch 'main' into setup
2 parents 51c4375 + 495fdd5 commit 0971b04

File tree

18 files changed

+872
-20
lines changed

18 files changed

+872
-20
lines changed

.github/workflows/ci.yml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
name: Build
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
workflow_dispatch:
10+
11+
12+
permissions:
13+
contents: write
14+
15+
jobs:
16+
linting:
17+
name: Linting
18+
runs-on: ubuntu-latest
19+
steps:
20+
- name: Checkout code
21+
uses: actions/checkout@v2
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v4
25+
with:
26+
python-version: 3.12
27+
architecture: x64
28+
29+
- name: Install dependencies
30+
run: |
31+
pip install ".[tests]"
32+
33+
- name: Run code analysis (black, mypy, flake8, pylint)
34+
run: |
35+
make code-analysis
36+
37+
build:
38+
strategy:
39+
fail-fast: false
40+
matrix:
41+
# NOTE: macos-13 is a workaround for an issue with the latest version
42+
os: [ubuntu-latest, macos-13, windows-latest]
43+
python: [3.9, "3.10", 3.11, 3.12]
44+
name: ${{ matrix.os }} Python ${{ matrix.python }}
45+
runs-on: ${{ matrix.os }}
46+
steps:
47+
- uses: actions/checkout@v2
48+
49+
- name: Set up Python
50+
uses: actions/setup-python@v4
51+
with:
52+
python-version: ${{ matrix.python }}
53+
architecture: x64
54+
55+
- name: Install
56+
run: |
57+
# NOTE: pip, setuptools and wheel should be included with any python
58+
# installation. It's being installed/upgraded here because the
59+
# setup-python action is not including setuptools with Python 3.12
60+
pip install --upgrade pip setuptools wheel
61+
pip install .[tests,optional,docs]
62+
63+
- name: Test library
64+
run: |
65+
make test
66+
67+
- name: Upload coverage to codecov
68+
uses: codecov/codecov-action@v4
69+
with:
70+
files: ./coverage.xml
71+
fail_ci_if_error: false
72+
token: ${{ secrets.CODECOV_TOKEN }}
73+
slug: DataResponsibly/sharp
74+
75+
- name: Test Docs
76+
run: |
77+
cd doc
78+
make html

xai_ranking/benchmarks/_hilw.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,40 @@
1-
import numpy as np
1+
"""
2+
Methods were adapted from the following paper:
3+
Jun Yuan and Aritra Dasgupta. 2023. A Human-in-the-loop Workflow for Multi-Factorial
4+
Sensitivity Analysis of Algorithmic Rankers. In Proceedings of the Workshop on
5+
Human-In-the-Loop Data Analytics (HILDA '23). Association for Computing Machinery,
6+
New York, NY, USA, Article 5, 1–5. https://doi.org/10.1145/3597465.3605221
7+
"""
28

9+
import numpy as np
310
from xai_ranking.benchmarks.hilw import hilw_contributions, hilw_batch_contributions
411

512

613
def human_in_the_loop_experiment(
714
X, score_function, upper_bound=1, lower_bound=None, *args, **kwargs
815
):
16+
"""
17+
Parameters
18+
----------
19+
X : pandas.DataFrame
20+
The input data for the experiment.
21+
score_function : callable
22+
The function used to score the input data.
23+
upper_bound : int, optional
24+
The upper bound for rank of the items (default is 1).
25+
lower_bound : int, optional
26+
The lower bound for rank of the items. If None, it defaults to
27+
the number of rows in X.
28+
*args : tuple
29+
Additional positional arguments to pass to the hilw_contributions function.
30+
**kwargs : dict
31+
Additional keyword arguments to pass to the hilw_contributions function.
32+
33+
Returns
34+
-------
35+
pandas.Series
36+
The contributions of the features.
37+
"""
938
if lower_bound is None:
1039
lower_bound = X.shape[0]
1140

@@ -17,6 +46,30 @@ def human_in_the_loop_experiment(
1746
def human_in_the_loop_batch_experiment(
1847
X, score_function, upper_bound=1, lower_bound=None, random_state=42, *args, **kwargs
1948
):
49+
"""
50+
Parameters
51+
----------
52+
X : pandas.DataFrame
53+
The input data for the experiment.
54+
score_function : callable
55+
The function used to score the input data.
56+
upper_bound : int, optional
57+
The upper bound for rank of the items (default is 1).
58+
lower_bound : int, optional
59+
The lower bound for rank of the items. If None, it defaults to
60+
the number of rows in X.
61+
random_state : int, optional
62+
The seed used by the random number generator. Default is 42.
63+
*args : tuple
64+
Additional positional arguments to pass to the hilw_contributions function.
65+
**kwargs : dict
66+
Additional keyword arguments to pass to the hilw_contributions function.
67+
68+
Returns
69+
-------
70+
pandas.Series
71+
The contributions of the features.
72+
"""
2073
batch_size = (
2174
np.ceil(0.1 * len(X)).astype(int)
2275
if "batch_size" not in kwargs

xai_ranking/benchmarks/_hre.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
"""
2-
Local Explanations of Global Rankings: Insights for Competitive Rankings
3-
4-
Hierarchical Ranking Explanation (HRE) framework.
5-
62
Anahideh, H., & Mohabbati-Kalejahi, N. (2022). Local explanations of global
73
rankings: insights for competitive rankings. IEEE Access, 10, 30676-30693.
84
@@ -24,7 +20,28 @@ def hierarchical_ranking_explanation(
2420
X, score_function, model_type="OLS", s=5, *args, **kwargs
2521
):
2622
"""
27-
`model_type` can be one of "DT", "LR", "OLS", "PLS".
23+
Parameters
24+
----------
25+
X : pandas.DataFrame
26+
The input data for which explanations are to be generated.
27+
score_function : callable
28+
A function that takes the input data X and returns scores.
29+
model_type : str, optional
30+
The type of model to use for feature importance calculation.
31+
Can be one of "DT" (Decision Tree), "LR" (Logistic Regression),
32+
"OLS" (Ordinary Least Squares), or "PLS" (Partial Least Squares).
33+
Default is "OLS".
34+
s : int, optional
35+
A parameter for the feature importance function. Default is 5.
36+
*args : tuple
37+
Additional arguments to pass to the feature importance function.
38+
**kwargs : dict
39+
Additional keyword arguments to pass to the feature importance function.
40+
41+
Returns
42+
-------
43+
numpy.ndarray
44+
An array of contributions for each observation in the input data.
2845
"""
2946
# index = X.index
3047
X = X.copy().reset_index(drop=True)
@@ -52,7 +69,30 @@ def hierarchical_ranking_batch_explanation(
5269
**kwargs,
5370
):
5471
"""
55-
`model_type` can be one of "DT", "LR", "OLS", "PLS".
72+
Parameters
73+
----------
74+
X : pandas.DataFrame
75+
The input data for which explanations are to be generated.
76+
score_function : callable
77+
A function that takes the input data X and returns scores.
78+
model_type : str, optional
79+
The type of model to use for feature importance calculation.
80+
Can be one of "DT" (Decision Tree), "LR" (Logistic Regression),
81+
"OLS" (Ordinary Least Squares), or "PLS" (Partial Least Squares).
82+
Default is "OLS".
83+
s : int, optional
84+
A parameter for the feature importance function. Default is 5.
85+
random_state : int, optional
86+
The seed used by the random number generator. Default is 42.
87+
*args : tuple
88+
Additional arguments to pass to the feature importance function.
89+
**kwargs : dict
90+
Additional keyword arguments to pass to the feature importance function.
91+
92+
Returns
93+
-------
94+
numpy.ndarray
95+
An array of contributions for each observation in the input data.
5696
"""
5797
batch_size = (
5898
np.ceil(0.1 * len(X)).astype(int)

xai_ranking/benchmarks/_lime.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,21 @@
55

66
def lime_experiment(X, score_function, mode="regression", **kwargs):
77
"""
8-
`mode` can be one of `[classification, regression]`.
8+
Parameters
9+
----------
10+
X : pandas.DataFrame
11+
The input data for which explanations are to be generated.
12+
score_function : callable
13+
The function used to score the data.
14+
mode : str, default="regression"
15+
The mode of the experiment. It can be either "classification" or "regression".
16+
**kwargs : dict
17+
Additional keyword arguments to be passed to the LIME explainer.
18+
19+
Returns
20+
-------
21+
lime_values : array-like
22+
The LIME attributions for the input data `X`.
923
"""
1024
explainer = LimeTabular(
1125
score_function,
@@ -20,7 +34,23 @@ def lime_batch_experiment(
2034
X, score_function, mode="regression", random_state=42, **kwargs
2135
):
2236
"""
23-
`mode` can be one of `[classification, regression]`.
37+
Parameters
38+
----------
39+
X : pandas.DataFrame
40+
The input data for which explanations are to be generated.
41+
score_function : callable
42+
The function used to score the data.
43+
mode : str, default="regression"
44+
The mode of the experiment. It can be either "classification" or "regression".
45+
random_state : int, optional
46+
The seed used by the random number generator. Default is 42.
47+
**kwargs : dict
48+
Additional keyword arguments to be passed to the LIME explainer.
49+
50+
Returns
51+
-------
52+
lime_values : array-like
53+
The LIME attributions for the input data `X`.
2454
"""
2555
batch_size = (
2656
np.ceil(0.1 * len(X)).astype(int)

xai_ranking/benchmarks/_participation.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,23 @@
1111

1212

1313
def participation_score(X, ranks, top_k=10):
14+
"""
15+
Computes the participation score for the top_k items.
16+
17+
Parameters
18+
----------
19+
X : pandas.DataFrame
20+
The input data.
21+
ranks : pandas.Series
22+
The ranks of the items.
23+
top_k : int, optional
24+
The number of top items to consider. Default is 10.
25+
26+
Returns
27+
-------
28+
pandas.Series
29+
The participation score for each feature.
30+
"""
1431
mask = ranks <= top_k
1532
X_top = X[mask]
1633
# thresh = score_function(X_top).min()
@@ -20,6 +37,25 @@ def participation_score(X, ranks, top_k=10):
2037

2138

2239
def weighted_participation_score(X, ranks, weights, top_k=10):
40+
"""
41+
Computes the weighted participation score for the top_k items.
42+
43+
Parameters
44+
----------
45+
X : pandas.DataFrame
46+
The input data.
47+
ranks : pandas.Series
48+
The ranks of the items.
49+
weights : pandas.Series
50+
The weights for each item.
51+
top_k : int, optional
52+
The number of top items to consider. Default is 10.
53+
54+
Returns
55+
-------
56+
pandas.Series
57+
The weighted participation score for each feature.
58+
"""
2359
mask = ranks <= top_k
2460
X_top = X[mask].mul(weights)
2561
# thresh = score_function(X_top).min()
@@ -29,6 +65,25 @@ def weighted_participation_score(X, ranks, weights, top_k=10):
2965

3066

3167
def participation_experiment(X, score_function, top_k=10, weights=None):
68+
"""
69+
Runs the participation score experiment.
70+
71+
Parameters
72+
----------
73+
X : pandas.DataFrame
74+
The input data.
75+
score_function : callable
76+
The function to compute scores.
77+
top_k : int, optional
78+
The number of top items to consider. Default is 10.
79+
weights : pandas.Series, optional
80+
The weights for each item. Default is None.
81+
82+
Returns
83+
-------
84+
pandas.Series
85+
The participation score or weighted participation score for each feature.
86+
"""
3287
ranks = scores_to_ordering(score_function(X))
3388
if weights is not None:
3489
return weighted_participation_score(X, ranks, weights=weights, top_k=top_k)

xai_ranking/benchmarks/_rank_lime.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44

55

66
def rank_lime_experiment(X, score_function, **kwargs):
7+
"""
8+
Parameters
9+
----------
10+
X : array-like
11+
The input data for which the attributions are to be computed.
12+
score_function : callable
13+
The model or function used to score the input data.
14+
**kwargs : dict
15+
Additional keyword arguments to be passed to the RankingLIME constructor.
16+
17+
Returns
18+
-------
19+
numpy.ndarray
20+
A 2D array where each element represents the attribution score for
21+
a specific feature in a specific document.
22+
"""
723
xai = RankingLIME(
824
background_data=np.array(X), original_model=score_function, **kwargs
925
)

0 commit comments

Comments
 (0)