Code formatting

ishevche · ishevche · commit 6743546e3675 · 2024-12-12T15:42:21.000+01:00
diff --git a/xai_ranking/benchmarks/hilw/dasgupta.py b/xai_ranking/benchmarks/hilw/dasgupta.py
@@ -16,7 +16,8 @@ def hilw_contributions(
     """
     Based on Dasgupta's original implementation.
 
-    `method` should be one of the following: `shapley`, `standardized shapley`, `rank-relevance shapley`
+    `method` should be one of the following: `shapley`,
+    `standardized shapley`, `rank-relevance shapley`
 
     hilw contributions for the entire population (no groupings, no batches).
     """
@@ -60,7 +61,7 @@ def hilw_contributions(
             df[["attention"]]
         )  # scale the attention back to 0 to 1
 
-        ## the raw payout is the score_std
+        # the raw payout is the score_std
         df["score_std"] = sum(
             [weights[num_attr] * df[attr] for num_attr, attr in enumerate(features)]
         )
@@ -155,7 +156,8 @@ def shapley_values(
     grouped = df.groupby(group_feature)
 
     for n, group in grouped:
-        # group[[x for x in features]] = MinMaxScaler().fit_transform(group[[x for x in features]])
+        # group[[x for x in features]] = MinMaxScaler()\
+        #   .fit_transform(group[[x for x in features]])
         avg_attributes = dict()
         for attr in features:
             avg_attributes[attr + "_avg"] = group.loc[:, attr].mean()
@@ -165,7 +167,7 @@ def shapley_values(
 
             dff = pd.concat([dff, group], axis=0)
 
-    ### use topN to subset the data
+    # use topN to subset the data
     dff = dff.query(f"{upper_bound} <= rank <= {lower_bound}")
     grouped = dff.groupby(group_feature)
 
@@ -209,7 +211,7 @@ def shapley_values(
     return df_mean_contri, df_mean_contri_privileged, df_mean_contri_protected
 
 
-### standardized shapley values
+# standardized shapley values
 def competing_powers(
     d, weights, upper_bound, lower_bound, features, num_batches, group_feature="_N"
 ):
@@ -221,14 +223,15 @@ def competing_powers(
     grouped = df.groupby(group_feature)
 
     for n, group in grouped:
-        # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler().fit_transform(group[['Attribute 1', 'Attribute 2']])
+        # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler()\
+        #    .fit_transform(group[['Attribute 1', 'Attribute 2']])
         score_sum = group.loc[:, "score"].sum()
 
         for attr in features:
             group[attr + "_contri"] = weights[attr] * group[attr] / score_sum
             dff = pd.concat([dff, group], axis=0)
 
-    ### use topN to subset the data
+    # use topN to subset the data
     dff = dff.query(f"{upper_bound} <= rank <= {lower_bound}")
     grouped = dff.groupby(group_feature)
 
@@ -272,7 +275,7 @@ def competing_powers(
     return df_mean_contri, df_mean_contri_privileged, df_mean_contri_protected
 
 
-### Rank-relevance Shapley values
+# Rank-relevance Shapley values
 def competing_powers2(
     d,
     weights,
@@ -292,25 +295,27 @@ def competing_powers2(
     grouped = df.groupby(group_feature)
 
     for n, group in grouped:
-        # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler().fit_transform(group[['Attribute 1', 'Attribute 2']])
-        rank_sum = group.loc[:, "rank"].sum()
+        # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler()\
+        #   .fit_transform(group[['Attribute 1', 'Attribute 2']])
+        # rank_sum = group.loc[:, "rank"].sum()
         rank_max = group.loc[:, "rank"].max()
 
-        # calculate the attention of the item based on reverse of the rank over the rank_sum, with optional exponential magnifier par
+        # calculate the attention of the item based on reverse of the rank
+        # over the rank_sum, with optional exponential magnifier par
         group["attention"] = (1 - group["rank"] / rank_max) ** exponential
         group[["attention"]] = MinMaxScaler().fit_transform(
             group[["attention"]]
         )  # scale the attention back to 0 to 1
 
-        ## the raw payout is the score_std
+        # the raw payout is the score_std
         group["score_std"] = sum([weights[attr] * group[attr] for attr in features])
         for attr in features:
             group[attr + "_contri"] = (
                 weights[attr] * group[attr] * group["attention"] / group["score_std"]
             )
             dff = pd.concat([dff, group], axis=0)
 
-    ### use topN to subset the data
+    # use topN to subset the data
     dff = dff.query(f"{upper_bound} <= rank <= {lower_bound}")
     grouped = dff.groupby(group_feature)
 
@@ -352,41 +357,3 @@ def competing_powers2(
         df_mean_contri_protected = transform_df(df_mean_contri_protected)
 
     return df_mean_contri, df_mean_contri_privileged, df_mean_contri_protected
-
-
-# box plots
-# def box_plot_competing_power3(df1, df2, df3, df_names):
-#     dfs = [df1, df2, df3]
-#     fig = go.Figure()
-#
-#     marker_colors = ["#FF851B", "#1b23ff", "#00FF00", "#FF00FF", "#FFFF00", "#00FFFF"]
-#
-#     for idx, df in enumerate(dfs):
-#         df = df.T.copy()
-#         # df.columns = [f'Attribute {i+1}' for i in range(len(df.columns))]
-#         # df = pd.melt(df, id_vars=None, value_vars=[f'Attribute {i+1}' for i in range(len(df.columns))])
-#         df = pd.melt(df, id_vars=None, value_vars=df.columns)
-#         df = df.rename(columns={"variable": "average contribution"})
-#
-#         y = df["average contribution"].values
-#
-#         fig.add_trace(
-#             go.Box(
-#                 y=df["value"].values,
-#                 x=y,
-#                 name=df_names[idx],
-#                 boxpoints="all",
-#                 jitter=0.5,
-#                 whiskerwidth=0.2,
-#                 marker_color=marker_colors[idx % len(marker_colors)],
-#                 marker_size=2,
-#                 line_width=1,
-#             )
-#         )
-#
-#     fig.update_layout(
-#         xaxis=dict(title="average contribution", zeroline=False),
-#         boxmode="group",
-#     )
-#
-#     return fig
diff --git a/xai_ranking/benchmarks/rank_lime/rank_lime.py b/xai_ranking/benchmarks/rank_lime/rank_lime.py
@@ -77,10 +77,7 @@ def __init__(
         self.individual_masking = individual_masking
         self.use_entry = use_entry
 
-        try:
-            self.explainer = self.get_explainer()
-        except:
-            pass
+        self.explainer = self.get_explainer()
         self.rank_similarity_coefficient = rank_similarity_coefficient
         if not individual_masking:
             self.new_model_predict = partial(
diff --git a/xai_ranking/datasets/merge_by_year.py b/xai_ranking/datasets/merge_by_year.py
@@ -1,5 +1,5 @@
 """
-Merges by year csv files got from the ATP_info.xlsx, 
+Merges by year csv files got from the ATP_info.xlsx,
 heights_weights.csv, and final_rankings_{2020-2023}.csv
 """
 
diff --git a/xai_ranking/metrics/_base.py b/xai_ranking/metrics/_base.py
@@ -181,7 +181,7 @@ def row_wise_kendall(results1, results2):
                 rank[mask] = rank[mask].max()
         ranks.append(rank)
 
-    row_sensitivity = kendall_similarity(*ranks)
+    row_sensitivity = kendall_similarity(ranks[0], ranks[1])
     return row_sensitivity
 
 
diff --git a/xai_ranking/metrics/_sensitivity.py b/xai_ranking/metrics/_sensitivity.py
@@ -179,7 +179,8 @@ def row_wise_explanation_sensitivity_all_neighbors(
         _find_all_neighbors(original_data, rankings, contributions, row_idx, threshold)
     )
 
-    # Compute the measure (e.g. Kendall tau) distance between the target point and its neighbors
+    # Compute the measure (e.g. Kendall tau) distance
+    # between the target point and its neighbors
     measure_distances = np.apply_along_axis(
         lambda row: _ROW_WISE_MEASURES[measure](row, row_cont, **kwargs),
         1,
@@ -220,7 +221,6 @@ def explanation_sensitivity(
 def explanation_sensitivity_all_neighbors(
     original_data, contributions, rankings, measure="kendall", threshold=0.1, **kwargs
 ):
-    result = lambda row_idx: row_wise_explanation_sensitivity_all_neighbors(
+    return lambda row_idx: row_wise_explanation_sensitivity_all_neighbors(
         original_data, contributions, row_idx, rankings, threshold, measure, **kwargs
     )
-    return result