@@ -16,7 +16,8 @@ def hilw_contributions(
1616 """
1717 Based on Dasgupta's original implementation.
1818
19- `method` should be one of the following: `shapley`, `standardized shapley`, `rank-relevance shapley`
19+ `method` should be one of the following: `shapley`,
20+ `standardized shapley`, `rank-relevance shapley`
2021
2122 hilw contributions for the entire population (no groupings, no batches).
2223 """
@@ -60,7 +61,7 @@ def hilw_contributions(
6061 df [["attention" ]]
6162 ) # scale the attention back to 0 to 1
6263
63- ## the raw payout is the score_std
64+ # the raw payout is the score_std
6465 df ["score_std" ] = sum (
6566 [weights [num_attr ] * df [attr ] for num_attr , attr in enumerate (features )]
6667 )
@@ -155,7 +156,8 @@ def shapley_values(
155156 grouped = df .groupby (group_feature )
156157
157158 for n , group in grouped :
158- # group[[x for x in features]] = MinMaxScaler().fit_transform(group[[x for x in features]])
159+ # group[[x for x in features]] = MinMaxScaler()\
160+ # .fit_transform(group[[x for x in features]])
159161 avg_attributes = dict ()
160162 for attr in features :
161163 avg_attributes [attr + "_avg" ] = group .loc [:, attr ].mean ()
@@ -165,7 +167,7 @@ def shapley_values(
165167
166168 dff = pd .concat ([dff , group ], axis = 0 )
167169
168- ### use topN to subset the data
170+ # use topN to subset the data
169171 dff = dff .query (f"{ upper_bound } <= rank <= { lower_bound } " )
170172 grouped = dff .groupby (group_feature )
171173
@@ -209,7 +211,7 @@ def shapley_values(
209211 return df_mean_contri , df_mean_contri_privileged , df_mean_contri_protected
210212
211213
212- ### standardized shapley values
214+ # standardized shapley values
213215def competing_powers (
214216 d , weights , upper_bound , lower_bound , features , num_batches , group_feature = "_N"
215217):
@@ -221,14 +223,15 @@ def competing_powers(
221223 grouped = df .groupby (group_feature )
222224
223225 for n , group in grouped :
224- # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler().fit_transform(group[['Attribute 1', 'Attribute 2']])
226+ # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler()\
227+ # .fit_transform(group[['Attribute 1', 'Attribute 2']])
225228 score_sum = group .loc [:, "score" ].sum ()
226229
227230 for attr in features :
228231 group [attr + "_contri" ] = weights [attr ] * group [attr ] / score_sum
229232 dff = pd .concat ([dff , group ], axis = 0 )
230233
231- ### use topN to subset the data
234+ # use topN to subset the data
232235 dff = dff .query (f"{ upper_bound } <= rank <= { lower_bound } " )
233236 grouped = dff .groupby (group_feature )
234237
@@ -272,7 +275,7 @@ def competing_powers(
272275 return df_mean_contri , df_mean_contri_privileged , df_mean_contri_protected
273276
274277
275- ### Rank-relevance Shapley values
278+ # Rank-relevance Shapley values
276279def competing_powers2 (
277280 d ,
278281 weights ,
@@ -292,25 +295,27 @@ def competing_powers2(
292295 grouped = df .groupby (group_feature )
293296
294297 for n , group in grouped :
295- # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler().fit_transform(group[['Attribute 1', 'Attribute 2']])
296- rank_sum = group .loc [:, "rank" ].sum ()
298+ # group[['Attribute 1', 'Attribute 2']] = MinMaxScaler()\
299+ # .fit_transform(group[['Attribute 1', 'Attribute 2']])
300+ # rank_sum = group.loc[:, "rank"].sum()
297301 rank_max = group .loc [:, "rank" ].max ()
298302
299- # calculate the attention of the item based on reverse of the rank over the rank_sum, with optional exponential magnifier par
303+ # calculate the attention of the item based on reverse of the rank
304+ # over the rank_sum, with optional exponential magnifier par
300305 group ["attention" ] = (1 - group ["rank" ] / rank_max ) ** exponential
301306 group [["attention" ]] = MinMaxScaler ().fit_transform (
302307 group [["attention" ]]
303308 ) # scale the attention back to 0 to 1
304309
305- ## the raw payout is the score_std
310+ # the raw payout is the score_std
306311 group ["score_std" ] = sum ([weights [attr ] * group [attr ] for attr in features ])
307312 for attr in features :
308313 group [attr + "_contri" ] = (
309314 weights [attr ] * group [attr ] * group ["attention" ] / group ["score_std" ]
310315 )
311316 dff = pd .concat ([dff , group ], axis = 0 )
312317
313- ### use topN to subset the data
318+ # use topN to subset the data
314319 dff = dff .query (f"{ upper_bound } <= rank <= { lower_bound } " )
315320 grouped = dff .groupby (group_feature )
316321
@@ -352,41 +357,3 @@ def competing_powers2(
352357 df_mean_contri_protected = transform_df (df_mean_contri_protected )
353358
354359 return df_mean_contri , df_mean_contri_privileged , df_mean_contri_protected
355-
356-
357- # box plots
358- # def box_plot_competing_power3(df1, df2, df3, df_names):
359- # dfs = [df1, df2, df3]
360- # fig = go.Figure()
361- #
362- # marker_colors = ["#FF851B", "#1b23ff", "#00FF00", "#FF00FF", "#FFFF00", "#00FFFF"]
363- #
364- # for idx, df in enumerate(dfs):
365- # df = df.T.copy()
366- # # df.columns = [f'Attribute {i+1}' for i in range(len(df.columns))]
367- # # df = pd.melt(df, id_vars=None, value_vars=[f'Attribute {i+1}' for i in range(len(df.columns))])
368- # df = pd.melt(df, id_vars=None, value_vars=df.columns)
369- # df = df.rename(columns={"variable": "average contribution"})
370- #
371- # y = df["average contribution"].values
372- #
373- # fig.add_trace(
374- # go.Box(
375- # y=df["value"].values,
376- # x=y,
377- # name=df_names[idx],
378- # boxpoints="all",
379- # jitter=0.5,
380- # whiskerwidth=0.2,
381- # marker_color=marker_colors[idx % len(marker_colors)],
382- # marker_size=2,
383- # line_width=1,
384- # )
385- # )
386- #
387- # fig.update_layout(
388- # xaxis=dict(title="average contribution", zeroline=False),
389- # boxmode="group",
390- # )
391- #
392- # return fig
0 commit comments