Skip to content

Commit ff38526

Browse files
authored
Feature/debias wrapper (#152)
Added DebiasWrapper for metrics
1 parent e682847 commit ff38526

File tree

13 files changed

+1008
-79
lines changed

13 files changed

+1008
-79
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
## Unreleased
1010

11+
### Added
12+
- `Debias` mechanism for classification, ranking and auc metrics. New parameter `is_debiased` to `calc_from_confusion_df`, `calc_per_user_from_confusion_df` methods of classification metrics, `calc_from_fitted`, `calc_per_user_from_fitted` methods of auc and rankning (`MAP`) metrics, `calc_from_merged`, `calc_per_user_from_merged` methods of ranking (`NDCG`, `MRR`) metrics. ([#152](https://github.com/MobileTeleSystems/RecTools/pull/152))
1113

1214
## [0.7.0] - 29.07.2024
1315

rectools/metrics/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,13 @@
4747
`metrics.PairwiseDistanceCalculator`
4848
`metrics.PairwiseHammingDistanceCalculator`
4949
`metrics.SparsePairwiseHammingDistanceCalculator`
50+
`metrics.DebiasConfig`
51+
`metrics.debias_interactions`
5052
"""
5153

5254
from .auc import PAP, PartialAUC
5355
from .classification import MCC, Accuracy, F1Beta, HitRate, Precision, Recall
56+
from .debias import DebiasConfig, debias_interactions
5457
from .distances import (
5558
PairwiseDistanceCalculator,
5659
PairwiseHammingDistanceCalculator,
@@ -89,4 +92,6 @@
8992
"SufficientReco",
9093
"UnrepeatedReco",
9194
"CoveredUsers",
95+
"DebiasConfig",
96+
"debias_interactions",
9297
)

rectools/metrics/auc.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from attrs import define, field
2222

2323
from rectools import Columns
24-
from rectools.metrics.base import MetricAtK, outer_merge_reco
24+
from rectools.metrics.base import outer_merge_reco
25+
from rectools.metrics.debias import DebiasableMetrikAtK, calc_debiased_fit_task, debias_interactions
2526

2627

2728
class InsufficientHandling(str, Enum):
@@ -58,7 +59,7 @@ class AUCFitted:
5859

5960

6061
@define
61-
class _AUCMetric(MetricAtK):
62+
class _AUCMetric(DebiasableMetrikAtK):
6263
"""
6364
ROC AUC based metric base class.
6465
@@ -88,6 +89,8 @@ class _AUCMetric(MetricAtK):
8889
until the model has non-zero scores for the item in item-item similarity matrix. So with
8990
small `K` for neighbours in ItemKNN and big `K` for `recommend` and AUC based metric you
9091
will still get an error when `insufficient_handling` is set to `raise`.
92+
debias_config : DebiasConfig, optional, default None
93+
Config with debias method parameters (iqr_coef, random_state).
9194
"""
9295

9396
insufficient_handling: str = field(default="ignore")
@@ -217,36 +220,45 @@ def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Se
217220
pd.Series
218221
Values of metric (index - user id, values - metric value for every user).
219222
"""
223+
is_debiased = False
224+
if self.debias_config is not None:
225+
interactions = debias_interactions(interactions, self.debias_config)
226+
is_debiased = True
227+
220228
self._check(reco, interactions=interactions)
221229
insufficient_handling_needed = self.insufficient_handling != InsufficientHandling.IGNORE
222230
fitted = self.fit(reco, interactions, self.k, insufficient_handling_needed)
223-
return self.calc_per_user_from_fitted(fitted)
231+
return self.calc_per_user_from_fitted(fitted, is_debiased)
224232

225-
def calc_from_fitted(self, fitted: AUCFitted) -> float:
233+
def calc_from_fitted(self, fitted: AUCFitted, is_debiased: bool = False) -> float:
226234
"""
227235
Calculate metric value from fitted data.
228236
229237
Parameters
230238
----------
231239
fitted : AUCFitted
232240
Meta data that got from `.fit` method.
241+
is_debiased : bool, default False
242+
An indicator of whether the debias transformation has been applied before or not.
233243
234244
Returns
235245
-------
236246
float
237247
Value of metric (average between users).
238248
"""
239-
per_user = self.calc_per_user_from_fitted(fitted)
249+
per_user = self.calc_per_user_from_fitted(fitted, is_debiased)
240250
return per_user.mean()
241251

242-
def calc_per_user_from_fitted(self, fitted: AUCFitted) -> pd.Series:
252+
def calc_per_user_from_fitted(self, fitted: AUCFitted, is_debiased: bool = False) -> pd.Series:
243253
"""
244254
Calculate metric values for all users from from fitted data.
245255
246256
Parameters
247257
----------
248258
fitted : AUCFitted
249259
Meta data that got from `.fit` method.
260+
is_debiased : bool, default False
261+
An indicator of whether the debias transformation has been applied before or not.
250262
251263
Returns
252264
-------
@@ -307,6 +319,8 @@ class PartialAUC(_AUCMetric):
307319
until the model has non-zero scores for the item in item-item similarity matrix. So with
308320
small `K` for neighbours in ItemKNN and big `K` for `recommend` and AUC based metric you
309321
will still get an error when `insufficient_handling` is set to `raise`.
322+
debias_config : DebiasConfig, optional, default None
323+
Config with debias method parameters (iqr_coef, random_state).
310324
311325
Examples
312326
--------
@@ -339,25 +353,26 @@ def _get_sufficient_reco_explanation(self) -> str:
339353
not too high.
340354
"""
341355

342-
def calc_per_user_from_fitted(self, fitted: AUCFitted) -> pd.Series:
356+
def calc_per_user_from_fitted(self, fitted: AUCFitted, is_debiased: bool = False) -> pd.Series:
343357
"""
344358
Calculate metric values for all users from from fitted data.
345359
346360
Parameters
347361
----------
348362
fitted : AUCFitted
349363
Meta data that got from `.fit` method.
364+
is_debiased : bool, default False
365+
An indicator of whether the debias transformation has been applied before or not.
350366
351367
Returns
352368
-------
353369
pd.Series
354370
Values of metric (index - user id, values - metric value for every user).
355371
"""
372+
self._check_debias(is_debiased, obj_name="AUCFitted")
356373
outer_merged = fitted.outer_merged_enriched
357-
358374
# Keep k first false positives for roc auc computation, keep all predicted test positives
359375
cropped = outer_merged[(outer_merged["__fp_cumsum"] < self.k) & (~outer_merged[Columns.Rank].isna())]
360-
361376
cropped_suf, n_pos_suf = self._handle_insufficient_cases(
362377
outer_merged=cropped, n_pos=fitted.n_pos, n_fp_insufficient=fitted.n_fp_insufficient
363378
)
@@ -415,6 +430,8 @@ class PAP(_AUCMetric):
415430
until the model has non-zero scores for the item in item-item similarity matrix. So with
416431
small `K` for neighbours in ItemKNN and big `K` for `recommend` and AUC based metric you
417432
will still get an error when `insufficient_handling` is set to `raise`.
433+
debias_config : DebiasConfig, optional, default None
434+
Config with debias method parameters (iqr_coef, random_state).
418435
419436
Examples
420437
--------
@@ -447,22 +464,24 @@ def _get_sufficient_reco_explanation(self) -> str:
447464
for all users.
448465
"""
449466

450-
def calc_per_user_from_fitted(self, fitted: AUCFitted) -> pd.Series:
467+
def calc_per_user_from_fitted(self, fitted: AUCFitted, is_debiased: bool = False) -> pd.Series:
451468
"""
452469
Calculate metric values for all users from outer merged recommendations.
453470
454471
Parameters
455472
----------
456473
fitted : AUCFitted
457474
Meta data that got from `.fit` method.
475+
is_debiased : bool, default False
476+
An indicator of whether the debias transformation has been applied before or not.
458477
459478
Returns
460479
-------
461480
pd.Series
462481
Values of metric (index - user id, values - metric value for every user).
463482
"""
483+
self._check_debias(is_debiased, obj_name="AUCFitted")
464484
outer_merged = fitted.outer_merged_enriched
465-
466485
# Keep k first false positives and k first predicted test positives for roc auc computation
467486
cropped = outer_merged[
468487
(outer_merged["__test_pos_cumsum"] <= self.k)
@@ -513,12 +532,22 @@ def calc_auc_metrics(
513532
"""
514533
results = {}
515534

516-
k_max = max(metric.k for metric in metrics.values())
517535
insufficient_handling_needed = any(
518536
metric.insufficient_handling != InsufficientHandling.IGNORE for metric in metrics.values()
519537
)
520-
fitted = _AUCMetric.fit(reco, interactions, k_max, insufficient_handling_needed)
538+
539+
debiased_fit_task = calc_debiased_fit_task(metrics.values(), interactions)
540+
fitted_debiased = {}
541+
for debias_config_name, (k_max_d, interactions_d) in debiased_fit_task.items():
542+
fitted_debiased[debias_config_name] = _AUCMetric.fit(
543+
reco, interactions_d, k_max_d, insufficient_handling_needed
544+
)
545+
521546
for name, metric in metrics.items():
522-
results[name] = metric.calc_from_fitted(fitted)
547+
is_debiased = metric.debias_config is not None
548+
results[name] = metric.calc_from_fitted(
549+
fitted=fitted_debiased[metric.debias_config],
550+
is_debiased=is_debiased,
551+
)
523552

524553
return results

0 commit comments

Comments
 (0)