Feature/idcg and coverage (#266)

blondered · web-flow · commit f7160bb82546 · 2025-02-22T00:44:57.000+03:00
- Added `CatalogCoverage`
- Added `divide_by_achievable` argument for `NDCG`
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## Unreleased
+
+### Added
+- `CatalogCoverage` metric ([#266](https://github.com/MobileTeleSystems/RecTools/pull/266))
+- `divide_by_achievable` argument to `NDCG` metric ([#266](https://github.com/MobileTeleSystems/RecTools/pull/266))
+
 ## [0.11.0] - 17.02.2025
 
 ### Added
diff --git a/rectools/metrics/__init__.py b/rectools/metrics/__init__.py
@@ -1,4 +1,4 @@
-#  Copyright 2022-2024 MTS (Mobile Telesystems)
+#  Copyright 2022-2025 MTS (Mobile Telesystems)
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -40,6 +40,7 @@
 `metrics.SufficientReco`
 `metrics.UnrepeatedReco`
 `metrics.CoveredUsers`
+`metrics.CatalogCoverage`
 
 Tools
 -----
@@ -52,6 +53,7 @@
 """
 
 from .auc import PAP, PartialAUC
+from .catalog import CatalogCoverage
 from .classification import MCC, Accuracy, F1Beta, HitRate, Precision, Recall
 from .debias import DebiasConfig, debias_interactions
 from .distances import (
@@ -80,6 +82,7 @@
     "PartialAUC",
     "PAP",
     "MRR",
+    "CatalogCoverage",
     "MeanInvUserFreq",
     "IntraListDiversity",
     "AvgRecPopularity",
diff --git a/rectools/metrics/catalog.py b/rectools/metrics/catalog.py
@@ -0,0 +1,85 @@
+#  Copyright 2025 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Catalog statistics recommendations metrics."""
+
+import typing as tp
+
+import pandas as pd
+
+from rectools import Columns
+
+from .base import Catalog, MetricAtK
+
+
+class CatalogCoverage(MetricAtK):
+    """
+    Share of items in catalog that is present in recommendations for all users.
+
+    Parameters
+    ----------
+    k : int
+        Number of items at the top of recommendations list that will be used to calculate metric.
+    """
+
+    def calc(self, reco: pd.DataFrame, catalog: Catalog) -> float:
+        """
+        Calculate metric value.
+
+        Parameters
+        ----------
+        reco : pd.DataFrame
+            Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
+        catalog : collection
+            Collection of unique item ids that could be used for recommendations.
+
+        Returns
+        -------
+        float
+            Value of metric (aggregated for all users).
+        """
+        return reco.loc[reco[Columns.Rank] <= self.k, Columns.Item].nunique() / len(catalog)
+
+
+CatalogMetric = CatalogCoverage
+
+
+def calc_catalog_metrics(
+    metrics: tp.Dict[str, CatalogMetric],
+    reco: pd.DataFrame,
+    catalog: Catalog,
+) -> tp.Dict[str, float]:
+    """
+    Calculate metrics of catalog statistics for recommendations.
+
+    Warning: It is not recommended to use this function directly.
+    Use `calc_metrics` instead.
+
+    Parameters
+    ----------
+    metrics : dict(str -> CatalogMetric)
+        Dict of metric objects to calculate,
+        where key is a metric name and value is a metric object.
+    reco : pd.DataFrame
+        Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`.
+    catalog : collection
+        Collection of unique item ids that could be used for recommendations.
+
+    Returns
+    -------
+    dict(str->float)
+        Dictionary where keys are the same as keys in `metrics`
+        and values are metric calculation results.
+    """
+    return {metric_name: metric.calc(reco, catalog) for metric_name, metric in metrics.items()}
diff --git a/rectools/metrics/ranking.py b/rectools/metrics/ranking.py
@@ -1,4 +1,4 @@
-#  Copyright 2022-2024 MTS (Mobile Telesystems)
+#  Copyright 2022-2025 MTS (Mobile Telesystems)
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -317,27 +317,37 @@ class NDCG(_RankingMetric):
     Estimates relevance of recommendations taking in account their order.
 
     .. math::
-        NDCG@k = DCG@k / IDCG@k
-    where :math:`DCG@k = \sum_{i=1}^{k+1} rel(i) / log_{}(i+1)` -
-    Discounted Cumulative Gain at k, main part of `NDCG@k`.
+        NDCG@k=\frac{1}{|U|}\sum_{u \in U}\frac{DCG_u@k}{IDCG_u@k}
 
-    The closer it is to the top the more weight it assigns to relevant items.
-    Here:
-    - `rel(i)` is an indicator function, it equals to ``1``
-    if an item at rank `i` is relevant, ``0`` otherwise;
-    - `log` - logarithm at any given base, usually ``2``.
-
-    and :math:`IDCG@k = \sum_{i=1}^{k+1} (1 / log(i + 1))` -
-    `Ideal DCG@k`, maximum possible value of `DCG@k`, used as
-    normalization coefficient to ensure that `NDCG@k` values
-    lie in ``[0, 1]``.
+    where
+        - :math:`DCG_u@k` is "Discounted Cumulative Gain" at k for user u.
+        - `"Gain"` stands for relevance of item at position i to user. It equals to ``1`` if this item
+          is relevant, ``0`` otherwise
+        - `"Discounted Gain"` means that original item relevance is being discounted based on this
+          items rank. The closer is item to the top the, the more gain is achieved.
+        - `"Discounted Cumulative Gain"` means that discounted gains are summed together.
+        - :math:`IDCG_u@k` is `"Ideal Discounted Cumulative Gain"` at k for user u. This is maximum
+          possible value of `DCG@k`, used as normalization coefficient to ensure that `NDCG@k`
+          values lie in ``[0, 1]``.
+
+    When `divide_by_achievable` is set to ``False`` (default) `IDCG_u@k` is the same value for all
+    users and is equal to:
+    :math:`IDCG_u@k = \sum_{i=1}^{k} \frac{1}{log(i + 1)}`
+    When `divide_by_achievable` is set to ``True``, the formula for IDCG depends
+    on number of each user relevant items in the test set. The formula is:
+    :math:`IDCG_u@k = \sum_{i=1}^{\min (|R(u)|, k)} \frac{1}{log(i + 1)}`
 
     Parameters
     ----------
     k : int
         Number of items at the top of recommendations list that will be used to calculate metric.
     log_base : int, default ``2``
         Base of logarithm used to weight relevant items.
+    divide_by_achievable: bool, default ``False``
+        When set to ``False`` (default) IDCG is calculated as one value for all of the users and
+        equals to the maximum gain, achievable when all ``k`` positions are relevant.
+        When set to ``True``, IDCG is calculated for each user individually, considering
+        the maximum possible amount of user test items on top ``k`` positions.
     debias_config : DebiasConfig, optional, default None
         Config with debias method parameters (iqr_coef, random_state).
 
@@ -368,6 +378,7 @@ class NDCG(_RankingMetric):
     """
 
     log_base: int = attr.ib(default=2)
+    divide_by_achievable: bool = attr.ib(default=False)
 
     def calc_per_user(self, reco: pd.DataFrame, interactions: pd.DataFrame) -> pd.Series:
         """
@@ -429,15 +440,36 @@ def calc_per_user_from_merged(self, merged: pd.DataFrame, is_debiased: bool = Fa
         if not is_debiased and self.debias_config is not None:
             merged = debias_interactions(merged, self.debias_config)
 
-        dcg = (merged[Columns.Rank] <= self.k).astype(int) / log_at_base(merged[Columns.Rank] + 1, self.log_base)
-        idcg = (1 / log_at_base(np.arange(1, self.k + 1) + 1, self.log_base)).sum()
-        ndcg = (
-            pd.DataFrame({Columns.User: merged[Columns.User], "__ndcg": dcg / idcg})
-            .groupby(Columns.User, sort=False)["__ndcg"]
-            .sum()
-            .rename(None)
+        # DCG
+        # Avoid division by 0 with `+1` for rank value in denominator before taking logarithm
+        merged["__DCG"] = (merged[Columns.Rank] <= self.k).astype(int) / log_at_base(
+            merged[Columns.Rank] + 1, self.log_base
         )
-        return ndcg
+        ranks = np.arange(1, self.k + 1)
+        discounted_gains = 1 / log_at_base(ranks + 1, self.log_base)
+
+        if self.divide_by_achievable:
+            grouped = merged.groupby(Columns.User, sort=False)
+            stats = grouped.agg(n_items=(Columns.Item, "count"), dcg=("__DCG", "sum"))
+
+            # IDCG
+            n_items_to_ndcg_map = dict(zip(ranks, discounted_gains.cumsum()))
+            n_items_to_ndcg_map[0] = 0
+            idcg = stats["n_items"].clip(upper=self.k).map(n_items_to_ndcg_map)
+
+            # NDCG
+            ndcg = stats["dcg"] / idcg
+
+        else:
+            idcg = discounted_gains.sum()
+            ndcg = (
+                pd.DataFrame({Columns.User: merged[Columns.User], "__ndcg": merged["__DCG"] / idcg})
+                .groupby(Columns.User, sort=False)["__ndcg"]
+                .sum()
+            )
+
+        del merged["__DCG"]
+        return ndcg.rename(None)
 
 
 class MRR(_RankingMetric):
diff --git a/rectools/metrics/scoring.py b/rectools/metrics/scoring.py
@@ -1,4 +1,4 @@
-#  Copyright 2022-2024 MTS (Mobile Telesystems)
+#  Copyright 2022-2025 MTS (Mobile Telesystems)
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 
 from .auc import AucMetric, calc_auc_metrics
 from .base import Catalog, MetricAtK, merge_reco
+from .catalog import CatalogMetric, calc_catalog_metrics
 from .classification import ClassificationMetric, SimpleClassificationMetric, calc_classification_metrics
 from .diversity import DiversityMetric, calc_diversity_metrics
 from .dq import CrossDQMetric, RecoDQMetric, calc_cross_dq_metrics, calc_reco_dq_metrics
@@ -150,6 +151,14 @@ def calc_metrics(  # noqa  # pylint: disable=too-many-branches,too-many-locals,t
         novelty_values = calc_novelty_metrics(novelty_metrics, reco, prev_interactions)
         results.update(novelty_values)
 
+    # Catalog
+    catalog_metrics = select_by_type(metrics, CatalogMetric)
+    if catalog_metrics:
+        if catalog is None:
+            raise ValueError("For calculating catalog metrics it's necessary to set 'catalog'")
+        catalog_values = calc_catalog_metrics(catalog_metrics, reco, catalog)
+        results.update(catalog_values)
+
     # Popularity
     popularity_metrics = select_by_type(metrics, PopularityMetric)
     if popularity_metrics:
diff --git a/tests/metrics/test_catalog.py b/tests/metrics/test_catalog.py
@@ -0,0 +1,38 @@
+#  Copyright 2025 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+# pylint: disable=attribute-defined-outside-init
+
+import numpy as np
+import pandas as pd
+
+from rectools import Columns
+from rectools.metrics import CatalogCoverage
+
+
+class TestCatalogCoverage:
+    def setup_method(self) -> None:
+        self.metric = CatalogCoverage(k=2)
+        self.reco = pd.DataFrame(
+            {
+                Columns.User: [1, 1, 1, 2, 2, 3, 4],
+                Columns.Item: [1, 2, 3, 1, 2, 1, 1],
+                Columns.Rank: [1, 2, 3, 1, 1, 3, 2],
+            }
+        )
+
+    def test_calc(self) -> None:
+        catalog = np.arange(5)
+        expected = 0.4
+        assert self.metric.calc(self.reco, catalog) == expected
diff --git a/tests/metrics/test_ranking.py b/tests/metrics/test_ranking.py
@@ -1,4 +1,4 @@
-#  Copyright 2022-2024 MTS (Mobile Telesystems)
+#  Copyright 2022-2025 MTS (Mobile Telesystems)
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -99,13 +99,15 @@ class TestNDCG:
     _idcg_at_3 = 1 / np.log2(2) + 1 / np.log2(3) + 1 / np.log2(4)
 
     @pytest.mark.parametrize(
-        "k,expected_ndcg",
+        "k,divide_by_achievable,expected_ndcg",
         (
-            (1, [0, 0, 1, 1, 0]),
-            (3, [0, 0, 1, 1 / _idcg_at_3, 0.5 / _idcg_at_3]),
+            (1, False, [0, 0, 1, 1, 0]),
+            (3, False, [0, 0, 1, 1 / _idcg_at_3, 0.5 / _idcg_at_3]),
+            (1, True, [0, 0, 1, 1, 0]),
+            (3, True, [0, 0, 1, 1, (1 / np.log2(4)) / (1 / np.log2(2))]),
         ),
     )
-    def test_calc(self, k: int, expected_ndcg: tp.List[float]) -> None:
+    def test_calc(self, k: int, divide_by_achievable: bool, expected_ndcg: tp.List[float]) -> None:
         reco = pd.DataFrame(
             {
                 Columns.User: [1, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6],
@@ -115,12 +117,12 @@ def test_calc(self, k: int, expected_ndcg: tp.List[float]) -> None:
         )
         interactions = pd.DataFrame(
             {
-                Columns.User: [1, 2, 3, 3, 3, 4, 5, 5, 5, 5],
-                Columns.Item: [1, 1, 1, 2, 3, 1, 1, 2, 3, 4],
+                Columns.User: [1, 2, 3, 3, 3, 4, 5],
+                Columns.Item: [1, 1, 1, 2, 3, 1, 1],
             }
         )
 
-        metric = NDCG(k=k)
+        metric = NDCG(k=k, divide_by_achievable=divide_by_achievable)
         expected_metric_per_user = pd.Series(
             expected_ndcg,
             index=pd.Series([1, 2, 3, 4, 5], name=Columns.User),
diff --git a/tests/metrics/test_scoring.py b/tests/metrics/test_scoring.py
@@ -1,4 +1,4 @@
-#  Copyright 2022-2024 MTS (Mobile Telesystems)
+#  Copyright 2022-2025 MTS (Mobile Telesystems)
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
     PAP,
     Accuracy,
     AvgRecPopularity,
+    CatalogCoverage,
     CoveredUsers,
     DebiasConfig,
     F1Beta,
@@ -118,6 +119,7 @@ def test_success(self) -> None:
             "sufficient": SufficientReco(k=2),
             "unrepeated": UnrepeatedReco(k=2),
             "covered_users": CoveredUsers(k=2),
+            "catalog_coverage": CatalogCoverage(k=2),
         }
         with pytest.warns(UserWarning, match="Custom metrics are not supported"):
             actual = calc_metrics(
@@ -147,6 +149,7 @@ def test_success(self) -> None:
             "sufficient": 0.25,
             "unrepeated": 1,
             "covered_users": 0.75,
+            "catalog_coverage": 0.2,
         }
         assert actual == expected
 
@@ -164,6 +167,7 @@ def test_success(self) -> None:
             (PartialAUC(k=1), ["reco"]),
             (Intersection(k=1), ["reco"]),
             (CoveredUsers(k=1), ["reco"]),
+            (CatalogCoverage(k=1), ["reco"]),
         ),
     )
     def test_raises(self, metric: MetricAtK, arg_names: tp.List[str]) -> None:

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright 2022-2024 MTS (Mobile Telesystems)`
	`1`	`+# Copyright 2022-2025 MTS (Mobile Telesystems)`
`2`	`2`	`#`
`3`	`3`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`4`	`4`	`# you may not use this file except in compliance with the License.`
`@@ -99,13 +99,15 @@ class TestNDCG:`
`99`	`99`	`_idcg_at_3 = 1 / np.log2(2) + 1 / np.log2(3) + 1 / np.log2(4)`
`100`	`100`
`101`	`101`	`@pytest.mark.parametrize(`
`102`		`- "k,expected_ndcg",`
	`102`	`+ "k,divide_by_achievable,expected_ndcg",`
`103`	`103`	`(`
`104`		`- (1, [0, 0, 1, 1, 0]),`
`105`		`- (3, [0, 0, 1, 1 / _idcg_at_3, 0.5 / _idcg_at_3]),`
	`104`	`+ (1, False, [0, 0, 1, 1, 0]),`
	`105`	`+ (3, False, [0, 0, 1, 1 / _idcg_at_3, 0.5 / _idcg_at_3]),`
	`106`	`+ (1, True, [0, 0, 1, 1, 0]),`
	`107`	`+ (3, True, [0, 0, 1, 1, (1 / np.log2(4)) / (1 / np.log2(2))]),`
`106`	`108`	`),`
`107`	`109`	`)`
`108`		`- def test_calc(self, k: int, expected_ndcg: tp.List[float]) -> None:`
	`110`	`+ def test_calc(self, k: int, divide_by_achievable: bool, expected_ndcg: tp.List[float]) -> None:`
`109`	`111`	`reco = pd.DataFrame(`
`110`	`112`	`{`
`111`	`113`	`Columns.User: [1, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6],`
`@@ -115,12 +117,12 @@ def test_calc(self, k: int, expected_ndcg: tp.List[float]) -> None:`
`115`	`117`	`)`
`116`	`118`	`interactions = pd.DataFrame(`
`117`	`119`	`{`
`118`		`- Columns.User: [1, 2, 3, 3, 3, 4, 5, 5, 5, 5],`
`119`		`- Columns.Item: [1, 1, 1, 2, 3, 1, 1, 2, 3, 4],`
	`120`	`+ Columns.User: [1, 2, 3, 3, 3, 4, 5],`
	`121`	`+ Columns.Item: [1, 1, 1, 2, 3, 1, 1],`
`120`	`122`	`}`
`121`	`123`	`)`
`122`	`124`
`123`		`- metric = NDCG(k=k)`
	`125`	`+ metric = NDCG(k=k, divide_by_achievable=divide_by_achievable)`
`124`	`126`	`expected_metric_per_user = pd.Series(`
`125`	`127`	`expected_ndcg,`
`126`	`128`	`index=pd.Series([1, 2, 3, 4, 5], name=Columns.User),`