chore: remove deprecated functions (#2412)

anistark · anistark · commit dbf91ab25c2b · 2025-11-17T11:18:46.000+05:30
diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py
@@ -257,8 +257,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
         else:
             assert self.answer_similarity is not None, "AnswerSimilarity must be set"
 
-            similarity_score = await self.answer_similarity.ascore(
-                row, callbacks=callbacks
+            similarity_score = await self.answer_similarity.single_turn_ascore(
+                SingleTurnSample(**row), callbacks=callbacks
             )
 
         score = np.average(
diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py
@@ -18,7 +18,6 @@
 )
 from ragas.prompt import PydanticPrompt
 from ragas.run_config import RunConfig
-from ragas.utils import deprecated
 
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
@@ -317,12 +316,6 @@ async def _single_turn_ascore(
     ) -> float:
         return await super()._single_turn_ascore(sample, callbacks)
 
-    @deprecated(
-        since="0.2", removal="0.3", alternative="LLMContextPrecisionWithReference"
-    )
-    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
-        return await super()._ascore(row, callbacks)
-
 
 @dataclass
 class ContextUtilization(LLMContextPrecisionWithoutReference):
@@ -333,12 +326,6 @@ async def _single_turn_ascore(
     ) -> float:
         return await super()._single_turn_ascore(sample, callbacks)
 
-    @deprecated(
-        since="0.2", removal="0.3", alternative="LLMContextPrecisionWithoutReference"
-    )
-    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
-        return await super()._ascore(row, callbacks)
-
 
 context_precision = ContextPrecision()
 context_utilization = ContextUtilization()
diff --git a/src/ragas/metrics/_context_recall.py b/src/ragas/metrics/_context_recall.py
@@ -18,7 +18,6 @@
 )
 from ragas.prompt import PydanticPrompt
 from ragas.run_config import RunConfig
-from ragas.utils import deprecated
 
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
@@ -161,17 +160,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
 class ContextRecall(LLMContextRecall):
     name: str = "context_recall"
 
-    @deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall")
-    async def _single_turn_ascore(
-        self, sample: SingleTurnSample, callbacks: Callbacks
-    ) -> float:
-        row = sample.to_dict()
-        return await self._ascore(row, callbacks)
-
-    @deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall")
-    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
-        return await super()._ascore(row, callbacks)
-
 
 @dataclass
 class NonLLMContextRecall(SingleTurnMetric):
diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py
@@ -20,7 +20,7 @@
 from ragas.metrics.validators import AllowedValuesType
 from ragas.prompt import FewShotPydanticPrompt, PromptMixin
 from ragas.run_config import RunConfig
-from ragas.utils import camel_to_snake, deprecated, get_metric_language
+from ragas.utils import camel_to_snake, get_metric_language
 
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
@@ -153,79 +153,6 @@ def init(self, run_config: RunConfig) -> None:
         """
         ...
 
-    @deprecated("0.2", removal="0.3", alternative="single_turn_ascore")
-    def score(self, row: t.Dict, callbacks: Callbacks = None) -> float:
-        """
-        Calculates the score for a single row of data.
-
-        Note
-        ----
-        This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` or `multi_turn_ascore` instead.
-        """
-        callbacks = callbacks or []
-        rm, group_cm = new_group(
-            self.name,
-            inputs=row,
-            callbacks=callbacks,
-            metadata={"type": ChainType.METRIC},
-        )
-
-        async def _async_wrapper():
-            try:
-                result = await self._ascore(row=row, callbacks=group_cm)
-            except Exception as e:
-                if not group_cm.ended:
-                    rm.on_chain_error(e)
-                raise e
-            else:
-                if not group_cm.ended:
-                    rm.on_chain_end({"output": result})
-                return result
-
-        # Apply nest_asyncio logic to ensure compatibility in notebook/Jupyter environments.
-        apply_nest_asyncio()
-        return run(_async_wrapper)
-
-    @deprecated("0.2", removal="0.3", alternative="single_turn_ascore")
-    async def ascore(
-        self,
-        row: t.Dict,
-        callbacks: Callbacks = None,
-        timeout: t.Optional[float] = None,
-    ) -> float:
-        """
-        Asynchronously calculates the score for a single row of data.
-
-        Note
-        ----
-        This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` instead.
-        """
-        callbacks = callbacks or []
-        rm, group_cm = new_group(
-            self.name,
-            inputs=row,
-            callbacks=callbacks,
-            metadata={"type": ChainType.METRIC},
-        )
-        try:
-            score = await asyncio.wait_for(
-                self._ascore(row=row, callbacks=group_cm),
-                timeout=timeout,
-            )
-        except Exception as e:
-            if not group_cm.ended:
-                rm.on_chain_error(e)
-            raise e
-        else:
-            if not group_cm.ended:
-                rm.on_chain_end({"output": score})
-        return score
-
-    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
-        raise NotImplementedError(
-            f"Metric '{self.name}' has no implementation for _ascore. score() is deprecated and will be removed in 0.3. Please use single_turn_ascore or multi_turn_ascore instead."
-        )
-
 
 @dataclass
 class MetricWithLLM(Metric, PromptMixin):
diff --git a/tests/unit/test_executor_in_jupyter.ipynb b/tests/unit/test_executor_in_jupyter.ipynb
@@ -56,20 +56,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "async def _run():\n",
-    "    results = []\n",
-    "    for t in as_completed([echo(1), echo(2), echo(3)], 3):\n",
-    "        r = await t\n",
-    "        results.append(r)\n",
-    "    return results\n",
-    "\n",
-    "\n",
-    "results = await _run()\n",
-    "\n",
-    "expected = [1, 2, 3]\n",
-    "assert results == expected, f\"got: {results}, expected: {expected}\""
-   ]
+   "source": "async def _run():\n    results = []\n    for task in as_completed([echo(1), echo(2), echo(3)], 3):\n        r = await task\n        results.append(r)\n    return results\n\n\nresults = await _run()\n\nexpected = [1, 2, 3]\nassert results == expected, f\"got: {results}, expected: {expected}\""
   },
   {
    "cell_type": "markdown",
@@ -215,18 +202,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ragas.metrics.base import Metric\n",
+    "import typing as t\n",
+    "from dataclasses import dataclass, field\n",
+    "\n",
+    "from ragas.dataset_schema import SingleTurnSample\n",
+    "from ragas.metrics.base import MetricType, SingleTurnMetric\n",
     "\n",
     "\n",
-    "class FakeMetric(Metric):\n",
-    "    name = \"fake_metric\"\n",
-    "    _required_columns = (\"user_input\", \"response\")\n",
+    "@dataclass\n",
+    "class FakeMetric(SingleTurnMetric):\n",
+    "    name: str = \"fake_metric\"\n",
+    "    _required_columns: t.Dict[MetricType, t.Set[str]] = field(\n",
+    "        default_factory=lambda: {MetricType.SINGLE_TURN: {\"user_input\", \"response\"}}\n",
+    "    )\n",
     "\n",
-    "    def init(self):\n",
+    "    def init(self, run_config=None):\n",
     "        pass\n",
     "\n",
-    "    async def _ascore(self, row, callbacks) -> float:\n",
-    "        return 0\n",
+    "    async def _single_turn_ascore(self, sample: SingleTurnSample, callbacks) -> float:\n",
+    "        return 0.0\n",
     "\n",
     "\n",
     "fm = FakeMetric()"
@@ -238,8 +232,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "score = fm.score({\"user_input\": \"a\", \"response\": \"b\"})\n",
-    "assert score == 0"
+    "score = await fm.single_turn_ascore(SingleTurnSample(user_input=\"a\", response=\"b\"))\n",
+    "assert score == 0.0"
    ]
   },
   {
@@ -326,4 +320,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}

Original file line number	Diff line number	Diff line change
`@@ -257,8 +257,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:`
`257`	`257`	`else:`
`258`	`258`	`assert self.answer_similarity is not None, "AnswerSimilarity must be set"`
`259`	`259`
`260`		`- similarity_score = await self.answer_similarity.ascore(`
`261`		`- row, callbacks=callbacks`
	`260`	`+ similarity_score = await self.answer_similarity.single_turn_ascore(`
	`261`	`+ SingleTurnSample(**row), callbacks=callbacks`
`262`	`262`	`)`
`263`	`263`
`264`	`264`	`score = np.average(`