diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py index 8242e9ffe..bd5f1539e 100644 --- a/src/ragas/metrics/_answer_correctness.py +++ b/src/ragas/metrics/_answer_correctness.py @@ -257,8 +257,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: else: assert self.answer_similarity is not None, "AnswerSimilarity must be set" - similarity_score = await self.answer_similarity.ascore( - row, callbacks=callbacks + similarity_score = await self.answer_similarity.single_turn_ascore( + SingleTurnSample(**row), callbacks=callbacks ) score = np.average( diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py index ad88cb905..ea68dc820 100644 --- a/src/ragas/metrics/_context_precision.py +++ b/src/ragas/metrics/_context_precision.py @@ -18,7 +18,6 @@ ) from ragas.prompt import PydanticPrompt from ragas.run_config import RunConfig -from ragas.utils import deprecated if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks @@ -317,12 +316,6 @@ async def _single_turn_ascore( ) -> float: return await super()._single_turn_ascore(sample, callbacks) - @deprecated( - since="0.2", removal="0.3", alternative="LLMContextPrecisionWithReference" - ) - async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: - return await super()._ascore(row, callbacks) - @dataclass class ContextUtilization(LLMContextPrecisionWithoutReference): @@ -333,12 +326,6 @@ async def _single_turn_ascore( ) -> float: return await super()._single_turn_ascore(sample, callbacks) - @deprecated( - since="0.2", removal="0.3", alternative="LLMContextPrecisionWithoutReference" - ) - async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: - return await super()._ascore(row, callbacks) - context_precision = ContextPrecision() context_utilization = ContextUtilization() diff --git a/src/ragas/metrics/_context_recall.py b/src/ragas/metrics/_context_recall.py index 774e8717d..e3456a65c 100644 --- a/src/ragas/metrics/_context_recall.py +++ b/src/ragas/metrics/_context_recall.py @@ -18,7 +18,6 @@ ) from ragas.prompt import PydanticPrompt from ragas.run_config import RunConfig -from ragas.utils import deprecated if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks @@ -161,17 +160,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: class ContextRecall(LLMContextRecall): name: str = "context_recall" - @deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall") - async def _single_turn_ascore( - self, sample: SingleTurnSample, callbacks: Callbacks - ) -> float: - row = sample.to_dict() - return await self._ascore(row, callbacks) - - @deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall") - async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: - return await super()._ascore(row, callbacks) - @dataclass class NonLLMContextRecall(SingleTurnMetric): diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index 7ed249e1e..ada9bcfd3 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -20,7 +20,7 @@ from ragas.metrics.validators import AllowedValuesType from ragas.prompt import FewShotPydanticPrompt, PromptMixin from ragas.run_config import RunConfig -from ragas.utils import camel_to_snake, deprecated, get_metric_language +from ragas.utils import camel_to_snake, get_metric_language if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks @@ -150,79 +150,6 @@ def init(self, run_config: RunConfig) -> None: """ ... - @deprecated("0.2", removal="0.3", alternative="single_turn_ascore") - def score(self, row: t.Dict, callbacks: Callbacks = None) -> float: - """ - Calculates the score for a single row of data. - - Note - ---- - This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` or `multi_turn_ascore` instead. - """ - callbacks = callbacks or [] - rm, group_cm = new_group( - self.name, - inputs=row, - callbacks=callbacks, - metadata={"type": ChainType.METRIC}, - ) - - async def _async_wrapper(): - try: - result = await self._ascore(row=row, callbacks=group_cm) - except Exception as e: - if not group_cm.ended: - rm.on_chain_error(e) - raise e - else: - if not group_cm.ended: - rm.on_chain_end({"output": result}) - return result - - # Apply nest_asyncio logic to ensure compatibility in notebook/Jupyter environments. - apply_nest_asyncio() - return run(_async_wrapper) - - @deprecated("0.2", removal="0.3", alternative="single_turn_ascore") - async def ascore( - self, - row: t.Dict, - callbacks: Callbacks = None, - timeout: t.Optional[float] = None, - ) -> float: - """ - Asynchronously calculates the score for a single row of data. - - Note - ---- - This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` instead. - """ - callbacks = callbacks or [] - rm, group_cm = new_group( - self.name, - inputs=row, - callbacks=callbacks, - metadata={"type": ChainType.METRIC}, - ) - try: - score = await asyncio.wait_for( - self._ascore(row=row, callbacks=group_cm), - timeout=timeout, - ) - except Exception as e: - if not group_cm.ended: - rm.on_chain_error(e) - raise e - else: - if not group_cm.ended: - rm.on_chain_end({"output": score}) - return score - - async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: - raise NotImplementedError( - f"Metric '{self.name}' has no implementation for _ascore. score() is deprecated and will be removed in 0.3. Please use single_turn_ascore or multi_turn_ascore instead." - ) - @dataclass class MetricWithLLM(Metric, PromptMixin): diff --git a/tests/unit/test_executor_in_jupyter.ipynb b/tests/unit/test_executor_in_jupyter.ipynb index 5cd4b88d8..dd7710393 100644 --- a/tests/unit/test_executor_in_jupyter.ipynb +++ b/tests/unit/test_executor_in_jupyter.ipynb @@ -56,20 +56,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "async def _run():\n", - " results = []\n", - " for t in as_completed([echo(1), echo(2), echo(3)], 3):\n", - " r = await t\n", - " results.append(r)\n", - " return results\n", - "\n", - "\n", - "results = await _run()\n", - "\n", - "expected = [1, 2, 3]\n", - "assert results == expected, f\"got: {results}, expected: {expected}\"" - ] + "source": "async def _run():\n results = []\n for task in as_completed([echo(1), echo(2), echo(3)], 3):\n r = await task\n results.append(r)\n return results\n\n\nresults = await _run()\n\nexpected = [1, 2, 3]\nassert results == expected, f\"got: {results}, expected: {expected}\"" }, { "cell_type": "markdown", @@ -215,18 +202,25 @@ "metadata": {}, "outputs": [], "source": [ - "from ragas.metrics.base import Metric\n", + "import typing as t\n", + "from dataclasses import dataclass, field\n", + "\n", + "from ragas.dataset_schema import SingleTurnSample\n", + "from ragas.metrics.base import MetricType, SingleTurnMetric\n", "\n", "\n", - "class FakeMetric(Metric):\n", - " name = \"fake_metric\"\n", - " _required_columns = (\"user_input\", \"response\")\n", + "@dataclass\n", + "class FakeMetric(SingleTurnMetric):\n", + " name: str = \"fake_metric\"\n", + " _required_columns: t.Dict[MetricType, t.Set[str]] = field(\n", + " default_factory=lambda: {MetricType.SINGLE_TURN: {\"user_input\", \"response\"}}\n", + " )\n", "\n", - " def init(self):\n", + " def init(self, run_config=None):\n", " pass\n", "\n", - " async def _ascore(self, row, callbacks) -> float:\n", - " return 0\n", + " async def _single_turn_ascore(self, sample: SingleTurnSample, callbacks) -> float:\n", + " return 0.0\n", "\n", "\n", "fm = FakeMetric()" @@ -238,8 +232,8 @@ "metadata": {}, "outputs": [], "source": [ - "score = fm.score({\"user_input\": \"a\", \"response\": \"b\"})\n", - "assert score == 0" + "score = await fm.single_turn_ascore(SingleTurnSample(user_input=\"a\", response=\"b\"))\n", + "assert score == 0.0" ] }, { @@ -326,4 +320,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file