Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ragas/metrics/_answer_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
else:
assert self.answer_similarity is not None, "AnswerSimilarity must be set"

similarity_score = await self.answer_similarity.ascore(
row, callbacks=callbacks
similarity_score = await self.answer_similarity.single_turn_ascore(
SingleTurnSample(**row), callbacks=callbacks
)

score = np.average(
Expand Down
13 changes: 0 additions & 13 deletions src/ragas/metrics/_context_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)
from ragas.prompt import PydanticPrompt
from ragas.run_config import RunConfig
from ragas.utils import deprecated

if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks
Expand Down Expand Up @@ -317,12 +316,6 @@ async def _single_turn_ascore(
) -> float:
return await super()._single_turn_ascore(sample, callbacks)

@deprecated(
since="0.2", removal="0.3", alternative="LLMContextPrecisionWithReference"
)
async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
return await super()._ascore(row, callbacks)


@dataclass
class ContextUtilization(LLMContextPrecisionWithoutReference):
Expand All @@ -333,12 +326,6 @@ async def _single_turn_ascore(
) -> float:
return await super()._single_turn_ascore(sample, callbacks)

@deprecated(
since="0.2", removal="0.3", alternative="LLMContextPrecisionWithoutReference"
)
async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
return await super()._ascore(row, callbacks)


context_precision = ContextPrecision()
context_utilization = ContextUtilization()
12 changes: 0 additions & 12 deletions src/ragas/metrics/_context_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)
from ragas.prompt import PydanticPrompt
from ragas.run_config import RunConfig
from ragas.utils import deprecated

if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks
Expand Down Expand Up @@ -161,17 +160,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
class ContextRecall(LLMContextRecall):
name: str = "context_recall"

@deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall")
async def _single_turn_ascore(
self, sample: SingleTurnSample, callbacks: Callbacks
) -> float:
row = sample.to_dict()
return await self._ascore(row, callbacks)

@deprecated(since="0.2", removal="0.3", alternative="LLMContextRecall")
async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
return await super()._ascore(row, callbacks)


@dataclass
class NonLLMContextRecall(SingleTurnMetric):
Expand Down
75 changes: 1 addition & 74 deletions src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ragas.metrics.validators import AllowedValuesType
from ragas.prompt import FewShotPydanticPrompt, PromptMixin
from ragas.run_config import RunConfig
from ragas.utils import camel_to_snake, deprecated, get_metric_language
from ragas.utils import camel_to_snake, get_metric_language

if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks
Expand Down Expand Up @@ -150,79 +150,6 @@ def init(self, run_config: RunConfig) -> None:
"""
...

@deprecated("0.2", removal="0.3", alternative="single_turn_ascore")
def score(self, row: t.Dict, callbacks: Callbacks = None) -> float:
"""
Calculates the score for a single row of data.

Note
----
This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` or `multi_turn_ascore` instead.
"""
callbacks = callbacks or []
rm, group_cm = new_group(
self.name,
inputs=row,
callbacks=callbacks,
metadata={"type": ChainType.METRIC},
)

async def _async_wrapper():
try:
result = await self._ascore(row=row, callbacks=group_cm)
except Exception as e:
if not group_cm.ended:
rm.on_chain_error(e)
raise e
else:
if not group_cm.ended:
rm.on_chain_end({"output": result})
return result

# Apply nest_asyncio logic to ensure compatibility in notebook/Jupyter environments.
apply_nest_asyncio()
return run(_async_wrapper)

@deprecated("0.2", removal="0.3", alternative="single_turn_ascore")
async def ascore(
self,
row: t.Dict,
callbacks: Callbacks = None,
timeout: t.Optional[float] = None,
) -> float:
"""
Asynchronously calculates the score for a single row of data.

Note
----
This method is deprecated and will be removed in 0.3. Please use `single_turn_ascore` instead.
"""
callbacks = callbacks or []
rm, group_cm = new_group(
self.name,
inputs=row,
callbacks=callbacks,
metadata={"type": ChainType.METRIC},
)
try:
score = await asyncio.wait_for(
self._ascore(row=row, callbacks=group_cm),
timeout=timeout,
)
except Exception as e:
if not group_cm.ended:
rm.on_chain_error(e)
raise e
else:
if not group_cm.ended:
rm.on_chain_end({"output": score})
return score

async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
raise NotImplementedError(
f"Metric '{self.name}' has no implementation for _ascore. score() is deprecated and will be removed in 0.3. Please use single_turn_ascore or multi_turn_ascore instead."
)


@dataclass
class MetricWithLLM(Metric, PromptMixin):
Expand Down
42 changes: 18 additions & 24 deletions tests/unit/test_executor_in_jupyter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"async def _run():\n",
" results = []\n",
" for t in as_completed([echo(1), echo(2), echo(3)], 3):\n",
" r = await t\n",
" results.append(r)\n",
" return results\n",
"\n",
"\n",
"results = await _run()\n",
"\n",
"expected = [1, 2, 3]\n",
"assert results == expected, f\"got: {results}, expected: {expected}\""
]
"source": "async def _run():\n results = []\n for task in as_completed([echo(1), echo(2), echo(3)], 3):\n r = await task\n results.append(r)\n return results\n\n\nresults = await _run()\n\nexpected = [1, 2, 3]\nassert results == expected, f\"got: {results}, expected: {expected}\""
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -215,18 +202,25 @@
"metadata": {},
"outputs": [],
"source": [
"from ragas.metrics.base import Metric\n",
"import typing as t\n",
"from dataclasses import dataclass, field\n",
"\n",
"from ragas.dataset_schema import SingleTurnSample\n",
"from ragas.metrics.base import MetricType, SingleTurnMetric\n",
"\n",
"\n",
"class FakeMetric(Metric):\n",
" name = \"fake_metric\"\n",
" _required_columns = (\"user_input\", \"response\")\n",
"@dataclass\n",
"class FakeMetric(SingleTurnMetric):\n",
" name: str = \"fake_metric\"\n",
" _required_columns: t.Dict[MetricType, t.Set[str]] = field(\n",
" default_factory=lambda: {MetricType.SINGLE_TURN: {\"user_input\", \"response\"}}\n",
" )\n",
"\n",
" def init(self):\n",
" def init(self, run_config=None):\n",
" pass\n",
"\n",
" async def _ascore(self, row, callbacks) -> float:\n",
" return 0\n",
" async def _single_turn_ascore(self, sample: SingleTurnSample, callbacks) -> float:\n",
" return 0.0\n",
"\n",
"\n",
"fm = FakeMetric()"
Expand All @@ -238,8 +232,8 @@
"metadata": {},
"outputs": [],
"source": [
"score = fm.score({\"user_input\": \"a\", \"response\": \"b\"})\n",
"assert score == 0"
"score = await fm.single_turn_ascore(SingleTurnSample(user_input=\"a\", response=\"b\"))\n",
"assert score == 0.0"
]
},
{
Expand Down Expand Up @@ -326,4 +320,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
Loading