Skip to content

Commit 36564a4

Browse files
authored
feat: added option to provide experiment_name to evaluate() (#1843)
fixes: #967
1 parent 5c2cb19 commit 36564a4

File tree

5 files changed

+15
-11
lines changed

5 files changed

+15
-11
lines changed

.readthedocs.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
version: 2
22

3+
mkdocs:
4+
configuration: mkdocs.yml
5+
36
build:
47
os: ubuntu-22.04
58
tools:

src/ragas/dataset_schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,21 @@
1313
from datasets import Dataset as HFDataset
1414
from pydantic import BaseModel, field_validator
1515

16+
from ragas._version import __version__
1617
from ragas.callbacks import ChainRunEncoder, parse_run_traces
1718
from ragas.cost import CostCallbackHandler
1819
from ragas.exceptions import UploadException
1920
from ragas.messages import AIMessage, HumanMessage, ToolCall, ToolMessage
2021
from ragas.sdk import (
21-
upload_packet,
2222
RAGAS_API_SOURCE,
23-
get_app_token,
24-
check_api_response,
2523
build_evaluation_app_url,
24+
check_api_response,
2625
get_api_url,
26+
get_app_token,
2727
get_app_url,
28+
upload_packet,
2829
)
2930
from ragas.utils import safe_nanmean
30-
from ragas._version import __version__
3131

3232
if t.TYPE_CHECKING:
3333
from pathlib import Path

src/ragas/evaluation.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def evaluate(
5858
metrics: t.Optional[t.Sequence[Metric]] = None,
5959
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
6060
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
61+
experiment_name: t.Optional[str] = None,
6162
callbacks: Callbacks = None,
6263
run_config: t.Optional[RunConfig] = None,
6364
token_usage_parser: t.Optional[TokenUsageParser] = None,
@@ -87,6 +88,8 @@ def evaluate(
8788
The embeddings to use for the metrics. If not provided then ragas will use
8889
the default embeddings for metrics which require embeddings. This can we overridden by the embeddings specified in
8990
the metric level with `metric.embeddings`.
91+
experiment_name: str, optional
92+
The name of the experiment to track. This is used to track the evaluation in the tracing tools.
9093
callbacks: Callbacks, optional
9194
Lifecycle Langchain Callbacks to run during evaluation. Check the
9295
[langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
@@ -246,7 +249,7 @@ def evaluate(
246249
# new evaluation chain
247250
row_run_managers = []
248251
evaluation_rm, evaluation_group_cm = new_group(
249-
name=RAGAS_EVALUATION_CHAIN_NAME,
252+
name=experiment_name or RAGAS_EVALUATION_CHAIN_NAME,
250253
inputs={},
251254
callbacks=callbacks,
252255
metadata={"type": ChainType.EVALUATION},

src/ragas/metrics/_domain_specific_rubrics.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ def __init__(
113113
}
114114

115115
# Add rubrics to the scoring prompts
116-
rubrics_text = "\n".join(f"{key}: {value}" for key, value in self.rubrics.items())
116+
rubrics_text = "\n".join(
117+
f"{key}: {value}" for key, value in self.rubrics.items()
118+
)
117119
self.single_turn_scoring_prompt.instruction = f"{self.single_turn_scoring_prompt.instruction}\n\nScoring Rubrics:\n{rubrics_text}\n"
118120
self.multi_turn_scoring_prompt.instruction = f"{self.multi_turn_scoring_prompt.instruction}\n\nScoring Rubrics:\n{rubrics_text}\n"
119121

src/ragas/metrics/base.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,7 @@
1313

1414
from ragas._analytics import EvaluationEvent, _analytics_batcher
1515
from ragas.callbacks import ChainType, new_group
16-
from ragas.dataset_schema import (
17-
MetricAnnotation,
18-
MultiTurnSample,
19-
SingleTurnSample,
20-
)
16+
from ragas.dataset_schema import MetricAnnotation, MultiTurnSample, SingleTurnSample
2117
from ragas.executor import is_event_loop_running
2218
from ragas.losses import BinaryMetricLoss, MSELoss
2319
from ragas.prompt import FewShotPydanticPrompt, PromptMixin

0 commit comments

Comments
 (0)