Skip to content

Commit 500219e

Browse files
authored
refactor: experiment function to use model as additional optional arg (#2165)
1 parent 6273d92 commit 500219e

File tree

4 files changed

+141
-3
lines changed

4 files changed

+141
-3
lines changed

docs/experimental/core_concepts/experimentation.md

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,109 @@ async def my_experiment(row):
6060
my_experiment.arun(dataset)
6161
```
6262

63+
### Passing Additional Parameters
64+
65+
You can pass additional parameters to your experiment function through `arun()`. This is useful for models, configurations, or any other parameters your experiment needs:
66+
67+
```python
68+
@experiment
69+
async def my_experiment(row, model):
70+
# Process the query with the specified parameters
71+
response = my_app(row.query, model=model)
72+
73+
# Calculate the metric
74+
metric = my_metric.score(response, row.ground_truth)
75+
76+
# Return results
77+
return {**row, "response": response, "accuracy": metric.value}
78+
79+
# Run with specific parameters
80+
my_experiment.arun(dataset, "gpt-4")
81+
82+
# Or use keyword arguments
83+
my_experiment.arun(dataset, model="gpt-4o")
84+
```
85+
86+
### Using Data Models
87+
88+
You can specify a data model for your experiment results at the decorator level:
89+
90+
```python
91+
from pydantic import BaseModel
92+
93+
class ExperimentResult(BaseModel):
94+
response: str
95+
accuracy: float
96+
model_used: str
97+
98+
@experiment(experiment_model=ExperimentResult)
99+
async def my_experiment(row, model):
100+
response = my_app(row.query, model)
101+
metric = my_metric.score(response, row.ground_truth)
102+
return ExperimentResult(
103+
response=response,
104+
accuracy=metric.value,
105+
model_used=model
106+
)
107+
108+
# Run experiment with specific model
109+
my_experiment.arun(dataset, model="gpt-4o")
110+
```
111+
112+
### Complete Example: LLM Parameter Passing
113+
114+
Here's a complete example showing how to pass different LLM models to your experiment function:
115+
116+
```python
117+
from pydantic import BaseModel
118+
from ragas.experimental import experiment, Dataset
119+
120+
class ExperimentResult(BaseModel):
121+
query: str
122+
response: str
123+
accuracy: float
124+
model_used: str
125+
latency_ms: float
126+
127+
@experiment(experiment_model=ExperimentResult)
128+
async def llm_experiment(row, llm_model, temperature=0.7):
129+
"""Experiment function that accepts LLM model and other parameters."""
130+
import time
131+
start_time = time.time()
132+
133+
# Use the passed LLM model
134+
response = await my_llm_app(
135+
query=row.query,
136+
model=llm_model,
137+
temperature=temperature
138+
)
139+
140+
# Calculate metrics
141+
metric = my_metric.score(response, row.ground_truth)
142+
end_time = time.time()
143+
144+
return ExperimentResult(
145+
query=row.query,
146+
response=response,
147+
accuracy=metric.value,
148+
model_used=llm_model,
149+
latency_ms=(end_time - start_time) * 1000
150+
)
151+
152+
# Run experiments with different models
153+
gpt4_results = await llm_experiment.arun(
154+
dataset,
155+
llm_model="gpt-4o",
156+
temperature=0.1
157+
)
158+
159+
claude_results = await llm_experiment.arun(
160+
dataset,
161+
llm_model="claude-4-sonnet",
162+
temperature=0.7
163+
)
164+
```
165+
63166
## Result Storage
64167

65168
Once executed, Ragas processes each row in the dataset, runs it through the function, and stores the results in the `experiments` folder. The storage backend can be configured based on your preferences.

docs/experimental/tutorials/prompt.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,34 @@ async def run_experiment(row):
7474
return experiment_view
7575
```
7676

77-
Now whenever you make a change to your prompt, you can run the experiment and see how it affects the performance of your prompt.
77+
Now whenever you make a change to your prompt, you can run the experiment and see how it affects the performance of your prompt.
78+
79+
### Passing Additional Parameters
80+
81+
You can pass additional parameters like models or configurations to your experiment function:
82+
83+
```python
84+
@experiment()
85+
async def run_experiment(row, model):
86+
response = run_prompt(row["text"], model=model)
87+
score = my_metric.score(
88+
prediction=response,
89+
actual=row["label"]
90+
)
91+
92+
experiment_view = {
93+
**row,
94+
"response": response,
95+
"score": score.result,
96+
}
97+
return experiment_view
98+
99+
# Run with specific parameters
100+
run_experiment.arun(dataset, "gpt-4")
101+
102+
# Or use keyword arguments
103+
run_experiment.arun(dataset, model="gpt-4o")
104+
```
78105

79106

80107
## Running the example end to end

ragas/src/ragas/experimental/backends/registry.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,11 @@ def _discover_backends(self) -> None:
120120
entry_points = entry_points_result.select(group="ragas.backends")
121121
else:
122122
# Python 3.9 compatibility
123-
entry_points = entry_points_result.get("ragas.backends", [])
123+
entry_points = (
124+
entry_points_result.get("ragas.backends", [])
125+
if isinstance(entry_points_result, dict)
126+
else []
127+
)
124128

125129
for entry_point in entry_points:
126130
try:

ragas/src/ragas/experimental/experiment.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ async def arun(
8181
dataset: Dataset,
8282
name: t.Optional[str] = None,
8383
backend: t.Optional[t.Union[BaseBackend, str]] = None,
84+
*args,
85+
**kwargs,
8486
) -> "Experiment": ...
8587

8688

@@ -114,6 +116,8 @@ async def arun(
114116
dataset: Dataset,
115117
name: t.Optional[str] = None,
116118
backend: t.Optional[t.Union[BaseBackend, str]] = None,
119+
*args,
120+
**kwargs,
117121
) -> "Experiment":
118122
"""Run the experiment against a dataset."""
119123
# Generate name if not provided
@@ -139,7 +143,7 @@ async def arun(
139143
# Create tasks for all items
140144
tasks = []
141145
for item in dataset:
142-
tasks.append(self(item))
146+
tasks.append(self(item, *args, **kwargs))
143147

144148
progress_bar = None
145149
try:

0 commit comments

Comments
 (0)