Skip to content

Commit 8e41fe3

Browse files
authored
fix: import error for TestsetGeneration and small fixes (#1516)
1 parent 9408d10 commit 8e41fe3

File tree

4 files changed

+197
-86
lines changed

4 files changed

+197
-86
lines changed

src/ragas/dataset_schema.py

Lines changed: 66 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import typing as t
5+
from abc import ABC, abstractmethod
56
from dataclasses import dataclass, field
67

78
from datasets import Dataset as HFDataset
@@ -12,6 +13,8 @@
1213
from ragas.utils import safe_nanmean
1314

1415
if t.TYPE_CHECKING:
16+
from pathlib import Path
17+
1518
from datasets import Dataset as HFDataset
1619
from pandas import DataFrame as PandasDataframe
1720

@@ -136,9 +139,20 @@ def pretty_repr(self):
136139
Sample = t.TypeVar("Sample", bound=BaseSample)
137140

138141

139-
class RagasDataset(BaseModel, t.Generic[Sample]):
142+
class RagasDataset(ABC, BaseModel, t.Generic[Sample]):
140143
samples: t.List[Sample]
141144

145+
@abstractmethod
146+
def to_list(self) -> t.List[t.Dict]:
147+
"""Converts the samples to a list of dictionaries."""
148+
pass
149+
150+
@classmethod
151+
@abstractmethod
152+
def from_list(cls, data: t.List[t.Dict]) -> RagasDataset[Sample]:
153+
"""Creates an EvaluationDataset from a list of dictionaries."""
154+
pass
155+
142156
@field_validator("samples")
143157
def validate_samples(cls, samples: t.List[BaseSample]) -> t.List[BaseSample]:
144158
"""Validates that all samples are of the same type."""
@@ -155,20 +169,6 @@ def get_sample_type(self) -> t.Type[Sample]:
155169
"""Returns the type of the samples in the dataset."""
156170
return type(self.samples[0])
157171

158-
def _to_list(self) -> t.List[t.Dict]:
159-
"""Converts the samples to a list of dictionaries."""
160-
rows = [sample.to_dict() for sample in self.samples]
161-
162-
if self.get_sample_type() == MultiTurnSample:
163-
for sample in rows:
164-
for item in sample["user_input"]:
165-
if not isinstance(item["content"], str):
166-
item["content"] = json.dumps(
167-
item["content"], ensure_ascii=False
168-
)
169-
170-
return rows
171-
172172
def to_hf_dataset(self) -> HFDataset:
173173
"""Converts the dataset to a Hugging Face Dataset."""
174174
try:
@@ -178,7 +178,7 @@ def to_hf_dataset(self) -> HFDataset:
178178
"datasets is not installed. Please install it to use this function."
179179
)
180180

181-
return HFDataset.from_list(self._to_list())
181+
return HFDataset.from_list(self.to_list())
182182

183183
@classmethod
184184
def from_hf_dataset(cls, dataset: HFDataset):
@@ -194,26 +194,13 @@ def to_pandas(self) -> PandasDataframe:
194194
"pandas is not installed. Please install it to use this function."
195195
)
196196

197-
data = self._to_list()
197+
data = self.to_list()
198198
return pd.DataFrame(data)
199199

200200
def features(self):
201201
"""Returns the features of the samples."""
202202
return self.samples[0].get_features()
203203

204-
@classmethod
205-
def from_list(cls, mapping: t.List[t.Dict]):
206-
"""Creates an EvaluationDataset from a list of dictionaries."""
207-
samples = []
208-
if all(
209-
"user_input" in item and isinstance(mapping[0]["user_input"], list)
210-
for item in mapping
211-
):
212-
samples.extend(MultiTurnSample(**sample) for sample in mapping)
213-
else:
214-
samples.extend(SingleTurnSample(**sample) for sample in mapping)
215-
return cls(samples=samples)
216-
217204
@classmethod
218205
def from_dict(cls, mapping: t.Dict):
219206
"""Creates an EvaluationDataset from a dictionary."""
@@ -227,40 +214,30 @@ def from_dict(cls, mapping: t.Dict):
227214
samples.extend(SingleTurnSample(**sample) for sample in mapping)
228215
return cls(samples=samples)
229216

230-
@classmethod
231-
def from_csv(cls, path: str):
232-
"""Creates an EvaluationDataset from a CSV file."""
233-
import csv
234-
235-
with open(path, "r", newline="") as csvfile:
236-
reader = csv.DictReader(csvfile)
237-
data = [row for row in reader]
238-
return cls.from_list(data)
239-
240-
def to_csv(self, path: str):
217+
def to_csv(self, path: t.Union[str, Path]):
241218
"""Converts the dataset to a CSV file."""
242219
import csv
243220

244-
data = self._to_list()
221+
data = self.to_list()
245222
if not data:
246223
return
247224

248-
fieldnames = self.features()
225+
fieldnames = data[0].keys()
249226

250227
with open(path, "w", newline="") as csvfile:
251228
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
252229
writer.writeheader()
253230
for row in data:
254231
writer.writerow(row)
255232

256-
def to_jsonl(self, path: str):
233+
def to_jsonl(self, path: t.Union[str, Path]):
257234
"""Converts the dataset to a JSONL file."""
258235
with open(path, "w") as jsonlfile:
259236
for sample in self.samples:
260237
jsonlfile.write(json.dumps(sample.to_dict(), ensure_ascii=False) + "\n")
261238

262239
@classmethod
263-
def from_jsonl(cls, path: str):
240+
def from_jsonl(cls, path: t.Union[str, Path]):
264241
"""Creates an EvaluationDataset from a JSONL file."""
265242
with open(path, "r") as jsonlfile:
266243
data = [json.loads(line) for line in jsonlfile]
@@ -307,8 +284,6 @@ class EvaluationDataset(RagasDataset[SingleTurnSampleOrMultiTurnSample]):
307284
Creates an EvaluationDataset from a list of dictionaries.
308285
from_dict(mapping)
309286
Creates an EvaluationDataset from a dictionary.
310-
from_csv(path)
311-
Creates an EvaluationDataset from a CSV file.
312287
to_csv(path)
313288
Converts the dataset to a CSV file.
314289
to_jsonl(path)
@@ -333,6 +308,37 @@ def __getitem__(
333308
else:
334309
raise TypeError("Index must be int or slice")
335310

311+
def to_list(self) -> t.List[t.Dict]:
312+
rows = [sample.to_dict() for sample in self.samples]
313+
314+
if self.get_sample_type() == MultiTurnSample:
315+
for sample in rows:
316+
for item in sample["user_input"]:
317+
if not isinstance(item["content"], str):
318+
item["content"] = json.dumps(
319+
item["content"], ensure_ascii=False
320+
)
321+
322+
return rows
323+
324+
@classmethod
325+
def from_list(cls, data: t.List[t.Dict]) -> EvaluationDataset:
326+
samples = []
327+
if all(
328+
"user_input" in item and isinstance(data[0]["user_input"], list)
329+
for item in data
330+
):
331+
samples.extend(MultiTurnSample(**sample) for sample in data)
332+
else:
333+
samples.extend(SingleTurnSample(**sample) for sample in data)
334+
return cls(samples=samples)
335+
336+
337+
class EvaluationResultRow(BaseModel):
338+
dataset_row: t.Dict
339+
scores: t.Dict[str, t.Any]
340+
trace: t.Dict[str, t.Any] = field(default_factory=dict) # none for now
341+
336342

337343
@dataclass
338344
class EvaluationResult:
@@ -352,7 +358,7 @@ class EvaluationResult:
352358
"""
353359

354360
scores: t.List[t.Dict[str, t.Any]]
355-
dataset: t.Optional[EvaluationDataset] = None
361+
dataset: EvaluationDataset
356362
binary_columns: t.List[str] = field(default_factory=list)
357363
cost_cb: t.Optional[CostCallbackHandler] = None
358364

@@ -407,6 +413,18 @@ def to_pandas(self, batch_size: int | None = None, batched: bool = False):
407413
dataset_df = self.dataset.to_pandas()
408414
return pd.concat([dataset_df, scores_df], axis=1)
409415

416+
def serialized(self) -> t.List[EvaluationResultRow]:
417+
"""
418+
Convert the result to a list of EvaluationResultRow.
419+
"""
420+
return [
421+
EvaluationResultRow(
422+
dataset_row=self.dataset[i].to_dict(),
423+
scores=self.scores[i],
424+
)
425+
for i in range(len(self.scores))
426+
]
427+
410428
def total_tokens(self) -> t.Union[t.List[TokenUsage], TokenUsage]:
411429
"""
412430
Compute the total tokens used in the evaluation.

src/ragas/testset/synthesizers/testset_schema.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
import typing as t
44

5-
from ragas.dataset_schema import BaseSample, RagasDataset
5+
from ragas.dataset_schema import (
6+
BaseSample,
7+
EvaluationDataset,
8+
MultiTurnSample,
9+
RagasDataset,
10+
SingleTurnSample,
11+
)
612

713
if t.TYPE_CHECKING:
8-
from ragas.dataset_schema import (
9-
EvaluationDataset,
10-
MultiTurnSample,
11-
SingleTurnSample,
12-
)
14+
from ragas.dataset_schema import MultiTurnSample, SingleTurnSample
1315

1416

1517
class TestsetSample(BaseSample):
@@ -48,13 +50,34 @@ def to_evaluation_dataset(self) -> EvaluationDataset:
4850
samples=[sample.eval_sample for sample in self.samples]
4951
)
5052

51-
def _to_list(self) -> t.List[t.Dict]:
52-
eval_list = self.to_evaluation_dataset()._to_list()
53-
testset_list_without_eval_sample = [
54-
sample.model_dump(exclude={"eval_sample"}) for sample in self.samples
55-
]
56-
testset_list = [
57-
{**eval_sample, **sample}
58-
for eval_sample, sample in zip(eval_list, testset_list_without_eval_sample)
59-
]
60-
return testset_list
53+
def to_list(self) -> t.List[t.Dict]:
54+
"""
55+
Converts the Testset to a list of dictionaries.
56+
"""
57+
return [sample.model_dump() for sample in self.samples]
58+
59+
@classmethod
60+
def from_list(cls, data: t.List[t.Dict]) -> Testset:
61+
"""
62+
Converts a list of dictionaries to a Testset.
63+
"""
64+
# first create the samples
65+
samples = []
66+
for sample in data:
67+
eval_sample = sample["eval_sample"]
68+
69+
# if user_input is a list it is MultiTurnSample
70+
if "user_input" in eval_sample and not isinstance(
71+
eval_sample.get("user_input"), list
72+
):
73+
eval_sample = SingleTurnSample(**sample["eval_sample"])
74+
else:
75+
eval_sample = MultiTurnSample(**sample["eval_sample"])
76+
77+
samples.append(
78+
TestsetSample(
79+
eval_sample=eval_sample, synthesizer_name=sample["synthesizer_name"]
80+
)
81+
)
82+
# then create the testset
83+
return Testset(samples=samples)

tests/unit/test_dataset_schema.py

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,66 @@
33
import pytest
44
from pydantic import ValidationError
55

6-
from ragas.dataset_schema import EvaluationDataset, MultiTurnSample, SingleTurnSample
7-
8-
9-
def test_evaluation_dataset():
10-
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
11-
12-
dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
6+
from ragas.dataset_schema import (
7+
EvaluationDataset,
8+
HumanMessage,
9+
MultiTurnSample,
10+
SingleTurnSample,
11+
)
12+
13+
samples = [
14+
SingleTurnSample(user_input="What is X", response="Y"),
15+
MultiTurnSample(
16+
user_input=[HumanMessage(content="What is X")],
17+
reference="Y",
18+
),
19+
]
20+
21+
22+
@pytest.mark.parametrize("eval_sample", samples)
23+
def test_evaluation_dataset(eval_sample):
24+
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])
1325

1426
hf_dataset = dataset.to_hf_dataset()
1527

16-
assert dataset.get_sample_type() == SingleTurnSample
28+
assert dataset.get_sample_type() is type(eval_sample)
1729
assert len(hf_dataset) == 2
18-
assert dataset.features() == ["user_input", "response"]
1930
assert len(dataset) == 2
20-
assert dataset[0] == single_turn_sample
31+
assert dataset[0] == eval_sample
2132

2233

23-
def test_evaluation_dataset_save_load(tmpdir):
24-
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
34+
@pytest.mark.parametrize("eval_sample", samples)
35+
def test_evaluation_dataset_save_load_csv(tmpdir, eval_sample):
36+
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])
2537

26-
dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
38+
# save and load to csv
39+
csv_path = tmpdir / "csvfile.csv"
40+
dataset.to_csv(csv_path)
2741

28-
hf_dataset = dataset.to_hf_dataset()
2942

30-
# save and load to csv
31-
dataset.to_csv(tmpdir / "csvfile.csv")
32-
loaded_dataset = EvaluationDataset.from_csv(tmpdir / "csvfile.csv")
33-
assert loaded_dataset == dataset
43+
@pytest.mark.parametrize("eval_sample", samples)
44+
def test_evaluation_dataset_save_load_jsonl(tmpdir, eval_sample):
45+
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])
3446

3547
# save and load to jsonl
36-
dataset.to_jsonl(tmpdir / "jsonlfile.jsonl")
37-
loaded_dataset = EvaluationDataset.from_jsonl(tmpdir / "jsonlfile.jsonl")
48+
jsonl_path = tmpdir / "jsonlfile.jsonl"
49+
dataset.to_jsonl(jsonl_path)
50+
loaded_dataset = EvaluationDataset.from_jsonl(jsonl_path)
3851
assert loaded_dataset == dataset
3952

40-
# load from hf dataset
53+
54+
@pytest.mark.parametrize("eval_sample", samples)
55+
def test_evaluation_dataset_load_from_hf(eval_sample):
56+
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])
57+
58+
# convert to and load from hf dataset
59+
hf_dataset = dataset.to_hf_dataset()
4160
loaded_dataset = EvaluationDataset.from_hf_dataset(hf_dataset)
4261
assert loaded_dataset == dataset
4362

4463

45-
def test_single_type_evaluation_dataset():
64+
@pytest.mark.parametrize("eval_sample", samples)
65+
def test_single_type_evaluation_dataset(eval_sample):
4666
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
4767
multi_turn_sample = MultiTurnSample(
4868
user_input=[{"content": "What is X"}],

0 commit comments

Comments
 (0)