22
33import json
44import typing as t
5+ from abc import ABC , abstractmethod
56from dataclasses import dataclass , field
67
78from datasets import Dataset as HFDataset
1213from ragas .utils import safe_nanmean
1314
1415if t .TYPE_CHECKING :
16+ from pathlib import Path
17+
1518 from datasets import Dataset as HFDataset
1619 from pandas import DataFrame as PandasDataframe
1720
@@ -136,9 +139,20 @@ def pretty_repr(self):
136139Sample = t .TypeVar ("Sample" , bound = BaseSample )
137140
138141
139- class RagasDataset (BaseModel , t .Generic [Sample ]):
142+ class RagasDataset (ABC , BaseModel , t .Generic [Sample ]):
140143 samples : t .List [Sample ]
141144
145+ @abstractmethod
146+ def to_list (self ) -> t .List [t .Dict ]:
147+ """Converts the samples to a list of dictionaries."""
148+ pass
149+
150+ @classmethod
151+ @abstractmethod
152+ def from_list (cls , data : t .List [t .Dict ]) -> RagasDataset [Sample ]:
153+ """Creates an EvaluationDataset from a list of dictionaries."""
154+ pass
155+
142156 @field_validator ("samples" )
143157 def validate_samples (cls , samples : t .List [BaseSample ]) -> t .List [BaseSample ]:
144158 """Validates that all samples are of the same type."""
@@ -155,20 +169,6 @@ def get_sample_type(self) -> t.Type[Sample]:
155169 """Returns the type of the samples in the dataset."""
156170 return type (self .samples [0 ])
157171
158- def _to_list (self ) -> t .List [t .Dict ]:
159- """Converts the samples to a list of dictionaries."""
160- rows = [sample .to_dict () for sample in self .samples ]
161-
162- if self .get_sample_type () == MultiTurnSample :
163- for sample in rows :
164- for item in sample ["user_input" ]:
165- if not isinstance (item ["content" ], str ):
166- item ["content" ] = json .dumps (
167- item ["content" ], ensure_ascii = False
168- )
169-
170- return rows
171-
172172 def to_hf_dataset (self ) -> HFDataset :
173173 """Converts the dataset to a Hugging Face Dataset."""
174174 try :
@@ -178,7 +178,7 @@ def to_hf_dataset(self) -> HFDataset:
178178 "datasets is not installed. Please install it to use this function."
179179 )
180180
181- return HFDataset .from_list (self ._to_list ())
181+ return HFDataset .from_list (self .to_list ())
182182
183183 @classmethod
184184 def from_hf_dataset (cls , dataset : HFDataset ):
@@ -194,26 +194,13 @@ def to_pandas(self) -> PandasDataframe:
194194 "pandas is not installed. Please install it to use this function."
195195 )
196196
197- data = self ._to_list ()
197+ data = self .to_list ()
198198 return pd .DataFrame (data )
199199
200200 def features (self ):
201201 """Returns the features of the samples."""
202202 return self .samples [0 ].get_features ()
203203
204- @classmethod
205- def from_list (cls , mapping : t .List [t .Dict ]):
206- """Creates an EvaluationDataset from a list of dictionaries."""
207- samples = []
208- if all (
209- "user_input" in item and isinstance (mapping [0 ]["user_input" ], list )
210- for item in mapping
211- ):
212- samples .extend (MultiTurnSample (** sample ) for sample in mapping )
213- else :
214- samples .extend (SingleTurnSample (** sample ) for sample in mapping )
215- return cls (samples = samples )
216-
217204 @classmethod
218205 def from_dict (cls , mapping : t .Dict ):
219206 """Creates an EvaluationDataset from a dictionary."""
@@ -227,40 +214,30 @@ def from_dict(cls, mapping: t.Dict):
227214 samples .extend (SingleTurnSample (** sample ) for sample in mapping )
228215 return cls (samples = samples )
229216
230- @classmethod
231- def from_csv (cls , path : str ):
232- """Creates an EvaluationDataset from a CSV file."""
233- import csv
234-
235- with open (path , "r" , newline = "" ) as csvfile :
236- reader = csv .DictReader (csvfile )
237- data = [row for row in reader ]
238- return cls .from_list (data )
239-
240- def to_csv (self , path : str ):
217+ def to_csv (self , path : t .Union [str , Path ]):
241218 """Converts the dataset to a CSV file."""
242219 import csv
243220
244- data = self ._to_list ()
221+ data = self .to_list ()
245222 if not data :
246223 return
247224
248- fieldnames = self . features ()
225+ fieldnames = data [ 0 ]. keys ()
249226
250227 with open (path , "w" , newline = "" ) as csvfile :
251228 writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
252229 writer .writeheader ()
253230 for row in data :
254231 writer .writerow (row )
255232
256- def to_jsonl (self , path : str ):
233+ def to_jsonl (self , path : t . Union [ str , Path ] ):
257234 """Converts the dataset to a JSONL file."""
258235 with open (path , "w" ) as jsonlfile :
259236 for sample in self .samples :
260237 jsonlfile .write (json .dumps (sample .to_dict (), ensure_ascii = False ) + "\n " )
261238
262239 @classmethod
263- def from_jsonl (cls , path : str ):
240+ def from_jsonl (cls , path : t . Union [ str , Path ] ):
264241 """Creates an EvaluationDataset from a JSONL file."""
265242 with open (path , "r" ) as jsonlfile :
266243 data = [json .loads (line ) for line in jsonlfile ]
@@ -307,8 +284,6 @@ class EvaluationDataset(RagasDataset[SingleTurnSampleOrMultiTurnSample]):
307284 Creates an EvaluationDataset from a list of dictionaries.
308285 from_dict(mapping)
309286 Creates an EvaluationDataset from a dictionary.
310- from_csv(path)
311- Creates an EvaluationDataset from a CSV file.
312287 to_csv(path)
313288 Converts the dataset to a CSV file.
314289 to_jsonl(path)
@@ -333,6 +308,37 @@ def __getitem__(
333308 else :
334309 raise TypeError ("Index must be int or slice" )
335310
311+ def to_list (self ) -> t .List [t .Dict ]:
312+ rows = [sample .to_dict () for sample in self .samples ]
313+
314+ if self .get_sample_type () == MultiTurnSample :
315+ for sample in rows :
316+ for item in sample ["user_input" ]:
317+ if not isinstance (item ["content" ], str ):
318+ item ["content" ] = json .dumps (
319+ item ["content" ], ensure_ascii = False
320+ )
321+
322+ return rows
323+
324+ @classmethod
325+ def from_list (cls , data : t .List [t .Dict ]) -> EvaluationDataset :
326+ samples = []
327+ if all (
328+ "user_input" in item and isinstance (data [0 ]["user_input" ], list )
329+ for item in data
330+ ):
331+ samples .extend (MultiTurnSample (** sample ) for sample in data )
332+ else :
333+ samples .extend (SingleTurnSample (** sample ) for sample in data )
334+ return cls (samples = samples )
335+
336+
337+ class EvaluationResultRow (BaseModel ):
338+ dataset_row : t .Dict
339+ scores : t .Dict [str , t .Any ]
340+ trace : t .Dict [str , t .Any ] = field (default_factory = dict ) # none for now
341+
336342
337343@dataclass
338344class EvaluationResult :
@@ -352,7 +358,7 @@ class EvaluationResult:
352358 """
353359
354360 scores : t .List [t .Dict [str , t .Any ]]
355- dataset : t . Optional [ EvaluationDataset ] = None
361+ dataset : EvaluationDataset
356362 binary_columns : t .List [str ] = field (default_factory = list )
357363 cost_cb : t .Optional [CostCallbackHandler ] = None
358364
@@ -407,6 +413,18 @@ def to_pandas(self, batch_size: int | None = None, batched: bool = False):
407413 dataset_df = self .dataset .to_pandas ()
408414 return pd .concat ([dataset_df , scores_df ], axis = 1 )
409415
416+ def serialized (self ) -> t .List [EvaluationResultRow ]:
417+ """
418+ Convert the result to a list of EvaluationResultRow.
419+ """
420+ return [
421+ EvaluationResultRow (
422+ dataset_row = self .dataset [i ].to_dict (),
423+ scores = self .scores [i ],
424+ )
425+ for i in range (len (self .scores ))
426+ ]
427+
410428 def total_tokens (self ) -> t .Union [t .List [TokenUsage ], TokenUsage ]:
411429 """
412430 Compute the total tokens used in the evaluation.
0 commit comments