1414def evaluate (
1515 dataset : Dataset ,
1616 metrics : list [Metric ] | None = None ,
17+ column_map : dict [str , str ] = {
18+ "question" : "question" ,
19+ "contexts" : "contexts" ,
20+ "answer" : "answer" ,
21+ "ground_truths" : "ground_truths" ,
22+ },
1723) -> Result :
1824 """
1925 Run the evaluation on the dataset with different metrics
@@ -26,6 +32,10 @@ def evaluate(
2632 metrics : list[Metric] , optional
2733 List of metrics to use for evaluation. If not provided then ragas will run the
2834 evaluation on the best set of metrics to give a complete view.
35+ column_map : dict[str, str], optional
36+ The column names of the dataset to use for evaluation. If the column names of
37+ the dataset are different from the default ones then you can provide the
38+ mapping as a dictionary here.
2939
3040 Returns
3141 -------
@@ -66,6 +76,9 @@ def evaluate(
6676
6777 metrics = [answer_relevancy , context_relevancy , faithfulness ]
6878
79+ # select columns from the dataset
80+ dataset = dataset .from_dict ({k : dataset [v ] for k , v in column_map .items ()})
81+
6982 # validation
7083 validate_evaluation_modes (dataset , metrics )
7184 validate_column_dtypes (dataset )
0 commit comments