|
1 | 1 | import typing as t |
2 | 2 |
|
3 | | -from datasets import Dataset, load_dataset |
| 3 | +from datasets import Dataset, arrow_dataset, load_dataset |
4 | 4 | from torch.cuda import is_available |
5 | 5 | from tqdm import tqdm |
6 | 6 | from utils import print_table, timeit |
7 | 7 |
|
8 | | -from ragas.metrics import ( |
9 | | - EditDistance, |
10 | | - EditRatio, |
11 | | - EntailmentScore, |
12 | | - Evaluation, |
13 | | - Rouge1, |
14 | | - Rouge2, |
15 | | - RougeL, |
16 | | - SBERTScore, |
17 | | -) |
| 8 | +from ragas.metrics import Evaluation, edit_distance, edit_ratio, rouge1, rouge2, rougeL |
18 | 9 |
|
19 | 10 | DEVICE = "cuda" if is_available() else "cpu" |
20 | 11 | BATCHES = [0, 1] |
21 | | -# init metrics |
22 | | -sbert_score = SBERTScore(similarity_metric="cosine") |
23 | | -entail = EntailmentScore(max_length=512, device=DEVICE) |
| 12 | + |
24 | 13 | METRICS = { |
25 | | - "Rouge1": Rouge1, |
26 | | - "Rouge2": Rouge2, |
27 | | - "RougeL": RougeL, |
28 | | - "EditRatio": EditRatio, |
29 | | - "EditDistance": EditDistance, |
| 14 | + "Rouge1": rouge1, |
| 15 | + "Rouge2": rouge2, |
| 16 | + "RougeL": rougeL, |
| 17 | + "EditRatio": edit_ratio, |
| 18 | + "EditDistance": edit_distance, |
30 | 19 | # "SBERTScore": sbert_score, |
31 | 20 | # "EntailmentScore": entail, |
32 | 21 | } |
33 | 22 | DS = load_dataset("explodinggradients/eli5-test", split="test_eli5") |
| 23 | +assert isinstance(DS, arrow_dataset.Dataset), "Not an arrow_dataset" |
| 24 | +DS = DS.select(range(100)) |
34 | 25 |
|
35 | 26 |
|
36 | 27 | def setup() -> t.Iterator[tuple[str, Evaluation, Dataset]]: |
|
0 commit comments