Skip to content

Commit 34a7914

Browse files
authored
Added evaluation benchmark (#29)
* add evaluation benchmark * added eval benchmark * revert file
1 parent b8fe059 commit 34a7914

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

tests/benchmarks/benchmark_eval.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from datasets import arrow_dataset, load_dataset
2+
from torch.cuda import is_available
3+
4+
from ragas.metrics import Evaluation, bert_score, edit_ratio, rougeL
5+
from ragas.metrics.factual import EntailmentScore
6+
7+
DEVICE = "cuda" if is_available() else "cpu"
8+
entailment_score = EntailmentScore(device=DEVICE, batch_size=2)
9+
# q_square = Qsquare(device=DEVICE, batch_size=2)
10+
11+
DS = load_dataset("explodinggradients/ragas-webgpt", split="train")
12+
assert isinstance(DS, arrow_dataset.Dataset), "Not an arrow_dataset"
13+
DS = DS.select(range(500))
14+
15+
if __name__ == "__main__":
16+
e = Evaluation(
17+
metrics=[rougeL, edit_ratio, bert_score, entailment_score],
18+
batched=True,
19+
batch_size=64,
20+
)
21+
result = e.eval(DS["ground_truth"], DS["generated_text"])
22+
print(result)

0 commit comments

Comments
 (0)