forked from confident-ai/deepeval
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaaa.py
More file actions
43 lines (41 loc) · 1.23 KB
/
aaa.py
File metadata and controls
43 lines (41 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from deepeval.evaluate.configs import AsyncConfig
from deepeval.test_case import ArenaTestCase, LLMTestCase, LLMTestCaseParams
from deepeval.metrics import ArenaGEval
from deepeval import compare
a_test_case = ArenaTestCase(
contestants={
"GPT-4": LLMTestCase(
input="What is the capital of France?",
actual_output="Paris",
),
"Claude-4": LLMTestCase(
input="What is the capital of France?",
actual_output="Paris is the capital of France.",
),
},
)
a_test_case2 = ArenaTestCase(
contestants={
"GPT-4": LLMTestCase(
input="What is the capital of France?",
actual_output="Paris",
),
"Claude-4": LLMTestCase(
input="What is the capital of France?",
actual_output="Paris is the capital of France.",
),
},
)
metric = ArenaGEval(
name="Friendly",
criteria="Choose the winner of the more friendly contestant based on the input and actual output",
evaluation_params=[
LLMTestCaseParams.INPUT,
LLMTestCaseParams.ACTUAL_OUTPUT,
],
)
compare(
test_cases=[a_test_case, a_test_case2] * 10,
metric=metric,
async_config=AsyncConfig(run_async=True),
)