-
Notifications
You must be signed in to change notification settings - Fork 238
Expand file tree
/
Copy pathgoogle.py
More file actions
66 lines (52 loc) · 2.47 KB
/
google.py
File metadata and controls
66 lines (52 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
from .evaluator import Evaluator
from langchain.evaluation import load_evaluator
from langchain_google_genai import ChatGoogleGenerativeAI
class GoogleEvaluator(Evaluator):
DEFAULT_MODEL_KWARGS: dict = dict(temperature=0)
CRITERIA = {"accuracy": """
Score 1: The answer is completely unrelated to the reference.
Score 3: The answer has minor relevance but does not align with the reference.
Score 5: The answer has moderate relevance but contains inaccuracies.
Score 7: The answer aligns with the reference but has minor omissions.
Score 10: The answer is completely accurate and aligns perfectly with the reference.
Only respond with a numberical score"""}
def __init__(self,
model_name: str = "gemini-1.5-pro",
model_kwargs: dict = DEFAULT_MODEL_KWARGS,
true_answer: str = None,
question_asked: str = None,):
"""
:param model_name: The name of the model.
:param model_kwargs: Model configuration. Default is {temperature: 0}
:param true_answer: The true answer to the question asked.
:param question_asked: The question asked to the model.
"""
if (not true_answer) or (not question_asked):
raise ValueError("true_answer and question_asked must be supplied with init.")
self.model_name = model_name
self.model_kwargs = model_kwargs
self.true_answer = true_answer
self.question_asked = question_asked
api_key = os.getenv('NIAH_EVALUATOR_API_KEY')
if (not api_key):
raise ValueError("NIAH_EVALUATOR_API_KEY must be in env for using google evaluator.")
self.api_key = api_key
self.evaluator = ChatGoogleGenerativeAI(model=self.model_name,
google_api_key=self.api_key,
**self.model_kwargs)
def evaluate_response(self, response: str) -> int:
evaluator = load_evaluator(
"labeled_score_string",
criteria=self.CRITERIA,
llm=self.evaluator,
)
eval_result = evaluator.evaluate_strings(
# The models response
prediction=response,
# The actual answer
reference=self.true_answer,
# The question asked
input=self.question_asked,
)
return int(eval_result['score'])