Skip to content

Commit 581ff43

Browse files
author
BastienZim
committed
fixed typo in openai evaluator
1 parent 9f403de commit 581ff43

File tree

2 files changed

+1
-36
lines changed

2 files changed

+1
-36
lines changed

needlehaystack/evaluators/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class OpenAIEvaluator(Evaluator):
1313
Score 5: The answer has moderate relevance but contains inaccuracies.
1414
Score 7: The answer aligns with the reference but has minor omissions.
1515
Score 10: The answer is completely accurate and aligns perfectly with the reference.
16-
Only respond with a numberical score"""}
16+
Only respond with a numerical score"""}
1717

1818
def __init__(self,
1919
model_name: str = "gpt-3.5-turbo-0125",

needlehaystack/llm_needle_haystack_tester.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -278,41 +278,6 @@ def insert_needle(self, context, depth_percent, context_length):
278278
new_context = self.model_to_test.decode_tokens(tokens_new_context)
279279
return new_context
280280

281-
def evaluate_response(self, response):
282-
accuracy_criteria = {
283-
"accuracy": """
284-
Score 1: The answer is completely unrelated to the reference.
285-
Score 3: The answer has minor relevance but does not align with the reference.
286-
Score 5: The answer has moderate relevance but contains inaccuracies.
287-
Score 7: The answer aligns with the reference but has minor omissions.
288-
Score 10: The answer is completely accurate and aligns perfectly with the reference.
289-
Only respond with a numerical score
290-
"""
291-
}
292-
293-
# Using GPT-4 to evaluate
294-
evaluator = load_evaluator(
295-
"labeled_score_string",
296-
criteria=accuracy_criteria,
297-
llm=self.evaluation_model,
298-
)
299-
300-
eval_result = evaluator.evaluate_strings(
301-
# The models response
302-
prediction=response,
303-
304-
# The actual answer
305-
reference=self.needle,
306-
307-
# The question asked
308-
input=self.retrieval_question,
309-
)
310-
311-
return int(eval_result['score'])
312-
313-
def get_context_length_in_tokens(self, context):
314-
return len(self.model_to_test.encode_text_to_tokens(context))
315-
316281
def read_context_files(self):
317282
context = ""
318283
max_context_length = max(self.context_lengths)

0 commit comments

Comments
 (0)