Skip to content

Commit a18f157

Browse files
authored
Update _nv_metrics.py (#2053)
Remove hard context lenght limitation on 7000 words on metrics `nv_relevance` and `nv_groundedness`. Updated Judge x HumanCorrelation Leaderboard.
1 parent 3d1a0d2 commit a18f157

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

src/ragas/metrics/_nv_metrics.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,17 @@ class AnswerAccuracy(MetricWithLLM, SingleTurnMetric):
2121
This metric averages two distinct judge prompts to evaluate.
2222
2323
Top10, Zero-shoot LLM-as-a-Judge Leaderboard:
24-
1)- mistralai/mixtral-8x22b-instruct-v0.1
25-
2)- mistralai/mixtral-8x7b-instruct-v0.1
26-
3)- meta/llama-3.1-70b-instruct
27-
4)- meta/llama-3.3-70b-instruct
28-
5)- meta/llama-3.1-405b-instruct
29-
6)- mistralai/mistral-nemo-12b-instruct
30-
7)- nvidia/llama-3.1-nemotron-70b-instruct
31-
8)- meta/llama-3.1-8b-instruct
32-
9)- google/gemma-2-2b-it
33-
10)- nvidia/nemotron-mini-4b-instruct
34-
The top1 LB model have high correlation with human judges (~0.90).
24+
1)- nvidia/Llama-3_3-Nemotron-Super-49B-v1
25+
2)- mistralai/mixtral-8x22b-instruct-v0.1
26+
3)- mistralai/mixtral-8x7b-instruct-v0.1
27+
4)- meta/llama-3.1-70b-instruct
28+
5)- meta/llama-3.3-70b-instruct
29+
6)- meta/llama-3.1-405b-instruct
30+
7)- mistralai/mistral-nemo-12b-instruct
31+
8)- nvidia/llama-3.1-nemotron-70b-instruct
32+
9)- meta/llama-3.1-8b-instruct
33+
10)- google/gemma-2-2b-it
34+
The top1 LB model have high correlation with human judges (~0.92).
3535
3636
Attributes
3737
----------
@@ -252,7 +252,7 @@ async def _single_turn_ascore(
252252
formatted_prompt = StringPromptValue(
253253
text=self.template_relevance1.format(
254254
query=sample.user_input,
255-
context="\n".join(sample.retrieved_contexts)[:7000],
255+
context="\n".join(sample.retrieved_contexts),
256256
)
257257
)
258258
req = self.llm.agenerate_text(
@@ -271,7 +271,7 @@ async def _single_turn_ascore(
271271
formatted_prompt = StringPromptValue(
272272
text=self.template_relevance2.format(
273273
query=sample.user_input,
274-
context="\n".join(sample.retrieved_contexts)[:7000],
274+
context="\n".join(sample.retrieved_contexts),
275275
)
276276
)
277277
req = self.llm.agenerate_text(
@@ -385,7 +385,7 @@ async def _single_turn_ascore(
385385
for retry in range(self.retry):
386386
formatted_prompt = StringPromptValue(
387387
text=self.template_groundedness1.format(
388-
context="\n".join(sample.retrieved_contexts)[:7000],
388+
context="\n".join(sample.retrieved_contexts),
389389
response=sample.response,
390390
)
391391
)
@@ -404,7 +404,7 @@ async def _single_turn_ascore(
404404
for retry in range(self.retry):
405405
formatted_prompt = StringPromptValue(
406406
text=self.template_groundedness2.format(
407-
context="\n".join(sample.retrieved_contexts)[:7000],
407+
context="\n".join(sample.retrieved_contexts),
408408
response=sample.response,
409409
)
410410
)

0 commit comments

Comments
 (0)