relari-ai · ellipsis-dev · Jun 11, 2024
diff --git a/continuous_eval/metrics/generation/text/llm_based.py b/continuous_eval/metrics/generation/text/llm_based.py
@@ -32,11 +32,12 @@ def __call__(self, answer: str, retrieved_context: List[str], question: str, **k
             answer (str): the generated answer
             retrieved_context (List[str]): the retrieved contexts
             question (str): the question
-        """ """"""
+        """ """"
         if self.classify_by_statement:
             # Context coverage uses the same prompt as faithfulness because it calculates how what proportion statements in the answer can be attributed to the context.
             # The difference is that faithfulness uses the generated answer, while context coverage uses ground truth answer (to evaluate context).
-            context_coverage = LLMBasedContextCoverage(use_few_shot=self.use_few_shot)
+            model = self.model if self.model is not None else None
+            context_coverage = LLMBasedContextCoverage(model=model, use_few_shot=self.use_few_shot)
             results = context_coverage(question, retrieved_context, answer)
             score = results["LLM_based_context_coverage"]
             reasoning = results["LLM_based_context_statements"]
@@ -228,6 +229,7 @@ def __call__(self, answer: str, ground_truth_answers: Union[List[str], str], **k
 Response:
 2.5
 The generated answer is more brief and doesn't have the formality and empathetic tone in the reference answer.
+
 """
         else:
             few_shot_prompt = ""
@@ -257,4 +259,4 @@ def __call__(self, answer: str, ground_truth_answers: Union[List[str], str], **k
         return {
             "LLM_based_style_consistency": normalized_score,
             "LLM_based_style_consistency_reasoning": reasoning,
-        }
+        }