Fix llm_context_recall

bderusha · bderusha · commit 7c0756916272 · 2024-10-23T19:21:42.000Z
diff --git a/rag_experiment_accelerator/evaluation/llm_based_metrics.py b/rag_experiment_accelerator/evaluation/llm_based_metrics.py
@@ -128,25 +128,26 @@ def llm_context_recall(
         double: The context recall score generated between the ground truth (expected) and context.
     """
     context = "\n".join(retrieved_contexts)
-    prompt = (
-        "\nquestion: "
-        + question
-        + "\ncontext: "
-        + context
-        + "\nanswer: "
-        + groundtruth_answer
-    )
-    result = response_generator.generate_response(
-        sys_message=llm_context_recall_instruction,
-        prompt=prompt,
+
+    result: list | None = response_generator.generate_response(
+        llm_context_recall_instruction,
+        context=context,
+        question=question,
+        answer=groundtruth_answer,
     )
-    good_response = '"Attributed": "1"'
-    bad_response = '"Attributed": "0"'
 
-    return (
-        result.count(good_response)
-        / (result.count(good_response) + result.count(bad_response))
-    ) * 100
+    good_responses = 0
+
+    for response in result:
+        try:
+            score = response.get("attributed", 0)
+            good_responses += int(score)
+        except ValueError:
+            logger.warning(f"Unable to parse {score} as int.")
+    if not result:
+        return -1
+    else:
+        return (good_responses / len(result)) * 100
 
 
 def compute_llm_based_score(
diff --git a/rag_experiment_accelerator/llm/prompt/ragas_prompts.py b/rag_experiment_accelerator/llm/prompt/ragas_prompts.py
@@ -16,11 +16,10 @@ def validate_context_recall(text: str) -> bool:
 
     def is_valid_entry(entry):
         statement_key_pattern = re.compile(r"^statement_\d+$")
-        for key in entry.keys():
-            if key not in ["reason", "attributed"] or not statement_key_pattern.match(
-                key
-            ):
-                return False
+        return all(
+            key in ["reason", "attributed"] or statement_key_pattern.match(key)
+            for key in entry.keys()
+        )
 
     return isinstance(json_text, list) and all(
         is_valid_entry(entry) for entry in json_text