load HHEM on specified device (#1235)

Miaoranmmm · Miaoran · jjmachan · web-flow · commit fa864a678eff · 2024-09-02T23:26:43.000+05:30
Allow users to specify the device to load HHEM and add `_create_batch`
to avoid OOM

---------

Co-authored-by: Miaoran &lt;miaoran@vectara.com&gt;
Co-authored-by: jjmachan &lt;jamesjithin97@gmail.com&gt;
diff --git a/src/ragas/metrics/_faithfulness.py b/src/ragas/metrics/_faithfulness.py
@@ -17,11 +17,9 @@
 
     from ragas.llms.prompt import PromptValue
 
-from typing import Any, Protocol
 
-
-class HasSegmentMethod(Protocol):
-    def segment(self, text) -> Any:
+class HasSegmentMethod(t.Protocol):
+    def segment(self, text) -> t.Any:
         ...
 
 
@@ -316,6 +314,8 @@ def save(self, cache_dir: t.Optional[str] = None) -> None:
 @dataclass
 class FaithulnesswithHHEM(Faithfulness):
     name: str = "faithfulness_with_hhem"  # type: ignore
+    device: str = "cpu"
+    batch_size: int = 10
 
     def __post_init__(self):
         try:
@@ -327,6 +327,7 @@ def __post_init__(self):
         self.nli_classifier = AutoModelForSequenceClassification.from_pretrained(
             "vectara/hallucination_evaluation_model", trust_remote_code=True
         )
+        self.nli_classifier.to(self.device)
         super().__post_init__()
 
     def _create_pairs(
@@ -339,6 +340,13 @@ def _create_pairs(
         pairs = [(premise, statement) for statement in statements]
         return pairs
 
+    def _create_batch(
+        self, pairs: t.List[t.Tuple[str, str]]
+    ) -> t.Generator[t.List[t.Tuple[str, str]], None, None]:
+        length_of_pairs = len(pairs)
+        for ndx in range(0, length_of_pairs, self.batch_size):
+            yield pairs[ndx : min(ndx + self.batch_size, length_of_pairs)]
+
     async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float:
         """
         returns the NLI score for each (q, c, a) pair
@@ -362,9 +370,14 @@ async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float:
 
         assert isinstance(statements, t.List), "statements must be a list"
 
+        scores = []
         pairs = self._create_pairs(row, statements)
-        scores = self.nli_classifier.predict(pairs).detach().numpy().round()
-        return scores.sum() / len(scores)
+        for input_pairs in self._create_batch(pairs):  # to avoid OOM
+            batch_scores = (
+                self.nli_classifier.predict(input_pairs).cpu().detach().round()
+            )
+            scores += batch_scores
+        return sum(scores) / len(scores)
 
 
 faithfulness = Faithfulness()