Support multiple sentences as input (#4)

dldk-gael · web-flow · commit 55318904778e · 2020-05-01T20:00:29.000+01:00
diff --git a/lm_scorer/models/abc/base.py b/lm_scorer/models/abc/base.py
@@ -10,38 +10,79 @@ class LMScorer(ABC):
     def __init__(self, model_name: str, **kwargs: Any) -> None:
         self._build(model_name, kwargs)
 
+    @overload
     def sentence_score(
         self, text: str, log: bool = False, reduce: str = "prod"
     ) -> float:
-        log_probs, _, _ = self._tokens_log_prob(text)
-        tlen = log_probs.shape[0]
+        ...
 
-        if reduce == "prod":
-            score = log_probs.sum()
-        elif reduce == "mean":
-            score = log_probs.logsumexp(0) - math.log(tlen)
-        elif reduce == "gmean":
-            score = log_probs.mean(0)
-        elif reduce == "hmean":
-            score = log_probs.neg().logsumexp(0).neg() + math.log(tlen)
-        else:
-            raise ValueError("Unrecognized scoring strategy: %s" % reduce)
+    @overload
+    def sentence_score(
+        self, text: List[str], log: bool = False, reduce: str = "prod"
+    ) -> List[float]:
+        ...
+
+    def sentence_score(
+        self, text: Union[str, List[str]], log: bool = False, reduce: str = "prod",
+    ) -> Union[float, List[float]]:
+        sentences = [text] if isinstance(text, str) else text
+        if len(sentences) == 0:
+            return []
+
+        outputs = self._tokens_log_prob(sentences)
+
+        scores = []
+        for output in outputs:
+            log_probs = output[0]
+            tlen = log_probs.shape[0]
 
-        if not log:
-            score = score.exp()
+            if reduce == "prod":
+                score = log_probs.sum()
+            elif reduce == "mean":
+                score = log_probs.logsumexp(0) - math.log(tlen)
+            elif reduce == "gmean":
+                score = log_probs.mean(0)
+            elif reduce == "hmean":
+                score = log_probs.neg().logsumexp(0).neg() + math.log(tlen)
+            else:
+                raise ValueError("Unrecognized scoring strategy: %s" % reduce)
 
-        return score.item()
+            if not log:
+                score = score.exp()
 
+            scores.append(score.item())
+
+        return scores[0] if isinstance(text, str) else scores
+
+    @overload
     def tokens_score(
         self, text: str, log: bool = False
     ) -> Tuple[List[float], List[int], List[str]]:
-        log_probs, ids, tokens = self._tokens_log_prob(text)
+        ...
 
-        scores = log_probs  # type: torch.Tensor # type: ignore
-        if not log:
-            scores = scores.exp()
+    @overload
+    def tokens_score(
+        self, text: List[str], log: bool = False
+    ) -> List[Tuple[List[float], List[int], List[str]]]:
+        ...
+
+    def tokens_score(
+        self, text: Union[str, List[str]], log: bool = False
+    ) -> Union[
+        Tuple[List[float], List[int], List[str]],
+        List[Tuple[List[float], List[int], List[str]]],
+    ]:
+        sentences = [text] if isinstance(text, str) else text
+        if len(sentences) == 0:
+            return []
+        outputs = []
+        for log_probs, ids, tokens in self._tokens_log_prob(sentences):
+            scores = log_probs  # type: torch.Tensor # type: ignore
+            if not log:
+                scores = scores.exp()
+            outputs.append((scores.tolist(), ids.tolist(), tokens))
 
-        return scores.tolist(), ids.tolist(), tokens
+        return outputs[0] if isinstance(text, str) else outputs
 
     @classmethod
     def supported_model_names(cls) -> Iterable[str]:
@@ -53,8 +94,8 @@ def _build(self, model_name: str, options: Dict[str, Any]) -> None:
 
     @abstractmethod
     def _tokens_log_prob(
-        self, text: str
-    ) -> Tuple[torch.FloatTensor, torch.LongTensor, List[str]]:
+        self, text: List[str]
+    ) -> List[Tuple[torch.FloatTensor, torch.LongTensor, List[str]]]:
         ...  # pragma: no cover
 
     @classmethod
diff --git a/lm_scorer/models/gpt2.py b/lm_scorer/models/gpt2.py
@@ -19,7 +19,7 @@ def _build(self, model_name: str, options: Dict[str, Any]) -> None:
             self.model.to(options["device"])
 
     # @overrides
-    def _tokens_log_prob(
+    def _tokens_log_prob_single_sentence(
         self, text: str
     ) -> Tuple[torch.FloatTensor, torch.LongTensor, List[str]]:
         device = self.model.device
@@ -57,6 +57,11 @@ def _tokens_log_prob(
 
         return log_probs[0], ids[0], tokens  # type: ignore
 
+    def _tokens_log_prob(
+        self, text: List[str]
+    ) -> List[Tuple[torch.FloatTensor, torch.LongTensor, List[str]]]:
+        return list(map(self._tokens_log_prob_single_sentence, text))
+
     # @overrides
     @classmethod
     def _supported_model_names(cls) -> Iterable[str]:
diff --git a/tests/models/test_gpt2.py b/tests/models/test_gpt2.py
@@ -51,6 +51,21 @@ def should_work_on_an_empty_sentence():
         score = scorer.sentence_score("", reduce="hmean", log=True)
         assert score <= 0.0
 
+    def should_work_on_an_empty_list():
+        assert scorer.sentence_score([]) == []
+
+    def should_give_same_results_independently_of_input_type():
+        sentences = [
+            "I have a big amount of money.",
+            "This is the best day of my life.",
+            "I think this game is easier than the one we played yesterday.",
+        ]
+
+        sentences_scores = scorer.sentence_score(sentences)
+
+        for i, sentence in enumerate(sentences):
+            assert scorer.sentence_score(sentence) == sentences_scores[i]
+
     # TODO: Test the various reducing strategies by mocking the _tokens_log_prob call.
 
 
@@ -70,6 +85,9 @@ def should_work_on_an_empty_sentence():
         assert len(tokens) == 1, tokens
         assert scores[0] <= 0.0
 
+    def should_work_on_an_empty_list():
+        assert scorer.tokens_score([]) == []
+
 
 def describe_sentence_score_for_english():
     scorer = GPT2LMScorer("gpt2")