Bump transformers minor version; fix TypicalLogitsWarper

njhill · njhill · commit 536e6a01a6d9 · 2023-11-14T18:09:25.000-08:00
diff --git a/server/poetry.lock b/server/poetry.lock
diff --git a/server/pyproject.toml b/server/pyproject.toml
@@ -19,7 +19,7 @@ bitsandbytes = { version = "^0.41.1", optional = true }
 scipy = { version = "^1.11.2", optional = true }
 safetensors = "^0.4.0"
 sentencepiece = "^0.1.99"
-transformers = "4.34.0"
+transformers = "4.34.1"
 optimum = { version = "1.13.2", extras = ["onnxruntime-gpu"], optional = true }
 onnxruntime = { version = "1.16.0", optional = true }
 onnxruntime-gpu = { version = "1.16.0", optional = true }
diff --git a/server/text_generation_server/utils/logits_process.py b/server/text_generation_server/utils/logits_process.py
@@ -419,7 +419,8 @@ def filter(self, indices):
         return None
 
 
-# This is a fixed version of the class in transformers. Can be moved once we contribute back the fix and upgrade.
+# This is a fixed version of the class in transformers, see https://github.com/huggingface/transformers/pull/26579.
+# Can be removed after upgrading to transformers v4.35+
 class TypicalLogitsWarper(LogitsWarper):
     r"""
     [`LogitsWarper`] that performs typical decoding. See [Typical Decoding for Natural Language
@@ -456,8 +457,8 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
         cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)
 
         # Remove tokens with cumulative mass above the threshold
-        last_ind = (cumulative_probs < self.mass).sum(dim=1)
-        last_ind.clamp_(0, sorted_scores.shape[-1] - 1)
+        last_ind = (cumulative_probs < self.mass).sum(dim=1) - 1
+        last_ind.clamp_(min=0)
         sorted_indices_to_remove = sorted_scores > sorted_scores.gather(1, last_ind.view(-1, 1))
         # Keep at least min_tokens_to_keep (set to min_tokens_to_keep-1 because we add the first one below)
         sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0