From 6c538a9aa706c3661c1c8c38b9c7e827fc285c02 Mon Sep 17 00:00:00 2001
From: Masataro Asai <guicho2.71828@gmail.com>
Date: Mon, 15 Sep 2025 18:28:13 -0400
Subject: [PATCH 1/2] test: Added a failing test for the lack of padding

---
 test/backends/test_huggingface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py
index 6859099a..b7b7d15f 100644
--- a/test/backends/test_huggingface.py
+++ b/test/backends/test_huggingface.py
@@ -179,7 +179,7 @@ class Email(pydantic.BaseModel):
 
 @pytest.mark.qualitative
 def test_generate_from_raw(session):
-    prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
+    prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?", "what is 4+2+2?"]
 
     results = session.backend._generate_from_raw(
         actions=[CBlock(value=prompt) for prompt in prompts], generate_logs=None

From bd318fce1346f9a48b44c42181223a2c713cfee1 Mon Sep 17 00:00:00 2001
From: Masataro Asai <guicho2.71828@gmail.com>
Date: Mon, 15 Sep 2025 18:29:12 -0400
Subject: [PATCH 2/2] fix: huggingface backend does not properly pad inputs

---
 mellea/backends/huggingface.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
index 360437bf..badeba75 100644
--- a/mellea/backends/huggingface.py
+++ b/mellea/backends/huggingface.py
@@ -447,7 +447,9 @@ def _generate_from_raw(
         prompts = [self.formatter.print(action) for action in actions]
 
         # batch-encoding call is deprecated in favor of this
-        inputs = self._tokenizer(prompts, return_tensors="pt").to(self._device)
+        inputs = self._tokenizer(prompts, return_tensors="pt", padding=True).to(
+            self._device
+        )
 
         if format is None:
             outputs = self._model.generate(  # type: ignore