From e3d54d3698fbb08e9655a5f39a2f8dabcec1b936 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=C3=B8rstad?= <thomas.borstad@modular.com>
Date: Mon, 22 Sep 2025 16:51:50 -0500
Subject: [PATCH 1/3] Support empty response for Completions and
 ChatCompletions API

---
 lm_eval/models/openai_completions.py | 11 ++++++++---
 tests/models/test_api.py             | 24 ++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py
index d89f63d31e3..32a088e8d75 100644
--- a/lm_eval/models/openai_completions.py
+++ b/lm_eval/models/openai_completions.py
@@ -8,7 +8,6 @@
 from lm_eval.models.api_models import TemplateAPI
 from lm_eval.models.utils import handle_stop_sequences
 
-
 eval_logger = logging.getLogger(__name__)
 
 
@@ -95,7 +94,10 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                tmp[choices["index"]] = choices["text"]
+                x = ""
+                if choices["text"] is not None:
+                    x = choices["text"]
+                tmp[choices["index"]] = x
             res = res + tmp
         return res
 
@@ -167,7 +169,10 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                tmp[choices["index"]] = choices["message"]["content"]
+                x = ""
+                if choices["message"]["content"] is not None:
+                    x = choices["message"]["content"]
+                tmp[choices["index"]] = x
             res = res + tmp
         return res
 
diff --git a/tests/models/test_api.py b/tests/models/test_api.py
index 2db22377470..4ae368d83c8 100644
--- a/tests/models/test_api.py
+++ b/tests/models/test_api.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from lm_eval.api.instance import Instance
 from lm_eval.models.openai_completions import LocalCompletionsAPI
 
 
@@ -161,6 +162,29 @@ def test_model_tokenized_call_usage(
         assert result == {"result": "success"}
 
 
+def test_generate_until_with_null_message_content(api):
+    with patch("requests.post") as mock_post:
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "choices": [
+                {
+                    "index": 0,
+                    "text": None,
+                }
+            ]
+        }
+        mock_response.ok = True
+        mock_post.return_value = mock_response
+        request = Instance(
+            request_type="generate_until",
+            doc={},
+            arguments=("Test prompt", {"max_gen_toks": 10}),
+            idx=0,
+        )
+
+        _ = api.generate_until([request])
+
+
 class DummyAsyncContextManager:
     def __init__(self, result):
         self.result = result

From 05ee49186920c48ccb44ec3c20300446be861a01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=C3=B8rstad?= <thomas.borstad@modular.com>
Date: Fri, 3 Oct 2025 11:39:07 +0200
Subject: [PATCH 2/3] Add warnings for empty/None API responses

- Add warning logs when API returns None/empty responses in parse_generations
- Helps users identify when reasoning models consume entire token budget
- Applied pre-commit formatting

Addresses review feedback from @baberabb
---
 lm_eval/models/openai_completions.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py
index 32a088e8d75..13bd5cf44f3 100644
--- a/lm_eval/models/openai_completions.py
+++ b/lm_eval/models/openai_completions.py
@@ -8,6 +8,7 @@
 from lm_eval.models.api_models import TemplateAPI
 from lm_eval.models.utils import handle_stop_sequences
 
+
 eval_logger = logging.getLogger(__name__)
 
 
@@ -97,6 +98,12 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
                 x = ""
                 if choices["text"] is not None:
                     x = choices["text"]
+                else:
+                    eval_logger.warning(
+                        f"Received empty response for choice {choices['index']}. "
+                        "This can happen when using reasoning models if the model spends all the token limit on reasoning. "
+                        "Consider increasing the number of allowed tokens."
+                    )
                 tmp[choices["index"]] = x
             res = res + tmp
         return res
@@ -172,6 +179,12 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
                 x = ""
                 if choices["message"]["content"] is not None:
                     x = choices["message"]["content"]
+                else:
+                    eval_logger.warning(
+                        f"Received empty response for choice {choices['index']}. "
+                        "This can happen when using reasoning models if the model spends all the token limit on reasoning. "
+                        "Consider increasing the number of allowed tokens."
+                    )
                 tmp[choices["index"]] = x
             res = res + tmp
         return res

From c01f509e2e5430f5dc894434d2394e49a64b575b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20B=C3=B8rstad?= <thomas.borstad@modular.com>
Date: Fri, 3 Oct 2025 11:53:28 +0200
Subject: [PATCH 3/3] Improve code readability and check for empty string
 instead of just None

---
 lm_eval/models/openai_completions.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py
index 13bd5cf44f3..96aa7ea0f08 100644
--- a/lm_eval/models/openai_completions.py
+++ b/lm_eval/models/openai_completions.py
@@ -95,16 +95,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                x = ""
-                if choices["text"] is not None:
-                    x = choices["text"]
-                else:
+                x = choices["text"]
+                content = x if x is not None else ""
+                if not content:
                     eval_logger.warning(
                         f"Received empty response for choice {choices['index']}. "
-                        "This can happen when using reasoning models if the model spends all the token limit on reasoning. "
+                        "This can happen when using reasoning models if the model spends the entire token budget on reasoning. "
                         "Consider increasing the number of allowed tokens."
                     )
-                tmp[choices["index"]] = x
+                tmp[choices["index"]] = content
             res = res + tmp
         return res
 
@@ -176,16 +175,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                x = ""
-                if choices["message"]["content"] is not None:
-                    x = choices["message"]["content"]
-                else:
+                x = choices["message"]["content"]
+                content = x if x is not None else ""
+                if not content:
                     eval_logger.warning(
                         f"Received empty response for choice {choices['index']}. "
-                        "This can happen when using reasoning models if the model spends all the token limit on reasoning. "
+                        "This can happen when using reasoning models if the model spends the entire token budget on reasoning. "
                         "Consider increasing the number of allowed tokens."
                     )
-                tmp[choices["index"]] = x
+                tmp[choices["index"]] = content
             res = res + tmp
         return res