simonw · deepankarm · Jul 2, 2025 · Jul 2, 2025 · deepankarm · Jul 2, 2025
diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
@@ -1,6 +1,7 @@
 from llm import AsyncKeyModel, EmbeddingModel, KeyModel, hookimpl
 import llm
 from llm.utils import (
+    asyncify,
     dicts_to_table_string,
     remove_dict_none_values,
     logging_client,
@@ -753,7 +754,7 @@ async def execute(
     ) -> AsyncGenerator[str, None]:
         if prompt.system and not self.allows_system_prompt:
             raise NotImplementedError("Model does not support system prompts")
-        messages = self.build_messages(prompt, conversation)
+        messages = await asyncify(self.build_messages, prompt, conversation)
         kwargs = self.build_kwargs(prompt, stream)
         client = self.get_client(key, async_=True)
         usage = None

diff --git a/llm/utils.py b/llm/utils.py
@@ -13,6 +13,7 @@
 import threading
 import time
 from typing import Final
+import asyncio
 
 from ulid import ULID
 
@@ -734,3 +735,7 @@ def _fresh(ms: int) -> bytes:
     timestamp = int.to_bytes(ms, TIMESTAMP_LEN, "big")
     randomness = os.urandom(RANDOMNESS_LEN)
     return timestamp + randomness
+
+
+async def asyncify(func, *args, **kwargs):
+    return await asyncio.to_thread(func, *args, **kwargs)
diff --git a/pyproject.toml b/pyproject.toml
@@ -68,6 +68,7 @@ test = [
     "types-PyYAML",
     "types-setuptools",
     "llm-echo==0.3a3",
+    "pyleak>=0.1.13",
 ]
 
 [build-system]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -484,3 +484,10 @@ def extract_braces(s):
     if first != -1 and last != -1 and first < last:
         return s[first : last + 1]
     return None
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", 
+        "no_leaks: detect asyncio task leaks, thread leaks, and event loop blocking"
+    )
+
diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py
@@ -1,8 +1,13 @@
+import time
+import random
 from click.testing import CliRunner
+import httpx
 from llm.cli import cli
 import pytest
 import sqlite_utils
 
+from llm.default_plugins.openai_models import AsyncChat
+from llm.models import Attachment
 
 @pytest.fixture
 def mocked_models(httpx_mock):
@@ -199,3 +204,66 @@ def test_gpt4o_mini_sync_and_async(monkeypatch, tmpdir, httpx_mock, async_, usag
     assert db["responses"].count == 1
     row = next(db["responses"].rows)
     assert row["response"] == "Ho ho ho"
+
+
+@pytest.mark.asyncio
+@pytest.mark.no_leaks
+async def test_async_chat_with_attachment_non_blocking(httpx_mock):
+    def head_response_with_delay(request: httpx.Request):
+        # assume 300-500ms to do the head request
+        time.sleep(random.uniform(0.3, 0.5))
+        return httpx.Response(
+            status_code=200,
+            content=b"",
+            headers={"Content-Type": "image/png"},
+        )
+
+    httpx_mock.add_callback(
+        head_response_with_delay,
+        method="HEAD",
+        url="https://www.example.com/example.png",
+        is_reusable=True,
+    )
+
+    httpx_mock.add_response(
+        method="POST",
+        # chat completion request
+        url="https://api.openai.com/v1/chat/completions",
+        json={
+            "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh",
+            "object": "chat.completion",
+            "created": 1730871958,
+            "model": "gpt-4.1-2025-04-14",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "It's a dummy example image",
+                        "refusal": None,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 1000,
+                "completion_tokens": 2000,
+                "total_tokens": 12,
+            },
+            "system_fingerprint": "fp_49254d0e9b",
+        },
+        headers={"Content-Type": "application/json"},
+    )
+
+    model = AsyncChat(
+        model_id="gpt-4.1-2025-04-14",
+        api_base="https://api.openai.com/v1",
+        key="x",
+    )
+    conversation = model.conversation()
+    await conversation.prompt(
+        prompt="What is this image?",
+        attachments=[Attachment(url="https://www.example.com/example.png")],
+        stream=False,
+    )
+    assert await conversation.responses[0].text() == "It's a dummy example image"