fix(openai): async pagination for OpenAI list methods [backport 3.17] (#15067)

dd-octo-sts[bot] · PROFeNoM · web-flow · commit 1769c0435141 · 2025-10-28T16:39:24.000Z
Backport 52929aa from #14911 to 3.17. Hey, so this is an attempt to fix #14574 where doing `async for model in client.models.list()` would fail with `TypeError: 'async for' requires an object with __aiter__ method, got coroutine`. ### The Problem Methods like `AsyncModels.list()` and `AsyncFiles.list()` don't actually return coroutines - they return `AsyncPaginator` objects that you can either: - `await` to get the first page (what existing code does) - Use with `async for` to iterate through all items (what was broken) But our wrapper in `_patched_endpoint_async` was converting everything into coroutines, which broke the `async for` use case. ### What I Tried First attempt was using `inspect.iscoroutinefunction()` to detect which methods are actually async vs just returning async objects. That got messy fast because checking unbound methods from classes didn't work reliably. Then I tried just using the sync wrapper for list methods: ```python if method_name == "list": wrap(openai, async_method, _patched_endpoint(openai, endpoint_hook)) ``` This looked promising - the pagination tests passed! But it broke `test_model_alist` and `test_file_alist` because those tests do `await client.models.list()` and expect full tracing with response metadata like `openai.response.count`. Using the sync wrapper meant we lost all that when the paginator was awaited. Also tried returning the paginator directly without any wrapping, but that meant we lost tracing entirely when someone did `async for`. Not acceptable. ### A Solution Created a `_TracedAsyncPaginator` wrapper class that implements both `__aiter__` and `__await__`. This way: - When you do `await client.models.list()` -> calls `__await__`, traces properly, returns first page (existing behavior preserved) - When you do `async for model in client.models.list()` -> calls `__aiter__`, traces on first iteration, yields items (fixes the bug) The wrapper is ~50 lines but it's the minimal solution that preserves 100% backward compatibility while fixing the breaking bug. Had to use `finally` blocks to ensure traces complete even if iteration stops early. ### Testing Added two new pagination tests (`test_model_list_pagination` and `test_model_alist_pagination`) that specifically test the `async for` pattern. Co-authored-by: Alexandre Choura <42672104+PROFeNoM@users.noreply.github.com>
diff --git a/ddtrace/contrib/internal/openai/_endpoint_hooks.py b/ddtrace/contrib/internal/openai/_endpoint_hooks.py
@@ -260,7 +260,8 @@ def _record_response(self, pin, integration, span, args, kwargs, resp, error):
         resp = super()._record_response(pin, integration, span, args, kwargs, resp, error)
         if not resp:
             return
-        span.set_metric("openai.response.count", len(resp.data or []))
+        if hasattr(resp, "data"):
+            span.set_metric("openai.response.count", len(resp.data or []))
         return resp
 
 
diff --git a/ddtrace/contrib/internal/openai/patch.py b/ddtrace/contrib/internal/openai/patch.py
@@ -286,40 +286,117 @@ def patched_endpoint(openai, pin, func, instance, args, kwargs):
     return patched_endpoint(openai)
 
 
+class _TracedAsyncPaginator:
+    """Wrapper for AsyncPaginator objects to enable tracing for both await and async for usage."""
+
+    def __init__(self, paginator, pin, integration, patch_hook, instance, args, kwargs):
+        self._paginator = paginator
+        self._pin = pin
+        self._integration = integration
+        self._patch_hook = patch_hook
+        self._instance = instance
+        self._args = args
+        self._kwargs = kwargs
+
+    def __aiter__(self):
+        async def _traced_aiter():
+            g = _traced_endpoint(
+                self._patch_hook, self._integration, self._instance, self._pin, self._args, self._kwargs
+            )
+            g.send(None)
+            err = None
+            completed = False
+            try:
+                iterator = self._paginator.__aiter__()
+                # Fetch first item to trigger trace completion before iteration starts.
+                # This ensures the span is recorded even if iteration stops early.
+                first_item = await iterator.__anext__()
+                try:
+                    g.send((None, None))
+                    completed = True
+                except StopIteration:
+                    completed = True
+                yield first_item
+                async for item in iterator:
+                    yield item
+            except StopAsyncIteration:
+                pass
+            except BaseException as e:
+                err = e
+                raise
+            finally:
+                if not completed:
+                    try:
+                        g.send((None, err))
+                    except StopIteration:
+                        pass
+
+        return _traced_aiter()
+
+    def __await__(self):
+        async def _trace_and_await():
+            g = _traced_endpoint(
+                self._patch_hook, self._integration, self._instance, self._pin, self._args, self._kwargs
+            )
+            g.send(None)
+            resp, err = None, None
+            try:
+                resp = await self._paginator
+            except BaseException as e:
+                err = e
+                raise
+            finally:
+                try:
+                    g.send((resp, err))
+                except StopIteration as e:
+                    if err is None:
+                        return e.value
+            return resp
+
+        return _trace_and_await().__await__()
+
+
 def _patched_endpoint_async(openai, patch_hook):
-    # Same as _patched_endpoint but async
     @with_traced_module
-    async def patched_endpoint(openai, pin, func, instance, args, kwargs):
+    def patched_endpoint(openai, pin, func, instance, args, kwargs):
         if (
             patch_hook is _endpoint_hooks._ChatCompletionWithRawResponseHook
             or patch_hook is _endpoint_hooks._CompletionWithRawResponseHook
         ):
             kwargs[OPENAI_WITH_RAW_RESPONSE_ARG] = True
-            return await func(*args, **kwargs)
+            return func(*args, **kwargs)
         if kwargs.pop(OPENAI_WITH_RAW_RESPONSE_ARG, False) and kwargs.get("stream", False):
-            return await func(*args, **kwargs)
+            return func(*args, **kwargs)
 
-        integration = openai._datadog_integration
-        g = _traced_endpoint(patch_hook, integration, instance, pin, args, kwargs)
-        g.send(None)
-        resp, err = None, None
-        override_return = None
-        try:
-            resp = await func(*args, **kwargs)
-        except BaseException as e:
-            err = e
-            raise
-        finally:
+        result = func(*args, **kwargs)
+        # Detect AsyncPaginator objects (have both __aiter__ and __await__).
+        # These must be returned directly (not awaited) to preserve iteration behavior.
+        if hasattr(result, "__aiter__") and hasattr(result, "__await__"):
+            return _TracedAsyncPaginator(result, pin, openai._datadog_integration, patch_hook, instance, args, kwargs)
+
+        async def async_wrapper():
+            integration = openai._datadog_integration
+            g = _traced_endpoint(patch_hook, integration, instance, pin, args, kwargs)
+            g.send(None)
+            resp, err = None, None
+            override_return = None
             try:
-                g.send((resp, err))
-            except StopIteration as e:
-                if err is None:
-                    # This return takes priority over `return resp`
-                    override_return = e.value
-
-        if override_return is not None:
-            return override_return
-        return resp
+                resp = await result
+            except BaseException as e:
+                err = e
+                raise
+            finally:
+                try:
+                    g.send((resp, err))
+                except StopIteration as e:
+                    if err is None:
+                        override_return = e.value
+
+            if override_return is not None:
+                return override_return
+            return resp
+
+        return async_wrapper()
 
     return patched_endpoint(openai)
 
diff --git a/releasenotes/notes/fix-openai-async-pagination-f2135d64e0372efb.yaml b/releasenotes/notes/fix-openai-async-pagination-f2135d64e0372efb.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    openai: This fix resolves an issue where using async iteration with paginated methods (e.g., ``async for model in client.models.list()``) caused a ``TypeError: 'async for' requires an object with __aiter__ method, got coroutine``. See `issue #14574 <https://github.com/DataDog/dd-trace-py/issues/14574>`_.
diff --git a/tests/contrib/openai/test_openai_v1.py b/tests/contrib/openai/test_openai_v1.py
@@ -35,6 +35,22 @@ def test_model_list(api_key_in_env, request_api_key, openai, openai_vcr, snapsho
             client.models.list()
 
 
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+def test_model_list_pagination(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
+    with snapshot_context(
+        token="tests.contrib.openai.test_openai.test_model_list_pagination",
+        ignores=["meta.http.useragent", "meta.openai.api_type", "meta.openai.api_base", "meta.openai.request.user"],
+    ):
+        with openai_vcr.use_cassette("model_list.yaml"):
+            client = openai.OpenAI(api_key=request_api_key)
+            count = 0
+            for model in client.models.list():
+                count += 1
+                if count >= 2:
+                    break
+            assert count >= 2
+
+
 @pytest.mark.parametrize("api_key_in_env", [True, False])
 async def test_model_alist(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     with snapshot_context(
@@ -46,6 +62,22 @@ async def test_model_alist(api_key_in_env, request_api_key, openai, openai_vcr,
             await client.models.list()
 
 
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+async def test_model_alist_pagination(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
+    with snapshot_context(
+        token="tests.contrib.openai.test_openai.test_model_alist_pagination",
+        ignores=["meta.http.useragent", "meta.openai.api_type", "meta.openai.api_base", "meta.openai.request.user"],
+    ):
+        with openai_vcr.use_cassette("model_alist.yaml"):
+            client = openai.AsyncOpenAI(api_key=request_api_key)
+            count = 0
+            async for model in client.models.list():
+                count += 1
+                if count >= 2:
+                    break
+            assert count >= 2
+
+
 @pytest.mark.parametrize("api_key_in_env", [True, False])
 def test_model_retrieve(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     with snapshot_context(
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_model_alist_pagination.json b/tests/snapshots/tests.contrib.openai.test_openai.test_model_alist_pagination.json
@@ -0,0 +1,30 @@
+[[
+  {
+    "name": "openai.request",
+    "service": "tests.contrib.openai",
+    "resource": "listModels",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "68f0b1d700000000",
+      "component": "openai",
+      "language": "python",
+      "openai.request.endpoint": "/v1/models",
+      "openai.request.method": "GET",
+      "openai.request.provider": "OpenAI",
+      "runtime-id": "1e2a3154601a494f8f219a4327b659c2"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 573
+    },
+    "duration": 1683125,
+    "start": 1760604631675824507
+  }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_model_list_pagination.json b/tests/snapshots/tests.contrib.openai.test_openai.test_model_list_pagination.json
@@ -0,0 +1,31 @@
+[[
+  {
+    "name": "openai.request",
+    "service": "tests.contrib.openai",
+    "resource": "listModels",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "68f0b1d500000000",
+      "component": "openai",
+      "language": "python",
+      "openai.request.endpoint": "/v1/models",
+      "openai.request.method": "GET",
+      "openai.request.provider": "OpenAI",
+      "runtime-id": "1e2a3154601a494f8f219a4327b659c2"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "openai.response.count": 112,
+      "process_id": 573
+    },
+    "duration": 13777416,
+    "start": 1760604629974266007
+  }]]

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +fixes:
 +  - |
 +    openai: This fix resolves an issue where using async iteration with paginated methods (e.g., ``async for model in client.models.list()``) caused a ``TypeError: 'async for' requires an object with __aiter__ method, got coroutine``. See `issue #14574 <https://github.com/DataDog/dd-trace-py/issues/14574>`_.