Integrate turnstile into OpenAI ext

igalshilman · igalshilman · commit 73a79f2390a0 · 2025-12-16T13:12:36.000+01:00
diff --git a/python/restate/ext/openai/runner_wrapper.py b/python/restate/ext/openai/runner_wrapper.py
@@ -12,7 +12,6 @@
 This module contains the optional OpenAI integration for Restate.
 """
 
-import asyncio
 import dataclasses
 import typing
 
@@ -41,11 +40,19 @@
 from agents.tool_context import ToolContext
 from pydantic import BaseModel
 from restate.exceptions import SdkInternalBaseException
+from restate.ext.turnstile import Turnstile
 from restate.extensions import current_context
 
 from restate import RunOptions, ObjectContext, TerminalError
 
 
+class State:
+    __slots__ = ("turnstile",)
+
+    def __init__(self) -> None:
+        self.turnstile = Turnstile([])
+
+
 # The OpenAI ModelResponse class is a dataclass with Pydantic fields.
 # The Restate SDK cannot serialize this. So we turn the ModelResponse int a Pydantic model.
 class RestateModelResponse(BaseModel):
@@ -71,23 +78,25 @@ class DurableModelCalls(MultiProvider):
     A Restate model provider that wraps the OpenAI SDK's default MultiProvider.
     """
 
-    def __init__(self, max_retries: int | None = 3):
+    def __init__(self, state: State, max_retries: int | None = 3):
         super().__init__()
         self.max_retries = max_retries
+        self.state = state
 
     def get_model(self, model_name: str | None) -> Model:
-        return RestateModelWrapper(super().get_model(model_name or None), self.max_retries)
+        return RestateModelWrapper(super().get_model(model_name or None), self.state, self.max_retries)
 
 
 class RestateModelWrapper(Model):
     """
     A wrapper around the OpenAI SDK's Model that persists LLM calls in the Restate journal.
     """
 
-    def __init__(self, model: Model, max_retries: int | None = 3):
+    def __init__(self, model: Model, state: State, max_retries: int | None = 3):
         self.model = model
         self.model_name = "RestateModelWrapper"
         self.max_retries = max_retries
+        self.state = state
 
     async def get_response(self, *args, **kwargs) -> ModelResponse:
         async def call_llm() -> RestateModelResponse:
@@ -104,6 +113,15 @@ async def call_llm() -> RestateModelResponse:
             raise RuntimeError("No current Restate context found, make sure to run inside a Restate handler")
         result = await ctx.run_typed("call LLM", call_llm, RunOptions(max_attempts=self.max_retries))
         # convert back to original ModelResponse
+        # find all the tool calls
+        ids = []
+        for item in result.output:
+            if item.type == "function_call":
+                ids.append(item.call_id)
+            # TODO: handle other types of tool calls if any are added in the future
+
+        self.state.turnstile = Turnstile(ids)
+
         return ModelResponse(
             output=result.output,
             usage=result.usage,
@@ -212,38 +230,33 @@ async def run(
         Returns:
             The result from Runner.run
         """
-
+        state = State()
         current_run_config = run_config or RunConfig()
         new_run_config = dataclasses.replace(
             current_run_config,
-            model_provider=DurableModelCalls(),
+            model_provider=DurableModelCalls(state),
         )
-        restate_agent = sequentialize_and_wrap_tools(starting_agent, disable_tool_autowrapping)
+        restate_agent = sequentialize_and_wrap_tools(starting_agent, disable_tool_autowrapping, state)
         return await OpenAIRunner.run(restate_agent, *args, run_config=new_run_config, **kwargs)
 
 
 def sequentialize_and_wrap_tools(
     agent: Agent[TContext],
     disable_tool_autowrapping: bool,
+    state: State,
 ) -> Agent[TContext]:
     """
     Wrap the tools of an agent to use the Restate error handling.
 
     Returns:
         A new agent with wrapped tools.
     """
-
-    # Restate does not allow parallel tool calls, so we use a lock to ensure sequential execution.
-    # This lock only affects tools for this agent; handoff agents are wrapped recursively.
-    sequential_tools_lock = asyncio.Lock()
     wrapped_tools: list[Tool] = []
     for tool in agent.tools:
         if isinstance(tool, FunctionTool):
 
             def create_wrapper(captured_tool):
                 async def on_invoke_tool_wrapper(tool_context: ToolContext[Any], tool_input: Any) -> Any:
-                    await sequential_tools_lock.acquire()
-
                     async def invoke():
                         result = await captured_tool.on_invoke_tool(tool_context, tool_input)
                         # Ensure Pydantic objects are serialized to dict for LLM compatibility
@@ -253,7 +266,10 @@ async def invoke():
                             return result.dict()
                         return result
 
+                    turnstile = state.turnstile
+                    call_id = tool_context.tool_call_id
                     try:
+                        await turnstile.wait_for(call_id)
                         if disable_tool_autowrapping:
                             return await invoke()
 
@@ -264,7 +280,7 @@ async def invoke():
                             )
                         return await ctx.run_typed(captured_tool.name, invoke)
                     finally:
-                        sequential_tools_lock.release()
+                        turnstile.allow_next_after(call_id)
 
                 return on_invoke_tool_wrapper