feat: add async generative slots

jakelorocco · jakelorocco · commit 9a88669e9cf3 · 2025-10-06T09:38:52.000-04:00
diff --git a/mellea/stdlib/genslot.py b/mellea/stdlib/genslot.py
@@ -1,8 +1,9 @@
 """A method to generate outputs based on python functions and a Generative Slot function."""
 
+import asyncio
 import functools
 import inspect
-from collections.abc import Callable
+from collections.abc import Callable, Coroutine
 from copy import deepcopy
 from typing import Any, Generic, ParamSpec, TypedDict, TypeVar, get_type_hints
 
@@ -168,14 +169,13 @@ def __call__(
             **kwargs: Additional Kwargs to be passed to the func.
 
         Returns:
-            ModelOutputThunk: Output with generated Thunk.
+            R: an object with the original return type of the function
         """
         if m is None:
             m = get_session()
         slot_copy = deepcopy(self)
         arguments = bind_function_arguments(self._function._func, *args, **kwargs)
         if arguments:
-            # slot_copy._arguments = []
             for key, val in arguments.items():
                 annotation = get_annotation(slot_copy._function._func, key, val)
                 slot_copy._arguments.append(Argument(annotation, key, val))
@@ -207,6 +207,52 @@ def format_for_llm(self) -> TemplateRepresentation:
         )
 
 
+class AsyncGenerativeSlot(GenerativeSlot, Generic[P, R]):
+    """A generative slot component that generates asynchronously and returns a coroutine."""
+
+    def __call__(
+        self,
+        m: MelleaSession | None = None,
+        model_options: dict | None = None,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Coroutine[Any, Any, R]:
+        """Call the async generative slot.
+
+        Args:
+            m: MelleaSession: A mellea session (optional, uses context if None)
+            **kwargs: Additional Kwargs to be passed to the func
+
+        Returns:
+            Coroutine[Any, Any, R]: a coroutine that returns an object with the original return type of the function
+        """
+        if m is None:
+            m = get_session()
+        slot_copy = deepcopy(self)
+        arguments = bind_function_arguments(self._function._func, *args, **kwargs)
+        if arguments:
+            for key, val in arguments.items():
+                annotation = get_annotation(slot_copy._function._func, key, val)
+                slot_copy._arguments.append(Argument(annotation, key, val))
+
+        response_model = create_response_format(self._function._func)
+
+        # AsyncGenerativeSlots are used with async functions. In order to support that behavior,
+        # they must return a coroutine object.
+        async def __async_call__():
+            # Use the async act func so that control flow doesn't get stuck here in async event loops.
+            response = await m.aact(
+                slot_copy, format=response_model, model_options=model_options
+            )
+
+            function_response: FunctionResponse[R] = response_model.model_validate_json(
+                response.value  # type: ignore
+            )
+            return function_response.result
+
+        return __async_call__()
+
+
 def generative(func: Callable[P, R]) -> GenerativeSlot[P, R]:
     """Convert a function into an AI-powered function.
 
@@ -216,6 +262,8 @@ def generative(func: Callable[P, R]) -> GenerativeSlot[P, R]:
     that function's behavior. The output is guaranteed to match the return type
     annotation using structured outputs and automatic validation.
 
+    Note: Works with async functions as well.
+
     Tip: Write the function and docstring in the most Pythonic way possible, not
     like a prompt. This ensures the function is well-documented, easily understood,
     and familiar to any Python developer. The more natural and conventional your
@@ -248,7 +296,7 @@ def generative(func: Callable[P, R]) -> GenerativeSlot[P, R]:
         ...     estimated_hours: float
         >>>
         >>> @generative
-        ... def create_project_tasks(project_desc: str, count: int) -> List[Task]:
+        ... async def create_project_tasks(project_desc: str, count: int) -> List[Task]:
         ...     '''Generate a list of realistic tasks for a project.
         ...
         ...     Args:
@@ -260,7 +308,7 @@ def generative(func: Callable[P, R]) -> GenerativeSlot[P, R]:
         ...     '''
         ...     ...
         >>>
-        >>> tasks = create_project_tasks(session, "Build a web app", 5)
+        >>> tasks = await create_project_tasks(session, "Build a web app", 5)
 
         >>> @generative
         ... def analyze_code_quality(code: str) -> Dict[str, Any]:
@@ -304,8 +352,46 @@ def generative(func: Callable[P, R]) -> GenerativeSlot[P, R]:
         >>>
         >>> reasoning = generate_chain_of_thought(session, "How to optimize a slow database query?")
     """
-    return GenerativeSlot(func)
+    if inspect.iscoroutinefunction(func):
+        return AsyncGenerativeSlot(func)
+    else:
+        return GenerativeSlot(func)
 
 
 # Export the decorator as the interface
 __all__ = ["generative"]
+
+
+if __name__ == "__main__":
+    from mellea import start_session
+
+    with start_session():
+
+        async def asyncly() -> int: ...
+
+        out = asyncly()
+
+        @generative
+        async def test_async(num: int) -> bool: ...
+
+        @generative
+        def test_sync(truthy: bool) -> int: ...
+
+        print("running sync")
+        print(test_sync(m=None, model_options=None, truthy=False))
+
+        async def runmany():
+            print(await test_async(m=None, model_options=None, num=6))
+            print(await test_async(m=None, model_options=None, num=4))
+            print(await test_async(m=None, model_options=None, num=5))
+
+            coros = [
+                test_async(m=None, model_options=None, num=1),
+                test_async(m=None, model_options=None, num=2),
+                test_async(m=None, model_options=None, num=3),
+            ]
+            results = await asyncio.gather(*coros)
+            print(results)
+
+        print("running async")
+        asyncio.run(runmany())
diff --git a/test/stdlib_basics/test_genslot.py b/test/stdlib_basics/test_genslot.py
@@ -1,6 +1,8 @@
+import asyncio
 import pytest
 from typing import Literal
 from mellea import generative, start_session
+from mellea.stdlib.genslot import AsyncGenerativeSlot, GenerativeSlot
 
 
 @generative
@@ -10,6 +12,8 @@ def classify_sentiment(text: str) -> Literal["positive", "negative"]: ...
 @generative
 def write_me_an_email() -> str: ...
 
+@generative
+async def async_write_short_sentence(topic: str) -> str: ...
 
 @pytest.fixture(scope="function")
 def session():
@@ -29,6 +33,7 @@ def test_gen_slot_output(classify_sentiment_output):
 
 
 def test_func(session):
+    assert isinstance(write_me_an_email, GenerativeSlot) and not isinstance(write_me_an_email, AsyncGenerativeSlot)
     write_email_component = write_me_an_email(session)
     assert isinstance(write_email_component, str)
 
@@ -43,5 +48,18 @@ def test_gen_slot_logs(classify_sentiment_output, session):
     assert isinstance(last_prompt, dict)
     assert set(last_prompt.keys()) == {"role", "content", "images"}
 
+async def test_async_gen_slot(session):
+    assert isinstance(async_write_short_sentence, AsyncGenerativeSlot)
+
+    r1 = async_write_short_sentence(session, topic="cats")
+    r2 = async_write_short_sentence(session, topic="dogs")
+
+    r3 = await async_write_short_sentence(session, topic="fish")
+    results = await asyncio.gather(r1, r2)
+
+    assert isinstance(r3, str)
+    assert len(results) == 2
+
+
 if __name__ == "__main__":
     pytest.main([__file__])