Implement tools and outputs for the MLXLM model

RobinPicard · RobinPicard · commit 7f748f1c60a6 · 2025-09-11T16:59:03.000+02:00
diff --git a/outlines/models/mlxlm.py b/outlines/models/mlxlm.py
@@ -6,7 +6,9 @@
 from outlines.inputs import Chat
 from outlines.models.base import Model, ModelTypeAdapter
 from outlines.models.transformers import TransformerTokenizer
+from outlines.outputs import Output, StreamingOutput
 from outlines.processors import OutlinesLogitsProcessor
+from outlines.tools import ToolDef
 
 if TYPE_CHECKING:
     import mlx.nn as nn
@@ -37,7 +39,7 @@ def format_input(self, model_input):
 
         """
         raise NotImplementedError(
-            f"The input type {input} is not available with mlx-lm. "
+            f"The input type {model_input} is not available with mlx-lm. "
             "The available types are `str` and `Chat`."
         )
 
@@ -63,7 +65,7 @@ def format_chat_input(self, model_input: Chat) -> str:
         )
 
     def format_output_type(
-        self, output_type: Optional[OutlinesLogitsProcessor] = None,
+        self, output_type: Optional[OutlinesLogitsProcessor],
     ) -> Optional[List[OutlinesLogitsProcessor]]:
         """Generate the logits processor argument to pass to the model.
 
@@ -83,6 +85,14 @@ def format_output_type(
         return [output_type]
 
 
+    def format_tools(self, tools):
+        """Not available for MLXLM."""
+        if tools:
+            raise NotImplementedError(
+                "MLXLM does not support tools."
+            )
+
+
 class MLXLM(Model):
     """Thin wrapper around an `mlx_lm` model.
 
@@ -118,9 +128,10 @@ def __init__(
     def generate(
         self,
         model_input: str,
-        output_type: Optional[OutlinesLogitsProcessor] = None,
+        output_type: Optional[OutlinesLogitsProcessor],
+        tools: Optional[List[ToolDef]],
         **kwargs,
-    ) -> str:
+    ) -> Output:
         """Generate text using `mlx-lm`.
 
         Parameters
@@ -130,29 +141,36 @@ def generate(
         output_type
             The logits processor the model will use to constrain the format of
             the generated text.
+        tools
+            The tools to use for the generation.
         kwargs
             Additional keyword arguments to pass to the `mlx-lm` library.
 
         Returns
         -------
-        str
+        Output
             The text generated by the model.
 
         """
         from mlx_lm import generate
 
-        return generate(
+        self.type_adapter.format_tools(tools)
+
+        result = generate(
             self.model,
             self.mlx_tokenizer,
             self.type_adapter.format_input(model_input),
             logits_processors=self.type_adapter.format_output_type(output_type),
             **kwargs,
         )
 
+        return Output(content=result.text)
+
     def generate_batch(
         self,
         model_input,
-        output_type = None,
+        output_type,
+        tools,
         **kwargs,
     ):
         raise NotImplementedError(
@@ -162,9 +180,10 @@ def generate_batch(
     def generate_stream(
         self,
         model_input: str,
-        output_type: Optional[OutlinesLogitsProcessor] = None,
+        output_type: Optional[OutlinesLogitsProcessor],
+        tools: Optional[List[ToolDef]],
         **kwargs,
-    ) -> Iterator[str]:
+    ) -> Iterator[StreamingOutput]:
         """Stream text using `mlx-lm`.
 
         Parameters
@@ -174,25 +193,29 @@ def generate_stream(
         output_type
             The logits processor the model will use to constrain the format of
             the generated text.
+        tools
+            The tools to use for the generation.
         kwargs
             Additional keyword arguments to pass to the `mlx-lm` library.
 
         Returns
         -------
-        Iterator[str]
+        Iterator[StreamingOutput]
             An iterator that yields the text generated by the model.
 
         """
         from mlx_lm import stream_generate
 
+        self.type_adapter.format_tools(tools)
+
         for gen_response in stream_generate(
             self.model,
             self.mlx_tokenizer,
             self.type_adapter.format_input(model_input),
             logits_processors=self.type_adapter.format_output_type(output_type),
             **kwargs,
         ):
-            yield gen_response.text
+            yield StreamingOutput(content=gen_response.text)
 
 
 def from_mlxlm(model: "nn.Module", tokenizer: "PreTrainedTokenizer") -> MLXLM:
diff --git a/tests/models/test_mlxlm.py b/tests/models/test_mlxlm.py
@@ -3,6 +3,8 @@
 from enum import Enum
 from typing import Generator
 
+from pydantic import BaseModel
+
 import outlines
 from outlines.types import Regex
 from outlines.models.mlxlm import (
@@ -11,7 +13,7 @@
     from_mlxlm
 )
 from outlines.models.transformers import TransformerTokenizer
-from pydantic import BaseModel
+from outlines.outputs import Output, StreamingOutput
 
 try:
     import mlx_lm
@@ -55,14 +57,14 @@ def test_mlxlm_tokenizer(model):
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 def test_mlxlm_simple(model):
-    result = model.generate("Respond with one word. Not more.", None)
-    assert isinstance(result, str)
+    result = model("Respond with one word. Not more.", None)
+    assert isinstance(result, Output)
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 def test_mlxlm_call(model):
     result = model("Respond with one word. Not more.")
-    assert isinstance(result, str)
+    assert isinstance(result, Output)
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
@@ -80,15 +82,15 @@ def test_mlxlm_invalid_inference_kwargs(model):
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 def test_mlxlm_inference_kwargs(model):
     result = model("Write a short story about a cat.", max_tokens=2)
-    assert isinstance(result, str)
-    assert len(result) < 20
+    assert isinstance(result, Output)
+    assert len(result.content) < 20
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 def test_mlxlm_regex(model):
     result = model("Give a number between 0 and 9.", Regex(r"[0-9]"))
-    assert isinstance(result, str)
-    assert re.match(r"[0-9]", result)
+    assert isinstance(result, Output)
+    assert re.match(r"[0-9]", result.content)
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
@@ -97,7 +99,7 @@ class Character(BaseModel):
         name: str
 
     result = model("Create a character with a name.", Character)
-    assert "name" in result
+    assert "name" in result.content
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
@@ -107,7 +109,7 @@ class Foo(Enum):
         dog = "dog"
 
     result = model("Cat or dog?", Foo)
-    assert result in ["cat", "dog"]
+    assert result.content in ["cat", "dog"]
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
@@ -116,7 +118,7 @@ def test_mlxlm_stream_text_stop(model):
         "Respond with one word. Not more.", None, max_tokens=100
     )
     assert isinstance(generator, Generator)
-    assert isinstance(next(generator), str)
+    assert isinstance(next(generator), StreamingOutput)
 
 
 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
diff --git a/tests/models/test_mlxlm_type_adapter.py b/tests/models/test_mlxlm_type_adapter.py
@@ -1,12 +1,13 @@
-import pytest
 import io
+import pytest
 
 from outlines_core import Index, Vocabulary
 from PIL import Image as PILImage
 
 from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor
 from outlines.inputs import Chat, Image
 from outlines.models.mlxlm import MLXLMTypeAdapter
+from outlines.tools import ToolDef
 
 try:
     import mlx_lm
@@ -82,3 +83,16 @@ def test_mlxlm_type_adapter_format_output_type(adapter, logits_processor):
     assert isinstance(formatted, list)
     assert len(formatted) == 1
     assert isinstance(formatted[0], OutlinesCoreLogitsProcessor)
+
+
+@pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
+def test_mlxlm_type_adapter_tools(adapter):
+    with pytest.raises(
+        NotImplementedError,
+        match="MLXLM does not support tools."
+    ):
+        adapter.format_tools(
+            [ToolDef(name="test", description="test", parameters={})]
+        )
+
+    adapter.format_tools(None)