deepset-ai · vblagoje · Oct 23, 2025 · Oct 22, 2025
@@ -26,7 +26,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.16.1", "llama-cpp-python>=0.2.87"]
+dependencies = ["haystack-ai>=2.19.0", "llama-cpp-python>=0.2.87"]
 
 # On macOS GitHub runners, we use a custom index to download pre-built wheels.
 # Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found")

@@ -16,10 +16,10 @@
 )
 from haystack.dataclasses.streaming_chunk import FinishReason, StreamingChunk, SyncStreamingCallbackT
 from haystack.tools import (
-    Tool,
-    Toolset,
+    ToolsType,
     _check_duplicate_tool_names,
     deserialize_tools_or_toolset_inplace,
+    flatten_tools_or_toolsets,
     serialize_tools_or_toolset,
 )
 from haystack.utils import deserialize_callable, serialize_callable
@@ -196,7 +196,7 @@ def __init__(
         model_kwargs: Optional[Dict[str, Any]] = None,
         generation_kwargs: Optional[Dict[str, Any]] = None,
         *,
-        tools: Optional[Union[List[Tool], Toolset]] = None,
+        tools: Optional[ToolsType] = None,
         streaming_callback: Optional[StreamingCallbackT] = None,
         chat_handler_name: Optional[str] = None,
         model_clip_path: Optional[str] = None,
@@ -215,8 +215,8 @@ def __init__(
             For more information on the available kwargs, see
             [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
         :param tools:
-            A list of tools or a Toolset for which the model can prepare calls.
-            This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
+            A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
+            Each tool should have a unique name.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
         :param chat_handler_name: Name of the chat handler for multimodal models.
             Common options include: "Llava16ChatHandler", "MoondreamChatHandler", "Qwen25VLChatHandler".
@@ -235,7 +235,7 @@ def __init__(
         model_kwargs.setdefault("n_ctx", n_ctx)
         model_kwargs.setdefault("n_batch", n_batch)
 
-        _check_duplicate_tool_names(list(tools or []))
+        _check_duplicate_tool_names(flatten_tools_or_toolsets(tools))
 
         handler: Optional[Llava15ChatHandler] = None
         # Validate multimodal requirements
@@ -325,7 +325,7 @@ def run(
         messages: List[ChatMessage],
         generation_kwargs: Optional[Dict[str, Any]] = None,
         *,
-        tools: Optional[Union[List[Tool], Toolset]] = None,
+        tools: Optional[ToolsType] = None,
         streaming_callback: Optional[StreamingCallbackT] = None,
     ) -> Dict[str, List[ChatMessage]]:
         """
@@ -337,8 +337,9 @@ def run(
             For more information on the available kwargs, see
             [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
         :param tools:
-            A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
-            parameter set during component initialization.
+            A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
+            Each tool should have a unique name. If set, it will override the `tools` parameter set during
+            component initialization.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
             If set, it will override the `streaming_callback` parameter set during component initialization.
         :returns: A dictionary with the following keys:
@@ -355,13 +356,12 @@ def run(
         formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]
 
         tools = tools or self.tools
-        if isinstance(tools, Toolset):
-            tools = list(tools)
-        _check_duplicate_tool_names(tools)
+        flattened_tools = flatten_tools_or_toolsets(tools)
+        _check_duplicate_tool_names(flattened_tools)
 
         llamacpp_tools: List[ChatCompletionTool] = []
-        if tools:
-            for t in tools:
+        if flattened_tools:
+            for t in flattened_tools:
                 llamacpp_tools.append(
                     {
                         "type": "function",

@@ -723,6 +723,59 @@ def test_init_with_toolset(self, temperature_tool):
         generator = LlamaCppChatGenerator(model="test_model.gguf", tools=toolset)
         assert generator.tools == toolset
 
+    def test_init_with_mixed_tools(self, temperature_tool):
+        """Test initialization with mixed Tool and Toolset objects."""
+
+        def population(city: str):
+            """Get population for a given city."""
+            return f"The population of {city} is 2.2 million"
+
+        population_tool = create_tool_from_function(population)
+        toolset = Toolset([population_tool])
+
+        generator = LlamaCppChatGenerator(model="test_model.gguf", tools=[temperature_tool, toolset])
+        assert generator.tools == [temperature_tool, toolset]
+
+    def test_run_with_mixed_tools(self, temperature_tool):
+        """Test run method with mixed Tool and Toolset objects."""
+
+        def population(city: str):
+            """Get population for a given city."""
+            return f"The population of {city} is 2.2 million"
+
+        population_tool = create_tool_from_function(population)
+        toolset = Toolset([population_tool])
+
+        generator = LlamaCppChatGenerator(model="test_model.gguf")
+
+        # Mock the model
+        mock_model = MagicMock()
+        mock_response = {
+            "choices": [{"message": {"content": "Generated text"}, "index": 0, "finish_reason": "stop"}],
+            "id": "test_id",
+            "model": "test_model",
+            "created": 1234567890,
+            "usage": {"prompt_tokens": 10, "completion_tokens": 5},
+        }
+        mock_model.create_chat_completion.return_value = mock_response
+        generator._model = mock_model
+
+        generator.run(
+            messages=[ChatMessage.from_user("What's the weather in Paris and population of Berlin?")],
+            tools=[temperature_tool, toolset],
+        )
+
+        # Verify the model was called with the correct tools
+        mock_model.create_chat_completion.assert_called_once()
+        call_args = mock_model.create_chat_completion.call_args[1]
+        assert "tools" in call_args
+        assert len(call_args["tools"]) == 2  # Both tools should be flattened
+
+        # Verify tool names
+        tool_names = {tool["function"]["name"] for tool in call_args["tools"]}
+        assert "get_current_temperature" in tool_names
+        assert "population" in tool_names
+
     def test_init_with_multimodal_params(self):
         """Test initialization with multimodal parameters."""
         generator = LlamaCppChatGenerator(