diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index 17451abc1b..2856f41473 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.16.1", "llama-cpp-python>=0.2.87"] +dependencies = ["haystack-ai>=2.19.0", "llama-cpp-python>=0.2.87"] # On macOS GitHub runners, we use a custom index to download pre-built wheels. # Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found") diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py index 80e3f08ef7..2ef77f0278 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py @@ -16,10 +16,10 @@ ) from haystack.dataclasses.streaming_chunk import FinishReason, StreamingChunk, SyncStreamingCallbackT from haystack.tools import ( - Tool, - Toolset, + ToolsType, _check_duplicate_tool_names, deserialize_tools_or_toolset_inplace, + flatten_tools_or_toolsets, serialize_tools_or_toolset, ) from haystack.utils import deserialize_callable, serialize_callable @@ -196,7 +196,7 @@ def __init__( model_kwargs: Optional[Dict[str, Any]] = None, generation_kwargs: Optional[Dict[str, Any]] = None, *, - tools: Optional[Union[List[Tool], Toolset]] = None, + tools: Optional[ToolsType] = None, streaming_callback: Optional[StreamingCallbackT] = None, chat_handler_name: Optional[str] = None, model_clip_path: Optional[str] = None, @@ -215,8 +215,8 @@ def __init__( For more information on the available kwargs, see [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion). :param tools: - A list of tools or a Toolset for which the model can prepare calls. - This parameter can accept either a list of `Tool` objects or a `Toolset` instance. + A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls. + Each tool should have a unique name. :param streaming_callback: A callback function that is called when a new token is received from the stream. :param chat_handler_name: Name of the chat handler for multimodal models. Common options include: "Llava16ChatHandler", "MoondreamChatHandler", "Qwen25VLChatHandler". @@ -235,7 +235,7 @@ def __init__( model_kwargs.setdefault("n_ctx", n_ctx) model_kwargs.setdefault("n_batch", n_batch) - _check_duplicate_tool_names(list(tools or [])) + _check_duplicate_tool_names(flatten_tools_or_toolsets(tools)) handler: Optional[Llava15ChatHandler] = None # Validate multimodal requirements @@ -325,7 +325,7 @@ def run( messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, Any]] = None, *, - tools: Optional[Union[List[Tool], Toolset]] = None, + tools: Optional[ToolsType] = None, streaming_callback: Optional[StreamingCallbackT] = None, ) -> Dict[str, List[ChatMessage]]: """ @@ -337,8 +337,9 @@ def run( For more information on the available kwargs, see [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion). :param tools: - A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools` - parameter set during component initialization. + A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls. + Each tool should have a unique name. If set, it will override the `tools` parameter set during + component initialization. :param streaming_callback: A callback function that is called when a new token is received from the stream. If set, it will override the `streaming_callback` parameter set during component initialization. :returns: A dictionary with the following keys: @@ -355,13 +356,12 @@ def run( formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages] tools = tools or self.tools - if isinstance(tools, Toolset): - tools = list(tools) - _check_duplicate_tool_names(tools) + flattened_tools = flatten_tools_or_toolsets(tools) + _check_duplicate_tool_names(flattened_tools) llamacpp_tools: List[ChatCompletionTool] = [] - if tools: - for t in tools: + if flattened_tools: + for t in flattened_tools: llamacpp_tools.append( { "type": "function", diff --git a/integrations/llama_cpp/tests/test_chat_generator.py b/integrations/llama_cpp/tests/test_chat_generator.py index 36fb7cbe0f..d574de53b8 100644 --- a/integrations/llama_cpp/tests/test_chat_generator.py +++ b/integrations/llama_cpp/tests/test_chat_generator.py @@ -723,6 +723,59 @@ def test_init_with_toolset(self, temperature_tool): generator = LlamaCppChatGenerator(model="test_model.gguf", tools=toolset) assert generator.tools == toolset + def test_init_with_mixed_tools(self, temperature_tool): + """Test initialization with mixed Tool and Toolset objects.""" + + def population(city: str): + """Get population for a given city.""" + return f"The population of {city} is 2.2 million" + + population_tool = create_tool_from_function(population) + toolset = Toolset([population_tool]) + + generator = LlamaCppChatGenerator(model="test_model.gguf", tools=[temperature_tool, toolset]) + assert generator.tools == [temperature_tool, toolset] + + def test_run_with_mixed_tools(self, temperature_tool): + """Test run method with mixed Tool and Toolset objects.""" + + def population(city: str): + """Get population for a given city.""" + return f"The population of {city} is 2.2 million" + + population_tool = create_tool_from_function(population) + toolset = Toolset([population_tool]) + + generator = LlamaCppChatGenerator(model="test_model.gguf") + + # Mock the model + mock_model = MagicMock() + mock_response = { + "choices": [{"message": {"content": "Generated text"}, "index": 0, "finish_reason": "stop"}], + "id": "test_id", + "model": "test_model", + "created": 1234567890, + "usage": {"prompt_tokens": 10, "completion_tokens": 5}, + } + mock_model.create_chat_completion.return_value = mock_response + generator._model = mock_model + + generator.run( + messages=[ChatMessage.from_user("What's the weather in Paris and population of Berlin?")], + tools=[temperature_tool, toolset], + ) + + # Verify the model was called with the correct tools + mock_model.create_chat_completion.assert_called_once() + call_args = mock_model.create_chat_completion.call_args[1] + assert "tools" in call_args + assert len(call_args["tools"]) == 2 # Both tools should be flattened + + # Verify tool names + tool_names = {tool["function"]["name"] for tool in call_args["tools"]} + assert "get_current_temperature" in tool_names + assert "population" in tool_names + def test_init_with_multimodal_params(self): """Test initialization with multimodal parameters.""" generator = LlamaCppChatGenerator(