Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/llama_cpp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.16.1", "llama-cpp-python>=0.2.87"]
dependencies = ["haystack-ai>=2.19.0", "llama-cpp-python>=0.2.87"]

# On macOS GitHub runners, we use a custom index to download pre-built wheels.
# Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
)
from haystack.dataclasses.streaming_chunk import FinishReason, StreamingChunk, SyncStreamingCallbackT
from haystack.tools import (
Tool,
Toolset,
ToolsType,
_check_duplicate_tool_names,
deserialize_tools_or_toolset_inplace,
flatten_tools_or_toolsets,
serialize_tools_or_toolset,
)
from haystack.utils import deserialize_callable, serialize_callable
Expand Down Expand Up @@ -196,7 +196,7 @@ def __init__(
model_kwargs: Optional[Dict[str, Any]] = None,
generation_kwargs: Optional[Dict[str, Any]] = None,
*,
tools: Optional[Union[List[Tool], Toolset]] = None,
tools: Optional[ToolsType] = None,
streaming_callback: Optional[StreamingCallbackT] = None,
chat_handler_name: Optional[str] = None,
model_clip_path: Optional[str] = None,
Expand All @@ -215,8 +215,8 @@ def __init__(
For more information on the available kwargs, see
[llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
:param tools:
A list of tools or a Toolset for which the model can prepare calls.
This parameter can accept either a list of `Tool` objects or a `Toolset` instance.
A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
Each tool should have a unique name.
:param streaming_callback: A callback function that is called when a new token is received from the stream.
:param chat_handler_name: Name of the chat handler for multimodal models.
Common options include: "Llava16ChatHandler", "MoondreamChatHandler", "Qwen25VLChatHandler".
Expand All @@ -235,7 +235,7 @@ def __init__(
model_kwargs.setdefault("n_ctx", n_ctx)
model_kwargs.setdefault("n_batch", n_batch)

_check_duplicate_tool_names(list(tools or []))
_check_duplicate_tool_names(flatten_tools_or_toolsets(tools))

handler: Optional[Llava15ChatHandler] = None
# Validate multimodal requirements
Expand Down Expand Up @@ -325,7 +325,7 @@ def run(
messages: List[ChatMessage],
generation_kwargs: Optional[Dict[str, Any]] = None,
*,
tools: Optional[Union[List[Tool], Toolset]] = None,
tools: Optional[ToolsType] = None,
streaming_callback: Optional[StreamingCallbackT] = None,
) -> Dict[str, List[ChatMessage]]:
"""
Expand All @@ -337,8 +337,9 @@ def run(
For more information on the available kwargs, see
[llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion).
:param tools:
A list of tools or a Toolset for which the model can prepare calls. If set, it will override the `tools`
parameter set during component initialization.
A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
Each tool should have a unique name. If set, it will override the `tools` parameter set during
component initialization.
:param streaming_callback: A callback function that is called when a new token is received from the stream.
If set, it will override the `streaming_callback` parameter set during component initialization.
:returns: A dictionary with the following keys:
Expand All @@ -355,13 +356,12 @@ def run(
formatted_messages = [_convert_message_to_llamacpp_format(msg) for msg in messages]

tools = tools or self.tools
if isinstance(tools, Toolset):
tools = list(tools)
_check_duplicate_tool_names(tools)
flattened_tools = flatten_tools_or_toolsets(tools)
_check_duplicate_tool_names(flattened_tools)

llamacpp_tools: List[ChatCompletionTool] = []
if tools:
for t in tools:
if flattened_tools:
for t in flattened_tools:
llamacpp_tools.append(
{
"type": "function",
Expand Down
53 changes: 53 additions & 0 deletions integrations/llama_cpp/tests/test_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,59 @@ def test_init_with_toolset(self, temperature_tool):
generator = LlamaCppChatGenerator(model="test_model.gguf", tools=toolset)
assert generator.tools == toolset

def test_init_with_mixed_tools(self, temperature_tool):
"""Test initialization with mixed Tool and Toolset objects."""

def population(city: str):
"""Get population for a given city."""
return f"The population of {city} is 2.2 million"

population_tool = create_tool_from_function(population)
toolset = Toolset([population_tool])

generator = LlamaCppChatGenerator(model="test_model.gguf", tools=[temperature_tool, toolset])
assert generator.tools == [temperature_tool, toolset]

def test_run_with_mixed_tools(self, temperature_tool):
"""Test run method with mixed Tool and Toolset objects."""

def population(city: str):
"""Get population for a given city."""
return f"The population of {city} is 2.2 million"

population_tool = create_tool_from_function(population)
toolset = Toolset([population_tool])

generator = LlamaCppChatGenerator(model="test_model.gguf")

# Mock the model
mock_model = MagicMock()
mock_response = {
"choices": [{"message": {"content": "Generated text"}, "index": 0, "finish_reason": "stop"}],
"id": "test_id",
"model": "test_model",
"created": 1234567890,
"usage": {"prompt_tokens": 10, "completion_tokens": 5},
}
mock_model.create_chat_completion.return_value = mock_response
generator._model = mock_model

generator.run(
messages=[ChatMessage.from_user("What's the weather in Paris and population of Berlin?")],
tools=[temperature_tool, toolset],
)

# Verify the model was called with the correct tools
mock_model.create_chat_completion.assert_called_once()
call_args = mock_model.create_chat_completion.call_args[1]
assert "tools" in call_args
assert len(call_args["tools"]) == 2 # Both tools should be flattened

# Verify tool names
tool_names = {tool["function"]["name"] for tool in call_args["tools"]}
assert "get_current_temperature" in tool_names
assert "population" in tool_names

def test_init_with_multimodal_params(self):
"""Test initialization with multimodal parameters."""
generator = LlamaCppChatGenerator(
Expand Down