From b799c93f51c55530f492bb78a4bcc87afae27e63 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Fri, 8 Aug 2025 09:36:34 +0200 Subject: [PATCH 01/14] save --- src/llm/io_processing/hermes3/tool_parser.cpp | 2 ++ src/llm/servable.cpp | 1 + src/llm/servable_initializer.cpp | 10 ++++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp index f8568b5980..955918c414 100644 --- a/src/llm/io_processing/hermes3/tool_parser.cpp +++ b/src/llm/io_processing/hermes3/tool_parser.cpp @@ -32,6 +32,8 @@ namespace ovms { void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + SPDLOG_INFO("AAAAAAAAAAAAAAAA:\n[{}]", parsedOutput.content); + std::vector tools; size_t pos = 0; size_t firstToolCallPos = parsedOutput.content.find("", pos); diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index e364fa5768..7673cec4ac 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -107,6 +107,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrapiHandler->getChatHistory(); constexpr bool add_generation_prompt = true; // confirm it should be hardcoded diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index a957d2a953..289dc27dc6 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -60,6 +60,8 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr Date: Mon, 11 Aug 2025 12:11:09 +0200 Subject: [PATCH 02/14] mistral --- src/llm/BUILD | 2 + src/llm/io_processing/mistral/tool_parser.cpp | 94 +++++++++++++++++++ src/llm/io_processing/mistral/tool_parser.hpp | 54 +++++++++++ src/llm/io_processing/output_parser.cpp | 3 + 4 files changed, 153 insertions(+) create mode 100644 src/llm/io_processing/mistral/tool_parser.cpp create mode 100644 src/llm/io_processing/mistral/tool_parser.hpp diff --git a/src/llm/BUILD b/src/llm/BUILD index acff1faae9..26bf208a95 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -114,6 +114,7 @@ cc_library( "io_processing/hermes3/tool_parser.hpp", "io_processing/llama3/tool_parser.hpp", "io_processing/phi4/tool_parser.hpp", + "io_processing/mistral/tool_parser.hpp", "io_processing/qwen3/reasoning_parser.hpp", "io_processing/output_parser.hpp", "io_processing/partial_json_builder.hpp", @@ -122,6 +123,7 @@ cc_library( "io_processing/hermes3/tool_parser.cpp", "io_processing/llama3/tool_parser.cpp", "io_processing/phi4/tool_parser.cpp", + "io_processing/mistral/tool_parser.cpp", "io_processing/qwen3/reasoning_parser.cpp", "io_processing/output_parser.cpp", "io_processing/partial_json_builder.cpp", diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp new file mode 100644 index 0000000000..685be9ced2 --- /dev/null +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -0,0 +1,94 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable : 6313) +#include +#include +#include +#pragma warning(pop) + +#include "../../../logging.hpp" +#include "tool_parser.hpp" +#include "../utils.hpp" + +namespace ovms { + +void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + std::vector tools; + + SPDLOG_INFO("DDDDDDDDD\n[{}]", parsedOutput.content); + + if (parsedOutput.content.empty()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); + return; + } + + + // Mistral with vLLM template produces tool calls in the format: + // [{"name": [function name], "arguments": [function arguments as JSON]}, ...] + if (parsedOutput.content[0] == '[') { + // Extract the content before the tools part + // parsedOutput.content = decoded.substr(0, toolsStartPos); + // Extract the tools part, assuming it's all the remaining content after "[" + //std::string toolsString = parsedOutput.content.substr(toolsStartPos + toolsStartString.length()); + rapidjson::Document toolsDoc; + toolsDoc.Parse(parsedOutput.content.c_str()); + if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) { + for (auto& toolVal : toolsDoc.GetArray()) { + if (!toolVal.IsObject()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object"); + continue; + } + ToolCall toolCall; + toolCall.id = generateRandomId(); // Generate a random ID for the tool call + if (toolVal.HasMember("name") && toolVal["name"].IsString()) { + toolCall.name = toolVal["name"].GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field"); + continue; + } + + if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) { + rapidjson::StringBuffer sb; + rapidjson::Writer toolWriter(sb); + toolVal["arguments"].Accept(toolWriter); + toolCall.arguments = sb.GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object"); + continue; + } + parsedOutput.toolCalls.push_back(toolCall); + } + // Remove the tools part from the content + parsedOutput.content.clear(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); + } + } +} + +std::optional MistralToolParser::parseChunk(const std::string& chunk) { + // Not implemented + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "MistralToolParser::parseChunk is not implemented"); + return std::nullopt; +} +} // namespace ovms diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp new file mode 100644 index 0000000000..21bb09b6c5 --- /dev/null +++ b/src/llm/io_processing/mistral/tool_parser.hpp @@ -0,0 +1,54 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable : 6313) +#include +#include +#include +#pragma warning(pop) + +#include "../base_output_parser.hpp" + +namespace ovms { +class MistralToolParser : public BaseOutputParser { +protected: + // Tools calls are expected to be the last part of the content, so we do not specify an end tag. + const std::string parsingStartTag = "functools"; + const std::string parsingEndTag = ""; + +public: + MistralToolParser() = delete; + explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk) override; + const std::string& getParsingStartTag() const override { + return parsingStartTag; + } + // Tools calls are expected to be the last part of the content, so we do not specify an end tag. + const std::string& getParsingEndTag() const override { + return parsingEndTag; + } +}; +} // namespace ovms diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index 115c1d9019..765266a44c 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -19,6 +19,7 @@ #include "llama3/tool_parser.hpp" #include "hermes3/tool_parser.hpp" #include "phi4/tool_parser.hpp" +#include "mistral/tool_parser.hpp" #include "qwen3/reasoning_parser.hpp" namespace ovms { @@ -46,6 +47,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to toolParser = std::make_unique(tokenizer); } else if (toolParserName == "phi4") { toolParser = std::make_unique(tokenizer); + } else if (toolParserName == "mistral") { + toolParser = std::make_unique(tokenizer); } else if (!toolParserName.empty()) { throw std::runtime_error("Unsupported tool parser: " + toolParserName); } From 5eed9cae9c55517490b9a12edf0756f65f8564f7 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 11 Aug 2025 12:33:09 +0200 Subject: [PATCH 03/14] save --- demos/common/export_models/export_model.py | 42 +++++++++++++++++-- src/llm/io_processing/mistral/tool_parser.cpp | 30 ++++++++++++- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index e892bd6a91..405df1bd23 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -376,6 +376,36 @@ def add_servable_to_config(config_path, mediapipe_name, base_path): json.dump(config_data, config_file, indent=4) print("Added servable to config file", config_path) +def apply_template_patches(template_content, model_type): + """Apply model-specific patches to the downloaded template.""" + patches = { + "phi4": [ + # Example patches for phi4 + ], + "llama3": [ + # Example patches for llama3 + ], + "hermes3": [ + # Hermes3 specific patches + ], + "mistral": [ + # Mistral specific patches - force model to produce single JSON array of tool calls + ( + "If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays.", + "If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array." + ), + ], + "qwen3": [ + # Qwen3 patches (if needed) + ] + } + + if model_type in patches: + for old_pattern, new_pattern in patches[model_type]: + template_content = template_content.replace(old_pattern, new_pattern) + + return template_content + def export_text_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path): model_path = "./" ### Export model @@ -464,6 +494,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name "phi4": "tool_chat_template_phi4_mini.jinja", "llama3": "tool_chat_template_llama3.1_json.jinja", "hermes3": "tool_chat_template_hermes.jinja", + "mistral": "tool_chat_template_mistral_parallel.jinja", "qwen3": None } template_name = template_mapping[task_parameters.get("tools_model_type")] @@ -472,9 +503,14 @@ def export_text_generation_model(model_repository_path, source_model, model_name import requests response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name) print(response.raise_for_status()) - with open(template_path, "wb") as f: - f.write(response.content) - print(f"Downloaded tuned chat template to {template_path}") + + # Apply patches to the template content + template_content = response.content.decode('utf-8') + template_content = apply_template_patches(template_content, task_parameters.get("tools_model_type")) + + with open(template_path, "w") as f: + f.write(template_content) + print(f"Downloaded and patched tuned chat template to {template_path}") add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path))) diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index 685be9ced2..7432a20074 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -42,16 +42,42 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector Date: Mon, 11 Aug 2025 12:47:04 +0200 Subject: [PATCH 04/14] save --- demos/common/export_models/export_model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 405df1bd23..62a37a0cbc 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -52,7 +52,7 @@ def add_common_arguments(parser): 'Not effective if target device is not NPU', dest='max_prompt_len') parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding') parser_text.add_argument('--reasoning_parser', choices=["qwen3"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') -parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') +parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3","mistral"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation') parser_embeddings = subparsers.add_parser('embeddings', help='[deprecated] export model for embeddings endpoint with models split into separate, versioned directories') @@ -488,7 +488,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) - if template_parameters.get("tools_model_type") is not None: + if template_parameters.get("tool_parser") is not None: print("Adding tuned chat template") template_mapping = { "phi4": "tool_chat_template_phi4_mini.jinja", @@ -497,7 +497,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name "mistral": "tool_chat_template_mistral_parallel.jinja", "qwen3": None } - template_name = template_mapping[task_parameters.get("tools_model_type")] + template_name = template_mapping[task_parameters.get("tool_parser")] if template_name is not None: template_path = os.path.join(model_repository_path, model_name, "template.jinja") import requests @@ -506,7 +506,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name # Apply patches to the template content template_content = response.content.decode('utf-8') - template_content = apply_template_patches(template_content, task_parameters.get("tools_model_type")) + template_content = apply_template_patches(template_content, task_parameters.get("tool_parser")) with open(template_path, "w") as f: f.write(template_content) From eb9d1e6581bf85bd516ae54702747b61bc433d14 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 11 Aug 2025 14:32:34 +0200 Subject: [PATCH 05/14] save --- src/llm/io_processing/hermes3/tool_parser.cpp | 2 - src/llm/io_processing/mistral/tool_parser.cpp | 42 ++++++++----------- src/llm/io_processing/mistral/tool_parser.hpp | 7 +--- src/llm/servable.cpp | 1 - src/llm/servable_initializer.cpp | 1 + 5 files changed, 21 insertions(+), 32 deletions(-) diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp index 955918c414..f8568b5980 100644 --- a/src/llm/io_processing/hermes3/tool_parser.cpp +++ b/src/llm/io_processing/hermes3/tool_parser.cpp @@ -32,8 +32,6 @@ namespace ovms { void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { - SPDLOG_INFO("AAAAAAAAAAAAAAAA:\n[{}]", parsedOutput.content); - std::vector tools; size_t pos = 0; size_t firstToolCallPos = parsedOutput.content.find("", pos); diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index 7432a20074..a06743c925 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -32,52 +32,46 @@ namespace ovms { +static std::string::iterator skipToFirstNonWhitespaceCharacter(std::string::iterator it, const std::string::iterator& end) { + while (it != end && std::isspace(*it)) { + ++it; + } + return it; +} + void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { std::vector tools; - SPDLOG_INFO("DDDDDDDDD\n[{}]", parsedOutput.content); - if (parsedOutput.content.empty()) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); return; } + // Mistral with vLLM template produces tool calls in the format: + // [{"name": [function name], "arguments": [function arguments as JSON]}, ...] + // So we ensure if first non white characters are [ and { bool isToolGenerated = false; - // ignore first whitespaces auto begin = parsedOutput.content.begin(); - while (begin != parsedOutput.content.end() && std::isspace(*begin)) { - ++begin; - } - + begin = skipToFirstNonWhitespaceCharacter(begin, parsedOutput.content.end()); if (begin != parsedOutput.content.end() && *begin == '[') { - // If the content starts with '[', it indicates that tool calls might be present. - //isToolGenerated = true; - - while (begin != parsedOutput.content.end() && std::isspace(*begin)) { - ++begin; - } - + begin = skipToFirstNonWhitespaceCharacter(begin + 1, parsedOutput.content.end()); if (begin != parsedOutput.content.end() && *begin == '{') { + // If the content starts with '[{', it indicates that tool calls are present. isToolGenerated = true; } else { - // not a tool call + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model"); } } else { - // not a tool call + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model"); } - // Mistral with vLLM template produces tool calls in the format: - // [{"name": [function name], "arguments": [function arguments as JSON]}, ...] - // So we ensure if first non white characters are [ and { + if (isToolGenerated) { // Extract the content before the tools part // parsedOutput.content = decoded.substr(0, toolsStartPos); - // Extract the tools part, assuming it's all the remaining content after "[" - //std::string toolsString = parsedOutput.content.substr(toolsStartPos + toolsStartString.length()); + // Extract the tools part, assuming it's all the remaining content after "[{" rapidjson::Document toolsDoc; - std::string content = parsedOutput.content.substr(begin - parsedOutput.content.begin()); - //toolsDoc.Parse(parsedOutput.content.c_str()); - toolsDoc.Parse(content.c_str()); + toolsDoc.Parse(parsedOutput.content.c_str()); if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) { for (auto& toolVal : toolsDoc.GetArray()) { if (!toolVal.IsObject()) { diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp index 21bb09b6c5..8c2996d620 100644 --- a/src/llm/io_processing/mistral/tool_parser.hpp +++ b/src/llm/io_processing/mistral/tool_parser.hpp @@ -31,11 +31,6 @@ namespace ovms { class MistralToolParser : public BaseOutputParser { -protected: - // Tools calls are expected to be the last part of the content, so we do not specify an end tag. - const std::string parsingStartTag = "functools"; - const std::string parsingEndTag = ""; - public: MistralToolParser() = delete; explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) : @@ -44,10 +39,12 @@ class MistralToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk) override; const std::string& getParsingStartTag() const override { + static const std::string parsingStartTag = "[{"; return parsingStartTag; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. const std::string& getParsingEndTag() const override { + static const std::string parsingEndTag = "[{"; return parsingEndTag; } }; diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 7673cec4ac..e364fa5768 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -107,7 +107,6 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrapiHandler->getChatHistory(); constexpr bool add_generation_prompt = true; // confirm it should be hardcoded diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index a7fc0dec67..ae2ff5f6ef 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -70,6 +70,7 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr Date: Mon, 11 Aug 2025 14:51:17 +0200 Subject: [PATCH 06/14] save --- demos/common/export_models/export_model.py | 74 +++++---------- .../tool_chat_template_mistral_parallel.jinja | 93 +++++++++++++++++++ 2 files changed, 117 insertions(+), 50 deletions(-) create mode 100644 demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 62a37a0cbc..2b33262c91 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -376,36 +376,6 @@ def add_servable_to_config(config_path, mediapipe_name, base_path): json.dump(config_data, config_file, indent=4) print("Added servable to config file", config_path) -def apply_template_patches(template_content, model_type): - """Apply model-specific patches to the downloaded template.""" - patches = { - "phi4": [ - # Example patches for phi4 - ], - "llama3": [ - # Example patches for llama3 - ], - "hermes3": [ - # Hermes3 specific patches - ], - "mistral": [ - # Mistral specific patches - force model to produce single JSON array of tool calls - ( - "If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays.", - "If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array." - ), - ], - "qwen3": [ - # Qwen3 patches (if needed) - ] - } - - if model_type in patches: - for old_pattern, new_pattern in patches[model_type]: - template_content = template_content.replace(old_pattern, new_pattern) - - return template_content - def export_text_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path): model_path = "./" ### Export model @@ -490,27 +460,31 @@ def export_text_generation_model(model_repository_path, source_model, model_name if template_parameters.get("tool_parser") is not None: print("Adding tuned chat template") - template_mapping = { - "phi4": "tool_chat_template_phi4_mini.jinja", - "llama3": "tool_chat_template_llama3.1_json.jinja", - "hermes3": "tool_chat_template_hermes.jinja", - "mistral": "tool_chat_template_mistral_parallel.jinja", - "qwen3": None - } - template_name = template_mapping[task_parameters.get("tool_parser")] - if template_name is not None: + # Custom Templates + if template_parameters.get("tool_parser") == "mistral": template_path = os.path.join(model_repository_path, model_name, "template.jinja") - import requests - response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name) - print(response.raise_for_status()) - - # Apply patches to the template content - template_content = response.content.decode('utf-8') - template_content = apply_template_patches(template_content, task_parameters.get("tool_parser")) - - with open(template_path, "w") as f: - f.write(template_content) - print(f"Downloaded and patched tuned chat template to {template_path}") + with open(template_path, "wb") as f: + # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja + with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file: + content = template_file.read() + f.write(content) + print(f"Added tuned chat template to {template_path}") + else: # VLLM templates + template_mapping = { + "phi4": "tool_chat_template_phi4_mini.jinja", + "llama3": "tool_chat_template_llama3.1_json.jinja", + "hermes3": "tool_chat_template_hermes.jinja", + "qwen3": None + } + template_name = template_mapping[task_parameters.get("tool_parser")] + if template_name is not None: + template_path = os.path.join(model_repository_path, model_name, "template.jinja") + import requests + response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name) + print(response.raise_for_status()) + with open(template_path, "wb") as f: + f.write(response.content) + print(f"Downloaded tuned chat template to {template_path}") add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path))) diff --git a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja new file mode 100644 index 0000000000..aec9f5ee0c --- /dev/null +++ b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja @@ -0,0 +1,93 @@ +{%- if messages[0]["role"] == "system" %} + {%- set system_message = messages[0]["content"] %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set loop_messages = messages %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- elif tools is not none %} + {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %} + {%- if system_message is defined %} + {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %} + {%- else %} + {%- set system_message = parallel_tool_prompt %} + {%- endif %} +{%- endif %} +{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} + +{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %} + {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %} + {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif %} +{%- endfor %} + +{{- bos_token }} +{%- for message in loop_messages %} + {%- if message["role"] == "user" %} + {%- if tools is not none and (message == user_messages[-1]) %} + {{- "[AVAILABLE_TOOLS] [" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- '{"type": "function", "function": {' }} + {%- for key, val in tool.items() if key != "return" %} + {%- if val is string %} + {{- '"' + key + '": "' + val + '"' }} + {%- else %} + {{- '"' + key + '": ' + val|tojson }} + {%- endif %} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- "}}" }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" }} + {%- endif %} + {%- endfor %} + {{- "[/AVAILABLE_TOOLS]" }} + {%- endif %} + {%- if loop.last and system_message is defined %} + {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }} + {%- else %} + {{- "[INST] " + message["content"] + "[/INST]" }} + {%- endif %} + {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %} + {%- if message.tool_calls is defined %} + {%- set tool_calls = message.tool_calls %} + {%- else %} + {%- set tool_calls = message.content %} + {%- endif %} + {{- "[TOOL_CALLS] [" }} + {%- for tool_call in tool_calls %} + {%- set out = tool_call.function|tojson %} + {{- out[:-1] }} + {%- if not tool_call.id is defined or tool_call.id|length < 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }} + {%- endif %} + {{- ', "id": "' + tool_call.id[-9:] + '"}' }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" + eos_token }} + {%- endif %} + {%- endfor %} + {%- elif message["role"] == "assistant" %} + {{- " " + message["content"] + eos_token }} + {%- elif message["role"] == "tool_results" or message["role"] == "tool" %} + {%- if message.content is defined and message.content.content is defined %} + {%- set content = message.content.content %} + {%- else %} + {%- set content = message.content %} + {%- endif %} + {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }} + {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }} + {%- endif %} + {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }} + {%- else %} + {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} + {%- endif %} +{%- endfor %} From fa66a4118acf1de66cb6175b230504039f5c9159 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 11 Aug 2025 14:57:09 +0200 Subject: [PATCH 07/14] save --- demos/common/export_models/export_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 2b33262c91..1de631dfbb 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -463,7 +463,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name # Custom Templates if template_parameters.get("tool_parser") == "mistral": template_path = os.path.join(model_repository_path, model_name, "template.jinja") - with open(template_path, "wb") as f: + with open(template_path, "w") as f: # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file: content = template_file.read() From ad2cb60bb638a18693d7f8a7b73ac4392f78e4c5 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 11 Aug 2025 15:32:32 +0200 Subject: [PATCH 08/14] unit test --- src/BUILD | 1 + .../mistral_output_parser_test.cpp | 99 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 src/test/llm/output_parsers/mistral_output_parser_test.cpp diff --git a/src/BUILD b/src/BUILD index 3080fffb74..6829e9008f 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2511,6 +2511,7 @@ cc_test( "test/llm/output_parsers/qwen3_output_parser_test.cpp", "test/llm/output_parsers/hermes3_output_parser_test.cpp", "test/llm/output_parsers/phi4_output_parser_test.cpp", + "test/llm/output_parsers/mistral_output_parser_test.cpp", "test/llm/output_parsers/partial_json_builder_test.cpp", ], "//:disable_python" : [], diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp new file mode 100644 index 0000000000..2154c3097c --- /dev/null +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -0,0 +1,99 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include "../../../llm/io_processing/base_output_parser.hpp" +#include "../../../llm/io_processing/output_parser.hpp" +#include "../../../llm/io_processing/mistral/tool_parser.hpp" +#include "../../test_utils.hpp" + +using namespace ovms; + +class MistralOutputParserTest : public ::testing::Test { +protected: + std::unique_ptr outputParser; + ov::genai::Tokenizer dummy_tokenizer; // not used + + void SetUp() override { + outputParser = std::make_unique(dummy_tokenizer); + } +}; + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { + ParsedOutput parsedOutput; + parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]"; + outputParser->parse(parsedOutput, {}); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCalls) { + ParsedOutput parsedOutput; + parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}},{\"name\": \"get_humidity\", \"arguments\": {\"location\": \"Madrid\"}}]"; + outputParser->parse(parsedOutput, {}); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 2); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); + EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsWithWhitespaces) { + ParsedOutput parsedOutput; + parsedOutput.content = R"( + [ + {"name": "get_weather", "arguments": {"location": "Paris"}}, + {"name": "get_humidity", "arguments": {"location": "Madrid"}} + ])"; + outputParser->parse(parsedOutput, {}); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 2); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); + EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls +} + +TEST_F(MistralOutputParserTest, Negative_ParseToolCallOutput) { + ParsedOutput parsedOutput; + std::string content = R"( + non whitespace [ + {"name": "get_weather", "arguments": {"location": "Paris"}}, + {"name": "get_humidity", "arguments": {"location": "Madrid"}} + ])"; + parsedOutput.content = content; + outputParser->parse(parsedOutput, {}); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + ASSERT_EQ(parsedOutput.content, content); // Content should remain unchanged +} From 5f02112805ccb4f3cad81753d5a4a441bb9a3ecb Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 14 Aug 2025 15:56:22 +0200 Subject: [PATCH 09/14] save --- prepare_llm_models.sh | 11 +- .../mistral_output_parser_test.cpp | 158 ++++++++++++------ 2 files changed, 121 insertions(+), 48 deletions(-) diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index cb3ffdd18f..b74980f43e 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -30,8 +30,9 @@ QWEN3_MODEL="Qwen/Qwen3-8B" LLAMA3_MODEL="meta-llama/Llama-3.1-8B-Instruct" HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B" PHI4_MODEL="microsoft/Phi-4-mini-instruct" +MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" -MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov") +MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$MISTRAL_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov") all_exist=true for model in "${MODELS[@]}"; do @@ -126,3 +127,11 @@ else mkdir -p $1/$PHI4_MODEL convert_tokenizer $PHI4_MODEL --with_detokenizer -o $1/$PHI4_MODEL fi + +if [ -d "$1/$MISTRAL_MODEL" ]; then + echo "Models directory $1/$MISTRAL_MODEL exists. Skipping downloading models." +else + mkdir -p $1/$MISTRAL_MODEL + convert_tokenizer $MISTRAL_MODEL --with_detokenizer -o $1/$MISTRAL_MODEL +fi + diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index 2154c3097c..b4e9be157d 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -20,80 +20,144 @@ #include "../../../llm/io_processing/base_output_parser.hpp" #include "../../../llm/io_processing/output_parser.hpp" -#include "../../../llm/io_processing/mistral/tool_parser.hpp" #include "../../test_utils.hpp" using namespace ovms; +#ifdef _WIN32 +const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\mistralai\\Mistral-7B-Instruct-v0.3"; +#else +// Hardcoded for usage in docker container +const std::string tokenizerPath = "/ovms/src/test/llm_testing/mistralai/Mistral-7B-Instruct-v0.3/"; +#endif + +static ov::genai::Tokenizer mistralTokenizer(tokenizerPath); + class MistralOutputParserTest : public ::testing::Test { protected: - std::unique_ptr outputParser; - ov::genai::Tokenizer dummy_tokenizer; // not used + std::unique_ptr outputParser; void SetUp() override { - outputParser = std::make_unique(dummy_tokenizer); + // For Phi4 model there is only tool parser available + outputParser = std::make_unique(mistralTokenizer, "mistral", ""); } }; TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { - ParsedOutput parsedOutput; - parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]"; - outputParser->parse(parsedOutput, {}); + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + SPDLOG_INFO("AAAAAAAAAAA: [{}]", generatedTokens); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 1); - EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); // Parser removes whitespaces, so we expect arguments value to be without spaces - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated - EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls } -TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCalls) { - ParsedOutput parsedOutput; - parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}},{\"name\": \"get_humidity\", \"arguments\": {\"location\": \"Madrid\"}}]"; - outputParser->parse(parsedOutput, {}); +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithThreeToolCalls) { + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}," + "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}," + "{\"name\": \"third_tool\", \"arguments\": {\"key\": \"value\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); - ASSERT_EQ(parsedOutput.toolCalls.size(), 2); - EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); - EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity"); + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); // Parser removes whitespaces, so we expect arguments value to be without spaces - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); - EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated - EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); // IDs should be different + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); // ID should be generated + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); // IDs should be different + EXPECT_NE(secondToolCallId, thirdToolCallId); // IDs should be different } -TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsWithWhitespaces) { - ParsedOutput parsedOutput; - parsedOutput.content = R"( - [ - {"name": "get_weather", "arguments": {"location": "Paris"}}, - {"name": "get_humidity", "arguments": {"location": "Madrid"}} - ])"; - outputParser->parse(parsedOutput, {}); - - ASSERT_EQ(parsedOutput.toolCalls.size(), 2); - EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather"); - EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity"); +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInvalid) { + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}," + "{\"tool_name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}," + "{\"name\": \"third_tool\", \"options\": {\"key\": \"value\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + // Only the first tool call is valid, the second one has an invalid name field and the third one has an invalid arguments + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); // Parser removes whitespaces, so we expect arguments value to be without spaces - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}"); - EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated - EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated - EXPECT_EQ(parsedOutput.content, ""); // Content should be empty after parsing tool calls + auto firstToolCallId = parsedOutput.toolCalls[0].id; } -TEST_F(MistralOutputParserTest, Negative_ParseToolCallOutput) { - ParsedOutput parsedOutput; - std::string content = R"( - non whitespace [ - {"name": "get_weather", "arguments": {"location": "Paris"}}, - {"name": "get_humidity", "arguments": {"location": "Madrid"}} - ])"; - parsedOutput.content = content; - outputParser->parse(parsedOutput, {}); - +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { + std::string input = "This is a regular model response without tool calls."; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); - ASSERT_EQ(parsedOutput.content, content); // Content should remain unchanged + EXPECT_EQ(parsedOutput.reasoning, ""); +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated +} +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsContentOnly) { + std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + // Content after 'functools' cannot be parsed as array of JSON objects, so it is treated as content + EXPECT_EQ(parsedOutput.content, "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); // No valid tool calls parsed +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithArrayArguments) { + std::string input = "[TOOL_CALLS][{\"name\": \"extractLastTransactionId\", \"arguments\": { \"filepath\": \"/var/log/db.log\", \"status\": [\"completed\", \"failed\"], \"encoding\": \"utf-8\", \"processFunction\": \"processFunction\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "extractLastTransactionId"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated } From cca406b8baa34d9455f1de375db63847c6a85ba2 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 18 Aug 2025 12:46:10 +0200 Subject: [PATCH 10/14] skip special tokens false --- src/llm/io_processing/mistral/tool_parser.cpp | 47 +++++++++---------- src/llm/io_processing/mistral/tool_parser.hpp | 9 ++-- .../mistral_output_parser_test.cpp | 24 ++++++++-- 3 files changed, 46 insertions(+), 34 deletions(-) diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index a06743c925..de1c7f03d3 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -47,32 +47,30 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk) override; const std::string& getParsingStartTag() const override { - static const std::string parsingStartTag = "[{"; - return parsingStartTag; + return toolCallStartTag; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. const std::string& getParsingEndTag() const override { - static const std::string parsingEndTag = "[{"; - return parsingEndTag; + return toolCallEndTag; } }; } // namespace ovms diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index b4e9be157d..a9904bab61 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -122,7 +122,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { } TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { - std::string input = "This is a content part and next will be a tool call.\n\[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); @@ -135,13 +135,27 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated } -TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsContentOnly) { - std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"; +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."; auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); - // Content after 'functools' cannot be parsed as array of JSON objects, so it is treated as content - EXPECT_EQ(parsedOutput.content, "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n This is a content part after tool call."); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated +} +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsContentOnly) { + std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + // Content after 'TOOL_CALLS' cannot be parsed as array of JSON objects, so it is treated as content + EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n [{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); // No valid tool calls parsed From 94597a124aceca2c25f76a53d5f81d126e54d7e6 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 18 Aug 2025 12:49:45 +0200 Subject: [PATCH 11/14] save --- .dockerignore | 1 + demos/common/export_models/export_model.py | 39 +++----- .../tool_chat_template_mistral_parallel.jinja | 93 ------------------- src/llm/io_processing/mistral/tool_parser.cpp | 8 -- .../mistral_output_parser_test.cpp | 1 - 5 files changed, 16 insertions(+), 126 deletions(-) delete mode 100644 demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja diff --git a/.dockerignore b/.dockerignore index 98c62c8c06..ab4fdc7ab2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,3 +4,4 @@ out demos/continuous_batching demos/embeddings +demos/common/export_models/models \ No newline at end of file diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 2593c507f6..921541ddb8 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -466,31 +466,22 @@ def export_text_generation_model(model_repository_path, source_model, model_name if template_parameters.get("tool_parser") is not None: print("Adding tuned chat template") - # Custom Templates - if template_parameters.get("tool_parser") == "mistral": + template_mapping = { + "phi4": "tool_chat_template_phi4_mini.jinja", + "llama3": "tool_chat_template_llama3.1_json.jinja", + "hermes3": "tool_chat_template_hermes.jinja", + "mistral": "tool_chat_template_mistral_parallel.jinja", + "qwen3": None + } + template_name = template_mapping[task_parameters.get("tool_parser")] + if template_name is not None: template_path = os.path.join(model_repository_path, model_name, "template.jinja") - with open(template_path, "w") as f: - # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja - with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file: - content = template_file.read() - f.write(content) - print(f"Added tuned chat template to {template_path}") - else: # VLLM templates - template_mapping = { - "phi4": "tool_chat_template_phi4_mini.jinja", - "llama3": "tool_chat_template_llama3.1_json.jinja", - "hermes3": "tool_chat_template_hermes.jinja", - "qwen3": None - } - template_name = template_mapping[task_parameters.get("tool_parser")] - if template_name is not None: - template_path = os.path.join(model_repository_path, model_name, "template.jinja") - import requests - response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name) - print(response.raise_for_status()) - with open(template_path, "wb") as f: - f.write(response.content) - print(f"Downloaded tuned chat template to {template_path}") + import requests + response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name) + print(response.raise_for_status()) + with open(template_path, "wb") as f: + f.write(response.content) + print(f"Downloaded tuned chat template to {template_path}") add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path))) diff --git a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja deleted file mode 100644 index aec9f5ee0c..0000000000 --- a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja +++ /dev/null @@ -1,93 +0,0 @@ -{%- if messages[0]["role"] == "system" %} - {%- set system_message = messages[0]["content"] %} - {%- set loop_messages = messages[1:] %} -{%- else %} - {%- set loop_messages = messages %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- elif tools is not none %} - {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %} - {%- if system_message is defined %} - {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %} - {%- else %} - {%- set system_message = parallel_tool_prompt %} - {%- endif %} -{%- endif %} -{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} - -{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %} - {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %} - {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} - {%- endif %} -{%- endfor %} - -{{- bos_token }} -{%- for message in loop_messages %} - {%- if message["role"] == "user" %} - {%- if tools is not none and (message == user_messages[-1]) %} - {{- "[AVAILABLE_TOOLS] [" }} - {%- for tool in tools %} - {%- set tool = tool.function %} - {{- '{"type": "function", "function": {' }} - {%- for key, val in tool.items() if key != "return" %} - {%- if val is string %} - {{- '"' + key + '": "' + val + '"' }} - {%- else %} - {{- '"' + key + '": ' + val|tojson }} - {%- endif %} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- "}}" }} - {%- if not loop.last %} - {{- ", " }} - {%- else %} - {{- "]" }} - {%- endif %} - {%- endfor %} - {{- "[/AVAILABLE_TOOLS]" }} - {%- endif %} - {%- if loop.last and system_message is defined %} - {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }} - {%- else %} - {{- "[INST] " + message["content"] + "[/INST]" }} - {%- endif %} - {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %} - {%- if message.tool_calls is defined %} - {%- set tool_calls = message.tool_calls %} - {%- else %} - {%- set tool_calls = message.content %} - {%- endif %} - {{- "[TOOL_CALLS] [" }} - {%- for tool_call in tool_calls %} - {%- set out = tool_call.function|tojson %} - {{- out[:-1] }} - {%- if not tool_call.id is defined or tool_call.id|length < 9 %} - {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }} - {%- endif %} - {{- ', "id": "' + tool_call.id[-9:] + '"}' }} - {%- if not loop.last %} - {{- ", " }} - {%- else %} - {{- "]" + eos_token }} - {%- endif %} - {%- endfor %} - {%- elif message["role"] == "assistant" %} - {{- " " + message["content"] + eos_token }} - {%- elif message["role"] == "tool_results" or message["role"] == "tool" %} - {%- if message.content is defined and message.content.content is defined %} - {%- set content = message.content.content %} - {%- else %} - {%- set content = message.content %} - {%- endif %} - {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }} - {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %} - {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }} - {%- endif %} - {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }} - {%- else %} - {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} - {%- endif %} -{%- endfor %} diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index de1c7f03d3..d945db1dca 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -32,13 +32,6 @@ namespace ovms { -static std::string::iterator skipToFirstNonWhitespaceCharacter(std::string::iterator it, const std::string::iterator& end) { - while (it != end && std::isspace(*it)) { - ++it; - } - return it; -} - void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { std::vector tools; @@ -58,7 +51,6 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector"; auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); - SPDLOG_INFO("AAAAAAAAAAA: [{}]", generatedTokens); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); EXPECT_EQ(parsedOutput.reasoning, ""); From 379bc6846ce63b20a7264a48960aeccb9a85b4fd Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 18 Aug 2025 13:11:59 +0200 Subject: [PATCH 12/14] save --- src/llm/io_processing/mistral/tool_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index d945db1dca..63507a8255 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -69,7 +69,6 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector Date: Tue, 19 Aug 2025 14:17:40 +0200 Subject: [PATCH 13/14] Milosz --- src/llm/io_processing/mistral/tool_parser.cpp | 81 ++++++++----------- src/llm/io_processing/mistral/tool_parser.hpp | 5 +- .../mistral_output_parser_test.cpp | 17 +--- 3 files changed, 40 insertions(+), 63 deletions(-) diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index 63507a8255..7313d019fb 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -35,63 +35,48 @@ namespace ovms { void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { std::vector tools; - if (parsedOutput.content.empty()) { + if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); return; } - std::string decoded = tokenizer.decode(generatedTokens, {ov::genai::skip_special_tokens(false)}); - - const std::string toolsStartString = getParsingStartTag(); - const std::string toolsStartEnd = getParsingEndTag(); - - size_t toolsStartPos = decoded.find(toolsStartString); - size_t toolsEndPos = decoded.find(toolsStartEnd); - - if (toolsStartPos != std::string::npos && toolsEndPos != std::string::npos) { - std::string remaining = decoded.substr(0, toolsStartPos) + decoded.substr(toolsEndPos + toolsStartEnd.length()); - - size_t toolsStartPos2 = remaining.find(toolsStartString); - size_t toolsEndPos2 = remaining.find(toolsStartEnd); - bool hasMoreSpecialTags = !(toolsStartPos2 == std::string::npos && toolsEndPos2 == std::string::npos); - - std::string toolsString = decoded.substr( - toolsStartPos + toolsStartString.length(), - toolsEndPos - toolsStartPos - toolsStartString.length()); + if (generatedTokens[0] != this->botTokenId) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); + return; + } - rapidjson::Document toolsDoc; - toolsDoc.Parse(toolsString.c_str()); + rapidjson::Document toolsDoc; + toolsDoc.Parse(parsedOutput.content.c_str()); - if (!toolsDoc.HasParseError() && toolsDoc.IsArray() && !hasMoreSpecialTags) { - for (auto& toolVal : toolsDoc.GetArray()) { - if (!toolVal.IsObject()) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object"); - continue; - } - ToolCall toolCall; - if (toolVal.HasMember("name") && toolVal["name"].IsString()) { - toolCall.name = toolVal["name"].GetString(); - } else { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field"); - continue; - } + if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) { + for (auto& toolVal : toolsDoc.GetArray()) { + if (!toolVal.IsObject()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object"); + continue; + } + ToolCall toolCall; + if (toolVal.HasMember("name") && toolVal["name"].IsString()) { + toolCall.name = toolVal["name"].GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field"); + continue; + } - if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) { - rapidjson::StringBuffer sb; - rapidjson::Writer toolWriter(sb); - toolVal["arguments"].Accept(toolWriter); - toolCall.arguments = sb.GetString(); - } else { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object"); - continue; - } - toolCall.id = generateRandomId(); // Generate a random ID for the tool call - parsedOutput.toolCalls.push_back(toolCall); + if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) { + rapidjson::StringBuffer sb; + rapidjson::Writer toolWriter(sb); + toolVal["arguments"].Accept(toolWriter); + toolCall.arguments = sb.GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object"); + continue; } - parsedOutput.content = remaining; - } else { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); + toolCall.id = generateRandomId(); // Generate a random ID for the tool call + parsedOutput.toolCalls.push_back(toolCall); } + parsedOutput.content.clear(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); } } diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp index 28e81f7abf..fe715a890c 100644 --- a/src/llm/io_processing/mistral/tool_parser.hpp +++ b/src/llm/io_processing/mistral/tool_parser.hpp @@ -31,8 +31,7 @@ namespace ovms { class MistralToolParser : public BaseOutputParser { - const std::string toolCallStartTag = "[TOOL_CALLS]"; - const std::string toolCallEndTag = ""; + const int64_t botTokenId = 5; // [TOOL_CALLS] public: MistralToolParser() = delete; @@ -42,10 +41,12 @@ class MistralToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk) override; const std::string& getParsingStartTag() const override { + static const std::string toolCallStartTag = "[TOOL_CALLS]"; return toolCallStartTag; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. const std::string& getParsingEndTag() const override { + static const std::string toolCallEndTag = ""; return toolCallEndTag; } }; diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index 9d22b58571..8f5a3701aa 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -38,7 +38,6 @@ class MistralOutputParserTest : public ::testing::Test { std::unique_ptr outputParser; void SetUp() override { - // For Phi4 model there is only tool parser available outputParser = std::make_unique(mistralTokenizer, "mistral", ""); } }; @@ -125,28 +124,20 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n"); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); - ASSERT_EQ(parsedOutput.toolCalls.size(), 1); - EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); - // Parser removes whitespaces, so we expect arguments value to be without spaces - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); - EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."; auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n This is a content part after tool call."); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); EXPECT_EQ(parsedOutput.reasoning, ""); - ASSERT_EQ(parsedOutput.toolCalls.size(), 1); - EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); - // Parser removes whitespaces, so we expect arguments value to be without spaces - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); - EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsContentOnly) { std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"; From 360c5f3a9f966dd485b90661001669bc0f8ff59f Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Tue, 19 Aug 2025 16:29:58 +0200 Subject: [PATCH 14/14] prepare mistral tokenizer for unit tests on windows --- windows_prepare_llm_models.bat | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 58521134b5..8ddcc7e96c 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -38,8 +38,9 @@ set "QWEN3_MODEL=Qwen/Qwen3-8B" set "LLAMA3_MODEL=meta-llama/Llama-3.1-8B-Instruct" set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B" set "PHI4_MODEL=microsoft/Phi-4-mini-instruct" +set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3" -set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL% +set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL% %MISTRAL_MODEL% set "ALL_EXIST=1" for %%M in ("%MODELS_LIST%") do ( @@ -152,4 +153,13 @@ if exist "%~1\%PHI4_MODEL%" ( if !errorlevel! neq 0 exit /b !errorlevel! ) +if exist "%~1\%MISTRAL_MODEL%" ( + echo Models directory %~1\%MISTRAL_MODEL% exists. Skipping downloading models. +) else ( + echo Downloading tokenizer and detokenizer for Mistral model to %~1\%MISTRAL_MODEL% directory. + mkdir "%~1\%MISTRAL_MODEL%" + convert_tokenizer "%MISTRAL_MODEL%" --with_detokenizer -o "%~1\%MISTRAL_MODEL%" + if !errorlevel! neq 0 exit /b !errorlevel! +) + endlocal