From b799c93f51c55530f492bb78a4bcc87afae27e63 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Fri, 8 Aug 2025 09:36:34 +0200
Subject: [PATCH 01/14] save

---
 src/llm/io_processing/hermes3/tool_parser.cpp |  2 ++
 src/llm/servable.cpp                          |  1 +
 src/llm/servable_initializer.cpp              | 10 ++++++++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp
index f8568b5980..955918c414 100644
--- a/src/llm/io_processing/hermes3/tool_parser.cpp
+++ b/src/llm/io_processing/hermes3/tool_parser.cpp
@@ -32,6 +32,8 @@
 namespace ovms {
 
 void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
+    SPDLOG_INFO("AAAAAAAAAAAAAAAA:\n[{}]", parsedOutput.content);
+
     std::vector<std::string> tools;
     size_t pos = 0;
     size_t firstToolCallPos = parsedOutput.content.find("<tool_call>", pos);
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
index e364fa5768..7673cec4ac 100644
--- a/src/llm/servable.cpp
+++ b/src/llm/servable.cpp
@@ -107,6 +107,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
         if (!success) {
             return absl::Status(absl::StatusCode::kInvalidArgument, inputText);
         }
+        SPDLOG_INFO("BBBBBBBBBBBBB:\n[{}]", inputText);
 #else
         ov::genai::ChatHistory& chatHistory = executionContext->apiHandler->getChatHistory();
         constexpr bool add_generation_prompt = true;  // confirm it should be hardcoded
diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp
index a957d2a953..289dc27dc6 100644
--- a/src/llm/servable_initializer.cpp
+++ b/src/llm/servable_initializer.cpp
@@ -60,6 +60,8 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
             global json
             import json
             from pathlib import Path
+            global datetime
+            import datetime
 
             global jinja2
             import jinja2
@@ -67,7 +69,10 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
 
             def raise_exception(message):
                 raise jinja2.exceptions.TemplateError(message)
-
+            
+            def strftime_now(format):
+                return datetime.datetime.now().strftime(format)
+            
             # Default chat template accepts only single message and outputs only it's 'content'
             # effectively turning it into a regular prompt. 
             default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}"
@@ -85,7 +90,8 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
             template_loader = jinja2.FileSystemLoader(searchpath=templates_directory)
             jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True, loader=template_loader)
             jinja_env.policies["json.dumps_kwargs"]["ensure_ascii"] = False
-            jinja_env.globals["raise_exception"] = raise_exception     
+            jinja_env.globals["raise_exception"] = raise_exception
+            jinja_env.globals["strftime_now"] = strftime_now
             if jinja_file.is_file():
                 template = jinja_env.get_template("template.jinja")
 

From 17b361f17d8d740c76a36d5d1d71425e21164cea Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 12:11:09 +0200
Subject: [PATCH 02/14] mistral

---
 src/llm/BUILD                                 |  2 +
 src/llm/io_processing/mistral/tool_parser.cpp | 94 +++++++++++++++++++
 src/llm/io_processing/mistral/tool_parser.hpp | 54 +++++++++++
 src/llm/io_processing/output_parser.cpp       |  3 +
 4 files changed, 153 insertions(+)
 create mode 100644 src/llm/io_processing/mistral/tool_parser.cpp
 create mode 100644 src/llm/io_processing/mistral/tool_parser.hpp

diff --git a/src/llm/BUILD b/src/llm/BUILD
index acff1faae9..26bf208a95 100644
--- a/src/llm/BUILD
+++ b/src/llm/BUILD
@@ -114,6 +114,7 @@ cc_library(
             "io_processing/hermes3/tool_parser.hpp",
             "io_processing/llama3/tool_parser.hpp",
             "io_processing/phi4/tool_parser.hpp",
+            "io_processing/mistral/tool_parser.hpp",
             "io_processing/qwen3/reasoning_parser.hpp",
             "io_processing/output_parser.hpp",
             "io_processing/partial_json_builder.hpp",
@@ -122,6 +123,7 @@ cc_library(
             "io_processing/hermes3/tool_parser.cpp",
             "io_processing/llama3/tool_parser.cpp",
             "io_processing/phi4/tool_parser.cpp",
+            "io_processing/mistral/tool_parser.cpp",
             "io_processing/qwen3/reasoning_parser.cpp",
             "io_processing/output_parser.cpp",
             "io_processing/partial_json_builder.cpp",
diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
new file mode 100644
index 0000000000..685be9ced2
--- /dev/null
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -0,0 +1,94 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <openvino/genai/tokenizer.hpp>
+#include <string>
+#include <vector>
+#include <regex>
+
+#pragma warning(push)
+#pragma warning(disable : 6313)
+#include <rapidjson/document.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+#pragma warning(pop)
+
+#include "../../../logging.hpp"
+#include "tool_parser.hpp"
+#include "../utils.hpp"
+
+namespace ovms {
+
+void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
+    std::vector<std::string> tools;
+
+    SPDLOG_INFO("DDDDDDDDD\n[{}]", parsedOutput.content);
+
+    if (parsedOutput.content.empty()) {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls");
+        return;
+    }
+
+
+    // Mistral with vLLM template produces tool calls in the format:
+    // [{"name": [function name], "arguments": [function arguments as JSON]}, ...]
+    if (parsedOutput.content[0] == '[') {
+        // Extract the content before the tools part
+        // parsedOutput.content = decoded.substr(0, toolsStartPos);
+        // Extract the tools part, assuming it's all the remaining content after "["
+        //std::string toolsString = parsedOutput.content.substr(toolsStartPos + toolsStartString.length());
+        rapidjson::Document toolsDoc;
+        toolsDoc.Parse(parsedOutput.content.c_str());
+        if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
+            for (auto& toolVal : toolsDoc.GetArray()) {
+                if (!toolVal.IsObject()) {
+                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object");
+                    continue;
+                }
+                ToolCall toolCall;
+                toolCall.id = generateRandomId();  // Generate a random ID for the tool call
+                if (toolVal.HasMember("name") && toolVal["name"].IsString()) {
+                    toolCall.name = toolVal["name"].GetString();
+                } else {
+                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field");
+                    continue;
+                }
+
+                if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) {
+                    rapidjson::StringBuffer sb;
+                    rapidjson::Writer<rapidjson::StringBuffer> toolWriter(sb);
+                    toolVal["arguments"].Accept(toolWriter);
+                    toolCall.arguments = sb.GetString();
+                } else {
+                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object");
+                    continue;
+                }
+                parsedOutput.toolCalls.push_back(toolCall);
+            }
+            // Remove the tools part from the content
+            parsedOutput.content.clear();
+        } else {
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
+        }
+    }
+}
+
+std::optional<rapidjson::Document> MistralToolParser::parseChunk(const std::string& chunk) {
+    // Not implemented
+    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "MistralToolParser::parseChunk is not implemented");
+    return std::nullopt;
+}
+}  // namespace ovms
diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp
new file mode 100644
index 0000000000..21bb09b6c5
--- /dev/null
+++ b/src/llm/io_processing/mistral/tool_parser.hpp
@@ -0,0 +1,54 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <openvino/genai/tokenizer.hpp>
+#include <string>
+#include <optional>
+#include <vector>
+
+#pragma warning(push)
+#pragma warning(disable : 6313)
+#include <rapidjson/document.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+#pragma warning(pop)
+
+#include "../base_output_parser.hpp"
+
+namespace ovms {
+class MistralToolParser : public BaseOutputParser {
+protected:
+    // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
+    const std::string parsingStartTag = "functools";
+    const std::string parsingEndTag = "";
+
+public:
+    MistralToolParser() = delete;
+    explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) :
+        BaseOutputParser(tokenizer) {}
+
+    void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
+    std::optional<rapidjson::Document> parseChunk(const std::string& chunk) override;
+    const std::string& getParsingStartTag() const override {
+        return parsingStartTag;
+    }
+    // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
+    const std::string& getParsingEndTag() const override {
+        return parsingEndTag;
+    }
+};
+}  // namespace ovms
diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp
index 115c1d9019..765266a44c 100644
--- a/src/llm/io_processing/output_parser.cpp
+++ b/src/llm/io_processing/output_parser.cpp
@@ -19,6 +19,7 @@
 #include "llama3/tool_parser.hpp"
 #include "hermes3/tool_parser.hpp"
 #include "phi4/tool_parser.hpp"
+#include "mistral/tool_parser.hpp"
 #include "qwen3/reasoning_parser.hpp"
 
 namespace ovms {
@@ -46,6 +47,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to
         toolParser = std::make_unique<Hermes3ToolParser>(tokenizer);
     } else if (toolParserName == "phi4") {
         toolParser = std::make_unique<Phi4ToolParser>(tokenizer);
+    } else if (toolParserName == "mistral") {
+        toolParser = std::make_unique<MistralToolParser>(tokenizer);
     } else if (!toolParserName.empty()) {
         throw std::runtime_error("Unsupported tool parser: " + toolParserName);
     }

From 5eed9cae9c55517490b9a12edf0756f65f8564f7 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 12:33:09 +0200
Subject: [PATCH 03/14] save

---
 demos/common/export_models/export_model.py    | 42 +++++++++++++++++--
 src/llm/io_processing/mistral/tool_parser.cpp | 30 ++++++++++++-
 2 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index e892bd6a91..405df1bd23 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -376,6 +376,36 @@ def add_servable_to_config(config_path, mediapipe_name, base_path):
         json.dump(config_data, config_file, indent=4)
     print("Added servable to config file", config_path)
 
+def apply_template_patches(template_content, model_type):
+    """Apply model-specific patches to the downloaded template."""
+    patches = {
+        "phi4": [
+            # Example patches for phi4
+        ],
+        "llama3": [
+            # Example patches for llama3
+        ],
+        "hermes3": [
+            # Hermes3 specific patches
+        ],
+        "mistral": [
+            # Mistral specific patches - force model to produce single JSON array of tool calls
+            (
+                "If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays.",
+                "If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array."
+            ),
+        ],
+        "qwen3": [
+            # Qwen3 patches (if needed)
+        ]
+    }
+    
+    if model_type in patches:
+        for old_pattern, new_pattern in patches[model_type]:
+            template_content = template_content.replace(old_pattern, new_pattern)
+    
+    return template_content
+
 def export_text_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path):
     model_path = "./"
     ### Export model
@@ -464,6 +494,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
             "phi4": "tool_chat_template_phi4_mini.jinja",
             "llama3": "tool_chat_template_llama3.1_json.jinja",
             "hermes3": "tool_chat_template_hermes.jinja",
+            "mistral": "tool_chat_template_mistral_parallel.jinja",
             "qwen3": None
             }
         template_name = template_mapping[task_parameters.get("tools_model_type")]
@@ -472,9 +503,14 @@ def export_text_generation_model(model_repository_path, source_model, model_name
             import requests
             response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name)
             print(response.raise_for_status())
-            with open(template_path, "wb") as f:
-                f.write(response.content)
-            print(f"Downloaded tuned chat template to {template_path}")
+            
+            # Apply patches to the template content
+            template_content = response.content.decode('utf-8')
+            template_content = apply_template_patches(template_content, task_parameters.get("tools_model_type"))
+            
+            with open(template_path, "w") as f:
+                f.write(template_content)
+            print(f"Downloaded and patched tuned chat template to {template_path}")
 
     add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index 685be9ced2..7432a20074 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -42,16 +42,42 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
         return;
     }
 
+    bool isToolGenerated = false;
+    // ignore first whitespaces
+    auto begin = parsedOutput.content.begin();
+    while (begin != parsedOutput.content.end() && std::isspace(*begin)) {
+        ++begin;
+    }
+    
+    if (begin != parsedOutput.content.end() && *begin == '[') {
+        // If the content starts with '[', it indicates that tool calls might be present.
+        //isToolGenerated = true;
+
+        while (begin != parsedOutput.content.end() && std::isspace(*begin)) {
+            ++begin;
+        }
+
+        if (begin != parsedOutput.content.end() && *begin == '{') {
+            isToolGenerated = true;
+        } else {
+            // not a tool call
+        }
+    } else {
+        // not a tool call
+    }
 
     // Mistral with vLLM template produces tool calls in the format:
     // [{"name": [function name], "arguments": [function arguments as JSON]}, ...]
-    if (parsedOutput.content[0] == '[') {
+    // So we ensure if first non white characters are [ and {
+    if (isToolGenerated) {
         // Extract the content before the tools part
         // parsedOutput.content = decoded.substr(0, toolsStartPos);
         // Extract the tools part, assuming it's all the remaining content after "["
         //std::string toolsString = parsedOutput.content.substr(toolsStartPos + toolsStartString.length());
         rapidjson::Document toolsDoc;
-        toolsDoc.Parse(parsedOutput.content.c_str());
+        std::string content = parsedOutput.content.substr(begin - parsedOutput.content.begin());
+        //toolsDoc.Parse(parsedOutput.content.c_str());
+        toolsDoc.Parse(content.c_str());
         if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
             for (auto& toolVal : toolsDoc.GetArray()) {
                 if (!toolVal.IsObject()) {

From 9660fc51baf96a854b3222d434b6ee7446393dae Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 12:47:04 +0200
Subject: [PATCH 04/14] save

---
 demos/common/export_models/export_model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 405df1bd23..62a37a0cbc 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -52,7 +52,7 @@ def add_common_arguments(parser):
                          'Not effective if target device is not NPU', dest='max_prompt_len')
 parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding')
 parser_text.add_argument('--reasoning_parser', choices=["qwen3"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser')
-parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser')
+parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3","mistral"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser')
 parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation')
 
 parser_embeddings = subparsers.add_parser('embeddings', help='[deprecated] export model for embeddings endpoint with models split into separate, versioned directories')
@@ -488,7 +488,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
 
-    if template_parameters.get("tools_model_type") is not None:
+    if template_parameters.get("tool_parser") is not None:
         print("Adding tuned chat template")
         template_mapping = {
             "phi4": "tool_chat_template_phi4_mini.jinja",
@@ -497,7 +497,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
             "mistral": "tool_chat_template_mistral_parallel.jinja",
             "qwen3": None
             }
-        template_name = template_mapping[task_parameters.get("tools_model_type")]
+        template_name = template_mapping[task_parameters.get("tool_parser")]
         if template_name is not None:
             template_path = os.path.join(model_repository_path, model_name, "template.jinja")
             import requests
@@ -506,7 +506,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
             
             # Apply patches to the template content
             template_content = response.content.decode('utf-8')
-            template_content = apply_template_patches(template_content, task_parameters.get("tools_model_type"))
+            template_content = apply_template_patches(template_content, task_parameters.get("tool_parser"))
             
             with open(template_path, "w") as f:
                 f.write(template_content)

From eb9d1e6581bf85bd516ae54702747b61bc433d14 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 14:32:34 +0200
Subject: [PATCH 05/14] save

---
 src/llm/io_processing/hermes3/tool_parser.cpp |  2 -
 src/llm/io_processing/mistral/tool_parser.cpp | 42 ++++++++-----------
 src/llm/io_processing/mistral/tool_parser.hpp |  7 +---
 src/llm/servable.cpp                          |  1 -
 src/llm/servable_initializer.cpp              |  1 +
 5 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp
index 955918c414..f8568b5980 100644
--- a/src/llm/io_processing/hermes3/tool_parser.cpp
+++ b/src/llm/io_processing/hermes3/tool_parser.cpp
@@ -32,8 +32,6 @@
 namespace ovms {
 
 void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
-    SPDLOG_INFO("AAAAAAAAAAAAAAAA:\n[{}]", parsedOutput.content);
-
     std::vector<std::string> tools;
     size_t pos = 0;
     size_t firstToolCallPos = parsedOutput.content.find("<tool_call>", pos);
diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index 7432a20074..a06743c925 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -32,52 +32,46 @@
 
 namespace ovms {
 
+static std::string::iterator skipToFirstNonWhitespaceCharacter(std::string::iterator it, const std::string::iterator& end) {
+    while (it != end && std::isspace(*it)) {
+        ++it;
+    }
+    return it;
+}
+
 void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
     std::vector<std::string> tools;
 
-    SPDLOG_INFO("DDDDDDDDD\n[{}]", parsedOutput.content);
-
     if (parsedOutput.content.empty()) {
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls");
         return;
     }
 
+    // Mistral with vLLM template produces tool calls in the format:
+    // [{"name": [function name], "arguments": [function arguments as JSON]}, ...]
+    // So we ensure if first non white characters are [ and {
     bool isToolGenerated = false;
-    // ignore first whitespaces
     auto begin = parsedOutput.content.begin();
-    while (begin != parsedOutput.content.end() && std::isspace(*begin)) {
-        ++begin;
-    }
-    
+    begin = skipToFirstNonWhitespaceCharacter(begin, parsedOutput.content.end());
     if (begin != parsedOutput.content.end() && *begin == '[') {
-        // If the content starts with '[', it indicates that tool calls might be present.
-        //isToolGenerated = true;
-
-        while (begin != parsedOutput.content.end() && std::isspace(*begin)) {
-            ++begin;
-        }
-
+        begin = skipToFirstNonWhitespaceCharacter(begin + 1, parsedOutput.content.end());
         if (begin != parsedOutput.content.end() && *begin == '{') {
+            // If the content starts with '[{', it indicates that tool calls are present.
             isToolGenerated = true;
         } else {
-            // not a tool call
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model");
         }
     } else {
-        // not a tool call
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model");
     }
 
-    // Mistral with vLLM template produces tool calls in the format:
-    // [{"name": [function name], "arguments": [function arguments as JSON]}, ...]
-    // So we ensure if first non white characters are [ and {
+
     if (isToolGenerated) {
         // Extract the content before the tools part
         // parsedOutput.content = decoded.substr(0, toolsStartPos);
-        // Extract the tools part, assuming it's all the remaining content after "["
-        //std::string toolsString = parsedOutput.content.substr(toolsStartPos + toolsStartString.length());
+        // Extract the tools part, assuming it's all the remaining content after "[{"
         rapidjson::Document toolsDoc;
-        std::string content = parsedOutput.content.substr(begin - parsedOutput.content.begin());
-        //toolsDoc.Parse(parsedOutput.content.c_str());
-        toolsDoc.Parse(content.c_str());
+        toolsDoc.Parse(parsedOutput.content.c_str());
         if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
             for (auto& toolVal : toolsDoc.GetArray()) {
                 if (!toolVal.IsObject()) {
diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp
index 21bb09b6c5..8c2996d620 100644
--- a/src/llm/io_processing/mistral/tool_parser.hpp
+++ b/src/llm/io_processing/mistral/tool_parser.hpp
@@ -31,11 +31,6 @@
 
 namespace ovms {
 class MistralToolParser : public BaseOutputParser {
-protected:
-    // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
-    const std::string parsingStartTag = "functools";
-    const std::string parsingEndTag = "";
-
 public:
     MistralToolParser() = delete;
     explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) :
@@ -44,10 +39,12 @@ class MistralToolParser : public BaseOutputParser {
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk) override;
     const std::string& getParsingStartTag() const override {
+        static const std::string parsingStartTag = "[{";
         return parsingStartTag;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
     const std::string& getParsingEndTag() const override {
+        static const std::string parsingEndTag = "[{";
         return parsingEndTag;
     }
 };
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
index 7673cec4ac..e364fa5768 100644
--- a/src/llm/servable.cpp
+++ b/src/llm/servable.cpp
@@ -107,7 +107,6 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
         if (!success) {
             return absl::Status(absl::StatusCode::kInvalidArgument, inputText);
         }
-        SPDLOG_INFO("BBBBBBBBBBBBB:\n[{}]", inputText);
 #else
         ov::genai::ChatHistory& chatHistory = executionContext->apiHandler->getChatHistory();
         constexpr bool add_generation_prompt = true;  // confirm it should be hardcoded
diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp
index a7fc0dec67..ae2ff5f6ef 100644
--- a/src/llm/servable_initializer.cpp
+++ b/src/llm/servable_initializer.cpp
@@ -70,6 +70,7 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
             def raise_exception(message):
                 raise jinja2.exceptions.TemplateError(message)
             
+            # Appears in some of mistral chat templates
             def strftime_now(format):
                 return datetime.datetime.now().strftime(format)
             

From a2249e4ea620b9b44430d60da269875eb4523d9a Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 14:51:17 +0200
Subject: [PATCH 06/14] save

---
 demos/common/export_models/export_model.py    | 74 +++++----------
 .../tool_chat_template_mistral_parallel.jinja | 93 +++++++++++++++++++
 2 files changed, 117 insertions(+), 50 deletions(-)
 create mode 100644 demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 62a37a0cbc..2b33262c91 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -376,36 +376,6 @@ def add_servable_to_config(config_path, mediapipe_name, base_path):
         json.dump(config_data, config_file, indent=4)
     print("Added servable to config file", config_path)
 
-def apply_template_patches(template_content, model_type):
-    """Apply model-specific patches to the downloaded template."""
-    patches = {
-        "phi4": [
-            # Example patches for phi4
-        ],
-        "llama3": [
-            # Example patches for llama3
-        ],
-        "hermes3": [
-            # Hermes3 specific patches
-        ],
-        "mistral": [
-            # Mistral specific patches - force model to produce single JSON array of tool calls
-            (
-                "If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays.",
-                "If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array."
-            ),
-        ],
-        "qwen3": [
-            # Qwen3 patches (if needed)
-        ]
-    }
-    
-    if model_type in patches:
-        for old_pattern, new_pattern in patches[model_type]:
-            template_content = template_content.replace(old_pattern, new_pattern)
-    
-    return template_content
-
 def export_text_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path):
     model_path = "./"
     ### Export model
@@ -490,27 +460,31 @@ def export_text_generation_model(model_repository_path, source_model, model_name
 
     if template_parameters.get("tool_parser") is not None:
         print("Adding tuned chat template")
-        template_mapping = {
-            "phi4": "tool_chat_template_phi4_mini.jinja",
-            "llama3": "tool_chat_template_llama3.1_json.jinja",
-            "hermes3": "tool_chat_template_hermes.jinja",
-            "mistral": "tool_chat_template_mistral_parallel.jinja",
-            "qwen3": None
-            }
-        template_name = template_mapping[task_parameters.get("tool_parser")]
-        if template_name is not None:
+        # Custom Templates
+        if template_parameters.get("tool_parser") == "mistral":
             template_path = os.path.join(model_repository_path, model_name, "template.jinja")
-            import requests
-            response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name)
-            print(response.raise_for_status())
-            
-            # Apply patches to the template content
-            template_content = response.content.decode('utf-8')
-            template_content = apply_template_patches(template_content, task_parameters.get("tool_parser"))
-            
-            with open(template_path, "w") as f:
-                f.write(template_content)
-            print(f"Downloaded and patched tuned chat template to {template_path}")
+            with open(template_path, "wb") as f:
+                # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja
+                with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file:
+                    content = template_file.read()
+                    f.write(content)
+            print(f"Added tuned chat template to {template_path}")
+        else: # VLLM templates
+            template_mapping = {
+                "phi4": "tool_chat_template_phi4_mini.jinja",
+                "llama3": "tool_chat_template_llama3.1_json.jinja",
+                "hermes3": "tool_chat_template_hermes.jinja",
+                "qwen3": None
+                }
+            template_name = template_mapping[task_parameters.get("tool_parser")]
+            if template_name is not None:
+                template_path = os.path.join(model_repository_path, model_name, "template.jinja")
+                import requests
+                response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name)
+                print(response.raise_for_status())
+                with open(template_path, "wb") as f:
+                    f.write(response.content)
+                print(f"Downloaded tuned chat template to {template_path}")
 
     add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
diff --git a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja
new file mode 100644
index 0000000000..aec9f5ee0c
--- /dev/null
+++ b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja
@@ -0,0 +1,93 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- elif tools is not none %}
+    {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %}
+    {%- if system_message is defined %}
+        {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %}
+    {%- else %}
+        {%- set system_message = parallel_tool_prompt %}
+    {%- endif %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+
+{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
+    {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
+        {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif %}
+{%- endfor %}
+
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+        {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
+        {%- if message.tool_calls is defined %}
+            {%- set tool_calls = message.tool_calls %}
+        {%- else %}
+            {%- set tool_calls = message.content %}
+        {%- endif %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"] + eos_token }}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}

From fa66a4118acf1de66cb6175b230504039f5c9159 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 14:57:09 +0200
Subject: [PATCH 07/14] save

---
 demos/common/export_models/export_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 2b33262c91..1de631dfbb 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -463,7 +463,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
         # Custom Templates
         if template_parameters.get("tool_parser") == "mistral":
             template_path = os.path.join(model_repository_path, model_name, "template.jinja")
-            with open(template_path, "wb") as f:
+            with open(template_path, "w") as f:
                 # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja
                 with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file:
                     content = template_file.read()

From ad2cb60bb638a18693d7f8a7b73ac4392f78e4c5 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 11 Aug 2025 15:32:32 +0200
Subject: [PATCH 08/14] unit test

---
 src/BUILD                                     |  1 +
 .../mistral_output_parser_test.cpp            | 99 +++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 src/test/llm/output_parsers/mistral_output_parser_test.cpp

diff --git a/src/BUILD b/src/BUILD
index 3080fffb74..6829e9008f 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -2511,6 +2511,7 @@ cc_test(
                 "test/llm/output_parsers/qwen3_output_parser_test.cpp",
                 "test/llm/output_parsers/hermes3_output_parser_test.cpp",
                 "test/llm/output_parsers/phi4_output_parser_test.cpp",
+                "test/llm/output_parsers/mistral_output_parser_test.cpp",
                 "test/llm/output_parsers/partial_json_builder_test.cpp",
             ],
             "//:disable_python" : [],
diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
new file mode 100644
index 0000000000..2154c3097c
--- /dev/null
+++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
@@ -0,0 +1,99 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <gtest/gtest.h>
+#include <openvino/genai/tokenizer.hpp>
+#include <string>
+#include <vector>
+
+#include "../../../llm/io_processing/base_output_parser.hpp"
+#include "../../../llm/io_processing/output_parser.hpp"
+#include "../../../llm/io_processing/mistral/tool_parser.hpp"
+#include "../../test_utils.hpp"
+
+using namespace ovms;
+
+class MistralOutputParserTest : public ::testing::Test {
+protected:
+    std::unique_ptr<MistralToolParser> outputParser;
+    ov::genai::Tokenizer dummy_tokenizer;  // not used
+
+    void SetUp() override {
+        outputParser = std::make_unique<MistralToolParser>(dummy_tokenizer);
+    }
+};
+
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) {
+    ParsedOutput parsedOutput;
+    parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]";
+    outputParser->parse(parsedOutput, {});
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
+}
+
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCalls) {
+    ParsedOutput parsedOutput;
+    parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}},{\"name\": \"get_humidity\", \"arguments\": {\"location\": \"Madrid\"}}]";
+    outputParser->parse(parsedOutput, {});
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 2);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
+    EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false);  // ID should be generated
+    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
+}
+
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsWithWhitespaces) {
+    ParsedOutput parsedOutput;
+    parsedOutput.content = R"(
+    [
+      {"name": "get_weather", "arguments": {"location": "Paris"}},
+      {"name": "get_humidity", "arguments": {"location": "Madrid"}}
+    ])";
+    outputParser->parse(parsedOutput, {});
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 2);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
+    EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false);  // ID should be generated
+    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
+}
+
+TEST_F(MistralOutputParserTest, Negative_ParseToolCallOutput) {
+    ParsedOutput parsedOutput;
+    std::string content = R"(
+    non whitespace [
+      {"name": "get_weather", "arguments": {"location": "Paris"}},
+      {"name": "get_humidity", "arguments": {"location": "Madrid"}}
+    ])";
+    parsedOutput.content = content;
+    outputParser->parse(parsedOutput, {});
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
+    ASSERT_EQ(parsedOutput.content, content);  // Content should remain unchanged
+}

From 5f02112805ccb4f3cad81753d5a4a441bb9a3ecb Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Thu, 14 Aug 2025 15:56:22 +0200
Subject: [PATCH 09/14] save

---
 prepare_llm_models.sh                         |  11 +-
 .../mistral_output_parser_test.cpp            | 158 ++++++++++++------
 2 files changed, 121 insertions(+), 48 deletions(-)

diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh
index cb3ffdd18f..b74980f43e 100755
--- a/prepare_llm_models.sh
+++ b/prepare_llm_models.sh
@@ -30,8 +30,9 @@ QWEN3_MODEL="Qwen/Qwen3-8B"
 LLAMA3_MODEL="meta-llama/Llama-3.1-8B-Instruct"
 HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
 PHI4_MODEL="microsoft/Phi-4-mini-instruct"
+MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3"
 
-MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov")
+MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$MISTRAL_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov")
 
 all_exist=true
 for model in "${MODELS[@]}"; do
@@ -126,3 +127,11 @@ else
   mkdir -p $1/$PHI4_MODEL
   convert_tokenizer $PHI4_MODEL --with_detokenizer -o $1/$PHI4_MODEL
 fi
+
+if [ -d "$1/$MISTRAL_MODEL" ]; then
+  echo "Models directory $1/$MISTRAL_MODEL exists. Skipping downloading models."
+else
+  mkdir -p $1/$MISTRAL_MODEL
+  convert_tokenizer $MISTRAL_MODEL --with_detokenizer -o $1/$MISTRAL_MODEL
+fi
+
diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
index 2154c3097c..b4e9be157d 100644
--- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp
+++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
@@ -20,80 +20,144 @@
 
 #include "../../../llm/io_processing/base_output_parser.hpp"
 #include "../../../llm/io_processing/output_parser.hpp"
-#include "../../../llm/io_processing/mistral/tool_parser.hpp"
 #include "../../test_utils.hpp"
 
 using namespace ovms;
 
+#ifdef _WIN32
+const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\mistralai\\Mistral-7B-Instruct-v0.3";
+#else
+// Hardcoded for usage in docker container
+const std::string tokenizerPath = "/ovms/src/test/llm_testing/mistralai/Mistral-7B-Instruct-v0.3/";
+#endif
+
+static ov::genai::Tokenizer mistralTokenizer(tokenizerPath);
+
 class MistralOutputParserTest : public ::testing::Test {
 protected:
-    std::unique_ptr<MistralToolParser> outputParser;
-    ov::genai::Tokenizer dummy_tokenizer;  // not used
+    std::unique_ptr<OutputParser> outputParser;
 
     void SetUp() override {
-        outputParser = std::make_unique<MistralToolParser>(dummy_tokenizer);
+        // For Phi4 model there is only tool parser available
+        outputParser = std::make_unique<OutputParser>(mistralTokenizer, "mistral", "");
     }
 };
 
 TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) {
-    ParsedOutput parsedOutput;
-    parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]";
-    outputParser->parse(parsedOutput, {});
+    std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    SPDLOG_INFO("AAAAAAAAAAA: [{}]", generatedTokens);
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
 
     ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
-    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
     // Parser removes whitespaces, so we expect arguments value to be without spaces
-    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
     EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
-    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
 }
 
-TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCalls) {
-    ParsedOutput parsedOutput;
-    parsedOutput.content = "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}},{\"name\": \"get_humidity\", \"arguments\": {\"location\": \"Madrid\"}}]";
-    outputParser->parse(parsedOutput, {});
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
+    std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}},"
+                        "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}},"
+                        "{\"name\": \"third_tool\", \"arguments\": {\"key\": \"value\"}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
 
-    ASSERT_EQ(parsedOutput.toolCalls.size(), 2);
-    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
-    EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 3);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
     // Parser removes whitespaces, so we expect arguments value to be without spaces
-    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
-    EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
     EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    auto firstToolCallId = parsedOutput.toolCalls[0].id;
+
+    EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}");
     EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false);  // ID should be generated
-    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
+    auto secondToolCallId = parsedOutput.toolCalls[1].id;
+    EXPECT_NE(firstToolCallId, secondToolCallId);  // IDs should be different
+
+    EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false);  // ID should be generated
+    auto thirdToolCallId = parsedOutput.toolCalls[2].id;
+    EXPECT_NE(firstToolCallId, thirdToolCallId);   // IDs should be different
+    EXPECT_NE(secondToolCallId, thirdToolCallId);  // IDs should be different
 }
 
-TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsWithWhitespaces) {
-    ParsedOutput parsedOutput;
-    parsedOutput.content = R"(
-    [
-      {"name": "get_weather", "arguments": {"location": "Paris"}},
-      {"name": "get_humidity", "arguments": {"location": "Madrid"}}
-    ])";
-    outputParser->parse(parsedOutput, {});
-
-    ASSERT_EQ(parsedOutput.toolCalls.size(), 2);
-    EXPECT_EQ(parsedOutput.toolCalls[0].name, "get_weather");
-    EXPECT_EQ(parsedOutput.toolCalls[1].name, "get_humidity");
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInvalid) {
+    std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}},"
+                        "{\"tool_name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}},"
+                        "{\"name\": \"third_tool\", \"options\": {\"key\": \"value\"}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+
+    // Only the first tool call is valid, the second one has an invalid name field and the third one has an invalid arguments
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
     // Parser removes whitespaces, so we expect arguments value to be without spaces
-    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"location\":\"Paris\"}");
-    EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"location\":\"Madrid\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
     EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
-    EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false);  // ID should be generated
-    EXPECT_EQ(parsedOutput.content, "");                     // Content should be empty after parsing tool calls
+    auto firstToolCallId = parsedOutput.toolCalls[0].id;
 }
 
-TEST_F(MistralOutputParserTest, Negative_ParseToolCallOutput) {
-    ParsedOutput parsedOutput;
-    std::string content = R"(
-    non whitespace [
-      {"name": "get_weather", "arguments": {"location": "Paris"}},
-      {"name": "get_humidity", "arguments": {"location": "Madrid"}}
-    ])";
-    parsedOutput.content = content;
-    outputParser->parse(parsedOutput, {});
-
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
+    std::string input = "This is a regular model response without tool calls.";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls.");
     ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
-    ASSERT_EQ(parsedOutput.content, content);  // Content should remain unchanged
+    EXPECT_EQ(parsedOutput.reasoning, "");
+}
+
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
+    std::string input = "This is a content part and next will be a tool call.\n\[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+}
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsContentOnly) {
+    std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    // Content after 'functools' cannot be parsed as array of JSON objects, so it is treated as content
+    EXPECT_EQ(parsedOutput.content, "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);  // No valid tool calls parsed
+}
+
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithArrayArguments) {
+    std::string input = "[TOOL_CALLS][{\"name\": \"extractLastTransactionId\", \"arguments\": { \"filepath\": \"/var/log/db.log\", \"status\": [\"completed\", \"failed\"], \"encoding\": \"utf-8\", \"processFunction\": \"processFunction\"}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "extractLastTransactionId");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
 }

From cca406b8baa34d9455f1de375db63847c6a85ba2 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 18 Aug 2025 12:46:10 +0200
Subject: [PATCH 10/14] skip special tokens false

---
 src/llm/io_processing/mistral/tool_parser.cpp | 47 +++++++++----------
 src/llm/io_processing/mistral/tool_parser.hpp |  9 ++--
 .../mistral_output_parser_test.cpp            | 24 ++++++++--
 3 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index a06743c925..de1c7f03d3 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -47,32 +47,30 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
         return;
     }
 
-    // Mistral with vLLM template produces tool calls in the format:
-    // [{"name": [function name], "arguments": [function arguments as JSON]}, ...]
-    // So we ensure if first non white characters are [ and {
-    bool isToolGenerated = false;
-    auto begin = parsedOutput.content.begin();
-    begin = skipToFirstNonWhitespaceCharacter(begin, parsedOutput.content.end());
-    if (begin != parsedOutput.content.end() && *begin == '[') {
-        begin = skipToFirstNonWhitespaceCharacter(begin + 1, parsedOutput.content.end());
-        if (begin != parsedOutput.content.end() && *begin == '{') {
-            // If the content starts with '[{', it indicates that tool calls are present.
-            isToolGenerated = true;
-        } else {
-            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model");
-        }
-    } else {
-        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool calls are not generated by the model");
-    }
+    std::string decoded = tokenizer.decode(generatedTokens, {ov::genai::skip_special_tokens(false)});
+
+    const std::string toolsStartString = getParsingStartTag();
+    const std::string toolsStartEnd = getParsingEndTag();
+
+    size_t toolsStartPos = decoded.find(toolsStartString);
+    size_t toolsEndPos = decoded.find(toolsStartEnd);
 
+    if (toolsStartPos != std::string::npos && toolsEndPos != std::string::npos) {
+        std::string remaining = decoded.substr(0, toolsStartPos) + decoded.substr(toolsEndPos + toolsStartEnd.length());
+
+
+        size_t toolsStartPos2 = remaining.find(toolsStartString);
+        size_t toolsEndPos2 = remaining.find(toolsStartEnd);
+        bool hasMoreSpecialTags = !(toolsStartPos2 == std::string::npos && toolsEndPos2 == std::string::npos);
+
+        std::string toolsString = decoded.substr(
+            toolsStartPos + toolsStartString.length(),
+            toolsEndPos - toolsStartPos - toolsStartString.length());
 
-    if (isToolGenerated) {
-        // Extract the content before the tools part
-        // parsedOutput.content = decoded.substr(0, toolsStartPos);
-        // Extract the tools part, assuming it's all the remaining content after "[{"
         rapidjson::Document toolsDoc;
-        toolsDoc.Parse(parsedOutput.content.c_str());
-        if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
+        toolsDoc.Parse(toolsString.c_str());
+
+        if (!toolsDoc.HasParseError() && toolsDoc.IsArray() && !hasMoreSpecialTags) {
             for (auto& toolVal : toolsDoc.GetArray()) {
                 if (!toolVal.IsObject()) {
                     SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object");
@@ -98,8 +96,7 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
                 }
                 parsedOutput.toolCalls.push_back(toolCall);
             }
-            // Remove the tools part from the content
-            parsedOutput.content.clear();
+            parsedOutput.content = remaining;
         } else {
             SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
         }
diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp
index 8c2996d620..28e81f7abf 100644
--- a/src/llm/io_processing/mistral/tool_parser.hpp
+++ b/src/llm/io_processing/mistral/tool_parser.hpp
@@ -31,6 +31,9 @@
 
 namespace ovms {
 class MistralToolParser : public BaseOutputParser {
+    const std::string toolCallStartTag = "[TOOL_CALLS]";
+    const std::string toolCallEndTag = "</s>";
+
 public:
     MistralToolParser() = delete;
     explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) :
@@ -39,13 +42,11 @@ class MistralToolParser : public BaseOutputParser {
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk) override;
     const std::string& getParsingStartTag() const override {
-        static const std::string parsingStartTag = "[{";
-        return parsingStartTag;
+        return toolCallStartTag;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
     const std::string& getParsingEndTag() const override {
-        static const std::string parsingEndTag = "[{";
-        return parsingEndTag;
+        return toolCallEndTag;
     }
 };
 }  // namespace ovms
diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
index b4e9be157d..a9904bab61 100644
--- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp
+++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
@@ -122,7 +122,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
 }
 
 TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
-    std::string input = "This is a content part and next will be a tool call.\n\[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s>";
+    std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s>";
     auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
     std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
     ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
@@ -135,13 +135,27 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall)
     EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
     EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
 }
-TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsContentOnly) {
-    std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]</s>";
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) {
+    std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s> This is a content part after tool call.";
     auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
     std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
     ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
-    // Content after 'functools' cannot be parsed as array of JSON objects, so it is treated as content
-    EXPECT_EQ(parsedOutput.content, "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]");
+    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n This is a content part after tool call.");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    // Parser removes whitespaces, so we expect arguments value to be without spaces
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+}
+TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsContentOnly) {
+    std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]</s> \n\nThis is some content\n\n[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]</s>";
+    auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
+    // Content after 'TOOL_CALLS' cannot be parsed as array of JSON objects, so it is treated as content
+    EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n [{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]");
     EXPECT_EQ(parsedOutput.reasoning, "");
 
     ASSERT_EQ(parsedOutput.toolCalls.size(), 0);  // No valid tool calls parsed

From 94597a124aceca2c25f76a53d5f81d126e54d7e6 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 18 Aug 2025 12:49:45 +0200
Subject: [PATCH 11/14] save

---
 .dockerignore                                 |  1 +
 demos/common/export_models/export_model.py    | 39 +++-----
 .../tool_chat_template_mistral_parallel.jinja | 93 -------------------
 src/llm/io_processing/mistral/tool_parser.cpp |  8 --
 .../mistral_output_parser_test.cpp            |  1 -
 5 files changed, 16 insertions(+), 126 deletions(-)
 delete mode 100644 demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja

diff --git a/.dockerignore b/.dockerignore
index 98c62c8c06..ab4fdc7ab2 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -4,3 +4,4 @@
 out
 demos/continuous_batching
 demos/embeddings
+demos/common/export_models/models
\ No newline at end of file
diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 2593c507f6..921541ddb8 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -466,31 +466,22 @@ def export_text_generation_model(model_repository_path, source_model, model_name
 
     if template_parameters.get("tool_parser") is not None:
         print("Adding tuned chat template")
-        # Custom Templates
-        if template_parameters.get("tool_parser") == "mistral":
+        template_mapping = {
+            "phi4": "tool_chat_template_phi4_mini.jinja",
+            "llama3": "tool_chat_template_llama3.1_json.jinja",
+            "hermes3": "tool_chat_template_hermes.jinja",
+            "mistral": "tool_chat_template_mistral_parallel.jinja",
+            "qwen3": None
+            }
+        template_name = template_mapping[task_parameters.get("tool_parser")]
+        if template_name is not None:
             template_path = os.path.join(model_repository_path, model_name, "template.jinja")
-            with open(template_path, "w") as f:
-                # Modified from https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/tool_chat_template_mistral_parallel.jinja
-                with open(os.path.dirname(os.path.abspath(__file__)) + "/templates/tool_chat_template_mistral_parallel.jinja", "r") as template_file:
-                    content = template_file.read()
-                    f.write(content)
-            print(f"Added tuned chat template to {template_path}")
-        else: # VLLM templates
-            template_mapping = {
-                "phi4": "tool_chat_template_phi4_mini.jinja",
-                "llama3": "tool_chat_template_llama3.1_json.jinja",
-                "hermes3": "tool_chat_template_hermes.jinja",
-                "qwen3": None
-                }
-            template_name = template_mapping[task_parameters.get("tool_parser")]
-            if template_name is not None:
-                template_path = os.path.join(model_repository_path, model_name, "template.jinja")
-                import requests
-                response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name)
-                print(response.raise_for_status())
-                with open(template_path, "wb") as f:
-                    f.write(response.content)
-                print(f"Downloaded tuned chat template to {template_path}")
+            import requests
+            response = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.9.0/examples/" + template_name)
+            print(response.raise_for_status())
+            with open(template_path, "wb") as f:
+                f.write(response.content)
+            print(f"Downloaded tuned chat template to {template_path}")
 
     add_servable_to_config(config_file_path, model_name, os.path.relpath( os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
diff --git a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja b/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja
deleted file mode 100644
index aec9f5ee0c..0000000000
--- a/demos/common/export_models/templates/tool_chat_template_mistral_parallel.jinja
+++ /dev/null
@@ -1,93 +0,0 @@
-{%- if messages[0]["role"] == "system" %}
-    {%- set system_message = messages[0]["content"] %}
-    {%- set loop_messages = messages[1:] %}
-{%- else %}
-    {%- set loop_messages = messages %}
-{%- endif %}
-{%- if not tools is defined %}
-    {%- set tools = none %}
-{%- elif tools is not none %}
-    {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a **SINGLE** JSON array of objects, where each object is a tool call, PLEASE NEST ALL OBJECTS IN SINGLE array. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %}
-    {%- if system_message is defined %}
-        {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %}
-    {%- else %}
-        {%- set system_message = parallel_tool_prompt %}
-    {%- endif %}
-{%- endif %}
-{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
-
-{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
-    {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
-        {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
-    {%- endif %}
-{%- endfor %}
-
-{{- bos_token }}
-{%- for message in loop_messages %}
-    {%- if message["role"] == "user" %}
-        {%- if tools is not none and (message == user_messages[-1]) %}
-            {{- "[AVAILABLE_TOOLS] [" }}
-            {%- for tool in tools %}
-                {%- set tool = tool.function %}
-                {{- '{"type": "function", "function": {' }}
-                {%- for key, val in tool.items() if key != "return" %}
-                    {%- if val is string %}
-                        {{- '"' + key + '": "' + val + '"' }}
-                    {%- else %}
-                        {{- '"' + key + '": ' + val|tojson }}
-                    {%- endif %}
-                    {%- if not loop.last %}
-                        {{- ", " }}
-                    {%- endif %}
-                {%- endfor %}
-                {{- "}}" }}
-                {%- if not loop.last %}
-                    {{- ", " }}
-                {%- else %}
-                    {{- "]" }}
-                {%- endif %}
-            {%- endfor %}
-            {{- "[/AVAILABLE_TOOLS]" }}
-        {%- endif %}
-        {%- if loop.last and system_message is defined %}
-            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
-        {%- else %}
-            {{- "[INST] " + message["content"] + "[/INST]" }}
-        {%- endif %}
-    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
-        {%- if message.tool_calls is defined %}
-            {%- set tool_calls = message.tool_calls %}
-        {%- else %}
-            {%- set tool_calls = message.content %}
-        {%- endif %}
-        {{- "[TOOL_CALLS] [" }}
-        {%- for tool_call in tool_calls %}
-            {%- set out = tool_call.function|tojson %}
-            {{- out[:-1] }}
-            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
-                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
-            {%- endif %}
-            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
-            {%- if not loop.last %}
-                {{- ", " }}
-            {%- else %}
-                {{- "]" + eos_token }}
-            {%- endif %}
-        {%- endfor %}
-    {%- elif message["role"] == "assistant" %}
-        {{- " " + message["content"] + eos_token }}
-    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
-        {%- if message.content is defined and message.content.content is defined %}
-            {%- set content = message.content.content %}
-        {%- else %}
-            {%- set content = message.content %}
-        {%- endif %}
-        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
-        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
-            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
-        {%- endif %}
-        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
-    {%- else %}
-        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
-    {%- endif %}
-{%- endfor %}
diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index de1c7f03d3..d945db1dca 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -32,13 +32,6 @@
 
 namespace ovms {
 
-static std::string::iterator skipToFirstNonWhitespaceCharacter(std::string::iterator it, const std::string::iterator& end) {
-    while (it != end && std::isspace(*it)) {
-        ++it;
-    }
-    return it;
-}
-
 void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
     std::vector<std::string> tools;
 
@@ -58,7 +51,6 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
     if (toolsStartPos != std::string::npos && toolsEndPos != std::string::npos) {
         std::string remaining = decoded.substr(0, toolsStartPos) + decoded.substr(toolsEndPos + toolsStartEnd.length());
 
-
         size_t toolsStartPos2 = remaining.find(toolsStartString);
         size_t toolsEndPos2 = remaining.find(toolsStartEnd);
         bool hasMoreSpecialTags = !(toolsStartPos2 == std::string::npos && toolsEndPos2 == std::string::npos);
diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
index a9904bab61..9d22b58571 100644
--- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp
+++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
@@ -47,7 +47,6 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) {
     std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s>";
     auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
     std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
-    SPDLOG_INFO("AAAAAAAAAAA: [{}]", generatedTokens);
     ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
     EXPECT_EQ(parsedOutput.content, "");
     EXPECT_EQ(parsedOutput.reasoning, "");

From 379bc6846ce63b20a7264a48960aeccb9a85b4fd Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Mon, 18 Aug 2025 13:11:59 +0200
Subject: [PATCH 12/14] save

---
 src/llm/io_processing/mistral/tool_parser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index d945db1dca..63507a8255 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -69,7 +69,6 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
                     continue;
                 }
                 ToolCall toolCall;
-                toolCall.id = generateRandomId();  // Generate a random ID for the tool call
                 if (toolVal.HasMember("name") && toolVal["name"].IsString()) {
                     toolCall.name = toolVal["name"].GetString();
                 } else {
@@ -86,6 +85,7 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int6
                     SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object");
                     continue;
                 }
+                toolCall.id = generateRandomId();  // Generate a random ID for the tool call
                 parsedOutput.toolCalls.push_back(toolCall);
             }
             parsedOutput.content = remaining;

From 5b8e24af4a20fda9b249d6fbf33b85237b13ebf9 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Tue, 19 Aug 2025 14:17:40 +0200
Subject: [PATCH 13/14] Milosz

---
 src/llm/io_processing/mistral/tool_parser.cpp | 81 ++++++++-----------
 src/llm/io_processing/mistral/tool_parser.hpp |  5 +-
 .../mistral_output_parser_test.cpp            | 17 +---
 3 files changed, 40 insertions(+), 63 deletions(-)

diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
index 63507a8255..7313d019fb 100644
--- a/src/llm/io_processing/mistral/tool_parser.cpp
+++ b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -35,63 +35,48 @@ namespace ovms {
 void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
     std::vector<std::string> tools;
 
-    if (parsedOutput.content.empty()) {
+    if (parsedOutput.content.empty() || generatedTokens.size() <= 0) {
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls");
         return;
     }
 
-    std::string decoded = tokenizer.decode(generatedTokens, {ov::genai::skip_special_tokens(false)});
-
-    const std::string toolsStartString = getParsingStartTag();
-    const std::string toolsStartEnd = getParsingEndTag();
-
-    size_t toolsStartPos = decoded.find(toolsStartString);
-    size_t toolsEndPos = decoded.find(toolsStartEnd);
-
-    if (toolsStartPos != std::string::npos && toolsEndPos != std::string::npos) {
-        std::string remaining = decoded.substr(0, toolsStartPos) + decoded.substr(toolsEndPos + toolsStartEnd.length());
-
-        size_t toolsStartPos2 = remaining.find(toolsStartString);
-        size_t toolsEndPos2 = remaining.find(toolsStartEnd);
-        bool hasMoreSpecialTags = !(toolsStartPos2 == std::string::npos && toolsEndPos2 == std::string::npos);
-
-        std::string toolsString = decoded.substr(
-            toolsStartPos + toolsStartString.length(),
-            toolsEndPos - toolsStartPos - toolsStartString.length());
+    if (generatedTokens[0] != this->botTokenId) {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
+        return;
+    }
 
-        rapidjson::Document toolsDoc;
-        toolsDoc.Parse(toolsString.c_str());
+    rapidjson::Document toolsDoc;
+    toolsDoc.Parse(parsedOutput.content.c_str());
 
-        if (!toolsDoc.HasParseError() && toolsDoc.IsArray() && !hasMoreSpecialTags) {
-            for (auto& toolVal : toolsDoc.GetArray()) {
-                if (!toolVal.IsObject()) {
-                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object");
-                    continue;
-                }
-                ToolCall toolCall;
-                if (toolVal.HasMember("name") && toolVal["name"].IsString()) {
-                    toolCall.name = toolVal["name"].GetString();
-                } else {
-                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field");
-                    continue;
-                }
+    if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
+        for (auto& toolVal : toolsDoc.GetArray()) {
+            if (!toolVal.IsObject()) {
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object");
+                continue;
+            }
+            ToolCall toolCall;
+            if (toolVal.HasMember("name") && toolVal["name"].IsString()) {
+                toolCall.name = toolVal["name"].GetString();
+            } else {
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field");
+                continue;
+            }
 
-                if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) {
-                    rapidjson::StringBuffer sb;
-                    rapidjson::Writer<rapidjson::StringBuffer> toolWriter(sb);
-                    toolVal["arguments"].Accept(toolWriter);
-                    toolCall.arguments = sb.GetString();
-                } else {
-                    SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object");
-                    continue;
-                }
-                toolCall.id = generateRandomId();  // Generate a random ID for the tool call
-                parsedOutput.toolCalls.push_back(toolCall);
+            if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) {
+                rapidjson::StringBuffer sb;
+                rapidjson::Writer<rapidjson::StringBuffer> toolWriter(sb);
+                toolVal["arguments"].Accept(toolWriter);
+                toolCall.arguments = sb.GetString();
+            } else {
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object");
+                continue;
             }
-            parsedOutput.content = remaining;
-        } else {
-            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
+            toolCall.id = generateRandomId();  // Generate a random ID for the tool call
+            parsedOutput.toolCalls.push_back(toolCall);
         }
+        parsedOutput.content.clear();
+    } else {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
     }
 }
 
diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp
index 28e81f7abf..fe715a890c 100644
--- a/src/llm/io_processing/mistral/tool_parser.hpp
+++ b/src/llm/io_processing/mistral/tool_parser.hpp
@@ -31,8 +31,7 @@
 
 namespace ovms {
 class MistralToolParser : public BaseOutputParser {
-    const std::string toolCallStartTag = "[TOOL_CALLS]";
-    const std::string toolCallEndTag = "</s>";
+    const int64_t botTokenId = 5;  // [TOOL_CALLS]
 
 public:
     MistralToolParser() = delete;
@@ -42,10 +41,12 @@ class MistralToolParser : public BaseOutputParser {
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk) override;
     const std::string& getParsingStartTag() const override {
+        static const std::string toolCallStartTag = "[TOOL_CALLS]";
         return toolCallStartTag;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
     const std::string& getParsingEndTag() const override {
+        static const std::string toolCallEndTag = "";
         return toolCallEndTag;
     }
 };
diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
index 9d22b58571..8f5a3701aa 100644
--- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp
+++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp
@@ -38,7 +38,6 @@ class MistralOutputParserTest : public ::testing::Test {
     std::unique_ptr<OutputParser> outputParser;
 
     void SetUp() override {
-        // For Phi4 model there is only tool parser available
         outputParser = std::make_unique<OutputParser>(mistralTokenizer, "mistral", "");
     }
 };
@@ -125,28 +124,20 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall)
     auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
     std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
     ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
-    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n");
+    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]");
     EXPECT_EQ(parsedOutput.reasoning, "");
 
-    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
-    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
-    // Parser removes whitespaces, so we expect arguments value to be without spaces
-    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
-    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
 }
 TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) {
     std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]</s> This is a content part after tool call.";
     auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
     std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
     ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);
-    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n This is a content part after tool call.");
+    EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call.");
     EXPECT_EQ(parsedOutput.reasoning, "");
 
-    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
-    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
-    // Parser removes whitespaces, so we expect arguments value to be without spaces
-    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
-    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);  // ID should be generated
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
 }
 TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsContentOnly) {
     std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]</s> \n\nThis is some content\n\n[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]</s>";

From 360c5f3a9f966dd485b90661001669bc0f8ff59f Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Tue, 19 Aug 2025 16:29:58 +0200
Subject: [PATCH 14/14] prepare mistral tokenizer for unit tests on windows

---
 windows_prepare_llm_models.bat | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat
index 58521134b5..8ddcc7e96c 100644
--- a/windows_prepare_llm_models.bat
+++ b/windows_prepare_llm_models.bat
@@ -38,8 +38,9 @@ set "QWEN3_MODEL=Qwen/Qwen3-8B"
 set "LLAMA3_MODEL=meta-llama/Llama-3.1-8B-Instruct"
 set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B"
 set "PHI4_MODEL=microsoft/Phi-4-mini-instruct"
+set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3"
 
-set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL%
+set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL% %MISTRAL_MODEL%
 
 set "ALL_EXIST=1"
 for %%M in ("%MODELS_LIST%") do (
@@ -152,4 +153,13 @@ if exist "%~1\%PHI4_MODEL%" (
   if !errorlevel! neq 0 exit /b !errorlevel!
 )
 
+if exist "%~1\%MISTRAL_MODEL%" (
+  echo Models directory %~1\%MISTRAL_MODEL% exists. Skipping downloading models.
+) else (
+  echo Downloading tokenizer and detokenizer for Mistral model to %~1\%MISTRAL_MODEL% directory.
+  mkdir "%~1\%MISTRAL_MODEL%"
+  convert_tokenizer "%MISTRAL_MODEL%" --with_detokenizer -o "%~1\%MISTRAL_MODEL%"
+  if !errorlevel! neq 0 exit /b !errorlevel!
+)
+
 endlocal