diff --git a/.dockerignore b/.dockerignore index 98c62c8c06..ab4fdc7ab2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,3 +4,4 @@ out demos/continuous_batching demos/embeddings +demos/common/export_models/models \ No newline at end of file diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 86f60fe57b..921541ddb8 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -52,7 +52,7 @@ def add_common_arguments(parser): 'Not effective if target device is not NPU', dest='max_prompt_len') parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding') parser_text.add_argument('--reasoning_parser', choices=["qwen3"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') -parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3", "qwen3"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') +parser_text.add_argument('--tool_parser', choices=["llama3","phi4","hermes3", "qwen3","mistral"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation') parser_embeddings = subparsers.add_parser('embeddings', help='[deprecated] export model for embeddings endpoint with models split into separate, versioned directories') @@ -464,15 +464,16 @@ def export_text_generation_model(model_repository_path, source_model, model_name f.write(graph_content) print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt'))) - if template_parameters.get("tools_model_type") is not None: + if template_parameters.get("tool_parser") is not None: print("Adding tuned chat template") template_mapping = { "phi4": "tool_chat_template_phi4_mini.jinja", "llama3": "tool_chat_template_llama3.1_json.jinja", "hermes3": "tool_chat_template_hermes.jinja", + "mistral": "tool_chat_template_mistral_parallel.jinja", "qwen3": None } - template_name = template_mapping[task_parameters.get("tools_model_type")] + template_name = template_mapping[task_parameters.get("tool_parser")] if template_name is not None: template_path = os.path.join(model_repository_path, model_name, "template.jinja") import requests diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index cb3ffdd18f..b74980f43e 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -30,8 +30,9 @@ QWEN3_MODEL="Qwen/Qwen3-8B" LLAMA3_MODEL="meta-llama/Llama-3.1-8B-Instruct" HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B" PHI4_MODEL="microsoft/Phi-4-mini-instruct" +MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" -MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov") +MODELS=("$CB_MODEL" "$EMBEDDING_MODEL" "$RERANK_MODEL" "$VLM_MODEL" "$QWEN3_MODEL" "$LLAMA3_MODEL" "$HERMES3_MODEL" "$PHI4_MODEL" "$MISTRAL_MODEL" "$EMBEDDING_MODEL/ov" "$RERANK_MODEL/ov") all_exist=true for model in "${MODELS[@]}"; do @@ -126,3 +127,11 @@ else mkdir -p $1/$PHI4_MODEL convert_tokenizer $PHI4_MODEL --with_detokenizer -o $1/$PHI4_MODEL fi + +if [ -d "$1/$MISTRAL_MODEL" ]; then + echo "Models directory $1/$MISTRAL_MODEL exists. Skipping downloading models." +else + mkdir -p $1/$MISTRAL_MODEL + convert_tokenizer $MISTRAL_MODEL --with_detokenizer -o $1/$MISTRAL_MODEL +fi + diff --git a/src/BUILD b/src/BUILD index 3080fffb74..6829e9008f 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2511,6 +2511,7 @@ cc_test( "test/llm/output_parsers/qwen3_output_parser_test.cpp", "test/llm/output_parsers/hermes3_output_parser_test.cpp", "test/llm/output_parsers/phi4_output_parser_test.cpp", + "test/llm/output_parsers/mistral_output_parser_test.cpp", "test/llm/output_parsers/partial_json_builder_test.cpp", ], "//:disable_python" : [], diff --git a/src/llm/BUILD b/src/llm/BUILD index acff1faae9..26bf208a95 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -114,6 +114,7 @@ cc_library( "io_processing/hermes3/tool_parser.hpp", "io_processing/llama3/tool_parser.hpp", "io_processing/phi4/tool_parser.hpp", + "io_processing/mistral/tool_parser.hpp", "io_processing/qwen3/reasoning_parser.hpp", "io_processing/output_parser.hpp", "io_processing/partial_json_builder.hpp", @@ -122,6 +123,7 @@ cc_library( "io_processing/hermes3/tool_parser.cpp", "io_processing/llama3/tool_parser.cpp", "io_processing/phi4/tool_parser.cpp", + "io_processing/mistral/tool_parser.cpp", "io_processing/qwen3/reasoning_parser.cpp", "io_processing/output_parser.cpp", "io_processing/partial_json_builder.cpp", diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp new file mode 100644 index 0000000000..7313d019fb --- /dev/null +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -0,0 +1,88 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable : 6313) +#include +#include +#include +#pragma warning(pop) + +#include "../../../logging.hpp" +#include "tool_parser.hpp" +#include "../utils.hpp" + +namespace ovms { + +void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + std::vector tools; + + if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); + return; + } + + if (generatedTokens[0] != this->botTokenId) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); + return; + } + + rapidjson::Document toolsDoc; + toolsDoc.Parse(parsedOutput.content.c_str()); + + if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) { + for (auto& toolVal : toolsDoc.GetArray()) { + if (!toolVal.IsObject()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object"); + continue; + } + ToolCall toolCall; + if (toolVal.HasMember("name") && toolVal["name"].IsString()) { + toolCall.name = toolVal["name"].GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid name field"); + continue; + } + + if (toolVal.HasMember("arguments") && toolVal["arguments"].IsObject()) { + rapidjson::StringBuffer sb; + rapidjson::Writer toolWriter(sb); + toolVal["arguments"].Accept(toolWriter); + toolCall.arguments = sb.GetString(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call does not contain valid parameters object"); + continue; + } + toolCall.id = generateRandomId(); // Generate a random ID for the tool call + parsedOutput.toolCalls.push_back(toolCall); + } + parsedOutput.content.clear(); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array"); + } +} + +std::optional MistralToolParser::parseChunk(const std::string& chunk) { + // Not implemented + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "MistralToolParser::parseChunk is not implemented"); + return std::nullopt; +} +} // namespace ovms diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp new file mode 100644 index 0000000000..fe715a890c --- /dev/null +++ b/src/llm/io_processing/mistral/tool_parser.hpp @@ -0,0 +1,53 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable : 6313) +#include +#include +#include +#pragma warning(pop) + +#include "../base_output_parser.hpp" + +namespace ovms { +class MistralToolParser : public BaseOutputParser { + const int64_t botTokenId = 5; // [TOOL_CALLS] + +public: + MistralToolParser() = delete; + explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk) override; + const std::string& getParsingStartTag() const override { + static const std::string toolCallStartTag = "[TOOL_CALLS]"; + return toolCallStartTag; + } + // Tools calls are expected to be the last part of the content, so we do not specify an end tag. + const std::string& getParsingEndTag() const override { + static const std::string toolCallEndTag = ""; + return toolCallEndTag; + } +}; +} // namespace ovms diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index 115c1d9019..765266a44c 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -19,6 +19,7 @@ #include "llama3/tool_parser.hpp" #include "hermes3/tool_parser.hpp" #include "phi4/tool_parser.hpp" +#include "mistral/tool_parser.hpp" #include "qwen3/reasoning_parser.hpp" namespace ovms { @@ -46,6 +47,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to toolParser = std::make_unique(tokenizer); } else if (toolParserName == "phi4") { toolParser = std::make_unique(tokenizer); + } else if (toolParserName == "mistral") { + toolParser = std::make_unique(tokenizer); } else if (!toolParserName.empty()) { throw std::runtime_error("Unsupported tool parser: " + toolParserName); } diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index edcbcb3a67..150054a299 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -60,6 +60,8 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr +#include +#include +#include + +#include "../../../llm/io_processing/base_output_parser.hpp" +#include "../../../llm/io_processing/output_parser.hpp" +#include "../../test_utils.hpp" + +using namespace ovms; + +#ifdef _WIN32 +const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\mistralai\\Mistral-7B-Instruct-v0.3"; +#else +// Hardcoded for usage in docker container +const std::string tokenizerPath = "/ovms/src/test/llm_testing/mistralai/Mistral-7B-Instruct-v0.3/"; +#endif + +static ov::genai::Tokenizer mistralTokenizer(tokenizerPath); + +class MistralOutputParserTest : public ::testing::Test { +protected: + std::unique_ptr outputParser; + + void SetUp() override { + outputParser = std::make_unique(mistralTokenizer, "mistral", ""); + } +}; + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithThreeToolCalls) { + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}," + "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}," + "{\"name\": \"third_tool\", \"arguments\": {\"key\": \"value\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); // IDs should be different + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); // ID should be generated + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); // IDs should be different + EXPECT_NE(secondToolCallId, thirdToolCallId); // IDs should be different +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInvalid) { + std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}," + "{\"tool_name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}," + "{\"name\": \"third_tool\", \"options\": {\"key\": \"value\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + // Only the first tool call is valid, the second one has an invalid name field and the third one has an invalid arguments + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + auto firstToolCallId = parsedOutput.toolCalls[0].id; +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { + std::string input = "This is a regular model response without tool calls."; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, ""); +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsContentOnly) { + std::string input = "[TOOL_CALLS][{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n[TOOL_CALLS][{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + // Content after 'TOOL_CALLS' cannot be parsed as array of JSON objects, so it is treated as content + EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n [{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); // No valid tool calls parsed +} + +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithArrayArguments) { + std::string input = "[TOOL_CALLS][{\"name\": \"extractLastTransactionId\", \"arguments\": { \"filepath\": \"/var/log/db.log\", \"status\": [\"completed\", \"failed\"], \"encoding\": \"utf-8\", \"processFunction\": \"processFunction\"}}]"; + auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "extractLastTransactionId"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated +} diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 58521134b5..8ddcc7e96c 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -38,8 +38,9 @@ set "QWEN3_MODEL=Qwen/Qwen3-8B" set "LLAMA3_MODEL=meta-llama/Llama-3.1-8B-Instruct" set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B" set "PHI4_MODEL=microsoft/Phi-4-mini-instruct" +set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3" -set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL% +set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL% %MISTRAL_MODEL% set "ALL_EXIST=1" for %%M in ("%MODELS_LIST%") do ( @@ -152,4 +153,13 @@ if exist "%~1\%PHI4_MODEL%" ( if !errorlevel! neq 0 exit /b !errorlevel! ) +if exist "%~1\%MISTRAL_MODEL%" ( + echo Models directory %~1\%MISTRAL_MODEL% exists. Skipping downloading models. +) else ( + echo Downloading tokenizer and detokenizer for Mistral model to %~1\%MISTRAL_MODEL% directory. + mkdir "%~1\%MISTRAL_MODEL%" + convert_tokenizer "%MISTRAL_MODEL%" --with_detokenizer -o "%~1\%MISTRAL_MODEL%" + if !errorlevel! neq 0 exit /b !errorlevel! +) + endlocal