Skip to content

Commit c7f6cfe

Browse files
atobiszeirasapalamzeglaCopilot
authored
GGUF model loading (#3571)
Co-authored-by: Rafal Sapala <[email protected]> Co-authored-by: Miłosz Żeglarski <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent e7d3b10 commit c7f6cfe

33 files changed

+1153
-161
lines changed

external/lfs.patch

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ new file mode 100644
290290
index 000000000..cd3aa39fd
291291
--- /dev/null
292292
+++ b/src/libgit2/lfs_filter.c
293-
@@ -0,0 +1,497 @@
293+
@@ -0,0 +1,500 @@
294294
+/*
295295
+/ Copyright 2025 Intel Corporation
296296
+/
@@ -643,7 +643,7 @@ index 000000000..cd3aa39fd
643643
+ /* Remove a header curl would otherwise add by itself */
644644
+ chunk = curl_slist_append(chunk, "Accept: application/vnd.git-lfs+json");
645645
+ /* Add a custom header */
646-
+ chunk = curl_slist_append(chunk, "Content-type: application/json");
646+
+ chunk = curl_slist_append(chunk, "Content-Type: application/vnd.git-lfs+json");
647647
+ /* set our custom set of headers */
648648
+ curl_easy_setopt(info_curl, CURLOPT_HTTPHEADER, chunk);
649649
+ /* First set the URL that is about to receive our POST. This URL
@@ -678,6 +678,7 @@ index 000000000..cd3aa39fd
678678
+ struct memory response = { 0 };
679679
+ curl_easy_setopt(
680680
+ info_curl, CURLOPT_WRITEFUNCTION, write_callback);
681+
+ curl_easy_setopt(info_curl, CURLOPT_USERAGENT,"git-lfs/3.5.0");
681682
+ curl_easy_setopt(
682683
+ info_curl, CURLOPT_WRITEDATA, (void *)&response);
683684
+
@@ -711,7 +712,7 @@ index 000000000..cd3aa39fd
711712
+ get_digit(la->lfs_size), 0,
712713
+ time(NULL), time(NULL) };
713714
+
714-
+ const char *href_regexp = "{\"href\":\"(.*)\"}}}]}";
715+
+ const char *href_regexp = "\"download\"\\s*:\\s*\\{\\s*\"href\":\"([^\"]+)\"";
715716
+ if (get_lfs_info_match(&res_str, href_regexp) < 0) {
716717
+ /* always cleanup */
717718
+ curl_easy_cleanup(dl_curl);
@@ -724,6 +725,7 @@ index 000000000..cd3aa39fd
724725
+ curl_easy_setopt(dl_curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
725726
+ curl_easy_setopt(dl_curl, CURLOPT_FOLLOWLOCATION, 1L);
726727
+ curl_easy_setopt(dl_curl, CURLOPT_USE_SSL, CURLUSESSL_ALL);
728+
+ curl_easy_setopt(dl_curl, CURLOPT_USERAGENT,"git-lfs/3.5.0");
727729
+ curl_easy_setopt(dl_curl, CURLOPT_WRITEFUNCTION, file_write_callback);
728730
+ curl_easy_setopt(dl_curl, CURLOPT_WRITEDATA, (void *)&ftpfile);
729731
+
@@ -762,6 +764,7 @@ index 000000000..cd3aa39fd
762764
+
763765
+on_error:
764766
+ git__free(payload);
767+
+ fprintf(stderr, "LFS download failed for file %s\n", la->full_path);
765768
+ return;
766769
+}
767770
+

src/BUILD

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ ovms_cc_library(
294294
"ovms_exit_codes",
295295
],
296296
visibility = ["//visibility:public",],
297+
additional_copts = COPTS_DROGON,
297298
)
298299
ovms_cc_library(
299300
name = "libovms_cliparser",
@@ -2660,10 +2661,12 @@ cc_test(
26602661
":inferencerequest_test",
26612662
":libtest_environment",
26622663
":libtest_gpuenvironment",
2664+
":test_gguf_environment",
26632665
":network_utils_test",
26642666
"//src:custom_nodes_common_buffersqueue",
26652667
"@com_google_googletest//:gtest",
26662668
":pull_hf_model_test",
2669+
":pull_gguf_hf_model_test",
26672670
":listdirectorymodels_test",
26682671
":graph_export_test",
26692672
":config_export_test",
@@ -2751,6 +2754,20 @@ cc_library(
27512754
local_defines = COMMON_LOCAL_DEFINES,
27522755
copts = COPTS_TESTS,
27532756
)
2757+
cc_library(
2758+
name = "test_gguf_environment",
2759+
linkstatic = 1,
2760+
alwayslink = True,
2761+
hdrs = ["test/gguf_environment.hpp",],
2762+
srcs = ["test/gguf_environment.cpp",],
2763+
linkopts = [],
2764+
deps = [
2765+
"libovmslogging",
2766+
"@com_google_googletest//:gtest",
2767+
],
2768+
local_defines = COMMON_LOCAL_DEFINES,
2769+
copts = COPTS_TESTS,
2770+
)
27542771
cc_library(
27552772
name = "inferencerequest_test",
27562773
linkstatic = 1,
@@ -2788,6 +2805,25 @@ cc_library(
27882805
local_defines = COMMON_LOCAL_DEFINES,
27892806
copts = COPTS_TESTS,
27902807
)
2808+
cc_library(
2809+
name = "pull_gguf_hf_model_test",
2810+
linkstatic = 1,
2811+
alwayslink = True,
2812+
srcs = ["test/pull_gguf_hf_model_test.cpp"],
2813+
linkopts = [],
2814+
deps = [
2815+
"@libgit2_engine//:libgit2_engine",
2816+
"//src/servables_config_manager_module:listmodels",
2817+
":test_utils",
2818+
":test_gguf_environment",
2819+
"//src/pull_module:gguf_downloader",
2820+
"//src:ovms_lib",
2821+
"libovmsstring_utils",
2822+
"@com_google_googletest//:gtest",
2823+
],
2824+
local_defines = COMMON_LOCAL_DEFINES,
2825+
copts = COPTS_TESTS,
2826+
)
27912827
cc_library(
27922828
name = "listdirectorymodels_test",
27932829
srcs = ["test/listdirectorymodels_test.cpp"],

src/capi_frontend/server_settings.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ enum GraphExportType : unsigned int {
3434
enum ModelDownlaodType : unsigned int {
3535
GIT_CLONE_DOWNLOAD,
3636
OPTIMUM_CLI_DOWNLOAD,
37+
GGUF_DOWNLOAD,
3738
UNKNOWN_DOWNLOAD
3839
};
3940

@@ -144,6 +145,7 @@ struct ImageGenerationGraphSettingsImpl {
144145
struct HFSettingsImpl {
145146
std::string targetDevice = "CPU";
146147
std::string sourceModel = "";
148+
std::optional<std::string> ggufFilename;
147149
std::string downloadPath = "";
148150
bool overwriteModels = false;
149151
std::optional<std::string> extraQuantizationParams;

src/cli_parser.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ void CLIParser::parse(int argc, char** argv) {
176176
"HF source model path",
177177
cxxopts::value<std::string>(),
178178
"HF_SOURCE")
179+
("gguf_filename",
180+
"Name of the GGUF file",
181+
cxxopts::value<std::string>(),
182+
"GGUF_FILENAME")
179183
("overwrite_models",
180184
"Overwrite the model if it already exists in the models repository",
181185
cxxopts::value<bool>()->default_value("false"),
@@ -459,7 +463,6 @@ void CLIParser::prepareModel(ModelsSettingsImpl& modelsSettings, HFSettingsImpl&
459463
modelsSettings.modelPath = result->operator[]("model_path").as<std::string>();
460464
modelsSettings.userSetSingleModelArguments.push_back("model_name");
461465
}
462-
463466
if (result->count("max_sequence_number")) {
464467
modelsSettings.maxSequenceNumber = result->operator[]("max_sequence_number").as<uint32_t>();
465468
modelsSettings.userSetSingleModelArguments.push_back("max_sequence_number");
@@ -537,13 +540,17 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
537540
} else {
538541
serverSettings.serverMode = HF_PULL_AND_START_MODE;
539542
}
540-
543+
if (result->count("gguf_filename")) {
544+
hfSettings.ggufFilename = result->operator[]("gguf_filename").as<std::string>();
545+
hfSettings.downloadType = GGUF_DOWNLOAD;
546+
}
541547
if (result->count("overwrite_models"))
542548
hfSettings.overwriteModels = result->operator[]("overwrite_models").as<bool>();
543549
if (result->count("source_model")) {
544550
hfSettings.sourceModel = result->operator[]("source_model").as<std::string>();
545-
// FIXME: Currently we use git clone only for OpenVINO, we will change this method of detection to parsing model files
546-
if (!startsWith(toLower(serverSettings.hfSettings.sourceModel), toLower("OpenVINO/"))) {
551+
// TODO: Currently we use git clone only for OpenVINO, we will change this method of detection to parsing model files
552+
if (!startsWith(toLower(serverSettings.hfSettings.sourceModel), toLower("OpenVINO/")) &&
553+
(hfSettings.ggufFilename == std::nullopt)) {
547554
hfSettings.downloadType = OPTIMUM_CLI_DOWNLOAD;
548555
}
549556
}

src/config.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ bool Config::validate() {
128128
std::cerr << "Error: --task parameter not set." << std::endl;
129129
return false;
130130
}
131-
if (serverSettings.hfSettings.downloadType != OPTIMUM_CLI_DOWNLOAD && !startsWith(toLower(serverSettings.hfSettings.sourceModel), toLower("OpenVINO/"))) {
132-
std::cerr << "For now only OpenVINO models are supported in pulling mode";
131+
if (serverSettings.hfSettings.downloadType == GIT_CLONE_DOWNLOAD && !startsWith(toLower(serverSettings.hfSettings.sourceModel), toLower("OpenVINO/"))) {
132+
std::cerr << "For now only OpenVINO models are supported in pulling mode with git clone. Please use optimum download or gguf models instead." << std::endl;
133133
return false;
134134
}
135135
if (this->serverSettings.hfSettings.task == TEXT_GENERATION_GRAPH) {

src/graph_export/graph_export.cpp

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,30 @@ namespace ovms {
5454

5555
static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n");
5656

57-
static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const TextGenGraphSettingsImpl& graphSettings) {
57+
static std::string constructModelsPath(const std::string& modelPath, const std::optional<std::string>& ggufFilenameOpt) {
58+
std::string modelsPath;
59+
if (ggufFilenameOpt.has_value()) {
60+
modelsPath = FileSystem::joinPath({modelPath, ggufFilenameOpt.value()});
61+
#if _WIN32
62+
// On Windows, file paths use backslashes ('\') as separators. However, the graph parser used in this project expects Unix-style paths with forward slashes ('/').
63+
// If Windows-style backslashes are present, the parser may fail to locate files or misinterpret the path. To ensure compatibility, we replace all backslashes with forward slashes.
64+
// This is safe because Windows APIs accept forward slashes in file paths.
65+
if (FileSystem::getOsSeparator() != "/") {
66+
std::replace(modelsPath.begin(), modelsPath.end(), '\\', '/');
67+
}
68+
#endif
69+
} else {
70+
modelsPath = modelPath;
71+
}
72+
SPDLOG_TRACE("Models path: {}, modelPath:{}, ggufFilenameOpt:{}", modelsPath, modelPath, ggufFilenameOpt.value_or("std::nullopt"));
73+
return modelsPath;
74+
}
75+
76+
static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const TextGenGraphSettingsImpl& graphSettings, const std::optional<std::string> ggufFilename) {
5877
std::ostringstream oss;
5978
oss << OVMS_VERSION_GRAPH_LINE;
79+
std::string modelsPath = constructModelsPath(graphSettings.modelPath, ggufFilename);
80+
SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
6081
// clang-format off
6182
oss << R"(
6283
input_stream: "HTTP_REQUEST_PAYLOAD:input"
@@ -80,7 +101,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
80101
device: ")"
81102
<< graphSettings.targetDevice << R"(",
82103
models_path: ")"
83-
<< graphSettings.modelPath << R"(",
104+
<< modelsPath << R"(",
84105
plugin_config: ')"
85106
<< GraphExport::createPluginString(graphSettings.pluginConfig) << R"(',
86107
enable_prefix_caching: )"
@@ -134,6 +155,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
134155
#if (MEDIAPIPE_DISABLE == 0)
135156
::mediapipe::CalculatorGraphConfig config;
136157
bool success = ::google::protobuf::TextFormat::ParseFromString(oss.str(), &config);
158+
SPDLOG_TRACE("Generated pbtxt: {}", oss.str());
137159
if (!success) {
138160
SPDLOG_ERROR("Created graph config file couldn't be parsed - check used task parameters values.");
139161
return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
@@ -325,20 +347,20 @@ Status GraphExport::createServableConfig(const std::string& directoryPath, const
325347
auto status = LocalFileSystem::exists(directoryPath, &exists);
326348
if (!status.ok())
327349
return status;
328-
329-
bool is_dir = false;
330-
status = LocalFileSystem::isDir(directoryPath, &is_dir);
331-
if (!status.ok())
332-
return status;
333-
334-
if (!is_dir) {
335-
SPDLOG_ERROR("Graph path is not a directory: {}", directoryPath);
336-
return StatusCode::PATH_INVALID;
350+
if (!hfSettings.ggufFilename.has_value()) {
351+
bool is_dir = false;
352+
status = LocalFileSystem::isDir(directoryPath, &is_dir);
353+
if (!status.ok())
354+
return status;
355+
356+
if (!is_dir) {
357+
SPDLOG_ERROR("Graph path is not a directory: {}", directoryPath);
358+
return StatusCode::PATH_INVALID;
359+
}
337360
}
338-
339361
if (hfSettings.task == TEXT_GENERATION_GRAPH) {
340362
if (std::holds_alternative<TextGenGraphSettingsImpl>(hfSettings.graphSettings)) {
341-
return createTextGenerationGraphTemplate(directoryPath, std::get<TextGenGraphSettingsImpl>(hfSettings.graphSettings));
363+
return createTextGenerationGraphTemplate(directoryPath, std::get<TextGenGraphSettingsImpl>(hfSettings.graphSettings), hfSettings.ggufFilename);
342364
} else {
343365
SPDLOG_ERROR("Graph options not initialized for text generation.");
344366
return StatusCode::INTERNAL_ERROR;

src/llm/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ ovms_cc_library(
177177
"//src:libovmsstatus",
178178
"//src:libovmsmediapipe_utils",
179179
"//src:libovmsprofiler",
180+
"//src:libovmsstring_utils",
180181
"//src:libovmsfilesystem",
181182
"llmcalculator_cc_proto",
182183
":openai_completions_api_handler",

src/llm/io_processing/base_generation_config_builder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//*****************************************************************************
1616

1717
#include <limits>
18+
#include <string>
1819
#include <openvino/genai/generation_config.hpp>
1920
#include "base_generation_config_builder.hpp"
2021

@@ -29,6 +30,10 @@ void BaseGenerationConfigBuilder::setStructuralTagsConfig(const ov::genai::Struc
2930
}
3031
}
3132

33+
void BaseGenerationConfigBuilder::addStopString(const std::string& decodedStopString) {
34+
config.stop_strings.insert(decodedStopString);
35+
}
36+
3237
void BaseGenerationConfigBuilder::validateStructuredOutputConfig(ov::genai::Tokenizer& tokenizer) {
3338
if (config.structured_output_config.has_value()) {
3439
config.structured_output_config.value().validate(tokenizer);

src/llm/io_processing/base_generation_config_builder.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
// limitations under the License.
1515
//*****************************************************************************
1616
#pragma once
17+
#include <string>
18+
1719
#include <openvino/genai/generation_config.hpp>
1820
#include <openvino/genai/tokenizer.hpp>
1921
#include "../apis/openai_request.hpp"
@@ -40,6 +42,11 @@ class BaseGenerationConfigBuilder {
4042

4143
ov::genai::GenerationConfig& getConfig() { return config; }
4244

45+
/*
46+
* Add stop string to generation config. Used when model server needs to add additional stop string that has not been provided in the request.
47+
*/
48+
void addStopString(const std::string& decodedStopString);
49+
4350
/*
4451
* Validates the structured output configuration, if exists.
4552
* Throws exception if validation fails.

src/llm/io_processing/generation_config_builder.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,9 @@ class GenerationConfigBuilder {
7070
void parseConfigFromRequest(const OpenAIChatCompletionsRequest& request) {
7171
builder_impl->parseConfigFromRequest(request);
7272
}
73+
74+
void addStopString(const std::string& decodedStopString) {
75+
builder_impl->addStopString(decodedStopString);
76+
}
7377
};
7478
} // namespace ovms

0 commit comments

Comments
 (0)