From 84bcf504cdcb479fa836329f59de870b8ffffb0a Mon Sep 17 00:00:00 2001
From: albert-polak <lugii1999@gmail.com>
Date: Fri, 8 Aug 2025 16:15:17 +0200
Subject: [PATCH 1/2] adding a fake ollama version endpoint

---
 tools/server/server.cpp | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index a255d481a4d1c..a0ff7f12bff66 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -3810,7 +3810,8 @@ int main(int argc, char ** argv) {
             "/health",
             "/models",
             "/v1/models",
-            "/api/tags"
+            "/api/tags",
+            "/api/version"
         };
 
         // If API key is not set, skip validation
@@ -3849,7 +3850,7 @@ int main(int argc, char ** argv) {
             if (req.path == "/" || tmp.back() == "html") {
                 res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
                 res.status = 503;
-            } else if (req.path == "/models" || req.path == "/v1/models" || req.path == "/api/tags") {
+            } else if (req.path == "/models" || req.path == "/v1/models" || req.path == "/api/tags" || req.path == "/api/version") {
                 // allow the models endpoint to be accessed during loading
                 return true;
             } else {
@@ -4545,6 +4546,22 @@ int main(int argc, char ** argv) {
         res_ok(res, models);
     };
 
+    const auto handle_ollama_version = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { 
+        json version;
+        char* fake_ollama_version = std::getenv("FAKE_OLLAMA_VERSION");
+        if (fake_ollama_version) {
+            version = {
+                {"version", std::string(fake_ollama_version)}
+            };
+        } else {
+            version = {
+                {"version", "0.6.4"} 
+            };
+        }
+
+        res_ok(res, version);
+    };
+
     const auto handle_tokenize = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
         const json body = json::parse(req.body);
 
@@ -4883,6 +4900,7 @@ int main(int argc, char ** argv) {
     svr->Get (params.api_prefix + "/models",              handle_models); // public endpoint (no API key check)
     svr->Get (params.api_prefix + "/v1/models",           handle_models); // public endpoint (no API key check)
     svr->Get (params.api_prefix + "/api/tags",            handle_models); // ollama specific endpoint. public endpoint (no API key check)
+    svr->Get (params.api_prefix + "/api/version",         handle_ollama_version); // ollama specific endpoint. public endpoint (no API key check)
     svr->Post(params.api_prefix + "/completion",          handle_completions); // legacy
     svr->Post(params.api_prefix + "/completions",         handle_completions);
     svr->Post(params.api_prefix + "/v1/completions",      handle_completions_oai);

From 5589224e8b20b247ecbe624aebf6b4129433bd3e Mon Sep 17 00:00:00 2001
From: albert-polak <lugii1999@gmail.com>
Date: Mon, 11 Aug 2025 09:51:43 +0200
Subject: [PATCH 2/2] apply suggestions from comments

---
 tools/server/server.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index a0ff7f12bff66..21bad4f8e0bd4 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -4546,16 +4546,16 @@ int main(int argc, char ** argv) {
         res_ok(res, models);
     };
 
-    const auto handle_ollama_version = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { 
+    const auto handle_version = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { 
         json version;
-        char* fake_ollama_version = std::getenv("FAKE_OLLAMA_VERSION");
-        if (fake_ollama_version) {
+        char* version_override = std::getenv("LLAMA_API_VERSION_OVERRIDE");
+        if (version_override) {
             version = {
-                {"version", std::string(fake_ollama_version)}
+                {"version", std::string(version_override)}
             };
         } else {
             version = {
-                {"version", "0.6.4"} 
+                {"version", std::to_string(LLAMA_BUILD_NUMBER)} 
             };
         }
 
@@ -4900,7 +4900,7 @@ int main(int argc, char ** argv) {
     svr->Get (params.api_prefix + "/models",              handle_models); // public endpoint (no API key check)
     svr->Get (params.api_prefix + "/v1/models",           handle_models); // public endpoint (no API key check)
     svr->Get (params.api_prefix + "/api/tags",            handle_models); // ollama specific endpoint. public endpoint (no API key check)
-    svr->Get (params.api_prefix + "/api/version",         handle_ollama_version); // ollama specific endpoint. public endpoint (no API key check)
+    svr->Get (params.api_prefix + "/api/version",         handle_version); // public endpoint (no API key check)
     svr->Post(params.api_prefix + "/completion",          handle_completions); // legacy
     svr->Post(params.api_prefix + "/completions",         handle_completions);
     svr->Post(params.api_prefix + "/v1/completions",      handle_completions_oai);