Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 26daec9

Browse files
authored
Merge branch 'dev' into fix/linux-arm
2 parents 537caf5 + 3456c7b commit 26daec9

File tree

12 files changed

+151
-91
lines changed

12 files changed

+151
-91
lines changed

engine/controllers/models.cc

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -488,65 +488,40 @@ void Models::StartModel(
488488
if (!http_util::HasFieldInReq(req, callback, "model"))
489489
return;
490490
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
491-
StartParameterOverride params_override;
492-
if (auto& o = (*(req->getJsonObject()))["prompt_template"]; !o.isNull()) {
493-
params_override.custom_prompt_template = o.asString();
494-
}
495-
496-
if (auto& o = (*(req->getJsonObject()))["cache_enabled"]; !o.isNull()) {
497-
params_override.cache_enabled = o.asBool();
498-
}
499-
500-
if (auto& o = (*(req->getJsonObject()))["ngl"]; !o.isNull()) {
501-
params_override.ngl = o.asInt();
502-
}
503-
504-
if (auto& o = (*(req->getJsonObject()))["n_parallel"]; !o.isNull()) {
505-
params_override.n_parallel = o.asInt();
506-
}
507-
508-
if (auto& o = (*(req->getJsonObject()))["ctx_len"]; !o.isNull()) {
509-
params_override.ctx_len = o.asInt();
510-
}
511-
512-
if (auto& o = (*(req->getJsonObject()))["cache_type"]; !o.isNull()) {
513-
params_override.cache_type = o.asString();
514-
}
515491

492+
std::optional<std::string> mmproj;
516493
if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) {
517-
params_override.mmproj = o.asString();
494+
mmproj = o.asString();
518495
}
519496

497+
auto bypass_llama_model_path = false;
520498
// Support both llama_model_path and model_path for backward compatible
521499
// model_path has higher priority
522500
if (auto& o = (*(req->getJsonObject()))["llama_model_path"]; !o.isNull()) {
523-
params_override.model_path = o.asString();
501+
auto model_path = o.asString();
524502
if (auto& mp = (*(req->getJsonObject()))["model_path"]; mp.isNull()) {
525503
// Bypass if model does not exist in DB and llama_model_path exists
526-
if (std::filesystem::exists(params_override.model_path.value()) &&
504+
if (std::filesystem::exists(model_path) &&
527505
!model_service_->HasModel(model_handle)) {
528506
CTL_INF("llama_model_path exists, bypass check model id");
529-
params_override.bypass_llama_model_path = true;
507+
bypass_llama_model_path = true;
530508
}
531509
}
532510
}
533511

534-
if (auto& o = (*(req->getJsonObject()))["model_path"]; !o.isNull()) {
535-
params_override.model_path = o.asString();
536-
}
512+
auto bypass_model_check = (mmproj.has_value() || bypass_llama_model_path);
537513

538514
auto model_entry = model_service_->GetDownloadedModel(model_handle);
539-
if (!model_entry.has_value() && !params_override.bypass_model_check()) {
515+
if (!model_entry.has_value() && !bypass_model_check) {
540516
Json::Value ret;
541517
ret["message"] = "Cannot find model: " + model_handle;
542518
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
543519
resp->setStatusCode(drogon::k400BadRequest);
544520
callback(resp);
545521
return;
546522
}
547-
std::string engine_name = params_override.bypass_model_check()
548-
? kLlamaEngine
549-
: model_entry.value().engine;
523+
std::string engine_name =
524+
bypass_model_check ? kLlamaEngine : model_entry.value().engine;
550525
auto engine_validate = engine_service_->IsEngineReady(engine_name);
551526
if (engine_validate.has_error()) {
552527
Json::Value ret;
@@ -565,7 +540,9 @@ void Models::StartModel(
565540
return;
566541
}
567542

568-
auto result = model_service_->StartModel(model_handle, params_override);
543+
auto result = model_service_->StartModel(
544+
model_handle, *(req->getJsonObject()) /*params_override*/,
545+
bypass_model_check);
569546
if (result.has_error()) {
570547
Json::Value ret;
571548
ret["message"] = result.error();
@@ -668,7 +645,7 @@ void Models::AddRemoteModel(
668645

669646
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
670647
auto engine_name = (*(req->getJsonObject())).get("engine", "").asString();
671-
648+
672649
auto engine_validate = engine_service_->IsEngineReady(engine_name);
673650
if (engine_validate.has_error()) {
674651
Json::Value ret;
@@ -687,7 +664,7 @@ void Models::AddRemoteModel(
687664
callback(resp);
688665
return;
689666
}
690-
667+
691668
config::RemoteModelConfig model_config;
692669
model_config.LoadFromJson(*(req->getJsonObject()));
693670
cortex::db::Models modellist_utils_obj;

engine/services/engine_service.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ cpp::result<bool, std::string> EngineService::UnzipEngine(
132132
CTL_INF("Found cuda variant, extract it");
133133
found_cuda = true;
134134
// extract binary
135-
auto cuda_path =
136-
file_manager_utils::GetCudaToolkitPath(NormalizeEngine(engine));
135+
auto cuda_path = file_manager_utils::GetCudaToolkitPath(
136+
NormalizeEngine(engine), true);
137137
archive_utils::ExtractArchive(path + "/" + cf, cuda_path.string(),
138138
true);
139139
}
@@ -434,7 +434,8 @@ cpp::result<bool, std::string> EngineService::DownloadCuda(
434434
}};
435435

436436
auto on_finished = [engine](const DownloadTask& finishedTask) {
437-
auto engine_path = file_manager_utils::GetCudaToolkitPath(engine);
437+
auto engine_path = file_manager_utils::GetCudaToolkitPath(engine, true);
438+
438439
archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
439440
engine_path.string());
440441
try {

engine/services/file_service.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "file_service.h"
22
#include <cstdint>
3-
#include "utils/ulid/ulid.hh"
3+
#include "utils/ulid_generator.h"
44

55
cpp::result<OpenAi::File, std::string> FileService::UploadFile(
66
const std::string& filename, const std::string& purpose,
@@ -11,7 +11,7 @@ cpp::result<OpenAi::File, std::string> FileService::UploadFile(
1111
std::chrono::system_clock::now().time_since_epoch())
1212
.count();
1313

14-
auto file_id{"file-" + ulid::Marshal(ulid::CreateNowRand())};
14+
auto file_id{"file-" + ulid::GenerateUlid()};
1515
OpenAi::File file;
1616
file.id = file_id;
1717
file.object = "file";

engine/services/message_service.cc

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "services/message_service.h"
22
#include "utils/logging_utils.h"
33
#include "utils/result.hpp"
4-
#include "utils/ulid/ulid.hh"
4+
#include "utils/ulid_generator.h"
55

66
cpp::result<OpenAi::Message, std::string> MessageService::CreateMessage(
77
const std::string& thread_id, const OpenAi::Role& role,
@@ -27,11 +27,8 @@ cpp::result<OpenAi::Message, std::string> MessageService::CreateMessage(
2727
std::get<std::vector<std::unique_ptr<OpenAi::Content>>>(content));
2828
}
2929

30-
auto ulid = ulid::CreateNowRand();
31-
auto msg_id = ulid::Marshal(ulid);
32-
3330
OpenAi::Message msg;
34-
msg.id = msg_id;
31+
msg.id = ulid::GenerateUlid();
3532
msg.object = "thread.message";
3633
msg.created_at = seconds_since_epoch;
3734
msg.thread_id = thread_id;

engine/services/model_service.cc

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -749,19 +749,28 @@ cpp::result<void, std::string> ModelService::DeleteModel(
749749
}
750750

751751
cpp::result<StartModelResult, std::string> ModelService::StartModel(
752-
const std::string& model_handle,
753-
const StartParameterOverride& params_override) {
752+
const std::string& model_handle, const Json::Value& params_override,
753+
bool bypass_model_check) {
754754
namespace fs = std::filesystem;
755755
namespace fmu = file_manager_utils;
756756
cortex::db::Models modellist_handler;
757757
config::YamlHandler yaml_handler;
758+
std::optional<std::string> custom_prompt_template;
759+
std::optional<int> ctx_len;
760+
if (auto& o = params_override["prompt_template"]; !o.isNull()) {
761+
custom_prompt_template = o.asString();
762+
}
763+
764+
if (auto& o = params_override["ctx_len"]; !o.isNull()) {
765+
ctx_len = o.asInt();
766+
}
758767

759768
try {
760769
constexpr const int kDefautlContextLength = 8192;
761770
int max_model_context_length = kDefautlContextLength;
762771
Json::Value json_data;
763772
// Currently we don't support download vision models, so we need to bypass check
764-
if (!params_override.bypass_model_check()) {
773+
if (!bypass_model_check) {
765774
auto model_entry = modellist_handler.GetModelInfo(model_handle);
766775
if (model_entry.has_error()) {
767776
CTL_WRN("Error: " + model_entry.error());
@@ -839,29 +848,19 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
839848
}
840849

841850
json_data["model"] = model_handle;
842-
if (auto& cpt = params_override.custom_prompt_template;
843-
!cpt.value_or("").empty()) {
851+
if (auto& cpt = custom_prompt_template; !cpt.value_or("").empty()) {
844852
auto parse_prompt_result = string_utils::ParsePrompt(cpt.value());
845853
json_data["system_prompt"] = parse_prompt_result.system_prompt;
846854
json_data["user_prompt"] = parse_prompt_result.user_prompt;
847855
json_data["ai_prompt"] = parse_prompt_result.ai_prompt;
848856
}
849857

850-
#define ASSIGN_IF_PRESENT(json_obj, param_override, param_name) \
851-
if (param_override.param_name) { \
852-
json_obj[#param_name] = param_override.param_name.value(); \
853-
}
858+
json_helper::MergeJson(json_data, params_override);
854859

855-
ASSIGN_IF_PRESENT(json_data, params_override, cache_enabled);
856-
ASSIGN_IF_PRESENT(json_data, params_override, ngl);
857-
ASSIGN_IF_PRESENT(json_data, params_override, n_parallel);
858-
ASSIGN_IF_PRESENT(json_data, params_override, cache_type);
859-
ASSIGN_IF_PRESENT(json_data, params_override, mmproj);
860-
ASSIGN_IF_PRESENT(json_data, params_override, model_path);
861-
#undef ASSIGN_IF_PRESENT
862-
if (params_override.ctx_len) {
860+
// Set the latest ctx_len
861+
if (ctx_len) {
863862
json_data["ctx_len"] =
864-
std::min(params_override.ctx_len.value(), max_model_context_length);
863+
std::min(ctx_len.value(), max_model_context_length);
865864
}
866865
CTL_INF(json_data.toStyledString());
867866
auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),

engine/services/model_service.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,6 @@ struct ModelPullInfo {
2222
std::string download_url;
2323
};
2424

25-
struct StartParameterOverride {
26-
std::optional<bool> cache_enabled;
27-
std::optional<int> ngl;
28-
std::optional<int> n_parallel;
29-
std::optional<int> ctx_len;
30-
std::optional<std::string> custom_prompt_template;
31-
std::optional<std::string> cache_type;
32-
std::optional<std::string> mmproj;
33-
std::optional<std::string> model_path;
34-
bool bypass_llama_model_path = false;
35-
bool bypass_model_check() const {
36-
return mmproj.has_value() || bypass_llama_model_path;
37-
}
38-
};
39-
4025
struct StartModelResult {
4126
bool success;
4227
std::optional<std::string> warning;
@@ -82,8 +67,8 @@ class ModelService {
8267
cpp::result<void, std::string> DeleteModel(const std::string& model_handle);
8368

8469
cpp::result<StartModelResult, std::string> StartModel(
85-
const std::string& model_handle,
86-
const StartParameterOverride& params_override);
70+
const std::string& model_handle, const Json::Value& params_override,
71+
bool bypass_model_check);
8772

8873
cpp::result<bool, std::string> StopModel(const std::string& model_handle);
8974

engine/services/thread_service.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "thread_service.h"
2+
#include <chrono>
23
#include "utils/logging_utils.h"
3-
#include "utils/ulid/ulid.hh"
4+
#include "utils/ulid_generator.h"
45

56
cpp::result<OpenAi::Thread, std::string> ThreadService::CreateThread(
67
std::unique_ptr<OpenAi::ThreadToolResources> tool_resources,
@@ -12,11 +13,8 @@ cpp::result<OpenAi::Thread, std::string> ThreadService::CreateThread(
1213
std::chrono::system_clock::now().time_since_epoch())
1314
.count();
1415

15-
auto ulid = ulid::CreateNowRand();
16-
auto thread_id = ulid::Marshal(ulid);
17-
1816
OpenAi::Thread thread;
19-
thread.id = thread_id;
17+
thread.id = ulid::GenerateUlid();
2018
thread.object = "thread";
2119
thread.created_at = seconds_since_epoch;
2220

engine/test/components/test_json_helper.cc

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,61 @@ TEST(ParseJsonStringTest, EmptyString) {
3333

3434
EXPECT_TRUE(result.isNull());
3535
}
36+
37+
TEST(MergeJsonTest, MergeSimpleObjects) {
38+
Json::Value json1, json2;
39+
json1["name"] = "John";
40+
json1["age"] = 30;
41+
42+
json2["age"] = 31;
43+
json2["email"] = "john@example.com";
44+
45+
json_helper::MergeJson(json1, json2);
46+
47+
Json::Value expected;
48+
expected["name"] = "John";
49+
expected["age"] = 31;
50+
expected["email"] = "john@example.com";
51+
52+
EXPECT_EQ(json1, expected);
53+
}
54+
55+
TEST(MergeJsonTest, MergeNestedObjects) {
56+
Json::Value json1, json2;
57+
json1["person"]["name"] = "John";
58+
json1["person"]["age"] = 30;
59+
60+
json2["person"]["age"] = 31;
61+
json2["person"]["email"] = "john@example.com";
62+
63+
json_helper::MergeJson(json1, json2);
64+
65+
Json::Value expected;
66+
expected["person"]["name"] = "John";
67+
expected["person"]["age"] = 31;
68+
expected["person"]["email"] = "john@example.com";
69+
70+
EXPECT_EQ(json1, expected);
71+
}
72+
73+
TEST(MergeJsonTest, MergeArrays) {
74+
Json::Value json1, json2;
75+
json1["hobbies"] = Json::Value(Json::arrayValue);
76+
json1["hobbies"].append("reading");
77+
json1["hobbies"].append("painting");
78+
79+
json2["hobbies"] = Json::Value(Json::arrayValue);
80+
json2["hobbies"].append("hiking");
81+
json2["hobbies"].append("painting");
82+
83+
json_helper::MergeJson(json1, json2);
84+
85+
Json::Value expected;
86+
expected["hobbies"] = Json::Value(Json::arrayValue);
87+
expected["hobbies"].append("reading");
88+
expected["hobbies"].append("painting");
89+
expected["hobbies"].append("hiking");
90+
expected["hobbies"].append("painting");
91+
92+
EXPECT_EQ(json1, expected);
93+
}

engine/utils/file_manager_utils.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,14 @@ std::filesystem::path GetModelsContainerPath() {
289289
return models_container_path;
290290
}
291291

292-
std::filesystem::path GetCudaToolkitPath(const std::string& engine) {
292+
std::filesystem::path GetCudaToolkitPath(const std::string& engine,
293+
bool create_if_not_exist) {
293294
auto engine_path = getenv("ENGINE_PATH")
294295
? std::filesystem::path(getenv("ENGINE_PATH"))
295296
: GetCortexDataPath();
296297

297298
auto cuda_path = engine_path / "engines" / engine / "deps";
298-
if (!std::filesystem::exists(cuda_path)) {
299+
if (create_if_not_exist && !std::filesystem::exists(cuda_path)) {
299300
std::filesystem::create_directories(cuda_path);
300301
}
301302

engine/utils/file_manager_utils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ void CreateDirectoryRecursively(const std::string& path);
4545

4646
std::filesystem::path GetModelsContainerPath();
4747

48-
std::filesystem::path GetCudaToolkitPath(const std::string& engine);
48+
std::filesystem::path GetCudaToolkitPath(const std::string& engine,
49+
bool create_if_not_exist = false);
4950

5051
std::filesystem::path GetEnginesContainerPath();
5152

0 commit comments

Comments
 (0)