Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 8139f23

Browse files
committed
feat: update engine interface
1 parent 1641500 commit 8139f23

File tree

6 files changed

+81
-146
lines changed

6 files changed

+81
-146
lines changed

engine/controllers/engines.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
2323
void Engines::ListEngine(
2424
const HttpRequestPtr& req,
2525
std::function<void(const HttpResponsePtr&)>&& callback) const {
26-
std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
27-
kTrtLlmEngine};
2826
Json::Value ret;
29-
for (const auto& engine : supported_engines) {
27+
auto engine_names = engine_service_->GetSupportedEngineNames().value();
28+
for (const auto& engine : engine_names) {
3029
auto installed_engines =
3130
engine_service_->GetInstalledEngineVariants(engine);
3231
if (installed_engines.has_error()) {

engine/cortex-common/EngineI.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,29 @@
11
#pragma once
22

3+
#include <filesystem>
34
#include <functional>
45
#include <memory>
56

67
#include "json/value.h"
78
#include "trantor/utils/Logger.h"
89
class EngineI {
910
public:
11+
struct EngineLoadOption {
12+
// engine
13+
std::filesystem::path engine_path;
14+
std::filesystem::path cuda_path;
15+
bool custom_engine_path;
16+
17+
// logging
18+
std::filesystem::path log_path;
19+
int max_log_lines;
20+
trantor::Logger::LogLevel log_level;
21+
};
22+
1023
virtual ~EngineI() {}
1124

25+
virtual void Load(EngineLoadOption opts) = 0;
26+
1227
// cortex.llamacpp interface
1328
virtual void HandleChatCompletion(
1429
std::shared_ptr<Json::Value> json_body,

engine/services/engine_service.cc

Lines changed: 51 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
179179
const std::string& engine, const std::optional<std::string> version,
180180
const std::optional<std::string> variant) {
181181
auto ne = NormalizeEngine(engine);
182+
std::lock_guard<std::mutex> lock(engines_mutex_);
182183
if (IsEngineLoaded(ne)) {
183184
CTL_INF("Engine " << ne << " is already loaded, unloading it");
184185
auto unload_res = UnloadEngine(ne);
@@ -272,6 +273,7 @@ cpp::result<void, std::string> EngineService::DownloadEngine(
272273
if (selected_variant == std::nullopt) {
273274
return cpp::fail("Failed to find a suitable variant for " + engine);
274275
}
276+
std::lock_guard<std::mutex> lock(engines_mutex_);
275277
if (IsEngineLoaded(engine)) {
276278
CTL_INF("Engine " << engine << " is already loaded, unloading it");
277279
auto unload_res = UnloadEngine(engine);
@@ -503,6 +505,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
503505
" is not installed yet!");
504506
}
505507

508+
std::lock_guard<std::mutex> lock(engines_mutex_);
506509
if (IsEngineLoaded(ne)) {
507510
CTL_INF("Engine " << ne << " is already loaded, unloading it");
508511
auto unload_res = UnloadEngine(ne);
@@ -631,7 +634,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
631634
}
632635

633636
bool EngineService::IsEngineLoaded(const std::string& engine) {
634-
std::lock_guard<std::mutex> lock(engines_mutex_);
635637
auto ne = NormalizeEngine(engine);
636638
return engines_.find(ne) != engines_.end();
637639
}
@@ -651,6 +653,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
651653
const std::string& engine_name) {
652654
auto ne = NormalizeEngine(engine_name);
653655

656+
std::lock_guard<std::mutex> lock(engines_mutex_);
654657
if (IsEngineLoaded(ne)) {
655658
CTL_INF("Engine " << ne << " is already loaded");
656659
return {};
@@ -672,6 +675,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
672675
auto user_defined_engine_path = getenv("ENGINE_PATH");
673676
#endif
674677

678+
auto custom_engine_path = user_defined_engine_path != nullptr;
675679
CTL_DBG("user defined engine path: " << user_defined_engine_path);
676680
const std::filesystem::path engine_dir_path = [&] {
677681
if (user_defined_engine_path != nullptr) {
@@ -685,8 +689,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
685689
}
686690
}();
687691

688-
CTL_DBG("Engine path: " << engine_dir_path.string());
689-
690692
if (!std::filesystem::exists(engine_dir_path)) {
691693
CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
692694
return cpp::fail("Directory " + engine_dir_path.string() +
@@ -696,164 +698,65 @@ cpp::result<void, std::string> EngineService::LoadEngine(
696698
CTL_INF("Engine path: " << engine_dir_path.string());
697699

698700
try {
699-
#if defined(_WIN32)
700-
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
701-
// We would like to support running multiple engines at the same time. Therefore,
702-
// the adding/removing dll directory logic is quite complicated:
703-
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
704-
// Unload the llamacpp dll directory then load the tensorrt-llm
705-
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
706-
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
707-
// 3. Add dll directory if met other conditions
708-
709-
auto add_dll = [this](const std::string& e_type,
710-
const std::filesystem::path& p) {
711-
if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
712-
CTL_DBG("Added dll directory: " << p.string());
713-
engines_[e_type].cookie = cookie;
714-
} else {
715-
CTL_WRN("Could not add dll directory: " << p.string());
716-
}
717-
718-
auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
719-
if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
720-
cuda_cookie != 0) {
721-
CTL_DBG("Added cuda dll directory: " << p.string());
722-
engines_[e_type].cuda_cookie = cuda_cookie;
723-
} else {
724-
CTL_WRN("Could not add cuda dll directory: " << p.string());
725-
}
701+
auto dylib =
702+
std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
703+
704+
auto config = file_manager_utils::GetCortexConfig();
705+
706+
auto log_path =
707+
std::filesystem::path(config.logFolderPath) /
708+
std::filesystem::path(
709+
config.logLlamaCppPath); // for now seems like we use same log path
710+
711+
// init
712+
auto func = dylib->get_function<EngineI*()>("get_engine");
713+
auto engine_obj = func();
714+
auto load_opts = EngineI::EngineLoadOption{
715+
.engine_path = engine_dir_path,
716+
.cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
717+
.custom_engine_path = custom_engine_path,
718+
.log_path = log_path,
719+
.max_log_lines = config.maxLogLines,
720+
.log_level = logging_utils_helper::global_log_level,
726721
};
722+
engine_obj->Load(load_opts);
727723

728-
#if defined(_WIN32)
729-
if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
730-
#else
731-
if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
732-
#endif
733-
should_use_dll_search_path) {
734-
if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
735-
should_use_dll_search_path) {
736-
737-
{
738-
std::lock_guard<std::mutex> lock(engines_mutex_);
739-
// Remove llamacpp dll directory
740-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
741-
CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
742-
} else {
743-
CTL_DBG("Removed dll directory: " << kLlamaRepo);
744-
}
745-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
746-
CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
747-
} else {
748-
CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
749-
}
750-
}
751-
752-
add_dll(ne, engine_dir_path);
753-
} else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
754-
// Do nothing
755-
} else {
756-
add_dll(ne, engine_dir_path);
757-
}
758-
}
759-
#endif
760-
{
761-
std::lock_guard<std::mutex> lock(engines_mutex_);
762-
engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
763-
engine_dir_path.string(), "engine");
764-
}
765-
#if defined(__linux__)
766-
const char* name = "LD_LIBRARY_PATH";
767-
auto data = getenv(name);
768-
std::string v;
769-
if (auto g = getenv(name); g) {
770-
v += g;
771-
}
772-
CTL_INF("LD_LIBRARY_PATH: " << v);
773-
auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
774-
CTL_INF("llamacpp_path: " << llamacpp_path);
775-
// tensorrt is not supported for now
776-
// auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
777-
778-
auto new_v = llamacpp_path.string() + ":" + v;
779-
setenv(name, new_v.c_str(), true);
780-
CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
781-
#endif
724+
engines_[ne].engine = engine_obj;
725+
engines_[ne].dl = std::move(dylib);
782726

727+
CTL_DBG("Engine loaded: "
728+
<< ne); // TODO: output more information like version and variant
729+
return {};
783730
} catch (const cortex_cpp::dylib::load_error& e) {
784731
CTL_ERR("Could not load engine: " << e.what());
785-
{
786-
std::lock_guard<std::mutex> lock(engines_mutex_);
787-
engines_.erase(ne);
788-
}
732+
engines_.erase(ne);
789733
return cpp::fail("Could not load engine " + ne + ": " + e.what());
790734
}
791-
792-
{
793-
std::lock_guard<std::mutex> lock(engines_mutex_);
794-
auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
795-
engines_[ne].engine = func();
796-
797-
auto& en = std::get<EngineI*>(engines_[ne].engine);
798-
if (ne == kLlamaRepo) { //fix for llamacpp engine first
799-
auto config = file_manager_utils::GetCortexConfig();
800-
if (en->IsSupported("SetFileLogger")) {
801-
en->SetFileLogger(config.maxLogLines,
802-
(std::filesystem::path(config.logFolderPath) /
803-
std::filesystem::path(config.logLlamaCppPath))
804-
.string());
805-
} else {
806-
CTL_WRN("Method SetFileLogger is not supported yet");
807-
}
808-
if (en->IsSupported("SetLogLevel")) {
809-
en->SetLogLevel(logging_utils_helper::global_log_level);
810-
} else {
811-
CTL_WRN("Method SetLogLevel is not supported yet");
812-
}
813-
}
814-
CTL_DBG("loaded engine: " << ne);
815-
}
816-
return {};
817735
}
818736

819737
cpp::result<void, std::string> EngineService::UnloadEngine(
820738
const std::string& engine) {
821739
auto ne = NormalizeEngine(engine);
822-
{
823-
std::lock_guard<std::mutex> lock(engines_mutex_);
824-
if (!IsEngineLoaded(ne)) {
825-
return cpp::fail("Engine " + ne + " is not loaded yet!");
826-
}
827-
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
828-
delete e;
740+
LOG_INFO << "Unloading engine " << ne;
829741

830-
#if defined(_WIN32)
831-
if (!RemoveDllDirectory(engines_[ne].cookie)) {
832-
CTL_WRN("Could not remove dll directory: " << ne);
833-
} else {
834-
CTL_DBG("Removed dll directory: " << ne);
835-
}
836-
if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
837-
CTL_WRN("Could not remove cuda dll directory: " << ne);
838-
} else {
839-
CTL_DBG("Removed cuda dll directory: " << ne);
840-
}
841-
#endif
842-
engines_.erase(ne);
742+
std::lock_guard<std::mutex> lock(engines_mutex_);
743+
if (!IsEngineLoaded(ne)) {
744+
return cpp::fail("Engine " + ne + " is not loaded yet!");
843745
}
844-
CTL_DBG("Unloaded engine " + ne);
746+
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
747+
delete e;
748+
engines_.erase(ne);
749+
CTL_DBG("Engine unloaded: " + ne);
845750
return {};
846751
}
847752

848753
std::vector<EngineV> EngineService::GetLoadedEngines() {
849-
{
850-
std::lock_guard<std::mutex> lock(engines_mutex_);
851-
std::vector<EngineV> loaded_engines;
852-
for (const auto& [key, value] : engines_) {
853-
loaded_engines.push_back(value.engine);
854-
}
855-
return loaded_engines;
754+
std::lock_guard<std::mutex> lock(engines_mutex_);
755+
std::vector<EngineV> loaded_engines;
756+
for (const auto& [key, value] : engines_) {
757+
loaded_engines.push_back(value.engine);
856758
}
759+
return loaded_engines;
857760
}
858761

859762
cpp::result<github_release_utils::GitHubRelease, std::string>
@@ -899,6 +802,7 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
899802
CTL_INF("Default variant: " << default_variant->variant
900803
<< ", version: " + default_variant->version);
901804

805+
std::lock_guard<std::mutex> lock(engines_mutex_);
902806
if (IsEngineLoaded(ne)) {
903807
CTL_INF("Engine " << ne << " is already loaded, unloading it");
904808
auto unload_res = UnloadEngine(ne);
@@ -955,3 +859,8 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
955859
.from = default_variant->version,
956860
.to = latest_version->tag_name};
957861
}
862+
863+
cpp::result<std::vector<std::string>, std::string>
864+
EngineService::GetSupportedEngineNames() {
865+
return file_manager_utils::GetCortexConfig().supportedEngines;
866+
}

engine/services/engine_service.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ class EngineService : public EngineServiceI {
123123
cpp::result<EngineUpdateResult, std::string> UpdateEngine(
124124
const std::string& engine);
125125

126+
cpp::result<std::vector<std::string>, std::string> GetSupportedEngineNames();
127+
126128
private:
127129
cpp::result<void, std::string> DownloadEngine(
128130
const std::string& engine, const std::string& version = "latest",

engine/utils/config_yaml_utils.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <iostream>
66
#include <mutex>
77
#include <string>
8+
#include "utils/engine_constants.h"
89
#include "utils/logging_utils.h"
910
#include "utils/result.hpp"
1011
#include "yaml-cpp/yaml.h"
@@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true;
2223
const std::vector<std::string> kDefaultEnabledOrigins{
2324
"http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
2425
constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
26+
const std::vector<std::string> kDefaultSupportedEngines{
27+
kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
2528

2629
struct CortexConfig {
2730
std::string logFolderPath;
@@ -59,6 +62,7 @@ struct CortexConfig {
5962

6063
bool verifyPeerSsl;
6164
bool verifyHostSsl;
65+
std::vector<std::string> supportedEngines;
6266
};
6367

6468
class CortexConfigMgr {
@@ -117,6 +121,7 @@ class CortexConfigMgr {
117121
node["noProxy"] = config.noProxy;
118122
node["verifyPeerSsl"] = config.verifyPeerSsl;
119123
node["verifyHostSsl"] = config.verifyHostSsl;
124+
node["supportedEngines"] = config.supportedEngines;
120125

121126
out_file << node;
122127
out_file.close();
@@ -151,7 +156,7 @@ class CortexConfigMgr {
151156
!node["proxyUsername"] || !node["proxyPassword"] ||
152157
!node["verifyPeerSsl"] || !node["verifyHostSsl"] ||
153158
!node["verifyProxySsl"] || !node["verifyProxyHostSsl"] ||
154-
!node["noProxy"]);
159+
!node["noProxy"] || !node["supportedEngines"]);
155160

156161
CortexConfig config = {
157162
.logFolderPath = node["logFolderPath"]
@@ -235,6 +240,10 @@ class CortexConfigMgr {
235240
.verifyHostSsl = node["verifyHostSsl"]
236241
? node["verifyHostSsl"].as<bool>()
237242
: default_cfg.verifyHostSsl,
243+
.supportedEngines =
244+
node["supportedEngines"]
245+
? node["supportedEngines"].as<std::vector<std::string>>()
246+
: default_cfg.supportedEngines,
238247
};
239248
if (should_update_config) {
240249
l.unlock();

engine/utils/file_manager_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() {
202202
.noProxy = config_yaml_utils::kDefaultNoProxy,
203203
.verifyPeerSsl = true,
204204
.verifyHostSsl = true,
205+
.supportedEngines = config_yaml_utils::kDefaultSupportedEngines,
205206
};
206207
}
207208

0 commit comments

Comments
 (0)