Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit ae8101b

Browse files
committed
feat: update engine interface
1 parent 1641500 commit ae8101b

File tree

2 files changed

+63
-140
lines changed

2 files changed

+63
-140
lines changed

engine/cortex-common/EngineI.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
11
#pragma once
22

3+
#include <filesystem>
34
#include <functional>
45
#include <memory>
56

67
#include "json/value.h"
78
#include "trantor/utils/Logger.h"
89
class EngineI {
910
public:
11+
struct EngineLoadOption {
12+
std::filesystem::path engine_path;
13+
std::filesystem::path
14+
cuda_path; // TODO: make this more generic. Here just to test for now
15+
bool custom_engine_path;
16+
};
17+
1018
virtual ~EngineI() {}
1119

20+
virtual void Load(EngineLoadOption opts) = 0;
21+
1222
// cortex.llamacpp interface
1323
virtual void HandleChatCompletion(
1424
std::shared_ptr<Json::Value> json_body,

engine/services/engine_service.cc

Lines changed: 53 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
179179
const std::string& engine, const std::optional<std::string> version,
180180
const std::optional<std::string> variant) {
181181
auto ne = NormalizeEngine(engine);
182+
std::lock_guard<std::mutex> lock(engines_mutex_);
182183
if (IsEngineLoaded(ne)) {
183184
CTL_INF("Engine " << ne << " is already loaded, unloading it");
184185
auto unload_res = UnloadEngine(ne);
@@ -272,6 +273,7 @@ cpp::result<void, std::string> EngineService::DownloadEngine(
272273
if (selected_variant == std::nullopt) {
273274
return cpp::fail("Failed to find a suitable variant for " + engine);
274275
}
276+
std::lock_guard<std::mutex> lock(engines_mutex_);
275277
if (IsEngineLoaded(engine)) {
276278
CTL_INF("Engine " << engine << " is already loaded, unloading it");
277279
auto unload_res = UnloadEngine(engine);
@@ -503,6 +505,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
503505
" is not installed yet!");
504506
}
505507

508+
std::lock_guard<std::mutex> lock(engines_mutex_);
506509
if (IsEngineLoaded(ne)) {
507510
CTL_INF("Engine " << ne << " is already loaded, unloading it");
508511
auto unload_res = UnloadEngine(ne);
@@ -631,7 +634,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
631634
}
632635

633636
bool EngineService::IsEngineLoaded(const std::string& engine) {
634-
std::lock_guard<std::mutex> lock(engines_mutex_);
635637
auto ne = NormalizeEngine(engine);
636638
return engines_.find(ne) != engines_.end();
637639
}
@@ -651,6 +653,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
651653
const std::string& engine_name) {
652654
auto ne = NormalizeEngine(engine_name);
653655

656+
std::lock_guard<std::mutex> lock(engines_mutex_);
654657
if (IsEngineLoaded(ne)) {
655658
CTL_INF("Engine " << ne << " is already loaded");
656659
return {};
@@ -672,6 +675,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
672675
auto user_defined_engine_path = getenv("ENGINE_PATH");
673676
#endif
674677

678+
auto custom_engine_path = user_defined_engine_path != nullptr;
675679
CTL_DBG("user defined engine path: " << user_defined_engine_path);
676680
const std::filesystem::path engine_dir_path = [&] {
677681
if (user_defined_engine_path != nullptr) {
@@ -685,8 +689,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
685689
}
686690
}();
687691

688-
CTL_DBG("Engine path: " << engine_dir_path.string());
689-
690692
if (!std::filesystem::exists(engine_dir_path)) {
691693
CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
692694
return cpp::fail("Directory " + engine_dir_path.string() +
@@ -696,164 +698,74 @@ cpp::result<void, std::string> EngineService::LoadEngine(
696698
CTL_INF("Engine path: " << engine_dir_path.string());
697699

698700
try {
699-
#if defined(_WIN32)
700-
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
701-
// We would like to support running multiple engines at the same time. Therefore,
702-
// the adding/removing dll directory logic is quite complicated:
703-
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
704-
// Unload the llamacpp dll directory then load the tensorrt-llm
705-
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
706-
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
707-
// 3. Add dll directory if met other conditions
708-
709-
auto add_dll = [this](const std::string& e_type,
710-
const std::filesystem::path& p) {
711-
if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
712-
CTL_DBG("Added dll directory: " << p.string());
713-
engines_[e_type].cookie = cookie;
714-
} else {
715-
CTL_WRN("Could not add dll directory: " << p.string());
716-
}
717-
718-
auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
719-
if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
720-
cuda_cookie != 0) {
721-
CTL_DBG("Added cuda dll directory: " << p.string());
722-
engines_[e_type].cuda_cookie = cuda_cookie;
723-
} else {
724-
CTL_WRN("Could not add cuda dll directory: " << p.string());
725-
}
701+
auto dylib =
702+
std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
703+
704+
// init
705+
auto func = dylib->get_function<EngineI*()>("get_engine");
706+
auto engine_obj = func();
707+
auto load_opts = EngineI::EngineLoadOption{
708+
.engine_path = engine_dir_path,
709+
.cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
710+
.custom_engine_path = custom_engine_path,
726711
};
712+
engine_obj->Load(load_opts);
727713

728-
#if defined(_WIN32)
729-
if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
730-
#else
731-
if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
732-
#endif
733-
should_use_dll_search_path) {
734-
if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
735-
should_use_dll_search_path) {
736-
737-
{
738-
std::lock_guard<std::mutex> lock(engines_mutex_);
739-
// Remove llamacpp dll directory
740-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
741-
CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
742-
} else {
743-
CTL_DBG("Removed dll directory: " << kLlamaRepo);
744-
}
745-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
746-
CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
747-
} else {
748-
CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
749-
}
750-
}
751-
752-
add_dll(ne, engine_dir_path);
753-
} else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
754-
// Do nothing
755-
} else {
756-
add_dll(ne, engine_dir_path);
757-
}
758-
}
759-
#endif
760-
{
761-
std::lock_guard<std::mutex> lock(engines_mutex_);
762-
engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
763-
engine_dir_path.string(), "engine");
764-
}
765-
#if defined(__linux__)
766-
const char* name = "LD_LIBRARY_PATH";
767-
auto data = getenv(name);
768-
std::string v;
769-
if (auto g = getenv(name); g) {
770-
v += g;
771-
}
772-
CTL_INF("LD_LIBRARY_PATH: " << v);
773-
auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
774-
CTL_INF("llamacpp_path: " << llamacpp_path);
775-
// tensorrt is not supported for now
776-
// auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
777-
778-
auto new_v = llamacpp_path.string() + ":" + v;
779-
setenv(name, new_v.c_str(), true);
780-
CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
781-
#endif
714+
engines_[ne].engine = engine_obj;
715+
engines_[ne].dl = std::move(dylib);
782716

717+
CTL_DBG("Engine loaded: "
718+
<< ne); // TODO: output more information like version and variant
783719
} catch (const cortex_cpp::dylib::load_error& e) {
784720
CTL_ERR("Could not load engine: " << e.what());
785-
{
786-
std::lock_guard<std::mutex> lock(engines_mutex_);
787-
engines_.erase(ne);
788-
}
721+
engines_.erase(ne);
789722
return cpp::fail("Could not load engine " + ne + ": " + e.what());
790723
}
791724

792-
{
793-
std::lock_guard<std::mutex> lock(engines_mutex_);
794-
auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
795-
engines_[ne].engine = func();
796-
797-
auto& en = std::get<EngineI*>(engines_[ne].engine);
798-
if (ne == kLlamaRepo) { //fix for llamacpp engine first
799-
auto config = file_manager_utils::GetCortexConfig();
800-
if (en->IsSupported("SetFileLogger")) {
801-
en->SetFileLogger(config.maxLogLines,
802-
(std::filesystem::path(config.logFolderPath) /
803-
std::filesystem::path(config.logLlamaCppPath))
804-
.string());
805-
} else {
806-
CTL_WRN("Method SetFileLogger is not supported yet");
807-
}
808-
if (en->IsSupported("SetLogLevel")) {
809-
en->SetLogLevel(logging_utils_helper::global_log_level);
810-
} else {
811-
CTL_WRN("Method SetLogLevel is not supported yet");
812-
}
813-
}
814-
CTL_DBG("loaded engine: " << ne);
815-
}
725+
// TODO: namh recheck this if can be moved to cortex.llamacpp
726+
// if (ne == kLlamaRepo) { //fix for llamacpp engine first
727+
// auto config = file_manager_utils::GetCortexConfig();
728+
// if (en->IsSupported("SetFileLogger")) {
729+
// en->SetFileLogger(config.maxLogLines,
730+
// (std::filesystem::path(config.logFolderPath) /
731+
// std::filesystem::path(config.logLlamaCppPath))
732+
// .string());
733+
// } else {
734+
// CTL_WRN("Method SetFileLogger is not supported yet");
735+
// }
736+
// if (en->IsSupported("SetLogLevel")) {
737+
// en->SetLogLevel(logging_utils_helper::global_log_level);
738+
// } else {
739+
// CTL_WRN("Method SetLogLevel is not supported yet");
740+
// }
741+
// }
742+
816743
return {};
817744
}
818745

819746
cpp::result<void, std::string> EngineService::UnloadEngine(
820747
const std::string& engine) {
821748
auto ne = NormalizeEngine(engine);
822-
{
823-
std::lock_guard<std::mutex> lock(engines_mutex_);
824-
if (!IsEngineLoaded(ne)) {
825-
return cpp::fail("Engine " + ne + " is not loaded yet!");
826-
}
827-
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
828-
delete e;
749+
LOG_INFO << "Unloading engine " << ne;
829750

830-
#if defined(_WIN32)
831-
if (!RemoveDllDirectory(engines_[ne].cookie)) {
832-
CTL_WRN("Could not remove dll directory: " << ne);
833-
} else {
834-
CTL_DBG("Removed dll directory: " << ne);
835-
}
836-
if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
837-
CTL_WRN("Could not remove cuda dll directory: " << ne);
838-
} else {
839-
CTL_DBG("Removed cuda dll directory: " << ne);
840-
}
841-
#endif
842-
engines_.erase(ne);
751+
std::lock_guard<std::mutex> lock(engines_mutex_);
752+
if (!IsEngineLoaded(ne)) {
753+
return cpp::fail("Engine " + ne + " is not loaded yet!");
843754
}
844-
CTL_DBG("Unloaded engine " + ne);
755+
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
756+
delete e;
757+
engines_.erase(ne);
758+
CTL_DBG("Engine unloaded: " + ne);
845759
return {};
846760
}
847761

848762
std::vector<EngineV> EngineService::GetLoadedEngines() {
849-
{
850-
std::lock_guard<std::mutex> lock(engines_mutex_);
851-
std::vector<EngineV> loaded_engines;
852-
for (const auto& [key, value] : engines_) {
853-
loaded_engines.push_back(value.engine);
854-
}
855-
return loaded_engines;
763+
std::lock_guard<std::mutex> lock(engines_mutex_);
764+
std::vector<EngineV> loaded_engines;
765+
for (const auto& [key, value] : engines_) {
766+
loaded_engines.push_back(value.engine);
856767
}
768+
return loaded_engines;
857769
}
858770

859771
cpp::result<github_release_utils::GitHubRelease, std::string>
@@ -899,6 +811,7 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
899811
CTL_INF("Default variant: " << default_variant->variant
900812
<< ", version: " + default_variant->version);
901813

814+
std::lock_guard<std::mutex> lock(engines_mutex_);
902815
if (IsEngineLoaded(ne)) {
903816
CTL_INF("Engine " << ne << " is already loaded, unloading it");
904817
auto unload_res = UnloadEngine(ne);

0 commit comments

Comments
 (0)