Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 5336334

Browse files
committed
feat: update engine interface
1 parent 1641500 commit 5336334

File tree

2 files changed

+58
-137
lines changed

2 files changed

+58
-137
lines changed

engine/cortex-common/EngineI.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
11
#pragma once
22

3+
#include <filesystem>
34
#include <functional>
45
#include <memory>
56

67
#include "json/value.h"
78
#include "trantor/utils/Logger.h"
89
class EngineI {
910
public:
11+
struct EngineLoadOption {
12+
std::filesystem::path engine_path;
13+
std::filesystem::path
14+
cuda_path; // TODO: make this more generic. Here just to test for now
15+
bool custom_engine_path;
16+
};
17+
1018
virtual ~EngineI() {}
1119

20+
virtual void Load(EngineLoadOption opts) = 0;
21+
1222
// cortex.llamacpp interface
1323
virtual void HandleChatCompletion(
1424
std::shared_ptr<Json::Value> json_body,

engine/services/engine_service.cc

Lines changed: 48 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
651651
const std::string& engine_name) {
652652
auto ne = NormalizeEngine(engine_name);
653653

654+
// std::lock_guard<std::mutex> lock(engines_mutex_);
654655
if (IsEngineLoaded(ne)) {
655656
CTL_INF("Engine " << ne << " is already loaded");
656657
return {};
@@ -672,6 +673,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
672673
auto user_defined_engine_path = getenv("ENGINE_PATH");
673674
#endif
674675

676+
auto custom_engine_path = user_defined_engine_path != nullptr;
675677
CTL_DBG("user defined engine path: " << user_defined_engine_path);
676678
const std::filesystem::path engine_dir_path = [&] {
677679
if (user_defined_engine_path != nullptr) {
@@ -685,8 +687,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
685687
}
686688
}();
687689

688-
CTL_DBG("Engine path: " << engine_dir_path.string());
689-
690690
if (!std::filesystem::exists(engine_dir_path)) {
691691
CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
692692
return cpp::fail("Directory " + engine_dir_path.string() +
@@ -696,90 +696,20 @@ cpp::result<void, std::string> EngineService::LoadEngine(
696696
CTL_INF("Engine path: " << engine_dir_path.string());
697697

698698
try {
699-
#if defined(_WIN32)
700-
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
701-
// We would like to support running multiple engines at the same time. Therefore,
702-
// the adding/removing dll directory logic is quite complicated:
703-
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
704-
// Unload the llamacpp dll directory then load the tensorrt-llm
705-
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
706-
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
707-
// 3. Add dll directory if met other conditions
708-
709-
auto add_dll = [this](const std::string& e_type,
710-
const std::filesystem::path& p) {
711-
if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
712-
CTL_DBG("Added dll directory: " << p.string());
713-
engines_[e_type].cookie = cookie;
714-
} else {
715-
CTL_WRN("Could not add dll directory: " << p.string());
716-
}
717-
718-
auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
719-
if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
720-
cuda_cookie != 0) {
721-
CTL_DBG("Added cuda dll directory: " << p.string());
722-
engines_[e_type].cuda_cookie = cuda_cookie;
723-
} else {
724-
CTL_WRN("Could not add cuda dll directory: " << p.string());
725-
}
699+
auto dylib =
700+
std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
701+
702+
// init
703+
auto func = dylib->get_function<EngineI*()>("get_engine");
704+
auto engine_obj = func();
705+
engines_[ne].engine = engine_obj;
706+
auto load_opts = EngineI::EngineLoadOption{
707+
.engine_path = engine_dir_path,
708+
.cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
709+
.custom_engine_path = custom_engine_path,
726710
};
727-
728-
#if defined(_WIN32)
729-
if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
730-
#else
731-
if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
732-
#endif
733-
should_use_dll_search_path) {
734-
if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
735-
should_use_dll_search_path) {
736-
737-
{
738-
std::lock_guard<std::mutex> lock(engines_mutex_);
739-
// Remove llamacpp dll directory
740-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
741-
CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
742-
} else {
743-
CTL_DBG("Removed dll directory: " << kLlamaRepo);
744-
}
745-
if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
746-
CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
747-
} else {
748-
CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
749-
}
750-
}
751-
752-
add_dll(ne, engine_dir_path);
753-
} else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
754-
// Do nothing
755-
} else {
756-
add_dll(ne, engine_dir_path);
757-
}
758-
}
759-
#endif
760-
{
761-
std::lock_guard<std::mutex> lock(engines_mutex_);
762-
engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
763-
engine_dir_path.string(), "engine");
764-
}
765-
#if defined(__linux__)
766-
const char* name = "LD_LIBRARY_PATH";
767-
auto data = getenv(name);
768-
std::string v;
769-
if (auto g = getenv(name); g) {
770-
v += g;
771-
}
772-
CTL_INF("LD_LIBRARY_PATH: " << v);
773-
auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
774-
CTL_INF("llamacpp_path: " << llamacpp_path);
775-
// tensorrt is not supported for now
776-
// auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
777-
778-
auto new_v = llamacpp_path.string() + ":" + v;
779-
setenv(name, new_v.c_str(), true);
780-
CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
781-
#endif
782-
711+
engine_obj->Load(load_opts);
712+
engines_[ne].dl = std::move(dylib);
783713
} catch (const cortex_cpp::dylib::load_error& e) {
784714
CTL_ERR("Could not load engine: " << e.what());
785715
{
@@ -789,71 +719,52 @@ cpp::result<void, std::string> EngineService::LoadEngine(
789719
return cpp::fail("Could not load engine " + ne + ": " + e.what());
790720
}
791721

792-
{
793-
std::lock_guard<std::mutex> lock(engines_mutex_);
794-
auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
795-
engines_[ne].engine = func();
796-
797-
auto& en = std::get<EngineI*>(engines_[ne].engine);
798-
if (ne == kLlamaRepo) { //fix for llamacpp engine first
799-
auto config = file_manager_utils::GetCortexConfig();
800-
if (en->IsSupported("SetFileLogger")) {
801-
en->SetFileLogger(config.maxLogLines,
802-
(std::filesystem::path(config.logFolderPath) /
803-
std::filesystem::path(config.logLlamaCppPath))
804-
.string());
805-
} else {
806-
CTL_WRN("Method SetFileLogger is not supported yet");
807-
}
808-
if (en->IsSupported("SetLogLevel")) {
809-
en->SetLogLevel(logging_utils_helper::global_log_level);
810-
} else {
811-
CTL_WRN("Method SetLogLevel is not supported yet");
812-
}
813-
}
814-
CTL_DBG("loaded engine: " << ne);
815-
}
722+
auto& en = std::get<EngineI*>(engines_[ne].engine);
723+
// TODO: namh recheck this if can be moved to cortex.llamacpp
724+
// if (ne == kLlamaRepo) { //fix for llamacpp engine first
725+
// auto config = file_manager_utils::GetCortexConfig();
726+
// if (en->IsSupported("SetFileLogger")) {
727+
// en->SetFileLogger(config.maxLogLines,
728+
// (std::filesystem::path(config.logFolderPath) /
729+
// std::filesystem::path(config.logLlamaCppPath))
730+
// .string());
731+
// } else {
732+
// CTL_WRN("Method SetFileLogger is not supported yet");
733+
// }
734+
// if (en->IsSupported("SetLogLevel")) {
735+
// en->SetLogLevel(logging_utils_helper::global_log_level);
736+
// } else {
737+
// CTL_WRN("Method SetLogLevel is not supported yet");
738+
// }
739+
// }
740+
CTL_DBG("loaded engine: " << ne);
741+
816742
return {};
817743
}
818744

819745
cpp::result<void, std::string> EngineService::UnloadEngine(
820746
const std::string& engine) {
821747
auto ne = NormalizeEngine(engine);
822-
{
823-
std::lock_guard<std::mutex> lock(engines_mutex_);
824-
if (!IsEngineLoaded(ne)) {
825-
return cpp::fail("Engine " + ne + " is not loaded yet!");
826-
}
827-
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
828-
delete e;
748+
LOG_INFO << "Unloading engine " << ne;
829749

830-
#if defined(_WIN32)
831-
if (!RemoveDllDirectory(engines_[ne].cookie)) {
832-
CTL_WRN("Could not remove dll directory: " << ne);
833-
} else {
834-
CTL_DBG("Removed dll directory: " << ne);
835-
}
836-
if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
837-
CTL_WRN("Could not remove cuda dll directory: " << ne);
838-
} else {
839-
CTL_DBG("Removed cuda dll directory: " << ne);
840-
}
841-
#endif
842-
engines_.erase(ne);
750+
std::lock_guard<std::mutex> lock(engines_mutex_);
751+
if (!IsEngineLoaded(ne)) {
752+
return cpp::fail("Engine " + ne + " is not loaded yet!");
843753
}
844-
CTL_DBG("Unloaded engine " + ne);
754+
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
755+
delete e;
756+
engines_.erase(ne);
757+
CTL_DBG("Engine unloaded: " + ne);
845758
return {};
846759
}
847760

848761
std::vector<EngineV> EngineService::GetLoadedEngines() {
849-
{
850-
std::lock_guard<std::mutex> lock(engines_mutex_);
851-
std::vector<EngineV> loaded_engines;
852-
for (const auto& [key, value] : engines_) {
853-
loaded_engines.push_back(value.engine);
854-
}
855-
return loaded_engines;
762+
std::lock_guard<std::mutex> lock(engines_mutex_);
763+
std::vector<EngineV> loaded_engines;
764+
for (const auto& [key, value] : engines_) {
765+
loaded_engines.push_back(value.engine);
856766
}
767+
return loaded_engines;
857768
}
858769

859770
cpp::result<github_release_utils::GitHubRelease, std::string>

0 commit comments

Comments
 (0)