diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index b53eb7fdf..5ffabf23c 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -71,7 +71,6 @@ add_subdirectory(cli) find_package(jsoncpp CONFIG REQUIRED) find_package(Drogon CONFIG REQUIRED) find_package(yaml-cpp CONFIG REQUIRED) -find_package(httplib CONFIG REQUIRED) find_package(unofficial-minizip CONFIG REQUIRED) find_package(LibArchive REQUIRED) find_package(CURL REQUIRED) @@ -147,7 +146,6 @@ add_executable(${TARGET_NAME} main.cc target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(${TARGET_NAME} PRIVATE httplib::httplib) target_link_libraries(${TARGET_NAME} PRIVATE unofficial::minizip::minizip) target_link_libraries(${TARGET_NAME} PRIVATE LibArchive::LibArchive) target_link_libraries(${TARGET_NAME} PRIVATE CURL::libcurl) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index c69e7e150..db2bed828 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -61,7 +61,6 @@ add_compile_definitions(CORTEX_CONFIG_FILE_PATH="${CORTEX_CONFIG_FILE_PATH}") find_package(jsoncpp CONFIG REQUIRED) find_package(yaml-cpp CONFIG REQUIRED) -find_package(httplib CONFIG REQUIRED) find_package(CLI11 CONFIG REQUIRED) find_package(unofficial-minizip CONFIG REQUIRED) find_package(LibArchive REQUIRED) @@ -87,7 +86,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc ) -target_link_libraries(${TARGET_NAME} PRIVATE httplib::httplib) target_link_libraries(${TARGET_NAME} PRIVATE CLI11::CLI11) target_link_libraries(${TARGET_NAME} PRIVATE unofficial::minizip::minizip) target_link_libraries(${TARGET_NAME} PRIVATE LibArchive::LibArchive) diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc index bb41dbe8b..e1b2f5feb 100644 --- a/engine/cli/command_line_parser.cc +++ b/engine/cli/command_line_parser.cc @@ -149,9 +149,9 @@ void CommandLineParser::SetupCommonCommands() { return; } try { - commands::ModelPullCmd(download_service_) - .Exec(cml_data_.config.apiServerHost, - std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id); + commands::ModelPullCmd().Exec(cml_data_.config.apiServerHost, + std::stoi(cml_data_.config.apiServerPort), + cml_data_.model_id); } catch (const std::exception& e) { CLI_LOG(e.what()); } @@ -214,10 +214,9 @@ void CommandLineParser::SetupModelCommands() { CLI_LOG(model_start_cmd->help()); return; }; - commands::ModelStartCmd(model_service_) - .Exec(cml_data_.config.apiServerHost, - std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id, - hw_activate_opts_); + commands::ModelStartCmd().Exec(cml_data_.config.apiServerHost, + std::stoi(cml_data_.config.apiServerPort), + cml_data_.model_id, hw_activate_opts_); }); auto stop_model_cmd = @@ -234,9 +233,9 @@ void CommandLineParser::SetupModelCommands() { CLI_LOG(stop_model_cmd->help()); return; }; - commands::ModelStopCmd(model_service_) - .Exec(cml_data_.config.apiServerHost, - std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id); + commands::ModelStopCmd().Exec(cml_data_.config.apiServerHost, + std::stoi(cml_data_.config.apiServerPort), + cml_data_.model_id); }); auto list_models_cmd = diff --git a/engine/cli/commands/chat_completion_cmd.cc b/engine/cli/commands/chat_completion_cmd.cc index f81040bac..0067b1c08 100644 --- a/engine/cli/commands/chat_completion_cmd.cc +++ b/engine/cli/commands/chat_completion_cmd.cc @@ -1,8 +1,8 @@ #include "chat_completion_cmd.h" +#include #include "config/yaml_config.h" #include "cortex_upd_cmd.h" #include "database/models.h" -#include "httplib.h" #include "model_status_cmd.h" #include "server_start_cmd.h" #include "utils/engine_constants.h" @@ -16,29 +16,42 @@ constexpr const auto kMinDataChunkSize = 6u; constexpr const char* kUser = "user"; constexpr const char* kAssistant = "assistant"; -} // namespace +struct StreamingCallback { + std::string* ai_chat; + bool is_done; -struct ChunkParser { - std::string content; - bool is_done = false; + StreamingCallback() : ai_chat(nullptr), is_done(false) {} +}; - ChunkParser(const char* data, size_t data_length) { - if (data && data_length > kMinDataChunkSize) { - std::string s(data + kMinDataChunkSize, data_length - kMinDataChunkSize); - if (s.find("[DONE]") != std::string::npos) { - is_done = true; - } else { - try { - content = - json_helper::ParseJsonString(s)["choices"][0]["delta"]["content"] - .asString(); - } catch (const std::exception& e) { - CTL_WRN("JSON parse error: " << e.what()); - } +size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) { + auto* callback = static_cast(userdata); + size_t data_length = size * nmemb; + + if (ptr && data_length > kMinDataChunkSize) { + std::string chunk(ptr + kMinDataChunkSize, data_length - kMinDataChunkSize); + if (chunk.find("[DONE]") != std::string::npos) { + callback->is_done = true; + std::cout << std::endl; + return data_length; + } + + try { + std::string content = + json_helper::ParseJsonString(chunk)["choices"][0]["delta"]["content"] + .asString(); + std::cout << content << std::flush; + if (callback->ai_chat) { + *callback->ai_chat += content; } + } catch (const std::exception& e) { + CTL_WRN("JSON parse error: " << e.what()); } } -}; + + return data_length; +} + +} // namespace void ChatCompletionCmd::Exec(const std::string& host, int port, const std::string& model_handle, std::string msg) { @@ -68,95 +81,101 @@ void ChatCompletionCmd::Exec(const std::string& host, int port, const std::string& model_handle, const config::ModelConfig& mc, std::string msg) { auto address = host + ":" + std::to_string(port); + // Check if server is started - { - if (!commands::IsServerAlive(host, port)) { - CLI_LOG("Server is not started yet, please run `" - << commands::GetCortexBinary() << " start` to start server!"); - return; - } + if (!commands::IsServerAlive(host, port)) { + CLI_LOG("Server is not started yet, please run `" + << commands::GetCortexBinary() << " start` to start server!"); + return; } // Only check if llamacpp engine if ((mc.engine.find(kLlamaEngine) != std::string::npos || mc.engine.find(kLlamaRepo) != std::string::npos) && - !commands::ModelStatusCmd(model_service_) - .IsLoaded(host, port, model_handle)) { + !commands::ModelStatusCmd().IsLoaded(host, port, model_handle)) { CLI_LOG("Model is not loaded yet!"); return; } + auto curl = curl_easy_init(); + if (!curl) { + CLI_LOG("Failed to initialize CURL"); + return; + } + + std::string url = "http://" + address + "/v1/chat/completions"; + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_POST, 1L); + + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + // Interactive mode or not bool interactive = msg.empty(); - // Some instruction for user here if (interactive) { - std::cout << "Inorder to exit, type `exit()`" << std::endl; + std::cout << "In order to exit, type `exit()`" << std::endl; } - // Model is loaded, start to chat - { - do { - std::string user_input = std::move(msg); - if (user_input.empty()) { - std::cout << "> "; - if (!std::getline(std::cin, user_input)) { - break; - } - } - string_utils::Trim(user_input); - if (user_input == kExitChat) { + do { + std::string user_input = std::move(msg); + if (user_input.empty()) { + std::cout << "> "; + if (!std::getline(std::cin, user_input)) { break; } + } + + string_utils::Trim(user_input); + if (user_input == kExitChat) { + break; + } + + if (!user_input.empty()) { + // Prepare JSON payload + Json::Value new_data; + new_data["role"] = kUser; + new_data["content"] = user_input; + histories_.push_back(std::move(new_data)); + + Json::Value json_data = mc.ToJson(); + json_data["engine"] = mc.engine; + + Json::Value msgs_array(Json::arrayValue); + for (const auto& m : histories_) { + msgs_array.append(m); + } + + json_data["messages"] = msgs_array; + json_data["model"] = model_handle; + json_data["stream"] = true; - if (!user_input.empty()) { - httplib::Client cli(address); - Json::Value json_data = mc.ToJson(); - Json::Value new_data; - new_data["role"] = kUser; - new_data["content"] = user_input; - histories_.push_back(std::move(new_data)); - json_data["engine"] = mc.engine; - Json::Value msgs_array(Json::arrayValue); - for (const auto& m : histories_) { - msgs_array.append(m); - } - json_data["messages"] = msgs_array; - json_data["model"] = model_handle; - //TODO: support non-stream - json_data["stream"] = true; - auto data_str = json_data.toStyledString(); - // std::cout << data_str << std::endl; - cli.set_read_timeout(std::chrono::seconds(60)); - // std::cout << "> "; - httplib::Request req; - req.headers = httplib::Headers(); - req.set_header("Content-Type", "application/json"); - req.method = "POST"; - req.path = "/v1/chat/completions"; - req.body = data_str; - std::string ai_chat; - req.content_receiver = [&](const char* data, size_t data_length, - uint64_t offset, uint64_t total_length) { - ChunkParser cp(data, data_length); - if (cp.is_done) { - std::cout << std::endl; - return false; - } - std::cout << cp.content << std::flush; - ai_chat += cp.content; - return true; - }; - cli.send(req); + std::string json_payload = json_data.toStyledString(); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_payload.c_str()); + + std::string ai_chat; + StreamingCallback callback; + callback.ai_chat = &ai_chat; + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &callback); + + CURLcode res = curl_easy_perform(curl); + + if (res != CURLE_OK) { + CLI_LOG("CURL request failed: " << curl_easy_strerror(res)); + } else { Json::Value ai_res; ai_res["role"] = kAssistant; ai_res["content"] = ai_chat; histories_.push_back(std::move(ai_res)); } - // std::cout << "ok Done" << std::endl; - } while (interactive); - } -} + } + } while (interactive); -}; // namespace commands + curl_slist_free_all(headers); + curl_easy_cleanup(curl); +} +} // namespace commands diff --git a/engine/cli/commands/chat_completion_cmd.h b/engine/cli/commands/chat_completion_cmd.h index 746c25eb3..a784b4604 100644 --- a/engine/cli/commands/chat_completion_cmd.h +++ b/engine/cli/commands/chat_completion_cmd.h @@ -3,14 +3,10 @@ #include #include #include "config/model_config.h" -#include "services/model_service.h" namespace commands { class ChatCompletionCmd { public: - explicit ChatCompletionCmd(const ModelService& model_service) - : model_service_{model_service} {}; - void Exec(const std::string& host, int port, const std::string& model_handle, std::string msg); void Exec(const std::string& host, int port, const std::string& model_handle, @@ -18,6 +14,5 @@ class ChatCompletionCmd { private: std::vector histories_; - ModelService model_service_; }; } // namespace commands diff --git a/engine/cli/commands/cortex_upd_cmd.cc b/engine/cli/commands/cortex_upd_cmd.cc index fcb45fc5c..5d7b4bf4c 100644 --- a/engine/cli/commands/cortex_upd_cmd.cc +++ b/engine/cli/commands/cortex_upd_cmd.cc @@ -1,9 +1,9 @@ #include "cortex_upd_cmd.h" -#include "httplib.h" +#include "cli/commands/server_start_cmd.h" #include "server_stop_cmd.h" #include "utils/archive_utils.h" +#include "utils/curl_utils.h" #include "utils/file_manager_utils.h" -#include "utils/json_helper.h" #include "utils/logging_utils.h" #include "utils/scope_exit.h" #include "utils/system_info_utils.h" @@ -151,69 +151,62 @@ std::optional CheckNewUpdate( return config.latestRelease; } - auto host_name = GetHostName(); - auto release_path = GetReleasePath(); - CTL_INF("Engine release path: " << host_name << release_path); + auto url = url_parser::Url{ + .protocol = "https", + .host = GetHostName(), + .pathParams = GetReleasePath(), + }; - httplib::Client cli(host_name); - if (timeout.has_value()) { - cli.set_connection_timeout(*timeout); - cli.set_read_timeout(*timeout); + CTL_INF("Engine release path: " << url.ToFullPath()); + + auto res = curl_utils::SimpleGetJson(url.ToFullPath()); + if (res.has_error()) { + CTL_INF("HTTP error: " << res.error()); + return std::nullopt; } - if (auto res = cli.Get(release_path)) { - if (res->status == httplib::StatusCode::OK_200) { - try { - auto get_latest = [](const Json::Value& data) -> std::string { - if (data.empty()) { - return ""; - } - if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) { - for (const auto& d : data) { - if (auto tag = d["tag_name"].asString(); - tag.find(kBetaComp) != std::string::npos) { - return tag; - } - } - return data[0]["tag_name"].asString(); - } else { - return data["tag_name"].asString(); + try { + auto get_latest = [](const Json::Value& data) -> std::string { + if (data.empty()) { + return ""; + } + + if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) { + for (const auto& d : data) { + if (auto tag = d["tag_name"].asString(); + tag.find(kBetaComp) != std::string::npos) { + return tag; } - return ""; - }; - - auto json_res = json_helper::ParseJsonString(res->body); - std::string latest_version = get_latest(json_res); - if (latest_version.empty()) { - CTL_WRN("Release not found!"); - return std::nullopt; - } - std::string current_version = CORTEX_CPP_VERSION; - CTL_INF("Got the latest release, update to the config file: " - << latest_version) - config.latestRelease = latest_version; - auto result = - config_yaml_utils::CortexConfigMgr::GetInstance().DumpYamlConfig( - config, file_manager_utils::GetConfigurationPath().string()); - if (result.has_error()) { - CTL_ERR("Error update " - << file_manager_utils::GetConfigurationPath().string() - << result.error()); } - if (current_version != latest_version) { - return latest_version; - } - } catch (const std::exception& e) { - CTL_INF("JSON parse error: " << e.what()); - return std::nullopt; + return data[0]["tag_name"].asString(); + } else { + return data["tag_name"].asString(); } - } else { - CTL_INF("HTTP error: " << res->status); + return ""; + }; + + auto latest_version = get_latest(res.value()); + if (latest_version.empty()) { + CTL_WRN("Release not found!"); return std::nullopt; } - } else { - auto err = res.error(); - CTL_INF("HTTP error: " << httplib::to_string(err)); + std::string current_version = CORTEX_CPP_VERSION; + CTL_INF( + "Got the latest release, update to the config file: " << latest_version) + config.latestRelease = latest_version; + auto result = + config_yaml_utils::CortexConfigMgr::GetInstance().DumpYamlConfig( + config, file_manager_utils::GetConfigurationPath().string()); + if (result.has_error()) { + CTL_ERR("Error update " + << file_manager_utils::GetConfigurationPath().string() + << result.error()); + } + if (current_version != latest_version) { + return latest_version; + } + } catch (const std::exception& e) { + CTL_INF("JSON parse error: " << e.what()); return std::nullopt; } return std::nullopt; @@ -230,9 +223,9 @@ void CortexUpdCmd::Exec(const std::string& v, bool force) { { auto config = file_manager_utils::GetCortexConfig(); - httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort); - auto res = cli.Get("/healthz"); - if (res) { + auto server_running = commands::IsServerAlive( + config.apiServerHost, std::stoi(config.apiServerPort)); + if (server_running) { CLI_LOG("Server is running. Stopping server before updating!"); commands::ServerStopCmd ssc(config.apiServerHost, std::stoi(config.apiServerPort)); @@ -270,38 +263,32 @@ bool CortexUpdCmd::GetStable(const std::string& v) { auto system_info = GetSystemInfoWithUniversal(); CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch); - // Download file - auto github_host = GetHostName(); - auto release_path = GetReleasePath(); - CTL_INF("Engine release path: " << github_host << release_path); + auto url_obj = url_parser::Url{ + .protocol = "https", + .host = GetHostName(), + .pathParams = GetReleasePath(), + }; + CTL_INF("Engine release path: " << url_obj.ToFullPath()); - httplib::Client cli(github_host); - if (auto res = cli.Get(release_path)) { - if (res->status == httplib::StatusCode::OK_200) { - try { - auto json_data = json_helper::ParseJsonString(res->body); - if (json_data.empty()) { - CLI_LOG("Version not found: " << v); - return false; - } + auto res = curl_utils::SimpleGetJson(url_obj.ToFullPath()); + if (res.has_error()) { + CLI_LOG_ERROR("HTTP error: " << res.error()); + return false; + } - if (downloaded_exe_path = HandleGithubRelease( - json_data["assets"], - {system_info->os + "-" + system_info->arch}); - !downloaded_exe_path) { - return false; - } - } catch (const std::exception& e) { - CLI_LOG_ERROR("JSON parse error: " << e.what()); - return false; - } - } else { - CLI_LOG_ERROR("HTTP error: " << res->status); + try { + if (res.value().empty()) { + CLI_LOG("Version not found: " << v); return false; } - } else { - auto err = res.error(); - CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err)); + + if (downloaded_exe_path = HandleGithubRelease( + res.value()["assets"], {system_info->os + "-" + system_info->arch}); + !downloaded_exe_path) { + return false; + } + } catch (const std::exception& e) { + CLI_LOG_ERROR("JSON parse error: " << e.what()); return false; } @@ -330,50 +317,42 @@ bool CortexUpdCmd::GetBeta(const std::string& v) { auto system_info = GetSystemInfoWithUniversal(); CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch); - // Download file - auto github_host = GetHostName(); - auto release_path = GetReleasePath(); - CTL_INF("Engine release path: " << github_host << release_path); + auto url_obj = url_parser::Url{ + .protocol = "https", + .host = GetHostName(), + .pathParams = GetReleasePath(), + }; + CTL_INF("Engine release path: " << url_obj.ToFullPath()); + auto res = curl_utils::SimpleGetJson(url_obj.ToFullPath()); + if (res.has_error()) { + CLI_LOG_ERROR("HTTP error: " << res.error()); + return false; + } - httplib::Client cli(github_host); - if (auto res = cli.Get(release_path)) { - if (res->status == httplib::StatusCode::OK_200) { - try { - auto json_res = json_helper::ParseJsonString(res->body); - - Json::Value json_data; - for (const auto& jr : json_res) { - // Get the latest beta or match version - if (auto tag = jr["tag_name"].asString(); - (v.empty() && tag.find(kBetaComp) != std::string::npos) || - (tag == v)) { - json_data = jr; - break; - } - } + try { + Json::Value json_data; + for (const auto& jr : res.value()) { + // Get the latest beta or match version + if (auto tag = jr["tag_name"].asString(); + (v.empty() && tag.find(kBetaComp) != std::string::npos) || + (tag == v)) { + json_data = jr; + break; + } + } - if (json_data.empty()) { - CLI_LOG("Version not found: " << v); - return false; - } + if (json_data.empty()) { + CLI_LOG("Version not found: " << v); + return false; + } - if (downloaded_exe_path = HandleGithubRelease( - json_data["assets"], - {system_info->os + "-" + system_info->arch}); - !downloaded_exe_path) { - return false; - } - } catch (const std::exception& e) { - CLI_LOG_ERROR("JSON parse error: " << e.what()); - return false; - } - } else { - CLI_LOG_ERROR("HTTP error: " << res->status); + if (downloaded_exe_path = HandleGithubRelease( + json_data["assets"], {system_info->os + "-" + system_info->arch}); + !downloaded_exe_path) { return false; } - } else { - auto err = res.error(); - CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err)); + } catch (const std::exception& e) { + CLI_LOG_ERROR("JSON parse error: " << e.what()); return false; } @@ -430,13 +409,15 @@ std::optional CortexUpdCmd::HandleGithubRelease( CLI_LOG_ERROR("Failed to create directories: " << e.what()); return std::nullopt; } - auto download_task{DownloadTask{.id = "cortex", - .type = DownloadType::Cortex, - .items = {DownloadItem{ - .id = "cortex", - .downloadUrl = download_url, - .localPath = local_path, - }}}}; + auto download_task{DownloadTask{ + .id = "cortex", + .type = DownloadType::Cortex, + .items = {DownloadItem{ + .id = "cortex", + .downloadUrl = download_url, + .localPath = local_path, + }}, + }}; auto result = download_service_->AddDownloadTask( download_task, [](const DownloadTask& finishedTask) { diff --git a/engine/cli/commands/cortex_upd_cmd.h b/engine/cli/commands/cortex_upd_cmd.h index 9c500a999..01793992f 100644 --- a/engine/cli/commands/cortex_upd_cmd.h +++ b/engine/cli/commands/cortex_upd_cmd.h @@ -1,5 +1,7 @@ #pragma once + #include +#include #include "services/download_service.h" #if !defined(_WIN32) #include @@ -67,19 +69,19 @@ inline std::string GetCortexServerBinary() { inline std::string GetHostName() { if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { - return "https://delta.jan.ai"; + return "delta.jan.ai"; } else { - return "https://api.github.com"; + return "api.github.com"; } } -inline std::string GetReleasePath() { +inline std::vector GetReleasePath() { if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { - return "/cortex/latest/version.json"; + return {"cortex", "latest", "version.json"}; } else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) { - return "/repos/janhq/cortex.cpp/releases"; + return {"repos", "janhq", "cortex.cpp", "releases"}; } else { - return "/repos/janhq/cortex.cpp/releases/latest"; + return {"repos", "janhq", "cortex.cpp", "releases", "latest"}; } } diff --git a/engine/cli/commands/engine_list_cmd.cc b/engine/cli/commands/engine_list_cmd.cc index 3a2b527c9..b010e8687 100644 --- a/engine/cli/commands/engine_list_cmd.cc +++ b/engine/cli/commands/engine_list_cmd.cc @@ -1,8 +1,8 @@ #include "engine_list_cmd.h" #include #include +#include "common/engine_servicei.h" #include "server_start_cmd.h" -#include "services/engine_service.h" #include "utils/curl_utils.h" #include "utils/logging_utils.h" #include "utils/url_parser.h" diff --git a/engine/cli/commands/hardware_activate_cmd.cc b/engine/cli/commands/hardware_activate_cmd.cc index a0f34e4b7..77d600233 100644 --- a/engine/cli/commands/hardware_activate_cmd.cc +++ b/engine/cli/commands/hardware_activate_cmd.cc @@ -36,7 +36,6 @@ bool HardwareActivateCmd::Exec( } } - // TODO(sang) should use curl but it does not work (?) Json::Value body; Json::Value gpus_json = Json::arrayValue; std::vector gpus; @@ -51,36 +50,30 @@ bool HardwareActivateCmd::Exec( body["gpus"] = gpus_json; auto data_str = body.toStyledString(); - httplib::Client cli(host + ":" + std::to_string(port)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "hardware", "activate"}, + }; - auto res = cli.Post("/v1/hardware/activate", httplib::Headers(), - data_str.data(), data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - auto root = json_helper::ParseJsonString(res->body); - if (!root["warning"].isNull()) { - CLI_LOG(root["warning"].asString()); - } - if(body["gpus"].empty()) { - CLI_LOG("Deactivated all GPUs!"); - } else { - std::string gpus_str; - for(auto i: gpus) { - gpus_str += " " + std::to_string(i); - } - CLI_LOG("Activated GPUs:" << gpus_str); - } - return true; - } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return false; - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); return false; } + if (!res.value()["warning"].isNull()) { + CLI_LOG(res.value()["warning"].asString()); + } + if (body["gpus"].empty()) { + CLI_LOG("Deactivated all GPUs!"); + } else { + std::string gpus_str; + for (auto i : gpus) { + gpus_str += " " + std::to_string(i); + } + CLI_LOG("Activated GPUs:" << gpus_str); + } return true; } -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/hardware_activate_cmd.h b/engine/cli/commands/hardware_activate_cmd.h index eb5b68cc3..82676ca99 100644 --- a/engine/cli/commands/hardware_activate_cmd.h +++ b/engine/cli/commands/hardware_activate_cmd.h @@ -1,7 +1,7 @@ #pragma once + #include #include -#include "common/hardware_config.h" namespace commands { class HardwareActivateCmd { @@ -9,4 +9,4 @@ class HardwareActivateCmd { bool Exec(const std::string& host, int port, const std::unordered_map& options); }; -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/hardware_list_cmd.cc b/engine/cli/commands/hardware_list_cmd.cc index a800b0e24..6d6cccbc3 100644 --- a/engine/cli/commands/hardware_list_cmd.cc +++ b/engine/cli/commands/hardware_list_cmd.cc @@ -1,21 +1,12 @@ #include "hardware_list_cmd.h" - #include #include #include - #include -#include "httplib.h" #include "server_start_cmd.h" +#include "services/hardware_service.h" #include "utils/curl_utils.h" -#include "utils/hardware/cpu_info.h" -#include "utils/hardware/gpu_info.h" -#include "utils/hardware/os_info.h" -#include "utils/hardware/power_info.h" -#include "utils/hardware/ram_info.h" -#include "utils/hardware/storage_info.h" #include "utils/logging_utils.h" -#include "utils/string_utils.h" // clang-format off #include // clang-format on @@ -186,4 +177,4 @@ bool HardwareListCmd::Exec(const std::string& host, int port, return true; } -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/model_del_cmd.cc b/engine/cli/commands/model_del_cmd.cc index d78fcc921..2f46aa52a 100644 --- a/engine/cli/commands/model_del_cmd.cc +++ b/engine/cli/commands/model_del_cmd.cc @@ -1,7 +1,8 @@ #include "model_del_cmd.h" -#include "httplib.h" #include "server_start_cmd.h" +#include "utils/curl_utils.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { @@ -16,18 +17,17 @@ void ModelDelCmd::Exec(const std::string& host, int port, } } - // Call API to delete model - httplib::Client cli(host + ":" + std::to_string(port)); - auto res = cli.Delete("/v1/models/" + model_handle); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Model " + model_handle + " deleted successfully"); - } else { - CTL_ERR("Model failed to delete with status code: " << res->status); - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", model_handle}, + }; + + auto res = curl_utils::SimpleDeleteJson(url.ToFullPath()); + if (res.has_error()) { + CLI_LOG("Failed to delete model: " << res.error()); + return; } + CLI_LOG("Model " + model_handle + " deleted successfully"); } } // namespace commands diff --git a/engine/cli/commands/model_get_cmd.cc b/engine/cli/commands/model_get_cmd.cc index 2c7c294e3..c4a400136 100644 --- a/engine/cli/commands/model_get_cmd.cc +++ b/engine/cli/commands/model_get_cmd.cc @@ -1,8 +1,9 @@ #include "model_get_cmd.h" -#include "httplib.h" #include "server_start_cmd.h" +#include "utils/curl_utils.h" #include "utils/json_helper.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { @@ -17,20 +18,19 @@ void ModelGetCmd::Exec(const std::string& host, int port, } } - // Call API to delete model - httplib::Client cli(host + ":" + std::to_string(port)); - auto res = cli.Get("/v1/models/" + model_handle); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG(res->body); - } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", model_handle}, + }; + + auto res = curl_utils::SimpleGetJson(url.ToFullPath()); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); + return; } -} + CLI_LOG(res.value().toStyledString()); +} } // namespace commands diff --git a/engine/cli/commands/model_import_cmd.cc b/engine/cli/commands/model_import_cmd.cc index f8cf6a810..fbc01be7d 100644 --- a/engine/cli/commands/model_import_cmd.cc +++ b/engine/cli/commands/model_import_cmd.cc @@ -1,8 +1,10 @@ #include "model_import_cmd.h" #include -#include "httplib.h" #include "server_start_cmd.h" +#include "utils/curl_utils.h" +#include "utils/json_helper.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { @@ -18,23 +20,25 @@ void ModelImportCmd::Exec(const std::string& host, int port, } } - httplib::Client cli(host + ":" + std::to_string(port)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "import"}, + }; + Json::Value json_data; json_data["model"] = model_handle; json_data["modelPath"] = model_path; auto data_str = json_data.toStyledString(); - auto res = cli.Post("/v1/models/import", httplib::Headers(), data_str.data(), - data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Successfully import model from '" + model_path + - "' for modeID '" + model_handle + "'."); - } else { - CTL_ERR("Model failed to import model with status code: " << res->status); - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + + auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); + return; } + + CLI_LOG("Successfully import model from '" + model_path + "' for modelID '" + + model_handle + "'."); } } // namespace commands diff --git a/engine/cli/commands/model_import_cmd.h b/engine/cli/commands/model_import_cmd.h index 141351909..685e8d5fc 100644 --- a/engine/cli/commands/model_import_cmd.h +++ b/engine/cli/commands/model_import_cmd.h @@ -1,6 +1,7 @@ #pragma once #include + namespace commands { class ModelImportCmd { @@ -8,4 +9,4 @@ class ModelImportCmd { void Exec(const std::string& host, int port, const std::string& model_handle, const std::string& model_path); }; -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/model_list_cmd.cc b/engine/cli/commands/model_list_cmd.cc index 41fe61d1c..c63ed0012 100644 --- a/engine/cli/commands/model_list_cmd.cc +++ b/engine/cli/commands/model_list_cmd.cc @@ -2,12 +2,13 @@ #include #include #include - #include -#include "httplib.h" #include "server_start_cmd.h" +#include "utils/curl_utils.h" +#include "utils/json_helper.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" +#include "utils/url_parser.h" // clang-format off #include // clang-format on @@ -44,43 +45,40 @@ void ModelListCmd::Exec(const std::string& host, int port, int count = 0; // Iterate through directory - httplib::Client cli(host + ":" + std::to_string(port)); - auto res = cli.Get("/v1/models"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - Json::Value body; - Json::Reader reader; - reader.parse(res->body, body); - if (!body["data"].isNull()) { - for (auto const& v : body["data"]) { - auto model_id = v["model"].asString(); - if (!filter.empty() && - !string_utils::StringContainsIgnoreCase(model_id, filter)) { - continue; - } + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models"}, + }; - count += 1; + auto res = curl_utils::SimpleGetJson(url.ToFullPath()); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); + return; + } + + if (!res.value()["data"].isNull()) { + for (auto const& v : res.value()["data"]) { + auto model_id = v["model"].asString(); + if (!filter.empty() && + !string_utils::StringContainsIgnoreCase(model_id, filter)) { + continue; + } - std::vector row = {std::to_string(count), - v["model"].asString()}; - if (display_engine) { - row.push_back(v["engine"].asString()); - } - if (display_version) { - row.push_back(v["version"].asString()); - } + count += 1; - table.add_row({row.begin(), row.end()}); - } + std::vector row = {std::to_string(count), + v["model"].asString()}; + if (display_engine) { + row.push_back(v["engine"].asString()); } - } else { - CTL_ERR("Failed to get model list with status code: " << res->status); - return; + if (display_version) { + row.push_back(v["version"].asString()); + } + + table.add_row({row.begin(), row.end()}); } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); - return; } std::cout << table << std::endl; diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc index 376943fd1..75c0ce1a0 100644 --- a/engine/cli/commands/model_pull_cmd.cc +++ b/engine/cli/commands/model_pull_cmd.cc @@ -1,10 +1,13 @@ #include "model_pull_cmd.h" +#include #include "server_start_cmd.h" #include "utils/cli_selection_utils.h" +#include "utils/curl_utils.h" #include "utils/download_progress.h" #include "utils/json_helper.h" #include "utils/logging_utils.h" #include "utils/scope_exit.h" +#include "utils/url_parser.h" #if defined(_WIN32) #include #endif @@ -33,65 +36,57 @@ std::optional ModelPullCmd::Exec(const std::string& host, int port, } } - // Get model info from Server - httplib::Client cli(host + ":" + std::to_string(port)); - cli.set_read_timeout(std::chrono::seconds(60)); + auto model_info_url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"models", "pull", "info"}, + }; Json::Value j_data; j_data["model"] = input; auto d_str = j_data.toStyledString(); - auto res = cli.Post("/models/pull/info", httplib::Headers(), d_str.data(), - d_str.size(), "application/json"); - - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - // CLI_LOG(res->body); - auto root = json_helper::ParseJsonString(res->body); - auto id = root["id"].asString(); - bool is_cortexso = root["modelSource"].asString() == "cortexso"; - auto default_branch = root["defaultBranch"].asString(); - std::vector downloaded; - for (auto const& v : root["downloadedModels"]) { - downloaded.push_back(v.asString()); - } - std::vector avails; - for (auto const& v : root["availableModels"]) { - avails.push_back(v.asString()); - } - auto download_url = root["downloadUrl"].asString(); - - if (downloaded.empty() && avails.empty()) { - model_id = id; - model = download_url; - } else { - if (is_cortexso) { - auto selection = cli_selection_utils::PrintModelSelection( - downloaded, avails, - default_branch.empty() - ? std::nullopt - : std::optional(default_branch)); - - if (!selection.has_value()) { - CLI_LOG("Invalid selection"); - return std::nullopt; - } - model_id = selection.value(); - model = model_id; - } else { - auto selection = cli_selection_utils::PrintSelection(avails); - CLI_LOG("Selected: " << selection.value()); - model_id = id + ":" + selection.value(); - model = download_url + selection.value(); - } + auto res = curl_utils::SimplePostJson(model_info_url.ToFullPath(), d_str); + + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); + return std::nullopt; + } + + auto id = res.value()["id"].asString(); + bool is_cortexso = res.value()["modelSource"].asString() == "cortexso"; + auto default_branch = res.value()["defaultBranch"].asString(); + std::vector downloaded; + for (auto const& v : res.value()["downloadedModels"]) { + downloaded.push_back(v.asString()); + } + std::vector avails; + for (auto const& v : res.value()["availableModels"]) { + avails.push_back(v.asString()); + } + auto download_url = res.value()["downloadUrl"].asString(); + + if (downloaded.empty() && avails.empty()) { + model_id = id; + model = download_url; + } else { + if (is_cortexso) { + auto selection = cli_selection_utils::PrintModelSelection( + downloaded, avails, + default_branch.empty() ? std::nullopt + : std::optional(default_branch)); + + if (!selection.has_value()) { + CLI_LOG("Invalid selection"); + return std::nullopt; } + model_id = selection.value(); + model = model_id; } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return std::nullopt; + auto selection = cli_selection_utils::PrintSelection(avails); + CLI_LOG("Selected: " << selection.value()); + model_id = id + ":" + selection.value(); + model = download_url + selection.value(); } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); - return std::nullopt; } CTL_INF("model: " << model << ", model_id: " << model_id); @@ -99,19 +94,18 @@ std::optional ModelPullCmd::Exec(const std::string& host, int port, Json::Value json_data; json_data["model"] = model; auto data_str = json_data.toStyledString(); - cli.set_read_timeout(std::chrono::seconds(60)); - res = cli.Post("/v1/models/pull", httplib::Headers(), data_str.data(), - data_str.size(), "application/json"); - - if (res) { - if (res->status != httplib::StatusCode::OK_200) { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return std::nullopt; - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + + auto pull_url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "pull"}, + }; + + auto pull_result = + curl_utils::SimplePostJson(pull_url.ToFullPath(), data_str); + if (pull_result.has_error()) { + auto root = json_helper::ParseJsonString(pull_result.error()); + CLI_LOG(root["message"].asString()); return std::nullopt; } @@ -154,23 +148,19 @@ bool ModelPullCmd::AbortModelPull(const std::string& host, int port, Json::Value json_data; json_data["taskId"] = task_id; auto data_str = json_data.toStyledString(); - httplib::Client cli(host + ":" + std::to_string(port)); - cli.set_read_timeout(std::chrono::seconds(60)); - auto res = cli.Delete("/v1/models/pull", httplib::Headers(), data_str.data(), - data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CTL_INF("Abort model pull successfully: " << task_id); - return true; - } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return false; - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "pull"}, + }; + auto res = curl_utils::SimpleDeleteJson(url.ToFullPath(), data_str); + + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); return false; } + CTL_INF("Abort model pull successfully: " << task_id); + return true; } }; // namespace commands diff --git a/engine/cli/commands/model_pull_cmd.h b/engine/cli/commands/model_pull_cmd.h index d05759dbc..022da9c84 100644 --- a/engine/cli/commands/model_pull_cmd.h +++ b/engine/cli/commands/model_pull_cmd.h @@ -1,23 +1,17 @@ #pragma once -#include "services/model_service.h" +#include +#include namespace commands { class ModelPullCmd { public: - explicit ModelPullCmd(std::shared_ptr download_service) - : model_service_{ModelService(download_service)} {}; - explicit ModelPullCmd(const ModelService& model_service) - : model_service_{model_service} {}; std::optional Exec(const std::string& host, int port, const std::string& input); private: bool AbortModelPull(const std::string& host, int port, const std::string& task_id); - - private: - ModelService model_service_; }; } // namespace commands diff --git a/engine/cli/commands/model_start_cmd.cc b/engine/cli/commands/model_start_cmd.cc index 9b2f9d4b3..ea6b81e5a 100644 --- a/engine/cli/commands/model_start_cmd.cc +++ b/engine/cli/commands/model_start_cmd.cc @@ -1,7 +1,6 @@ #include "model_start_cmd.h" #include "cortex_upd_cmd.h" #include "hardware_activate_cmd.h" -#include "httplib.h" #include "run_cmd.h" #include "server_start_cmd.h" #include "utils/cli_selection_utils.h" @@ -14,7 +13,7 @@ bool ModelStartCmd::Exec( const std::unordered_map& options, bool print_success_log) { std::optional model_id = - SelectLocalModel(host, port, model_service_, model_handle); + SelectLocalModel(host, port, model_handle); if (!model_id.has_value()) { return false; @@ -46,41 +45,34 @@ bool ModelStartCmd::Exec( while (count--) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); if (commands::IsServerAlive(host, port)) - break; + break; } } - // Call API to start model - httplib::Client cli(host + ":" + std::to_string(port)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "start"}, + }; + Json::Value json_data; json_data["model"] = model_id.value(); auto data_str = json_data.toStyledString(); - cli.set_read_timeout(std::chrono::seconds(60)); - auto res = cli.Post("/v1/models/start", httplib::Headers(), data_str.data(), - data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - if (print_success_log) { - CLI_LOG(model_id.value() - << " model started successfully. Use `" - << commands::GetCortexBinary() << " run " << *model_id - << "` for interactive chat shell"); - } - auto root = json_helper::ParseJsonString(res->body); - if (!root["warning"].isNull()) { - CLI_LOG(root["warning"].asString()); - } - return true; - } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return false; - } - } else { - auto err = res.error(); - CLI_LOG("HTTP error: " << httplib::to_string(err)); + auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); return false; } -} + if (print_success_log) { + CLI_LOG(model_id.value() << " model started successfully. Use `" + << commands::GetCortexBinary() << " run " + << *model_id << "` for interactive chat shell"); + } + if (!res.value()["warning"].isNull()) { + CLI_LOG(res.value()["warning"].asString()); + } + return true; +} }; // namespace commands diff --git a/engine/cli/commands/model_start_cmd.h b/engine/cli/commands/model_start_cmd.h index 652d37994..519db0f0d 100644 --- a/engine/cli/commands/model_start_cmd.h +++ b/engine/cli/commands/model_start_cmd.h @@ -1,20 +1,14 @@ #pragma once + #include #include -#include "services/model_service.h" namespace commands { class ModelStartCmd { public: - explicit ModelStartCmd(const ModelService& model_service) - : model_service_{model_service} {}; - bool Exec(const std::string& host, int port, const std::string& model_handle, const std::unordered_map& options, bool print_success_log = true); - - private: - ModelService model_service_; }; } // namespace commands diff --git a/engine/cli/commands/model_status_cmd.cc b/engine/cli/commands/model_status_cmd.cc index 6677fe0af..cd9f3034d 100644 --- a/engine/cli/commands/model_status_cmd.cc +++ b/engine/cli/commands/model_status_cmd.cc @@ -1,7 +1,9 @@ #include "model_status_cmd.h" -#include "httplib.h" #include "server_start_cmd.h" +#include "utils/curl_utils.h" +#include "utils/json_helper.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { bool ModelStatusCmd::IsLoaded(const std::string& host, int port, @@ -14,22 +16,20 @@ bool ModelStatusCmd::IsLoaded(const std::string& host, int port, return false; } } + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "status", model_handle}, + }; - // Call API to check model status - httplib::Client cli(host + ":" + std::to_string(port)); - auto res = cli.Get("/v1/models/status/" + model_handle); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CTL_INF(res->body); - } else { - CTL_WRN("Failed to get model status with code: " << res->status); - return false; - } - } else { - auto err = res.error(); - CTL_WRN("HTTP error: " << httplib::to_string(err)); + auto res = curl_utils::SimpleGetJson(url.ToFullPath()); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); return false; } + + CTL_INF(res.value().toStyledString()); return true; } } // namespace commands diff --git a/engine/cli/commands/model_status_cmd.h b/engine/cli/commands/model_status_cmd.h index 3bf1cb115..de094e748 100644 --- a/engine/cli/commands/model_status_cmd.h +++ b/engine/cli/commands/model_status_cmd.h @@ -1,18 +1,12 @@ #pragma once + #include -#include "services/model_service.h" namespace commands { class ModelStatusCmd { public: - explicit ModelStatusCmd(const ModelService& model_service) - : model_service_{model_service} {}; - bool IsLoaded(const std::string& host, int port, const std::string& model_handle); - - private: - ModelService model_service_; }; } // namespace commands diff --git a/engine/cli/commands/model_stop_cmd.cc b/engine/cli/commands/model_stop_cmd.cc index 9a14b0876..291977dc7 100644 --- a/engine/cli/commands/model_stop_cmd.cc +++ b/engine/cli/commands/model_stop_cmd.cc @@ -1,30 +1,29 @@ #include "model_stop_cmd.h" -#include "httplib.h" +#include +#include "utils/curl_utils.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { void ModelStopCmd::Exec(const std::string& host, int port, const std::string& model_handle) { - // Call API to stop model - httplib::Client cli(host + ":" + std::to_string(port)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", "stop"}, + }; + Json::Value json_data; json_data["model"] = model_handle; auto data_str = json_data.toStyledString(); - auto res = cli.Post("/v1/models/stop", httplib::Headers(), data_str.data(), - data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Model unloaded!"); - } else { - auto root = json_helper::ParseJsonString(res->body); - CLI_LOG(root["message"].asString()); - return; - } - } else { - auto err = res.error(); - CLI_LOG("HTTP error: " << httplib::to_string(err)); + auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str); + + if (res.has_error()) { + CLI_LOG_ERROR("Failed to stop model: " << res.error()); + return; } -} + CLI_LOG("Model stopped!"); +} }; // namespace commands diff --git a/engine/cli/commands/model_stop_cmd.h b/engine/cli/commands/model_stop_cmd.h index f341e70d2..f437f5000 100644 --- a/engine/cli/commands/model_stop_cmd.h +++ b/engine/cli/commands/model_stop_cmd.h @@ -1,18 +1,11 @@ #pragma once #include -#include "services/model_service.h" namespace commands { class ModelStopCmd { public: - explicit ModelStopCmd(const ModelService& model_service) - : model_service_{model_service} {}; - void Exec(const std::string& host, int port, const std::string& model_handle); - - private: - ModelService model_service_; }; } // namespace commands diff --git a/engine/cli/commands/model_upd_cmd.cc b/engine/cli/commands/model_upd_cmd.cc index af37efd5f..6534d1fbd 100644 --- a/engine/cli/commands/model_upd_cmd.cc +++ b/engine/cli/commands/model_upd_cmd.cc @@ -1,9 +1,9 @@ #include "model_upd_cmd.h" -#include "httplib.h" - #include "server_start_cmd.h" -#include "utils/file_manager_utils.h" +#include "utils/curl_utils.h" +#include "utils/json_helper.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { @@ -22,7 +22,12 @@ void ModelUpdCmd::Exec( } } - httplib::Client cli(host + ":" + std::to_string(port)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "models", model_handle_}, + }; + Json::Value json_data; for (const auto& [key, value] : options) { if (!value.empty()) { @@ -30,21 +35,15 @@ void ModelUpdCmd::Exec( } } auto data_str = json_data.toStyledString(); - auto res = cli.Patch("/v1/models/" + model_handle_, httplib::Headers(), - data_str.data(), data_str.size(), "application/json"); - if (res) { - if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Successfully updated model ID '" + model_handle_ + "'!"); - return; - } else { - CTL_ERR("Model failed to update with status code: " << res->status); - return; - } - } else { - auto err = res.error(); - CTL_ERR("HTTP error: " << httplib::to_string(err)); + auto res = curl_utils::SimplePatchJson(url.ToFullPath(), data_str); + if (res.has_error()) { + auto root = json_helper::ParseJsonString(res.error()); + CLI_LOG(root["message"].asString()); return; } + + CLI_LOG("Successfully updated model ID '" + model_handle_ + "'!"); + return; } void ModelUpdCmd::UpdateConfig(Json::Value& data, const std::string& key, @@ -335,4 +334,4 @@ void ModelUpdCmd::UpdateBooleanField(const std::string& key, bool boolValue = (value == "true" || value == "1"); setter(boolValue); } -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/model_upd_cmd.h b/engine/cli/commands/model_upd_cmd.h index f2eaa8675..0a78c3eae 100644 --- a/engine/cli/commands/model_upd_cmd.h +++ b/engine/cli/commands/model_upd_cmd.h @@ -1,11 +1,11 @@ #pragma once -#include -#include + +#include +#include #include #include #include -#include -#include "json/json.h" + namespace commands { class ModelUpdCmd { public: @@ -28,4 +28,4 @@ class ModelUpdCmd { private: std::string model_handle_; }; -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/cli/commands/ps_cmd.cc b/engine/cli/commands/ps_cmd.cc index ca891dab4..c692ffc00 100644 --- a/engine/cli/commands/ps_cmd.cc +++ b/engine/cli/commands/ps_cmd.cc @@ -1,29 +1,29 @@ #include "ps_cmd.h" -#include #include #include -#include "utils/engine_constants.h" +#include "utils/curl_utils.h" #include "utils/format_utils.h" -#include "utils/json_helper.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" +#include "utils/url_parser.h" namespace commands { void PsCmd::Exec(const std::string& host, int port) { - auto host_and_port{host + ":" + std::to_string(port)}; - httplib::Client cli(host_and_port); - - auto res = cli.Get("/inferences/server/models"); - if (!res || res->status != httplib::StatusCode::OK_200) { + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"inferences", "server", "models"}, + }; + auto res = curl_utils::SimpleGetJson(url.ToFullPath()); + if (res.has_error()) { CLI_LOG("No models loaded!"); return; } - auto data = json_helper::ParseJsonString(res->body)["data"]; std::vector model_status_list; try { - for (const auto& item : data) { + for (const auto& item : res.value()["data"]) { ModelLoadedStatus model_status; // TODO(sang) hardcode for now model_status.engine = kLlamaEngine; diff --git a/engine/cli/commands/run_cmd.cc b/engine/cli/commands/run_cmd.cc index 279128552..1b71f1af7 100644 --- a/engine/cli/commands/run_cmd.cc +++ b/engine/cli/commands/run_cmd.cc @@ -14,7 +14,6 @@ namespace commands { std::optional SelectLocalModel(std::string host, int port, - ModelService& model_service, const std::string& model_handle) { std::optional model_id = model_handle; cortex::db::Models modellist_handler; @@ -45,7 +44,7 @@ std::optional SelectLocalModel(std::string host, int port, } else { auto related_models_ids = modellist_handler.FindRelatedModel(model_handle); if (related_models_ids.has_error() || related_models_ids.value().empty()) { - auto result = ModelPullCmd(model_service).Exec(host, port, model_handle); + auto result = ModelPullCmd().Exec(host, port, model_handle); if (!result) { CLI_LOG("Model " << model_handle << " not found!"); return std::nullopt; @@ -70,7 +69,7 @@ std::optional SelectLocalModel(std::string host, int port, void RunCmd::Exec(bool run_detach, const std::unordered_map& options) { std::optional model_id = - SelectLocalModel(host_, port_, model_service_, model_handle_); + SelectLocalModel(host_, port_, model_handle_); if (!model_id.has_value()) { return; } @@ -127,10 +126,9 @@ void RunCmd::Exec(bool run_detach, { if ((mc.engine.find(kLlamaRepo) == std::string::npos && mc.engine.find(kLlamaEngine) == std::string::npos) || - !commands::ModelStatusCmd(model_service_) - .IsLoaded(host_, port_, *model_id)) { + !commands::ModelStatusCmd().IsLoaded(host_, port_, *model_id)) { - auto res = commands::ModelStartCmd(model_service_) + auto res = commands::ModelStartCmd() .Exec(host_, port_, *model_id, options, false /*print_success_log*/); if (!res) { @@ -146,7 +144,7 @@ void RunCmd::Exec(bool run_detach, << commands::GetCortexBinary() << " run " << *model_id << "` for interactive chat shell"); } else { - ChatCompletionCmd(model_service_).Exec(host_, port_, *model_id, mc, ""); + ChatCompletionCmd().Exec(host_, port_, *model_id, mc, ""); } } } catch (const std::exception& e) { diff --git a/engine/cli/commands/run_cmd.h b/engine/cli/commands/run_cmd.h index 6e524c6b1..c0f6a4eb2 100644 --- a/engine/cli/commands/run_cmd.h +++ b/engine/cli/commands/run_cmd.h @@ -3,12 +3,10 @@ #include #include #include "services/engine_service.h" -#include "services/model_service.h" namespace commands { std::optional SelectLocalModel(std::string host, int port, - ModelService& model_service, const std::string& model_handle); class RunCmd { @@ -19,8 +17,7 @@ class RunCmd { port_{port}, model_handle_{std::move(model_handle)}, download_service_(download_service), - engine_service_{EngineService(download_service)}, - model_service_{ModelService(download_service)} {}; + engine_service_{EngineService(download_service)} {}; void Exec(bool chat_flag, const std::unordered_map& options); @@ -31,7 +28,6 @@ class RunCmd { std::string model_handle_; std::shared_ptr download_service_; - ModelService model_service_; EngineService engine_service_; }; } // namespace commands diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index 5ba972463..6f36515f1 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -25,8 +25,6 @@ bool TryConnectToServer(const std::string& host, int port) { } } // namespace -ServerStartCmd::ServerStartCmd() {} - bool ServerStartCmd::Exec(const std::string& host, int port, const std::optional& log_level) { std::string log_level_; diff --git a/engine/cli/commands/server_start_cmd.h b/engine/cli/commands/server_start_cmd.h index 780123172..f3880532e 100644 --- a/engine/cli/commands/server_start_cmd.h +++ b/engine/cli/commands/server_start_cmd.h @@ -1,22 +1,30 @@ #pragma once -#include -#include "httplib.h" #include +#include +#include "utils/curl_utils.h" +#include "utils/logging_utils.h" +#include "utils/url_parser.h" + namespace commands { inline bool IsServerAlive(const std::string& host, int port) { - httplib::Client cli(host + ":" + std::to_string(port)); - auto res = cli.Get("/healthz"); - if (res && res->status == httplib::StatusCode::OK_200) { - return true; + auto url = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"healthz"}, + }; + auto res = curl_utils::SimpleGet(url.ToFullPath()); + if (res.has_error()) { + CTL_WRN("Server is not alive: " << res.error()); + return false; } - return false; + return true; } class ServerStartCmd { public: - ServerStartCmd(); - bool Exec(const std::string& host, int port, const std::optional& log_level = std::nullopt); + bool Exec(const std::string& host, int port, + const std::optional& log_level = std::nullopt); }; } // namespace commands diff --git a/engine/cli/commands/server_stop_cmd.cc b/engine/cli/commands/server_stop_cmd.cc index e55446923..303022174 100644 --- a/engine/cli/commands/server_stop_cmd.cc +++ b/engine/cli/commands/server_stop_cmd.cc @@ -1,20 +1,25 @@ #include "server_stop_cmd.h" -#include "httplib.h" +#include "utils/curl_utils.h" #include "utils/logging_utils.h" +#include "utils/url_parser.h" namespace commands { ServerStopCmd::ServerStopCmd(std::string host, int port) : host_(std::move(host)), port_(port) {} void ServerStopCmd::Exec() { - httplib::Client cli(host_ + ":" + std::to_string(port_)); - auto res = cli.Delete("/processManager/destroy"); - if (res) { - CLI_LOG("Server stopped!"); - } else { - auto err = res.error(); - CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err)); + auto url = url_parser::Url{ + .protocol = "http", + .host = host_ + ":" + std::to_string(port_), + .pathParams = {"processManager", "destroy"}, + }; + + auto res = curl_utils::SimpleDeleteJson(url.ToFullPath()); + if (res.has_error()) { + CLI_LOG_ERROR("Failed to stop server: " << res.error()); + return; } -} + CLI_LOG("Server stopped!"); +} }; // namespace commands diff --git a/engine/cli/main.cc b/engine/cli/main.cc index a03c5adf0..52fc5591f 100644 --- a/engine/cli/main.cc +++ b/engine/cli/main.cc @@ -88,6 +88,8 @@ int main(int argc, char* argv[]) { return 1; } + curl_global_init(CURL_GLOBAL_DEFAULT); + bool should_install_server = false; bool verbose = false; for (int i = 0; i < argc; i++) { diff --git a/engine/controllers/server.h b/engine/controllers/server.h index 2889e7ed1..5d6b8ded4 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -9,11 +9,6 @@ #include -#ifndef NDEBUG -// crash the server in debug mode, otherwise send an http 500 error -#define CPPHTTPLIB_NO_EXCEPTIONS 1 -#endif - #include #include #include "common/base.h" diff --git a/engine/e2e-test/main.py b/engine/e2e-test/main.py index add2354f3..9ef2970f9 100644 --- a/engine/e2e-test/main.py +++ b/engine/e2e-test/main.py @@ -8,8 +8,7 @@ ### models, keeps in order, note that we only uninstall engine after finishing all models test from test_api_model_pull_direct_url import TestApiModelPullDirectUrl -from test_api_model_start import TestApiModelStart -from test_api_model_stop import TestApiModelStop +from test_api_model_start_stop import TestApiModelStartStop from test_api_model_get import TestApiModelGet from test_api_model_list import TestApiModelList from test_api_model_update import TestApiModelUpdate diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py index 2f06e6edb..6856e05f4 100644 --- a/engine/e2e-test/test_api_docker.py +++ b/engine/e2e-test/test_api_docker.py @@ -1,18 +1,14 @@ import pytest import requests -import os - -from pathlib import Path -from test_runner import ( - wait_for_websocket_download_success_event -) +from test_runner import wait_for_websocket_download_success_event repo_branches = ["tinyllama:1b-gguf"] + class TestCortexsoModels: @pytest.fixture(autouse=True) - def setup_and_teardown(self, request): + def setup_and_teardown(self): yield @pytest.mark.parametrize("model_url", repo_branches) @@ -20,20 +16,20 @@ def setup_and_teardown(self, request): async def test_models_on_cortexso_hub(self, model_url): print("Pull model from cortexso hub") # Pull model from cortexso hub - json_body = { - "model": model_url - } + json_body = {"model": model_url} response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) assert response.status_code == 200, f"Failed to pull model: {model_url}" - + await wait_for_websocket_download_success_event(timeout=None) - + print("Check if the model was pulled successfully") # Check if the model was pulled successfully get_model_response = requests.get( f"http://127.0.0.1:3928/v1/models/{model_url}" ) - assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}" + assert ( + get_model_response.status_code == 200 + ), f"Failed to fetch model: {model_url}" assert ( get_model_response.json()["model"] == model_url ), f"Unexpected model name for: {model_url}" @@ -47,7 +43,10 @@ async def test_models_on_cortexso_hub(self, model_url): print("Start the model") # Start the model - response = requests.post("http://localhost:3928/v1/models/start", json=json_body) + response = requests.post( + "http://localhost:3928/v1/models/start", json=json_body + ) + print(response.json()) assert response.status_code == 200, f"status_code: {response.status_code}" print("Send an inference request") @@ -55,26 +54,24 @@ async def test_models_on_cortexso_hub(self, model_url): inference_json_body = { "frequency_penalty": 0.2, "max_tokens": 4096, - "messages": [ - { - "content": "", - "role": "user" - } - ], + "messages": [{"content": "", "role": "user"}], "model": model_url, "presence_penalty": 0.6, - "stop": [ - "End" - ], + "stop": ["End"], "stream": False, "temperature": 0.8, - "top_p": 0.95 - } - response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"}) - assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}" + "top_p": 0.95, + } + response = requests.post( + "http://localhost:3928/v1/chat/completions", + json=inference_json_body, + headers={"Content-Type": "application/json"}, + ) + assert ( + response.status_code == 200 + ), f"status_code: {response.status_code} response: {response.json()}" print("Stop the model") # Stop the model response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) assert response.status_code == 200, f"status_code: {response.status_code}" - diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py index 2a491d07a..1951e5c3a 100644 --- a/engine/e2e-test/test_api_engine_uninstall.py +++ b/engine/e2e-test/test_api_engine_uninstall.py @@ -1,9 +1,10 @@ -import pytest import time + +import pytest import requests from test_runner import ( run, - start_server, + start_server_if_needed, stop_server, wait_for_websocket_download_success_event, ) @@ -14,22 +15,20 @@ class TestApiEngineUninstall: @pytest.fixture(autouse=True) def setup_and_teardown(self): # Setup - success = start_server() - if not success: - raise Exception("Failed to start server") + start_server_if_needed() yield # Teardown stop_server() - + @pytest.mark.asyncio async def test_engines_uninstall_llamacpp_should_be_successful(self): response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 await wait_for_websocket_download_success_event(timeout=None) time.sleep(30) - + response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start_stop.py similarity index 74% rename from engine/e2e-test/test_api_model_start.py rename to engine/e2e-test/test_api_model_start_stop.py index b3e33d113..78c20e8da 100644 --- a/engine/e2e-test/test_api_model_start.py +++ b/engine/e2e-test/test_api_model_start_stop.py @@ -1,26 +1,28 @@ -import pytest import time + +import pytest import requests -from test_runner import run, start_server, stop_server from test_runner import ( - wait_for_websocket_download_success_event + run, + start_server_if_needed, + stop_server, + wait_for_websocket_download_success_event, ) -class TestApiModelStart: + + +class TestApiModelStartStop: @pytest.fixture(autouse=True) def setup_and_teardown(self): # Setup - stop_server() - success = start_server() - if not success: - raise Exception("Failed to start server") + start_server_if_needed() run("Delete model", ["models", "delete", "tinyllama:gguf"]) yield # Teardown stop_server() - + @pytest.mark.asyncio async def test_models_start_should_be_successful(self): response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") @@ -28,16 +30,17 @@ async def test_models_start_should_be_successful(self): await wait_for_websocket_download_success_event(timeout=None) # TODO(sang) need to fix for cuda download time.sleep(30) - - json_body = { - "model": "tinyllama:gguf" - } + + json_body = {"model": "tinyllama:gguf"} response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf" await wait_for_websocket_download_success_event(timeout=None) - + json_body = {"model": "tinyllama:gguf"} response = requests.post( "http://localhost:3928/v1/models/start", json=json_body ) assert response.status_code == 200, f"status_code: {response.status_code}" + + response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py deleted file mode 100644 index 4fc7a55e2..000000000 --- a/engine/e2e-test/test_api_model_stop.py +++ /dev/null @@ -1,38 +0,0 @@ -import pytest -import time -import requests -from test_runner import run, start_server, stop_server -from test_runner import ( - wait_for_websocket_download_success_event -) - -class TestApiModelStop: - - @pytest.fixture(autouse=True) - def setup_and_teardown(self): - # Setup - stop_server() - success = start_server() - if not success: - raise Exception("Failed to start server") - - yield - - run("Uninstall engine", ["engines", "uninstall", "llama-cpp"]) - # Teardown - stop_server() - - @pytest.mark.asyncio - async def test_models_stop_should_be_successful(self): - response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") - assert response.status_code == 200 - await wait_for_websocket_download_success_event(timeout=None) - time.sleep(30) - - json_body = {"model": "tinyllama:gguf"} - response = requests.post( - "http://localhost:3928/v1/models/start", json=json_body - ) - assert response.status_code == 200, f"status_code: {response.status_code}" - response = requests.post("http://localhost:3928/v1/models/stop", json=json_body) - assert response.status_code == 200, f"status_code: {response.status_code}" diff --git a/engine/e2e-test/test_cli_model_delete.py b/engine/e2e-test/test_cli_model_delete.py index d0ba43ec1..06cc3a4c3 100644 --- a/engine/e2e-test/test_cli_model_delete.py +++ b/engine/e2e-test/test_cli_model_delete.py @@ -1,11 +1,13 @@ import pytest import requests -from test_runner import popen, run -from test_runner import start_server, stop_server from test_runner import ( - wait_for_websocket_download_success_event + run, + start_server, + stop_server, + wait_for_websocket_download_success_event, ) + class TestCliModelDelete: @pytest.fixture(autouse=True) @@ -22,15 +24,13 @@ def setup_and_teardown(self): run("Delete model", ["models", "delete", "tinyllama:gguf"]) stop_server() - @pytest.mark.asyncio + @pytest.mark.asyncio async def test_models_delete_should_be_successful(self): - json_body = { - "model": "tinyllama:gguf" - } + json_body = {"model": "tinyllama:gguf"} response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf" await wait_for_websocket_download_success_event(timeout=None) - + exit_code, output, error = run( "Delete model", ["models", "delete", "tinyllama:gguf"] ) diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py index 843e669b4..dfc515df7 100644 --- a/engine/e2e-test/test_runner.py +++ b/engine/e2e-test/test_runner.py @@ -7,6 +7,7 @@ import threading import time import requests +from requests.exceptions import RequestException from typing import List import websockets @@ -72,6 +73,42 @@ def start_server() -> bool: return start_server_nix() +def start_server_if_needed(): + """ + Start the server if it is not already running. + Sending a healthz request to the server to check if it is running. + """ + try: + response = requests.get( + 'http://localhost:3928/healthz', + timeout=5 + ) + if response.status_code == 200: + print("Server is already running") + except RequestException as e: + print("Server is not running. Starting the server...") + start_server() + + +def pull_model_if_needed(model_id: str = "tinyllama:gguf"): + """ + Pull the model if it is not already pulled. + """ + should_pull = False + try: + response = requests.get("http://localhost:3928/models/" + model_id, + timeout=5 + ) + if response.status_code != 200: + should_pull = True + + except RequestException as e: + print("Http error occurred: " + e) + + if should_pull: + run("Pull model", ["pull", model_id], timeout=10*60) + + def start_server_nix() -> bool: executable = getExecutablePath() process = subprocess.Popen( diff --git a/engine/main.cc b/engine/main.cc index b39c4c6e2..7c37e27fe 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -219,6 +219,8 @@ int main(int argc, char* argv[]) { return 1; } + curl_global_init(CURL_GLOBAL_DEFAULT); + // avoid printing logs to terminal is_server = true; diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 4eebff669..c52e32ef0 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -4,7 +4,6 @@ #include #include "algorithm" #include "utils/archive_utils.h" -#include "utils/cortex_utils.h" #include "utils/engine_constants.h" #include "utils/engine_matcher_utils.h" #include "utils/file_manager_utils.h" @@ -631,13 +630,15 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const { return variants; } -bool EngineService::IsEngineLoaded(const std::string& engine) const { +bool EngineService::IsEngineLoaded(const std::string& engine) { + std::lock_guard lock(engines_mutex_); auto ne = NormalizeEngine(engine); return engines_.find(ne) != engines_.end(); } cpp::result EngineService::GetLoadedEngine( const std::string& engine_name) { + std::lock_guard lock(engines_mutex_); auto ne = NormalizeEngine(engine_name); if (engines_.find(ne) == engines_.end()) { return cpp::fail("Engine " + engine_name + " is not loaded yet!"); @@ -708,19 +709,19 @@ cpp::result EngineService::LoadEngine( auto add_dll = [this](const std::string& e_type, const std::filesystem::path& p) { if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p); + CTL_DBG("Added dll directory: " << p.string()); engines_[e_type].cookie = cookie; } else { - CTL_WRN("Could not add dll directory: " << p); + CTL_WRN("Could not add dll directory: " << p.string()); } auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p); + CTL_DBG("Added cuda dll directory: " << p.string()); engines_[e_type].cuda_cookie = cuda_cookie; } else { - CTL_WRN("Could not add cuda dll directory: " << p); + CTL_WRN("Could not add cuda dll directory: " << p.string()); } }; @@ -732,16 +733,20 @@ cpp::result EngineService::LoadEngine( should_use_dll_search_path) { if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && should_use_dll_search_path) { - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); + + { + std::lock_guard lock(engines_mutex_); + // Remove llamacpp dll directory + if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { + CTL_WRN("Could not remove dll directory: " << kLlamaRepo); + } else { + CTL_DBG("Removed dll directory: " << kLlamaRepo); + } + if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { + CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); + } else { + CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); + } } add_dll(ne, engine_dir_path); @@ -752,8 +757,11 @@ cpp::result EngineService::LoadEngine( } } #endif - engines_[ne].dl = - std::make_unique(engine_dir_path.string(), "engine"); + { + std::lock_guard lock(engines_mutex_); + engines_[ne].dl = std::make_unique( + engine_dir_path.string(), "engine"); + } #if defined(__linux__) const char* name = "LD_LIBRARY_PATH"; auto data = getenv(name); @@ -774,65 +782,78 @@ cpp::result EngineService::LoadEngine( } catch (const cortex_cpp::dylib::load_error& e) { CTL_ERR("Could not load engine: " << e.what()); - engines_.erase(ne); + { + std::lock_guard lock(engines_mutex_); + engines_.erase(ne); + } return cpp::fail("Could not load engine " + ne + ": " + e.what()); } - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); + { + std::lock_guard lock(engines_mutex_); + auto func = engines_[ne].dl->get_function("get_engine"); + engines_[ne].engine = func(); - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); + auto& en = std::get(engines_[ne].engine); + if (ne == kLlamaRepo) { //fix for llamacpp engine first + auto config = file_manager_utils::GetCortexConfig(); + if (en->IsSupported("SetFileLogger")) { + en->SetFileLogger(config.maxLogLines, + (std::filesystem::path(config.logFolderPath) / + std::filesystem::path(config.logLlamaCppPath)) + .string()); + } else { + CTL_WRN("Method SetFileLogger is not supported yet"); + } + if (en->IsSupported("SetLogLevel")) { + en->SetLogLevel(logging_utils_helper::global_log_level); + } else { + CTL_WRN("Method SetLogLevel is not supported yet"); + } } + CTL_DBG("loaded engine: " << ne); } - CTL_DBG("Loaded engine: " << ne); return {}; } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - EngineI* e = std::get(engines_[ne].engine); - delete e; + { + std::lock_guard lock(engines_mutex_); + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); + } + EngineI* e = std::get(engines_[ne].engine); + delete e; + #if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } + if (!RemoveDllDirectory(engines_[ne].cookie)) { + CTL_WRN("Could not remove dll directory: " << ne); + } else { + CTL_DBG("Removed dll directory: " << ne); + } + if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { + CTL_WRN("Could not remove cuda dll directory: " << ne); + } else { + CTL_DBG("Removed cuda dll directory: " << ne); + } #endif - engines_.erase(ne); + engines_.erase(ne); + } CTL_DBG("Unloaded engine " + ne); return {}; } std::vector EngineService::GetLoadedEngines() { - std::vector loaded_engines; - for (const auto& [key, value] : engines_) { - loaded_engines.push_back(value.engine); + { + std::lock_guard lock(engines_mutex_); + std::vector loaded_engines; + for (const auto& [key, value] : engines_) { + loaded_engines.push_back(value.engine); + } + return loaded_engines; } - return loaded_engines; } cpp::result diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index dee8a530b..a18a276cd 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -51,6 +52,7 @@ class EngineService : public EngineServiceI { #endif }; + std::mutex engines_mutex_; std::unordered_map engines_{}; public: @@ -99,7 +101,7 @@ class EngineService : public EngineServiceI { cpp::result, std::string> GetInstalledEngineVariants(const std::string& engine) const; - bool IsEngineLoaded(const std::string& engine) const; + bool IsEngineLoaded(const std::string& engine); cpp::result GetLoadedEngine( const std::string& engine_name); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 1ec1a68cf..a37cea12c 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -1,4 +1,6 @@ #include "model_service.h" +#include +#include #include #include #include @@ -7,7 +9,6 @@ #include "config/yaml_config.h" #include "database/models.h" #include "hardware_service.h" -#include "httplib.h" #include "utils/cli_selection_utils.h" #include "utils/cortex_utils.h" #include "utils/engine_constants.h" @@ -79,7 +80,8 @@ cpp::result GetDownloadTask( url_parser::Url url = { .protocol = "https", .host = kHuggingFaceHost, - .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}}; + .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}, + }; auto result = curl_utils::SimpleGetJson(url.ToFullPath()); if (result.has_error()) { @@ -812,9 +814,9 @@ cpp::result ModelService::StartModel( inference_svc_->LoadModel(std::make_shared(json_data)); auto status = std::get<0>(ir)["status_code"].asInt(); auto data = std::get<1>(ir); - if (status == httplib::StatusCode::OK_200) { + if (status == drogon::k200OK) { return StartModelResult{.success = true, .warning = warning}; - } else if (status == httplib::StatusCode::Conflict_409) { + } else if (status == drogon::k409Conflict) { CTL_INF("Model '" + model_handle + "' is already loaded"); return StartModelResult{.success = true, .warning = warning}; } else { @@ -859,7 +861,7 @@ cpp::result ModelService::StopModel( auto ir = inference_svc_->UnloadModel(engine_name, model_handle); auto status = std::get<0>(ir)["status_code"].asInt(); auto data = std::get<1>(ir); - if (status == httplib::StatusCode::OK_200) { + if (status == drogon::k200OK) { if (bypass_check) { bypass_stop_check_set_.erase(model_handle); } @@ -901,7 +903,7 @@ cpp::result ModelService::GetModelStatus( inference_svc_->GetModelStatus(std::make_shared(root)); auto status = std::get<0>(ir)["status_code"].asInt(); auto data = std::get<1>(ir); - if (status == httplib::StatusCode::OK_200) { + if (status == drogon::k200OK) { return true; } else { CTL_ERR("Model failed to get model status with status code: " << status); diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt index b92770a65..4a15b7c8b 100644 --- a/engine/test/components/CMakeLists.txt +++ b/engine/test/components/CMakeLists.txt @@ -17,7 +17,6 @@ add_executable(${PROJECT_NAME} find_package(Drogon CONFIG REQUIRED) find_package(GTest CONFIG REQUIRED) find_package(yaml-cpp CONFIG REQUIRED) -find_package(httplib CONFIG REQUIRED) find_package(unofficial-minizip CONFIG REQUIRED) find_package(LibArchive REQUIRED) find_package(CURL REQUIRED) @@ -26,7 +25,6 @@ find_package(SQLiteCpp REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main yaml-cpp::yaml-cpp ${CMAKE_THREAD_LIBS_INIT}) -target_link_libraries(${PROJECT_NAME} PRIVATE httplib::httplib) target_link_libraries(${PROJECT_NAME} PRIVATE unofficial::minizip::minizip) target_link_libraries(${PROJECT_NAME} PRIVATE LibArchive::LibArchive) target_link_libraries(${PROJECT_NAME} PRIVATE CURL::libcurl) diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index 3176339a0..73c990996 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include #include "utils/logging_utils.h" #include "utils/result.hpp" #include "yaml-cpp/yaml.h" diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 895217250..4d0a956a9 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -12,7 +12,6 @@ #include #include #include -#include #if defined(__linux__) #include #include @@ -120,5 +119,4 @@ inline std::string GetCurrentPath() { #endif } #endif - } // namespace cortex_utils diff --git a/engine/utils/cpuid/detail/init_linux_gcc_arm.h b/engine/utils/cpuid/detail/init_linux_gcc_arm.h index f10d360fd..cfd4059a5 100644 --- a/engine/utils/cpuid/detail/init_linux_gcc_arm.h +++ b/engine/utils/cpuid/detail/init_linux_gcc_arm.h @@ -21,7 +21,7 @@ void init_cpuinfo(CpuInfo::Impl& info) { // The Advanced SIMD (NEON) instruction set is required on AArch64 // (64-bit ARM). Note that /proc/cpuinfo will display "asimd" instead of // "neon" in the Features list on a 64-bit ARM CPU. - info.m_has_neon = true; + info.has_neon = true; #else // Runtime detection of NEON is necessary on 32-bit ARM CPUs // diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 7bfbec44c..c56808b56 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -73,8 +73,6 @@ inline std::optional> GetHeaders( inline cpp::result SimpleGet(const std::string& url, const int timeout = -1) { - // Initialize libcurl - curl_global_init(CURL_GLOBAL_DEFAULT); auto curl = curl_easy_init(); if (!curl) { diff --git a/engine/vcpkg.json b/engine/vcpkg.json index 09ddb3368..36fa322a3 100644 --- a/engine/vcpkg.json +++ b/engine/vcpkg.json @@ -3,10 +3,6 @@ "curl", "gtest", "cli11", - { - "name": "cpp-httplib", - "features": ["openssl"] - }, "drogon", "jsoncpp", "minizip",