Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit e0f75ad

Browse files
Merge branch 'feat/python-engine' of github.com:janhq/cortex.cpp into feat/python-engine
2 parents accec0a + 52d8105 commit e0f75ad

File tree

18 files changed

+464
-163
lines changed

18 files changed

+464
-163
lines changed

.github/workflows/cortex-cpp-quality-gate.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
ccache-dir: ""
3535
- os: "mac"
3636
name: "arm64"
37-
runs-on: "macos-silicon"
37+
runs-on: "macos-selfhosted-12-arm64"
3838
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
3939
build-deps-cmake-flags: ""
4040
ccache-dir: ""

.github/workflows/template-build-macos.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ jobs:
8282
matrix:
8383
include:
8484
- arch: 'arm64'
85-
runs-on: 'macos-silicon'
85+
runs-on: 'macos-selfhosted-12-arm64'
8686
extra-cmake-flags: "-DMAC_ARM64=ON"
8787

8888
- arch: 'amd64'

engine/controllers/hardware.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ void Hardware::Activate(
3838
ahc.gpus.push_back(g.asInt());
3939
}
4040
}
41-
std::sort(ahc.gpus.begin(), ahc.gpus.end());
41+
4242
if (!hw_svc_->IsValidConfig(ahc)) {
4343
Json::Value ret;
4444
ret["message"] = "Invalid GPU index provided.";

engine/controllers/server.cc

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "trantor/utils/Logger.h"
44
#include "utils/cortex_utils.h"
55
#include "utils/function_calling/common.h"
6+
#include "utils/http_util.h"
67

78
using namespace inferences;
89

@@ -27,6 +28,15 @@ void server::ChatCompletion(
2728
LOG_DEBUG << "Start chat completion";
2829
auto json_body = req->getJsonObject();
2930
bool is_stream = (*json_body).get("stream", false).asBool();
31+
auto model_id = (*json_body).get("model", "invalid_model").asString();
32+
auto engine_type = [this, &json_body]() -> std::string {
33+
if (!inference_svc_->HasFieldInReq(json_body, "engine")) {
34+
return kLlamaRepo;
35+
} else {
36+
return (*(json_body)).get("engine", kLlamaRepo).asString();
37+
}
38+
}();
39+
3040
LOG_DEBUG << "request body: " << json_body->toStyledString();
3141
auto q = std::make_shared<services::SyncQueue>();
3242
auto ir = inference_svc_->HandleChatCompletion(q, json_body);
@@ -40,7 +50,7 @@ void server::ChatCompletion(
4050
}
4151
LOG_DEBUG << "Wait to chat completion responses";
4252
if (is_stream) {
43-
ProcessStreamRes(std::move(callback), q);
53+
ProcessStreamRes(std::move(callback), q, engine_type, model_id);
4454
} else {
4555
ProcessNonStreamRes(std::move(callback), *q);
4656
}
@@ -121,12 +131,16 @@ void server::LoadModel(const HttpRequestPtr& req,
121131
}
122132

123133
void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
124-
std::shared_ptr<services::SyncQueue> q) {
134+
std::shared_ptr<services::SyncQueue> q,
135+
const std::string& engine_type,
136+
const std::string& model_id) {
125137
auto err_or_done = std::make_shared<std::atomic_bool>(false);
126-
auto chunked_content_provider =
127-
[q, err_or_done](char* buf, std::size_t buf_size) -> std::size_t {
138+
auto chunked_content_provider = [this, q, err_or_done, engine_type, model_id](
139+
char* buf,
140+
std::size_t buf_size) -> std::size_t {
128141
if (buf == nullptr) {
129142
LOG_TRACE << "Buf is null";
143+
inference_svc_->StopInferencing(engine_type, model_id);
130144
return 0;
131145
}
132146

engine/controllers/server.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ class server : public drogon::HttpController<server, false>,
7272

7373
private:
7474
void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
75-
std::shared_ptr<services::SyncQueue> q);
75+
std::shared_ptr<services::SyncQueue> q,
76+
const std::string& engine_type,
77+
const std::string& model_id);
7678
void ProcessNonStreamRes(std::function<void(const HttpResponsePtr&)> cb,
7779
services::SyncQueue& q);
7880

engine/cortex-common/EngineI.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,8 @@ class EngineI {
7272
virtual void HandleRequest(
7373
std::shared_ptr<Json::Value> json_body,
7474
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
75+
76+
// Stop inflight chat completion in stream mode
77+
virtual void StopInferencing(const std::string& model_id) = 0;
78+
7579
};

engine/database/hardware.cc

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,23 @@
55

66
namespace cortex::db {
77

8-
Hardwares::Hardwares() : db_(cortex::db::Database::GetInstance().db()) {}
8+
Hardware::Hardware() : db_(cortex::db::Database::GetInstance().db()) {}
99

10-
Hardwares::Hardwares(SQLite::Database& db) : db_(db) {}
10+
Hardware::Hardware(SQLite::Database& db) : db_(db) {}
1111

12-
Hardwares::~Hardwares() {}
12+
13+
Hardware::~Hardware() {}
1314

1415
cpp::result<std::vector<HardwareEntry>, std::string>
15-
Hardwares::LoadHardwareList() const {
16+
Hardware::LoadHardwareList() const {
1617
try {
1718
db_.exec("BEGIN TRANSACTION;");
1819
cortex::utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
1920
std::vector<HardwareEntry> entries;
2021
SQLite::Statement query(
2122
db_,
2223
"SELECT uuid, type, "
23-
"hardware_id, software_id, activated FROM hardware");
24+
"hardware_id, software_id, activated, priority FROM hardware");
2425

2526
while (query.executeStep()) {
2627
HardwareEntry entry;
@@ -29,6 +30,7 @@ Hardwares::LoadHardwareList() const {
2930
entry.hardware_id = query.getColumn(2).getInt();
3031
entry.software_id = query.getColumn(3).getInt();
3132
entry.activated = query.getColumn(4).getInt();
33+
entry.priority = query.getColumn(5).getInt();
3234
entries.push_back(entry);
3335
}
3436
return entries;
@@ -37,19 +39,20 @@ Hardwares::LoadHardwareList() const {
3739
return cpp::fail(e.what());
3840
}
3941
}
40-
cpp::result<bool, std::string> Hardwares::AddHardwareEntry(
42+
cpp::result<bool, std::string> Hardware::AddHardwareEntry(
4143
const HardwareEntry& new_entry) {
4244
try {
4345
SQLite::Statement insert(
4446
db_,
4547
"INSERT INTO hardware (uuid, type, "
46-
"hardware_id, software_id, activated) VALUES (?, ?, "
47-
"?, ?, ?)");
48+
"hardware_id, software_id, activated, priority) VALUES (?, ?, "
49+
"?, ?, ?, ?)");
4850
insert.bind(1, new_entry.uuid);
4951
insert.bind(2, new_entry.type);
5052
insert.bind(3, new_entry.hardware_id);
5153
insert.bind(4, new_entry.software_id);
5254
insert.bind(5, new_entry.activated);
55+
insert.bind(6, new_entry.priority);
5356
insert.exec();
5457
CTL_INF("Inserted: " << new_entry.ToJsonString());
5558
return true;
@@ -58,17 +61,19 @@ cpp::result<bool, std::string> Hardwares::AddHardwareEntry(
5861
return cpp::fail(e.what());
5962
}
6063
}
61-
cpp::result<bool, std::string> Hardwares::UpdateHardwareEntry(
64+
cpp::result<bool, std::string> Hardware::UpdateHardwareEntry(
6265
const std::string& id, const HardwareEntry& updated_entry) {
6366
try {
64-
SQLite::Statement upd(db_,
65-
"UPDATE hardware "
66-
"SET hardware_id = ?, software_id = ?, activated = ? "
67-
"WHERE uuid = ?");
67+
SQLite::Statement upd(
68+
db_,
69+
"UPDATE hardware "
70+
"SET hardware_id = ?, software_id = ?, activated = ?, priority = ? "
71+
"WHERE uuid = ?");
6872
upd.bind(1, updated_entry.hardware_id);
6973
upd.bind(2, updated_entry.software_id);
7074
upd.bind(3, updated_entry.activated);
71-
upd.bind(4, id);
75+
upd.bind(4, updated_entry.priority);
76+
upd.bind(5, id);
7277
if (upd.exec() == 1) {
7378
CTL_INF("Updated: " << updated_entry.ToJsonString());
7479
return true;
@@ -79,7 +84,7 @@ cpp::result<bool, std::string> Hardwares::UpdateHardwareEntry(
7984
}
8085
}
8186

82-
cpp::result<bool, std::string> Hardwares::DeleteHardwareEntry(
87+
cpp::result<bool, std::string> Hardware::DeleteHardwareEntry(
8388
const std::string& id) {
8489
try {
8590
SQLite::Statement del(db_, "DELETE from hardware WHERE uuid = ?");

engine/database/hardware.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <trantor/utils/Logger.h>
55
#include <string>
66
#include <vector>
7-
#include "utils/result.hpp"
87
#include "utils/json_helper.h"
8+
#include "utils/result.hpp"
99

1010
namespace cortex::db {
1111
struct HardwareEntry {
@@ -14,33 +14,34 @@ struct HardwareEntry {
1414
int hardware_id;
1515
int software_id;
1616
bool activated;
17+
int priority;
1718
std::string ToJsonString() const {
1819
Json::Value root;
1920
root["uuid"] = uuid;
2021
root["type"] = type;
2122
root["hardware_id"] = hardware_id;
2223
root["software_id"] = software_id;
2324
root["activated"] = activated;
25+
root["priority"] = priority;
2426
return json_helper::DumpJsonString(root);
2527
}
2628
};
2729

28-
class Hardwares {
30+
class Hardware {
2931

3032
private:
3133
SQLite::Database& db_;
3234

33-
3435
public:
35-
Hardwares();
36-
Hardwares(SQLite::Database& db);
37-
~Hardwares();
36+
Hardware();
37+
Hardware(SQLite::Database& db);
38+
~Hardware();
3839

3940
cpp::result<std::vector<HardwareEntry>, std::string> LoadHardwareList() const;
40-
cpp::result<bool, std::string> AddHardwareEntry(const HardwareEntry& new_entry);
41+
cpp::result<bool, std::string> AddHardwareEntry(
42+
const HardwareEntry& new_entry);
4143
cpp::result<bool, std::string> UpdateHardwareEntry(
4244
const std::string& id, const HardwareEntry& updated_entry);
43-
cpp::result<bool, std::string> DeleteHardwareEntry(
44-
const std::string& id);
45+
cpp::result<bool, std::string> DeleteHardwareEntry(const std::string& id);
4546
};
4647
} // namespace cortex::db

engine/e2e-test/test_api_docker.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,38 +40,39 @@ async def test_models_on_cortexso_hub(self, model_url):
4040
assert response.status_code == 200
4141
models = [i["id"] for i in response.json()["data"]]
4242
assert model_url in models, f"Model not found in list: {model_url}"
43+
44+
# TODO(sang) bypass for now. Re-enable when we publish new stable version for llama-cpp engine
45+
# print("Start the model")
46+
# # Start the model
47+
# response = requests.post(
48+
# "http://localhost:3928/v1/models/start", json=json_body
49+
# )
50+
# print(response.json())
51+
# assert response.status_code == 200, f"status_code: {response.status_code}"
4352

44-
print("Start the model")
45-
# Start the model
46-
response = requests.post(
47-
"http://localhost:3928/v1/models/start", json=json_body
48-
)
49-
print(response.json())
50-
assert response.status_code == 200, f"status_code: {response.status_code}"
51-
52-
print("Send an inference request")
53-
# Send an inference request
54-
inference_json_body = {
55-
"frequency_penalty": 0.2,
56-
"max_tokens": 4096,
57-
"messages": [{"content": "", "role": "user"}],
58-
"model": model_url,
59-
"presence_penalty": 0.6,
60-
"stop": ["End"],
61-
"stream": False,
62-
"temperature": 0.8,
63-
"top_p": 0.95,
64-
}
65-
response = requests.post(
66-
"http://localhost:3928/v1/chat/completions",
67-
json=inference_json_body,
68-
headers={"Content-Type": "application/json"},
69-
)
70-
assert (
71-
response.status_code == 200
72-
), f"status_code: {response.status_code} response: {response.json()}"
53+
# print("Send an inference request")
54+
# # Send an inference request
55+
# inference_json_body = {
56+
# "frequency_penalty": 0.2,
57+
# "max_tokens": 4096,
58+
# "messages": [{"content": "", "role": "user"}],
59+
# "model": model_url,
60+
# "presence_penalty": 0.6,
61+
# "stop": ["End"],
62+
# "stream": False,
63+
# "temperature": 0.8,
64+
# "top_p": 0.95,
65+
# }
66+
# response = requests.post(
67+
# "http://localhost:3928/v1/chat/completions",
68+
# json=inference_json_body,
69+
# headers={"Content-Type": "application/json"},
70+
# )
71+
# assert (
72+
# response.status_code == 200
73+
# ), f"status_code: {response.status_code} response: {response.json()}"
7374

74-
print("Stop the model")
75-
# Stop the model
76-
response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
77-
assert response.status_code == 200, f"status_code: {response.status_code}"
75+
# print("Stop the model")
76+
# # Stop the model
77+
# response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
78+
# assert response.status_code == 200, f"status_code: {response.status_code}"

engine/migrations/db_helper.h

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,28 @@
44
namespace cortex::mgr {
55
#include <iostream>
66
#include <stdexcept>
7-
#include <vector>
87
#include <string>
8+
#include <vector>
99

10-
inline bool ColumnExists(SQLite::Database& db, const std::string& table_name, const std::string& column_name) {
11-
try {
12-
SQLite::Statement query(db, "SELECT " + column_name + " FROM " + table_name + " LIMIT 0");
13-
return true;
14-
} catch (std::exception&) {
15-
return false;
16-
}
10+
inline bool ColumnExists(SQLite::Database& db, const std::string& table_name,
11+
const std::string& column_name) {
12+
try {
13+
SQLite::Statement query(
14+
db, "SELECT " + column_name + " FROM " + table_name + " LIMIT 0");
15+
return true;
16+
} catch (std::exception&) {
17+
return false;
18+
}
1719
}
1820

19-
inline void AddColumnIfNotExists(SQLite::Database& db, const std::string& table_name,
20-
const std::string& column_name, const std::string& column_type) {
21-
if (!ColumnExists(db, table_name, column_name)) {
22-
std::string sql = "ALTER TABLE " + table_name + " ADD COLUMN " + column_name + " " + column_type;
23-
db.exec(sql);
24-
}
21+
inline void AddColumnIfNotExists(SQLite::Database& db,
22+
const std::string& table_name,
23+
const std::string& column_name,
24+
const std::string& column_type) {
25+
if (!ColumnExists(db, table_name, column_name)) {
26+
std::string sql = "ALTER TABLE " + table_name + " ADD COLUMN " +
27+
column_name + " " + column_type;
28+
db.exec(sql);
29+
}
2530
}
26-
}
31+
} // namespace cortex::mgr

0 commit comments

Comments
 (0)