Skip to content

Commit 5b92c34

Browse files
Update server.cpp
Added my fixes back in after merge from upstream. Signed-off-by: Brad Hutchings <[email protected]>
1 parent dbb8991 commit 5b92c34

File tree

1 file changed

+120
-12
lines changed

1 file changed

+120
-12
lines changed

examples/server/server.cpp

Lines changed: 120 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@
3131
#include <unordered_map>
3232
#include <unordered_set>
3333

34+
// llama-server-one START
35+
#ifdef COSMOCC
36+
#include <cosmo.h>
37+
#endif
38+
// llama-server-one END
39+
3440
using json = nlohmann::ordered_json;
3541

3642
constexpr int HTTP_POLLING_SECONDS = 1;
@@ -1596,13 +1602,15 @@ struct server_queue {
15961602
return 0;
15971603
}
15981604

1605+
// llama-server-one START - defer() --> defer_task() to make Cosmo STL happy.
15991606
// Add a new task, but defer until one slot is available
1600-
void defer(server_task && task) {
1607+
void defer_task(server_task task) {
16011608
std::unique_lock<std::mutex> lock(mutex_tasks);
16021609
QUE_DBG("defer task, id = %d\n", task.id);
16031610
queue_tasks_deferred.push_back(std::move(task));
16041611
condition_tasks.notify_one();
16051612
}
1613+
// llama-server-one END
16061614

16071615
// Get the next id for creating a new task
16081616
int get_new_id() {
@@ -2652,13 +2660,17 @@ struct server_context {
26522660
if (slot == nullptr) {
26532661
// if no slot is available, we defer this task for processing later
26542662
SRV_DBG("no slot is available, defer task, id_task = %d\n", task.id);
2655-
queue_tasks.defer(std::move(task));
2663+
// llama-server-one START
2664+
queue_tasks.defer_task(task);
2665+
// llama-server-one END
26562666
break;
26572667
}
26582668
if (slot->is_processing()) {
26592669
// if requested slot is unavailable, we defer this task for processing later
26602670
SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id);
2661-
queue_tasks.defer(std::move(task));
2671+
// llama-server-one START
2672+
queue_tasks.defer_task(task);
2673+
// llama-server-one END
26622674
break;
26632675
}
26642676

@@ -2741,7 +2753,9 @@ struct server_context {
27412753
if (slot->is_processing()) {
27422754
// if requested slot is unavailable, we defer this task for processing later
27432755
SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id);
2744-
queue_tasks.defer(std::move(task));
2756+
// llama-server-one START
2757+
queue_tasks.defer_task(task);
2758+
// llama-server-one END
27452759
break;
27462760
}
27472761

@@ -2777,7 +2791,9 @@ struct server_context {
27772791
if (slot->is_processing()) {
27782792
// if requested slot is unavailable, we defer this task for processing later
27792793
SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id);
2780-
queue_tasks.defer(std::move(task));
2794+
// llama-server-one START
2795+
queue_tasks.defer_task(task);
2796+
// llama-server-one END
27812797
break;
27822798
}
27832799

@@ -2820,7 +2836,9 @@ struct server_context {
28202836
if (slot->is_processing()) {
28212837
// if requested slot is unavailable, we defer this task for processing later
28222838
SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id);
2823-
queue_tasks.defer(std::move(task));
2839+
// llama-server-one START
2840+
queue_tasks.defer_task(task);
2841+
// llama-server-one END
28242842
break;
28252843
}
28262844

@@ -3402,15 +3420,51 @@ struct server_context {
34023420
}
34033421

34043422
json model_meta() const {
3423+
char general_architecture[64];
3424+
char general_type[64];
3425+
char general_name[64];
3426+
char general_version[64];
3427+
char general_finetune[64];
3428+
char general_basename[64];
3429+
char general_size_label[64];
3430+
char general_license[64];
3431+
3432+
general_architecture[0] = 0;
3433+
general_type[0] = 0;
3434+
general_name[0] = 0;
3435+
general_version[0] = 0;
3436+
general_finetune[0] = 0;
3437+
general_basename[0] = 0;
3438+
general_size_label[0] = 0;
3439+
general_license[0] = 0;
3440+
3441+
llama_model_meta_val_str(model, "general.architecture", general_architecture, 64);
3442+
llama_model_meta_val_str(model, "general.type", general_type, 64);
3443+
llama_model_meta_val_str(model, "general.name", general_name, 64);
3444+
llama_model_meta_val_str(model, "general.version", general_version, 64);
3445+
llama_model_meta_val_str(model, "general.finetune", general_finetune, 64);
3446+
llama_model_meta_val_str(model, "general.basename", general_basename, 64);
3447+
llama_model_meta_val_str(model, "general.size_label", general_size_label, 64);
3448+
llama_model_meta_val_str(model, "general.license", general_license, 64);
3449+
34053450
return json {
3406-
{"vocab_type", llama_vocab_type (vocab)},
3407-
{"n_vocab", llama_vocab_n_tokens (vocab)},
3408-
{"n_ctx_train", llama_model_n_ctx_train(model)},
3409-
{"n_embd", llama_model_n_embd (model)},
3410-
{"n_params", llama_model_n_params (model)},
3411-
{"size", llama_model_size (model)},
3451+
{"vocab_type", llama_vocab_type (vocab)},
3452+
{"n_vocab", llama_vocab_n_tokens (vocab)},
3453+
{"n_ctx_train", llama_n_ctx_train (model)},
3454+
{"n_embd", llama_n_embd (model)},
3455+
{"n_params", llama_model_n_params (model)},
3456+
{"size", llama_model_size (model)},
3457+
{"general.architecture", general_architecture },
3458+
{"general.type", general_type },
3459+
{"general.name", general_name },
3460+
{"general.version", general_version },
3461+
{"general.finetune", general_finetune },
3462+
{"general.basename", general_basename },
3463+
{"general.size_label", general_size_label },
3464+
{"general.license", general_license },
34123465
};
34133466
}
3467+
// llama-server-one END
34143468
};
34153469

34163470
static void log_server_request(const httplib::Request & req, const httplib::Response & res) {
@@ -3442,6 +3496,40 @@ inline void signal_handler(int signal) {
34423496
}
34433497

34443498
int main(int argc, char ** argv) {
3499+
// llama-server-one START
3500+
// This implements an args file feature inspired by llamafile's.
3501+
#ifdef COSMOCC
3502+
// Keep the build from showing up as ape in the process list.
3503+
pthread_setname_np(pthread_self(), "llama-server-one");
3504+
3505+
// Args files if present. The names are different to remove confusion during packaging.
3506+
const std::string& argsFilename = "llama-server-one-args";
3507+
const std::string& zipArgsFilename = "/zip/default-args";
3508+
struct stat buffer;
3509+
3510+
// At this point, argc, argv represent:
3511+
// command (User supplied args)
3512+
3513+
if (stat (argsFilename.c_str(), &buffer) == 0) {
3514+
argc = cosmo_args(argsFilename.c_str(), &argv);
3515+
}
3516+
3517+
// At this point, argc, argv represent:
3518+
// command (argsFilename args) (User supplied args)
3519+
3520+
if (stat (zipArgsFilename.c_str(), &buffer) == 0) {
3521+
argc = cosmo_args(zipArgsFilename.c_str(), &argv);
3522+
}
3523+
3524+
// At this point, argc, argv represent:
3525+
// command (zipArgsFilename args) (argsFilename args) (User supplied args)
3526+
3527+
// Yep, this is counterintuitive, but how the cosmo_args command works.
3528+
// argsFilename args override zipArgsFilename file args.
3529+
// User supplied args override argsFilename and zipArgsFilename args.
3530+
#endif
3531+
// llama-server-one END
3532+
34453533
// own arguments required by this example
34463534
common_params params;
34473535

@@ -4500,6 +4588,26 @@ int main(int argc, char ** argv) {
45004588
}
45014589
}
45024590

4591+
// llama-server-one START
4592+
svr->Get("/chat", [](const httplib::Request & req, httplib::Response & res) {
4593+
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
4594+
res.set_content("Error: gzip is not supported by this browser", "text/plain");
4595+
} else {
4596+
res.set_header("Content-Encoding", "gzip");
4597+
// COEP and COOP headers, required by pyodide (python interpreter)
4598+
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
4599+
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
4600+
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
4601+
}
4602+
return false;
4603+
});
4604+
4605+
svr->Get("/chat/", [](const httplib::Request &, httplib::Response & res) {
4606+
res.set_redirect("/chat");
4607+
return false;
4608+
});
4609+
// llama-server-one END
4610+
45034611
// register API routes
45044612
svr->Get ("/health", handle_health); // public endpoint (no API key check)
45054613
svr->Get ("/metrics", handle_metrics);

0 commit comments

Comments
 (0)