|
31 | 31 | #include <unordered_map> |
32 | 32 | #include <unordered_set> |
33 | 33 |
|
| 34 | +// llama-server-one START |
| 35 | +#ifdef COSMOCC |
| 36 | +#include <cosmo.h> |
| 37 | +#endif |
| 38 | +// llama-server-one END |
| 39 | + |
34 | 40 | using json = nlohmann::ordered_json; |
35 | 41 |
|
36 | 42 | constexpr int HTTP_POLLING_SECONDS = 1; |
@@ -1594,13 +1600,15 @@ struct server_queue { |
1594 | 1600 | return 0; |
1595 | 1601 | } |
1596 | 1602 |
|
| 1603 | + // llama-server-one START - defer() --> defer_task() to make Cosmo STL happy. |
1597 | 1604 | // Add a new task, but defer until one slot is available |
1598 | | - void defer(server_task task) { |
| 1605 | + void defer_task(server_task task) { |
1599 | 1606 | std::unique_lock<std::mutex> lock(mutex_tasks); |
1600 | 1607 | QUE_DBG("defer task, id = %d\n", task.id); |
1601 | 1608 | queue_tasks_deferred.push_back(std::move(task)); |
1602 | 1609 | condition_tasks.notify_one(); |
1603 | 1610 | } |
| 1611 | + // llama-server-one END |
1604 | 1612 |
|
1605 | 1613 | // Get the next id for creating a new task |
1606 | 1614 | int get_new_id() { |
@@ -2637,13 +2645,17 @@ struct server_context { |
2637 | 2645 | if (slot == nullptr) { |
2638 | 2646 | // if no slot is available, we defer this task for processing later |
2639 | 2647 | SRV_DBG("no slot is available, defer task, id_task = %d\n", task.id); |
2640 | | - queue_tasks.defer(task); |
| 2648 | + // llama-server-one START |
| 2649 | + queue_tasks.defer_task(task); |
| 2650 | + // llama-server-one END |
2641 | 2651 | break; |
2642 | 2652 | } |
2643 | 2653 | if (slot->is_processing()) { |
2644 | 2654 | // if requested slot is unavailable, we defer this task for processing later |
2645 | 2655 | SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); |
2646 | | - queue_tasks.defer(task); |
| 2656 | + // llama-server-one START |
| 2657 | + queue_tasks.defer_task(task); |
| 2658 | + // llama-server-one END |
2647 | 2659 | break; |
2648 | 2660 | } |
2649 | 2661 |
|
@@ -2726,7 +2738,9 @@ struct server_context { |
2726 | 2738 | if (slot->is_processing()) { |
2727 | 2739 | // if requested slot is unavailable, we defer this task for processing later |
2728 | 2740 | SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); |
2729 | | - queue_tasks.defer(task); |
| 2741 | + // llama-server-one START |
| 2742 | + queue_tasks.defer_task(task); |
| 2743 | + // llama-server-one END |
2730 | 2744 | break; |
2731 | 2745 | } |
2732 | 2746 |
|
@@ -2762,7 +2776,9 @@ struct server_context { |
2762 | 2776 | if (slot->is_processing()) { |
2763 | 2777 | // if requested slot is unavailable, we defer this task for processing later |
2764 | 2778 | SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); |
2765 | | - queue_tasks.defer(task); |
| 2779 | + // llama-server-one START |
| 2780 | + queue_tasks.defer_task(task); |
| 2781 | + // llama-server-one END |
2766 | 2782 | break; |
2767 | 2783 | } |
2768 | 2784 |
|
@@ -2805,7 +2821,9 @@ struct server_context { |
2805 | 2821 | if (slot->is_processing()) { |
2806 | 2822 | // if requested slot is unavailable, we defer this task for processing later |
2807 | 2823 | SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); |
2808 | | - queue_tasks.defer(task); |
| 2824 | + // llama-server-one START |
| 2825 | + queue_tasks.defer_task(task); |
| 2826 | + // llama-server-one END |
2809 | 2827 | break; |
2810 | 2828 | } |
2811 | 2829 |
|
@@ -3427,6 +3445,37 @@ inline void signal_handler(int signal) { |
3427 | 3445 | } |
3428 | 3446 |
|
3429 | 3447 | int main(int argc, char ** argv) { |
| 3448 | + // llama-server-one START |
| 3449 | + // This implements an args file feature inspired by llamafile's. |
| 3450 | + #ifdef COSMOCC |
| 3451 | + // Args files if present. The names are different to remove confusion during packaging. |
| 3452 | + const std::string& argsFilename = "llama-server-one-args"; |
| 3453 | + const std::string& zipArgsFilename = "/zip/default-args"; |
| 3454 | + struct stat buffer; |
| 3455 | + |
| 3456 | + // At this point, argc, argv represent: |
| 3457 | + // command (User supplied args) |
| 3458 | + |
| 3459 | + if (stat (argsFilename.c_str(), &buffer) == 0) { |
| 3460 | + argc = cosmo_args(argsFilename.c_str(), &argv); |
| 3461 | + } |
| 3462 | + |
| 3463 | + // At this point, argc, argv represent: |
| 3464 | + // command (argsFilename args) (User supplied args) |
| 3465 | + |
| 3466 | + if (stat (zipArgsFilename.c_str(), &buffer) == 0) { |
| 3467 | + argc = cosmo_args(zipArgsFilename.c_str(), &argv); |
| 3468 | + } |
| 3469 | + |
| 3470 | + // At this point, argc, argv represent: |
| 3471 | + // command (zipArgsFilename args) (argsFilename args) (User supplied args) |
| 3472 | + |
| 3473 | + // Yep, this is counterintuitive, but how the cosmo_args command works. |
| 3474 | + // argsFilename args override zipArgsFilename file args. |
| 3475 | + // User supplied args override argsFilename and zipArgsFilename args. |
| 3476 | + #endif |
| 3477 | + // llama-server-one END |
| 3478 | + |
3430 | 3479 | // own arguments required by this example |
3431 | 3480 | common_params params; |
3432 | 3481 |
|
@@ -4452,6 +4501,26 @@ int main(int argc, char ** argv) { |
4452 | 4501 | } |
4453 | 4502 | } |
4454 | 4503 |
|
| 4504 | + // llama-server-one START |
| 4505 | + svr->Get("/chat", [](const httplib::Request & req, httplib::Response & res) { |
| 4506 | + if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) { |
| 4507 | + res.set_content("Error: gzip is not supported by this browser", "text/plain"); |
| 4508 | + } else { |
| 4509 | + res.set_header("Content-Encoding", "gzip"); |
| 4510 | + // COEP and COOP headers, required by pyodide (python interpreter) |
| 4511 | + res.set_header("Cross-Origin-Embedder-Policy", "require-corp"); |
| 4512 | + res.set_header("Cross-Origin-Opener-Policy", "same-origin"); |
| 4513 | + res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8"); |
| 4514 | + } |
| 4515 | + return false; |
| 4516 | + }); |
| 4517 | + |
| 4518 | + svr->Get("/chat/", [](const httplib::Request & req, httplib::Response & res) { |
| 4519 | + res.set_redirect("/chat"); |
| 4520 | + return false; |
| 4521 | + }); |
| 4522 | + // llama-server-one END |
| 4523 | + |
4455 | 4524 | // register API routes |
4456 | 4525 | svr->Get ("/health", handle_health); // public endpoint (no API key check) |
4457 | 4526 | svr->Get ("/metrics", handle_metrics); |
|
0 commit comments