Skip to content

Commit 4289b5c

Browse files
Merge pull request #359 from janhq/update-dev-from-master-2025-12-12-00-38
Sync master with upstream release b7360
2 parents de78367 + 53ecd4f commit 4289b5c

33 files changed

+1120
-358
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1770,7 +1770,7 @@ jobs:
17701770
echo "Fetch llama2c model"
17711771
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
17721772
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
1773-
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
1773+
./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
17741774
17751775
ubuntu-cmake-sanitizer-riscv64-native:
17761776
runs-on: RISCV64

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ add_library(${TARGET} STATIC
7373
ngram-cache.h
7474
peg-parser.cpp
7575
peg-parser.h
76+
preset.cpp
77+
preset.h
7678
regex-partial.cpp
7779
regex-partial.h
7880
sampling.cpp

common/arg.cpp

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
4848

4949
using json = nlohmann::ordered_json;
50+
using namespace common_arg_utils;
5051

5152
static std::initializer_list<enum llama_example> mmproj_examples = {
5253
LLAMA_EXAMPLE_MTMD,
@@ -64,6 +65,15 @@ static std::string read_file(const std::string & fname) {
6465
return content;
6566
}
6667

68+
static const std::vector<common_arg> & get_common_arg_defs() {
69+
static const std::vector<common_arg> options = [] {
70+
common_params params;
71+
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_SERVER, nullptr);
72+
return ctx.options;
73+
}();
74+
return options;
75+
}
76+
6777
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
6878
this->examples = examples;
6979
return *this;
@@ -134,7 +144,7 @@ static std::vector<std::string> break_str_into_lines(std::string input, size_t m
134144
return result;
135145
}
136146

137-
std::string common_arg::to_string() {
147+
std::string common_arg::to_string() const {
138148
// params for printing to console
139149
const static int n_leading_spaces = 40;
140150
const static int n_char_per_line_help = 70; // TODO: detect this based on current console
@@ -647,6 +657,53 @@ static void add_rpc_devices(const std::string & servers) {
647657
}
648658
}
649659

660+
bool common_params_parse(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map) {
661+
common_params dummy_params;
662+
common_params_context ctx_arg = common_params_parser_init(dummy_params, ex, nullptr);
663+
664+
std::unordered_map<std::string, common_arg *> arg_to_options;
665+
for (auto & opt : ctx_arg.options) {
666+
for (const auto & arg : opt.args) {
667+
arg_to_options[arg] = &opt;
668+
}
669+
}
670+
671+
// TODO @ngxson : find a way to deduplicate this code
672+
673+
// handle command line arguments
674+
auto check_arg = [&](int i) {
675+
if (i+1 >= argc) {
676+
throw std::invalid_argument("expected value for argument");
677+
}
678+
};
679+
680+
for (int i = 1; i < argc; i++) {
681+
const std::string arg_prefix = "--";
682+
683+
std::string arg = argv[i];
684+
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
685+
std::replace(arg.begin(), arg.end(), '_', '-');
686+
}
687+
if (arg_to_options.find(arg) == arg_to_options.end()) {
688+
throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
689+
}
690+
auto opt = *arg_to_options[arg];
691+
std::string val;
692+
if (opt.value_hint != nullptr) {
693+
// arg with single value
694+
check_arg(i);
695+
val = argv[++i];
696+
}
697+
if (opt.value_hint_2 != nullptr) {
698+
// TODO: support arg with 2 values
699+
throw std::invalid_argument("error: argument with 2 values is not yet supported\n");
700+
}
701+
out_map[opt] = val;
702+
}
703+
704+
return true;
705+
}
706+
650707
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
651708
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
652709
const common_params params_org = ctx_arg.params; // the example can modify the default params
@@ -692,25 +749,19 @@ static std::string list_builtin_chat_templates() {
692749
return msg.str();
693750
}
694751

695-
static bool is_truthy(const std::string & value) {
752+
bool common_arg_utils::is_truthy(const std::string & value) {
696753
return value == "on" || value == "enabled" || value == "1";
697754
}
698755

699-
static bool is_falsey(const std::string & value) {
756+
bool common_arg_utils::is_falsey(const std::string & value) {
700757
return value == "off" || value == "disabled" || value == "0";
701758
}
702759

703-
static bool is_autoy(const std::string & value) {
760+
bool common_arg_utils::is_autoy(const std::string & value) {
704761
return value == "auto" || value == "-1";
705762
}
706763

707764
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
708-
// default values specific to example
709-
// note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
710-
if (ex == LLAMA_EXAMPLE_SERVER) {
711-
params.use_jinja = true;
712-
}
713-
714765
params.use_color = tty_can_use_colors();
715766

716767
// load dynamic backends
@@ -2543,6 +2594,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25432594
params.models_dir = value;
25442595
}
25452596
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
2597+
add_opt(common_arg(
2598+
{"--models-preset"}, "PATH",
2599+
"path to INI file containing model presets for the router server (default: disabled)",
2600+
[](common_params & params, const std::string & value) {
2601+
params.models_preset = value;
2602+
}
2603+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_PRESET"));
25462604
add_opt(common_arg(
25472605
{"--models-max"}, "N",
25482606
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
@@ -2559,14 +2617,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25592617
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
25602618
add_opt(common_arg(
25612619
{"--jinja"},
2562-
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2620+
string_format("use jinja template for chat (default: %s)", params.use_jinja ? "enabled" : "disabled"),
25632621
[](common_params & params) {
25642622
params.use_jinja = true;
25652623
}
25662624
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
25672625
add_opt(common_arg(
25682626
{"--no-jinja"},
2569-
string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2627+
string_format("disable jinja template for chat (default: %s)", params.use_jinja ? "disabled" : "enabled"),
25702628
[](common_params & params) {
25712629
params.use_jinja = false;
25722630
}

common/arg.h

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
#include "common.h"
44

55
#include <set>
6+
#include <map>
67
#include <string>
78
#include <vector>
9+
#include <cstring>
810

911
//
1012
// CLI argument parsing
@@ -24,6 +26,8 @@ struct common_arg {
2426
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
2527
void (*handler_int) (common_params & params, int) = nullptr;
2628

29+
common_arg() = default;
30+
2731
common_arg(
2832
const std::initializer_list<const char *> & args,
2933
const char * value_hint,
@@ -61,9 +65,29 @@ struct common_arg {
6165
bool is_exclude(enum llama_example ex);
6266
bool get_value_from_env(std::string & output) const;
6367
bool has_value_from_env() const;
64-
std::string to_string();
68+
std::string to_string() const;
69+
70+
// for using as key in std::map
71+
bool operator<(const common_arg& other) const {
72+
if (args.empty() || other.args.empty()) {
73+
return false;
74+
}
75+
return strcmp(args[0], other.args[0]) < 0;
76+
}
77+
bool operator==(const common_arg& other) const {
78+
if (args.empty() || other.args.empty()) {
79+
return false;
80+
}
81+
return strcmp(args[0], other.args[0]) == 0;
82+
}
6583
};
6684

85+
namespace common_arg_utils {
86+
bool is_truthy(const std::string & value);
87+
bool is_falsey(const std::string & value);
88+
bool is_autoy(const std::string & value);
89+
}
90+
6791
struct common_params_context {
6892
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
6993
common_params & params;
@@ -76,7 +100,11 @@ struct common_params_context {
76100
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
77101
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
78102

79-
// function to be used by test-arg-parser
103+
// parse input arguments from CLI into a map
104+
// TODO: support repeated args in the future
105+
bool common_params_parse(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map);
106+
107+
// initialize argument parser context - used by test-arg-parser and preset
80108
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81109

82110
struct common_remote_params {

common/common.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ struct common_params {
464464
std::string public_path = ""; // NOLINT
465465
std::string api_prefix = ""; // NOLINT
466466
std::string chat_template = ""; // NOLINT
467-
bool use_jinja = false; // NOLINT
467+
bool use_jinja = true; // NOLINT
468468
bool enable_chat_template = true;
469469
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
470470
int reasoning_budget = -1;
@@ -484,9 +484,10 @@ struct common_params {
484484
bool endpoint_metrics = false;
485485

486486
// router server configs
487-
std::string models_dir = ""; // directory containing models for the router server
488-
int models_max = 4; // maximum number of models to load simultaneously
489-
bool models_autoload = true; // automatically load models when requested via the router server
487+
std::string models_dir = ""; // directory containing models for the router server
488+
std::string models_preset = ""; // directory containing model presets for the router server
489+
int models_max = 4; // maximum number of models to load simultaneously
490+
bool models_autoload = true; // automatically load models when requested via the router server
490491

491492
bool log_json = false;
492493

0 commit comments

Comments
 (0)