Skip to content

Commit a3eb12f

Browse files
committed
server : PoC implementation of "interim" server
1 parent b486ba0 commit a3eb12f

File tree

4 files changed

+248
-121
lines changed

4 files changed

+248
-121
lines changed

common/arg.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,26 @@ static std::string get_all_kv_cache_types() {
834834
// CLI argument parsing functions
835835
//
836836

837+
// handle model and download
838+
void common_params_handle_models(enum llama_example cur_ex, common_params & params) {
839+
auto res = common_params_handle_model(params.model, params.hf_token, "");
840+
if (params.no_mmproj) {
841+
params.mmproj = {};
842+
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
843+
// optionally, handle mmproj model when -hf is specified
844+
params.mmproj = res.mmproj;
845+
}
846+
// only download mmproj if the current example is using it
847+
for (auto & ex : mmproj_examples) {
848+
if (cur_ex == ex) {
849+
common_params_handle_model(params.mmproj, params.hf_token, "");
850+
break;
851+
}
852+
}
853+
common_params_handle_model(params.speculative.model, params.hf_token, "");
854+
common_params_handle_model(params.vocoder.model, params.hf_token, "");
855+
}
856+
837857
static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
838858
std::string arg;
839859
const std::string arg_prefix = "--";
@@ -933,24 +953,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
933953
}
934954

935955
// handle model and download
936-
{
937-
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
938-
if (params.no_mmproj) {
939-
params.mmproj = {};
940-
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
941-
// optionally, handle mmproj model when -hf is specified
942-
params.mmproj = res.mmproj;
943-
}
944-
// only download mmproj if the current example is using it
945-
for (auto & ex : mmproj_examples) {
946-
if (ctx_arg.ex == ex) {
947-
common_params_handle_model(params.mmproj, params.hf_token, "");
948-
break;
949-
}
950-
}
951-
common_params_handle_model(params.speculative.model, params.hf_token, "");
952-
common_params_handle_model(params.vocoder.model, params.hf_token, "");
953-
}
956+
common_params_handle_models(ctx_arg.ex, params);
954957

955958
if (params.escape) {
956959
string_process_escapes(params.prompt);
@@ -2486,10 +2489,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
24862489
{"-m", "--model"}, "FNAME",
24872490
ex == LLAMA_EXAMPLE_EXPORT_LORA
24882491
? std::string("model path from which to load base model")
2489-
: string_format(
2490-
"model path (default: `models/$filename` with filename from `--hf-file` "
2491-
"or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
2492-
),
2492+
: "model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set)",
24932493
[](common_params & params, const std::string & value) {
24942494
params.model.path = value;
24952495
}

common/arg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
8181
bool common_has_curl();
8282

83+
// handle model and download
84+
void common_params_handle_models(enum llama_example cur_ex, common_params & params);
85+
8386
struct common_remote_params {
8487
std::vector<std::string> headers;
8588
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout

common/common.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
2424
} while(0)
2525

26-
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
27-
2826
struct common_adapter_lora_info {
2927
std::string path;
3028
float scale;

0 commit comments

Comments
 (0)