7777
7878using json = nlohmann::ordered_json;
7979
80+ //
81+ // Environment variable utils
82+ //
83+
84+ template <typename T>
85+ static typename std::enable_if<std::is_same<T, std::string>::value, void >::type
86+ get_env (std::string name, T & target) {
87+ char * value = std::getenv (name.c_str ());
88+ target = value ? std::string (value) : target;
89+ }
90+
91+ template <typename T>
92+ static typename std::enable_if<!std::is_same<T, bool >::value && std::is_integral<T>::value, void >::type
93+ get_env (std::string name, T & target) {
94+ char * value = std::getenv (name.c_str ());
95+ target = value ? std::stoi (value) : target;
96+ }
97+
98+ template <typename T>
99+ static typename std::enable_if<std::is_floating_point<T>::value, void >::type
100+ get_env (std::string name, T & target) {
101+ char * value = std::getenv (name.c_str ());
102+ target = value ? std::stof (value) : target;
103+ }
104+
105+ template <typename T>
106+ static typename std::enable_if<std::is_same<T, bool >::value, void >::type
107+ get_env (std::string name, T & target) {
108+ char * value = std::getenv (name.c_str ());
109+ if (value) {
110+ std::string val (value);
111+ target = val == " 1" || val == " true" ;
112+ }
113+ }
114+
80115//
81116// CPU utils
82117//
@@ -220,12 +255,6 @@ int32_t cpu_get_num_math() {
220255// CLI argument parsing
221256//
222257
223- void gpt_params_handle_hf_token (gpt_params & params) {
224- if (params.hf_token .empty () && std::getenv (" HF_TOKEN" )) {
225- params.hf_token = std::getenv (" HF_TOKEN" );
226- }
227- }
228-
229258void gpt_params_handle_model_default (gpt_params & params) {
230259 if (!params.hf_repo .empty ()) {
231260 // short-hand to avoid specifying --hf-file -> default it to --model
@@ -273,7 +302,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
273302
274303 gpt_params_handle_model_default (params);
275304
276- gpt_params_handle_hf_token (params);
305+ if (params.hf_token .empty ()) {
306+ get_env (" HF_TOKEN" , params.hf_token );
307+ }
277308
278309 if (params.escape ) {
279310 string_process_escapes (params.prompt );
@@ -293,6 +324,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
293324 return true ;
294325}
295326
327+ void gpt_params_parse_from_env (gpt_params & params) {
328+ // we only care about server-related params for now
329+ get_env (" LLAMA_ARG_MODEL" , params.model );
330+ get_env (" LLAMA_ARG_THREADS" , params.n_threads );
331+ get_env (" LLAMA_ARG_CTX_SIZE" , params.n_ctx );
332+ get_env (" LLAMA_ARG_N_PARALLEL" , params.n_parallel );
333+ get_env (" LLAMA_ARG_BATCH" , params.n_batch );
334+ get_env (" LLAMA_ARG_UBATCH" , params.n_ubatch );
335+ get_env (" LLAMA_ARG_N_GPU_LAYERS" , params.n_gpu_layers );
336+ get_env (" LLAMA_ARG_THREADS_HTTP" , params.n_threads_http );
337+ get_env (" LLAMA_ARG_CHAT_TEMPLATE" , params.chat_template );
338+ get_env (" LLAMA_ARG_N_PREDICT" , params.n_predict );
339+ get_env (" LLAMA_ARG_ENDPOINT_METRICS" , params.endpoint_metrics );
340+ get_env (" LLAMA_ARG_ENDPOINT_SLOTS" , params.endpoint_slots );
341+ get_env (" LLAMA_ARG_EMBEDDINGS" , params.embedding );
342+ get_env (" LLAMA_ARG_FLASH_ATTN" , params.flash_attn );
343+ get_env (" LLAMA_ARG_DEFRAG_THOLD" , params.defrag_thold );
344+ }
345+
296346bool gpt_params_parse (int argc, char ** argv, gpt_params & params) {
297347 const auto params_org = params; // the example can modify the default params
298348
0 commit comments