@@ -1550,11 +1550,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15501550 {" -fa" , " --flash-attn" }, " FA" ,
15511551 string_format (" set Flash Attention use ('on', 'off', or 'auto', default: '%s')" , llama_flash_attn_type_name (params.flash_attn_type )),
15521552 [](common_params & params, const std::string & value) {
1553- if (value == " on" || value == " enabled" ) {
1553+ if (value == " on" || value == " enabled" || value == " 1 " ) {
15541554 params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
1555- } else if (value == " off" || value == " disabled" ) {
1555+ } else if (value == " off" || value == " disabled" || value == " 0 " ) {
15561556 params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
1557- } else if (value == " auto" ) {
1557+ } else if (value == " auto" || value == " -1 " ) {
15581558 params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
15591559 } else {
15601560 throw std::runtime_error (string_format (" error: unkown value for --flash-attn: '%s'\n " , value.c_str ()));
@@ -2964,20 +2964,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29642964 params.endpoint_metrics = true ;
29652965 }
29662966 ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_ENDPOINT_METRICS" ));
2967- add_opt (common_arg (
2968- {" --slots" },
2969- string_format (" enable slots monitoring endpoint (default: %s)" , params.endpoint_slots ? " enabled" : " disabled" ),
2970- [](common_params & params) {
2971- params.endpoint_slots = true ;
2972- }
2973- ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_ENDPOINT_SLOTS" ));
29742967 add_opt (common_arg (
29752968 {" --props" },
29762969 string_format (" enable changing global properties via POST /props (default: %s)" , params.endpoint_props ? " enabled" : " disabled" ),
29772970 [](common_params & params) {
29782971 params.endpoint_props = true ;
29792972 }
29802973 ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_ENDPOINT_PROPS" ));
2974+ add_opt (common_arg (
2975+ {" --slots" },
2976+ string_format (" enable slots monitoring endpoint (default: %s)" , params.endpoint_slots ? " enabled" : " disabled" ),
2977+ [](common_params & params) {
2978+ params.endpoint_slots = true ;
2979+ }
2980+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_ENDPOINT_SLOTS" ));
29812981 add_opt (common_arg (
29822982 {" --no-slots" },
29832983 " disables slots monitoring endpoint" ,
0 commit comments