@@ -673,17 +673,8 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
673673 * - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example
674674 * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
675675 */
676- std::unordered_set<std::string> seen_args;
677676 auto add_opt = [&](llama_arg arg) {
678677 if (arg.in_example (ex) || arg.in_example (LLAMA_EXAMPLE_COMMON)) {
679- // make sure there is no argument duplications
680- for (const auto & a : arg.args ) {
681- if (seen_args.find (a) == seen_args.end ()) {
682- seen_args.insert (a);
683- } else {
684- throw std::runtime_error (format (" found duplicated argument in source code: %s" , a));
685- }
686- }
687678 options.push_back (std::move (arg));
688679 }
689680 };
@@ -790,8 +781,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
790781 add_opt (llama_arg (
791782 {" -C" , " --cpu-mask" }, " M" ,
792783 " CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\" )" ,
793- [](gpt_params & params, const std::string & value) {
794- std::string mask = value;
784+ [](gpt_params & params, const std::string & mask) {
795785 params.cpuparams .mask_valid = true ;
796786 if (!parse_cpu_mask (mask, params.cpuparams .cpumask )) {
797787 throw std::invalid_argument (" invalid cpumask" );
@@ -801,8 +791,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
801791 add_opt (llama_arg (
802792 {" -Cr" , " --cpu-range" }, " lo-hi" ,
803793 " range of CPUs for affinity. Complements --cpu-mask" ,
804- [](gpt_params & params, const std::string & value) {
805- std::string range = value;
794+ [](gpt_params & params, const std::string & range) {
806795 params.cpuparams .mask_valid = true ;
807796 if (!parse_cpu_range (range, params.cpuparams .cpumask )) {
808797 throw std::invalid_argument (" invalid range" );
@@ -816,6 +805,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
816805 params.cpuparams .strict_cpu = std::stoul (value);
817806 }
818807 ));
808+ add_opt (llama_arg (
809+ {" --prio" }, " N" ,
810+ format (" set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n " , params.cpuparams .priority ),
811+ [](gpt_params & params, int prio) {
812+ if (prio < 0 || prio > 3 ) {
813+ throw std::invalid_argument (" invalid value" );
814+ }
815+ params.cpuparams .priority = (enum ggml_sched_priority) prio;
816+ }
817+ ));
819818 add_opt (llama_arg (
820819 {" --poll" }, " <0...100>" ,
821820 format (" use polling level to wait for work (0 - no polling, default: %u)\n " , (unsigned ) params.cpuparams .poll ),
@@ -826,8 +825,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
826825 add_opt (llama_arg (
827826 {" -Cb" , " --cpu-mask-batch" }, " M" ,
828827 " CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)" ,
829- [](gpt_params & params, const std::string & value) {
830- std::string mask = value;
828+ [](gpt_params & params, const std::string & mask) {
831829 params.cpuparams_batch .mask_valid = true ;
832830 if (!parse_cpu_mask (mask, params.cpuparams_batch .cpumask )) {
833831 throw std::invalid_argument (" invalid cpumask" );
@@ -837,8 +835,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
837835 add_opt (llama_arg (
838836 {" -Crb" , " --cpu-range-batch" }, " lo-hi" ,
839837 " ranges of CPUs for affinity. Complements --cpu-mask-batch" ,
840- [](gpt_params & params, const std::string & value) {
841- std::string range = value;
838+ [](gpt_params & params, const std::string & range) {
842839 params.cpuparams_batch .mask_valid = true ;
843840 if (!parse_cpu_range (range, params.cpuparams_batch .cpumask )) {
844841 throw std::invalid_argument (" invalid range" );
@@ -852,6 +849,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
852849 params.cpuparams_batch .strict_cpu = value;
853850 }
854851 ));
852+ add_opt (llama_arg (
853+ {" --prio-batch" }, " N" ,
854+ format (" set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n " , params.cpuparams_batch .priority ),
855+ [](gpt_params & params, int prio) {
856+ if (prio < 0 || prio > 3 ) {
857+ throw std::invalid_argument (" invalid value" );
858+ }
859+ params.cpuparams_batch .priority = (enum ggml_sched_priority) prio;
860+ }
861+ ));
855862 add_opt (llama_arg (
856863 {" --poll-batch" }, " <0|1>" ,
857864 " use polling to wait for work (default: same as --poll)" ,
@@ -862,8 +869,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
862869 add_opt (llama_arg (
863870 {" -Cd" , " --cpu-mask-draft" }, " M" ,
864871 " Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)" ,
865- [](gpt_params & params, const std::string & value) {
866- std::string mask = value;
872+ [](gpt_params & params, const std::string & mask) {
867873 params.draft_cpuparams .mask_valid = true ;
868874 if (!parse_cpu_mask (mask, params.draft_cpuparams .cpumask )) {
869875 throw std::invalid_argument (" invalid cpumask" );
@@ -873,8 +879,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
873879 add_opt (llama_arg (
874880 {" -Crd" , " --cpu-range-draft" }, " lo-hi" ,
875881 " Ranges of CPUs for affinity. Complements --cpu-mask-draft" ,
876- [](gpt_params & params, const std::string & value) {
877- std::string range = value;
882+ [](gpt_params & params, const std::string & range) {
878883 params.draft_cpuparams .mask_valid = true ;
879884 if (!parse_cpu_range (range, params.draft_cpuparams .cpumask )) {
880885 throw std::invalid_argument (" invalid range" );
@@ -888,18 +893,37 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
888893 params.draft_cpuparams .strict_cpu = value;
889894 }
890895 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
896+ add_opt (llama_arg (
897+ {" --prio-draft" }, " N" ,
898+ format (" set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n " , params.draft_cpuparams .priority ),
899+ [](gpt_params & params, int prio) {
900+ if (prio < 0 || prio > 3 ) {
901+ throw std::invalid_argument (" invalid value" );
902+ }
903+ params.draft_cpuparams .priority = (enum ggml_sched_priority) prio;
904+ }
905+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
891906 add_opt (llama_arg (
892907 {" --poll-draft" }, " <0|1>" ,
893908 " Use polling to wait for draft model work (default: same as --poll])" ,
894909 [](gpt_params & params, int value) {
895910 params.draft_cpuparams .poll = value;
896911 }
897912 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
913+ add_opt (llama_arg (
914+ {" -Cbd" , " --cpu-mask-batch-draft" }, " M" ,
915+ " Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)" ,
916+ [](gpt_params & params, const std::string & mask) {
917+ params.draft_cpuparams_batch .mask_valid = true ;
918+ if (!parse_cpu_mask (mask, params.draft_cpuparams_batch .cpumask )) {
919+ throw std::invalid_argument (" invalid cpumask" );
920+ }
921+ }
922+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
898923 add_opt (llama_arg (
899924 {" -Crbd" , " --cpu-range-batch-draft" }, " lo-hi" ,
900925 " Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)" ,
901- [](gpt_params & params, const std::string & value) {
902- std::string range = value;
926+ [](gpt_params & params, const std::string & range) {
903927 params.draft_cpuparams_batch .mask_valid = true ;
904928 if (!parse_cpu_range (range, params.draft_cpuparams_batch .cpumask )) {
905929 throw std::invalid_argument (" invalid cpumask" );
@@ -913,6 +937,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
913937 params.draft_cpuparams_batch .strict_cpu = value;
914938 }
915939 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
940+ add_opt (llama_arg (
941+ {" --prio-batch-draft" }, " N" ,
942+ format (" set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n " , params.draft_cpuparams_batch .priority ),
943+ [](gpt_params & params, int prio) {
944+ if (prio < 0 || prio > 3 ) {
945+ throw std::invalid_argument (" invalid value" );
946+ }
947+ params.draft_cpuparams_batch .priority = (enum ggml_sched_priority) prio;
948+ }
949+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
916950 add_opt (llama_arg (
917951 {" --poll-batch-draft" }, " <0|1>" ,
918952 " Use polling to wait for draft model work (default: --poll-draft)" ,
@@ -1124,45 +1158,45 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
11241158 [](gpt_params & params) {
11251159 params.interactive = true ;
11261160 }
1127- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1161+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11281162 add_opt (llama_arg (
11291163 {" -if" , " --interactive-first" },
11301164 format (" run in interactive mode and wait for input right away (default: %s)" , params.interactive_first ? " true" : " false" ),
11311165 [](gpt_params & params) {
11321166 params.interactive_first = true ;
11331167 }
1134- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1168+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11351169 add_opt (llama_arg (
11361170 {" -mli" , " --multiline-input" },
11371171 " allows you to write or paste multiple lines without ending each in '\\ '" ,
11381172 [](gpt_params & params) {
11391173 params.multiline_input = true ;
11401174 }
1141- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1175+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11421176 add_opt (llama_arg (
11431177 {" --in-prefix-bos" },
11441178 " prefix BOS to user inputs, preceding the `--in-prefix` string" ,
11451179 [](gpt_params & params) {
11461180 params.input_prefix_bos = true ;
11471181 params.enable_chat_template = false ;
11481182 }
1149- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1183+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11501184 add_opt (llama_arg (
11511185 {" --in-prefix" }, " STRING" ,
11521186 " string to prefix user inputs with (default: empty)" ,
11531187 [](gpt_params & params, const std::string & value) {
11541188 params.input_prefix = value;
11551189 params.enable_chat_template = false ;
11561190 }
1157- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1191+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11581192 add_opt (llama_arg (
11591193 {" --in-suffix" }, " STRING" ,
11601194 " string to suffix after user inputs with (default: empty)" ,
11611195 [](gpt_params & params, const std::string & value) {
11621196 params.input_suffix = value;
11631197 params.enable_chat_template = false ;
11641198 }
1165- ).set_examples ({LLAMA_EXAMPLE_INFILL }));
1199+ ).set_examples ({LLAMA_EXAMPLE_MAIN }));
11661200 add_opt (llama_arg (
11671201 {" --no-warmup" },
11681202 " skip warming up the model with an empty run" ,
@@ -1499,7 +1533,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
14991533 }
15001534 ));
15011535 add_opt (llama_arg (
1502- {" --all-logits" },
1536+ {" --perplexity " , " -- all-logits" },
15031537 format (" return logits for all tokens in the batch (default: %s)" , params.logits_all ? " true" : " false" ),
15041538 [](gpt_params & params) {
15051539 params.logits_all = true ;
@@ -1554,6 +1588,13 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
15541588 params.kl_divergence = true ;
15551589 }
15561590 ).set_examples ({LLAMA_EXAMPLE_PERPLEXITY}));
1591+ add_opt (llama_arg (
1592+ {" --save-all-logits" , " --kl-divergence-base" }, " FNAME" ,
1593+ " set logits file" ,
1594+ [](gpt_params & params, const std::string & value) {
1595+ params.logits_file = value;
1596+ }
1597+ ).set_examples ({LLAMA_EXAMPLE_PERPLEXITY}));
15571598 add_opt (llama_arg (
15581599 {" --ppl-stride" }, " N" ,
15591600 format (" stride for perplexity calculation (default: %d)" , params.ppl_stride ),
@@ -1802,7 +1843,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
18021843 [](gpt_params & params, const std::string & value) {
18031844 params.model_alias = value;
18041845 }
1805- ).set_examples ({LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_MODEL " ) );
1846+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
18061847 add_opt (llama_arg (
18071848 {" -m" , " --model" }, " FNAME" ,
18081849 ex == LLAMA_EXAMPLE_EXPORT_LORA
@@ -1890,7 +1931,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
18901931 }
18911932 ).set_examples ({LLAMA_EXAMPLE_PASSKEY}));
18921933 add_opt (llama_arg (
1893- {" -o" , " --output" }, " FNAME" ,
1934+ {" -o" , " --output" , " --output-file " }, " FNAME" ,
18941935 format (" output file (default: '%s')" ,
18951936 ex == LLAMA_EXAMPLE_EXPORT_LORA
18961937 ? params.lora_outfile .c_str ()
@@ -1932,7 +1973,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
19321973 }
19331974 ).set_examples ({LLAMA_EXAMPLE_IMATRIX}));
19341975 add_opt (llama_arg (
1935- {" --chunk" }, " N" ,
1976+ {" --chunk" , " --from-chunk " }, " N" ,
19361977 format (" start processing the input from chunk N (default: %d)" , params.i_chunk ),
19371978 [](gpt_params & params, int value) {
19381979 params.i_chunk = value;
@@ -2057,7 +2098,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
20572098 }
20582099 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
20592100 add_opt (llama_arg (
2060- {" --timeout" }, " N" ,
2101+ {" -to " , " - -timeout" }, " N" ,
20612102 format (" server read/write timeout in seconds (default: %d)" , params.timeout_read ),
20622103 [](gpt_params & params, int value) {
20632104 params.timeout_read = value;
0 commit comments