@@ -1633,7 +1633,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
16331633 params.cvector_outfile = value;
16341634 params.lora_outfile = value;
16351635 }
1636- ).set_examples ({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA}));
1636+ ).set_examples ({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_COMPRESS }));
16371637 add_opt (llama_arg (
16381638 {" -ofreq" , " --output-frequency" }, " N" ,
16391639 format (" output the imatrix every N iterations (default: %d)" , params.n_out_freq ),
@@ -1938,6 +1938,24 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
19381938 else { std::invalid_argument (" invalid value" ); }
19391939 }
19401940 ).set_examples ({LLAMA_EXAMPLE_BENCH}));
1941+ add_opt (llama_arg (
1942+ {" --compression_header_size" }, " N" ,
1943+ " Number of tokens to keep in header (default: 1)" ,
1944+ [](gpt_params & params, int value){
1945+ params.num_tokens_header = value;
1946+ }).set_examples ({LLAMA_EXAMPLE_COMPRESS}));
1947+ add_opt (llama_arg (
1948+ {" --mode" }, " {compress,expand,test}" ,
1949+ " What task to run (default: test)" ,
1950+ [](gpt_params & params, const std::string & value){
1951+ if (value == " test" ){
1952+ return ; }
1953+ else if (value == " compress" ){
1954+ params.compress_mode = 1 ; }
1955+ else if (value == " expand" ){
1956+ params.compress_mode = 2 ; }
1957+ else { std::invalid_argument (" invalid value" ); }
1958+ }).set_examples ({LLAMA_EXAMPLE_COMPRESS}));
19411959 add_opt (llama_arg (
19421960 {" --log-disable" },
19431961 " Log disable" ,
0 commit comments