@@ -267,6 +267,7 @@ struct cmd_params {
267267 int delay;
268268 bool verbose;
269269 bool progress;
270+ bool no_warmup;
270271 output_formats output_format;
271272 output_formats output_format_stderr;
272273};
@@ -303,6 +304,7 @@ static const cmd_params cmd_params_defaults = {
303304 /* delay */ 0 ,
304305 /* verbose */ false ,
305306 /* progress */ false ,
307+ /* no_warmup */ false ,
306308 /* output_format */ MARKDOWN,
307309 /* output_format_stderr */ NONE,
308310};
@@ -325,6 +327,7 @@ static void print_usage(int /* argc */, char ** argv) {
325327 output_format_str (cmd_params_defaults.output_format_stderr ));
326328 printf (" -v, --verbose verbose output\n " );
327329 printf (" --progress print test progress indicators\n " );
330+ printf (" --no-warmup skip warmup runs before benchmarking\n " );
328331 printf (" \n " );
329332 printf (" test parameters:\n " );
330333 printf (" -m, --model <filename> (default: %s)\n " , join (cmd_params_defaults.model , " ," ).c_str ());
@@ -425,6 +428,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
425428 params.prio = cmd_params_defaults.prio ;
426429 params.delay = cmd_params_defaults.delay ;
427430 params.progress = cmd_params_defaults.progress ;
431+ params.no_warmup = cmd_params_defaults.no_warmup ;
428432
429433 for (int i = 1 ; i < argc; i++) {
430434 arg = argv[i];
@@ -798,6 +802,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
798802 params.verbose = true ;
799803 } else if (arg == " --progress" ) {
800804 params.progress = true ;
805+ } else if (arg == " --no-warmup" ) {
806+ params.no_warmup = true ;
801807 } else {
802808 invalid_param = true ;
803809 break ;
@@ -1925,25 +1931,27 @@ int main(int argc, char ** argv) {
19251931 llama_attach_threadpool (ctx, threadpool, NULL );
19261932
19271933 // warmup run
1928- if (t.n_prompt > 0 ) {
1929- if (params.progress ) {
1930- fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup prompt run\n " , params_idx, params_count);
1931- }
1932- // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1933- bool res = test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
1934- if (!res) {
1935- fprintf (stderr, " %s: error: failed to run prompt warmup\n " , __func__);
1936- exit (1 );
1937- }
1938- }
1939- if (t.n_gen > 0 ) {
1940- if (params.progress ) {
1941- fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup generation run\n " , params_idx, params_count);
1934+ if (!params.no_warmup ) {
1935+ if (t.n_prompt > 0 ) {
1936+ if (params.progress ) {
1937+ fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup prompt run\n " , params_idx, params_count);
1938+ }
1939+ // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1940+ bool res = test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
1941+ if (!res) {
1942+ fprintf (stderr, " %s: error: failed to run prompt warmup\n " , __func__);
1943+ exit (1 );
1944+ }
19421945 }
1943- bool res = test_gen (ctx, 1 , t.n_threads );
1944- if (!res) {
1945- fprintf (stderr, " %s: error: failed to run gen warmup\n " , __func__);
1946- exit (1 );
1946+ if (t.n_gen > 0 ) {
1947+ if (params.progress ) {
1948+ fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup generation run\n " , params_idx, params_count);
1949+ }
1950+ bool res = test_gen (ctx, 1 , t.n_threads );
1951+ if (!res) {
1952+ fprintf (stderr, " %s: error: failed to run gen warmup\n " , __func__);
1953+ exit (1 );
1954+ }
19471955 }
19481956 }
19491957
0 commit comments