Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.n_print = value;
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"--bench"}, "N",
string_format("repeat the full generation N times and report aggregate throughput (default: %d)", params.bench_runs),
[](common_params & params, int value) {
params.bench_runs = std::max(1, value);
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"--prompt-cache"}, "FNAME",
"file to cache prompt state for faster startup (default: none)",
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ struct common_params {
int32_t grp_attn_n = 1; // group-attention factor
int32_t grp_attn_w = 512; // group-attention width
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
int32_t bench_runs = 1; // repeat full generation runs for benchmarking
float rope_freq_base = 0.0f; // RoPE base frequency
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor
Expand Down
Loading
Loading