@@ -143,7 +143,7 @@ int main(int argc, char ** argv) {
143143
144144 if (!sweep_bench_output_jsonl) {
145145 LOG_INF (" \n " );
146- LOG_INF (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
146+ LOG_INF (" %s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, flash_attn_type = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n " , __func__, n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn_type , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch );
147147 LOG_INF (" \n " );
148148 LOG_INF (" |%6s | %6s | %6s | %8s | %8s | %8s | %8s |\n " , " PP" , " TG" , " N_KV" , " T_PP s" , " S_PP t/s" , " T_TG s" , " S_TG t/s" );
149149 LOG_INF (" |%6s-|-%6s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|\n " , " ------" , " ------" , " ------" , " --------" , " --------" , " --------" , " --------" );
@@ -243,9 +243,9 @@ int main(int argc, char ** argv) {
243243
244244 if (sweep_bench_output_jsonl) {
245245 LOG_INF (
246- " {\" n_kv_max\" : %d, \" n_batch\" : %d, \" n_ubatch\" : %d, \" flash_attn \" : %d, \" n_gpu_layers\" : %d, \" n_threads\" : %u, \" n_threads_batch\" : %u, "
246+ " {\" n_kv_max\" : %d, \" n_batch\" : %d, \" n_ubatch\" : %d, \" flash_attn_type \" : %d, \" n_gpu_layers\" : %d, \" n_threads\" : %u, \" n_threads_batch\" : %u, "
247247 " \" pp\" : %d, \" tg\" : %d, \" n_kv\" : %d, \" t_pp\" : %f, \" speed_pp\" : %f, \" t_tg\" : %f, \" speed_tg\" : %f }\n " ,
248- n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch ,
248+ n_kv_max, params.n_batch , params.n_ubatch , params.flash_attn_type , params.n_gpu_layers , ctx_params.n_threads , ctx_params.n_threads_batch ,
249249 pp, tg, n_kv, t_pp, speed_pp, t_tg, speed_tg
250250 );
251251 } else {
0 commit comments