1919
2020# Properties by which to differentiate results per commit:
2121KEY_PROPERTIES = [
22- "cpu_info" , "gpu_info" , "backends" , "n_gpu_layers" , "model_filename " , "model_type " , "n_batch" , "n_ubatch " ,
23- "embeddings " , "cpu_mask " , "cpu_strict " , "poll " , "n_threads " , "type_k " , "type_v " , "use_mmap " , "no_kv_offload " ,
24- "split_mode" , "main_gpu" , "tensor_split" , "flash_attn" , "n_prompt" , "n_gen"
22+ "cpu_info" , "gpu_info" , "backends" , "n_gpu_layers" , "tensor_buft_overrides " , "model_filename " , "model_type " ,
23+ "n_batch " , "n_ubatch " , "embeddings " , "cpu_mask " , "cpu_strict " , "poll " , "n_threads " , "type_k " , "type_v " ,
24+ "use_mmap" , "no_kv_offload" , " split_mode" , "main_gpu" , "tensor_split" , "flash_attn" , "n_prompt" , "n_gen" , "n_depth "
2525]
2626
2727# Properties that are boolean and are converted to Yes/No for the table:
3030# Header names for the table:
3131PRETTY_NAMES = {
3232 "cpu_info" : "CPU" , "gpu_info" : "GPU" , "backends" : "Backends" , "n_gpu_layers" : "GPU layers" ,
33- "model_filename" : "File" , "model_type" : "Model" , "model_size" : "Model size [GiB]" ,
34- "model_n_params" : "Num. of par." , "n_batch" : "Batch size" , "n_ubatch" : "Microbatch size" ,
35- "embeddings " : "Embeddings " , "cpu_mask " : "CPU mask " , "cpu_strict " : "CPU strict" , "poll " : "Poll " ,
36- "n_threads " : "Threads " , "type_k " : "K type " , "type_v " : "V type " , "split_mode " : "Split mode " , "main_gpu " : "Main GPU " ,
37- "no_kv_offload" : "NKVO" , " flash_attn" : "FlashAttention" , "tensor_split" : "Tensor split" , "use_mmap" : "Use mmap " ,
33+ "tensor_buft_overrides" : "Tensor overrides" , " model_filename" : "File" , "model_type" : "Model" , "model_size" : "Model size [GiB]" ,
34+ "model_n_params" : "Num. of par." , "n_batch" : "Batch size" , "n_ubatch" : "Microbatch size" , "embeddings" : "Embeddings" ,
35+ "cpu_mask " : "CPU mask " , "cpu_strict " : "CPU strict " , "poll " : "Poll" , "n_threads" : "Threads" , "type_k " : "K type" , "type_v" : "V type " ,
36+ "use_mmap " : "Use mmap " , "no_kv_offload " : "NKVO " , "split_mode " : "Split mode " , "main_gpu " : "Main GPU " , "tensor_split " : "Tensor split " ,
37+ "flash_attn" : "FlashAttention" ,
3838}
3939
4040DEFAULT_SHOW = ["model_type" ] # Always show these properties by default.
@@ -281,12 +281,12 @@ def get_rows(properties):
281281 The returned rows are unique in terms of property combinations.
282282 """
283283 select_string = ", " .join (
284- [f"tb.{ p } " for p in properties ] + ["tb.n_prompt" , "tb.n_gen" , "AVG(tb.avg_ts)" , "AVG(tc.avg_ts)" ])
284+ [f"tb.{ p } " for p in properties ] + ["tb.n_prompt" , "tb.n_gen" , "tb.n_depth" , " AVG(tb.avg_ts)" , "AVG(tc.avg_ts)" ])
285285 equal_string = " AND " .join (
286286 [f"tb.{ p } = tc.{ p } " for p in KEY_PROPERTIES ] + [
287287 f"tb.build_commit = '{ hexsha8_baseline } '" , f"tc.build_commit = '{ hexsha8_compare } '" ]
288288 )
289- group_order_string = ", " .join ([f"tb.{ p } " for p in properties ] + ["tb.n_gen" , "tb.n_prompt" ])
289+ group_order_string = ", " .join ([f"tb.{ p } " for p in properties ] + ["tb.n_gen" , "tb.n_prompt" , "tb.n_depth" ])
290290 query = (f"SELECT { select_string } FROM test tb JOIN test tc ON { equal_string } "
291291 f"GROUP BY { group_order_string } ORDER BY { group_order_string } ;" )
292292 return cursor .execute (query ).fetchall ()
@@ -309,7 +309,7 @@ def get_rows(properties):
309309 rows_full = get_rows (KEY_PROPERTIES )
310310 properties_different = []
311311 for i , kp_i in enumerate (KEY_PROPERTIES ):
312- if kp_i in DEFAULT_SHOW or kp_i == "n_prompt" or kp_i == "n_gen" :
312+ if kp_i in DEFAULT_SHOW or kp_i in [ "n_prompt" , "n_gen" , "n_depth" ] :
313313 continue
314314 for row_full in rows_full :
315315 if row_full [i ] != rows_full [0 ][i ]:
@@ -340,17 +340,20 @@ def get_rows(properties):
340340
341341table = []
342342for row in rows_show :
343- n_prompt = int (row [- 4 ])
344- n_gen = int (row [- 3 ])
343+ n_prompt = int (row [- 5 ])
344+ n_gen = int (row [- 4 ])
345+ n_depth = int (row [- 3 ])
345346 if n_prompt != 0 and n_gen == 0 :
346347 test_name = f"pp{ n_prompt } "
347348 elif n_prompt == 0 and n_gen != 0 :
348349 test_name = f"tg{ n_gen } "
349350 else :
350351 test_name = f"pp{ n_prompt } +tg{ n_gen } "
352+ if n_depth != 0 :
353+ test_name = f"{ test_name } @d{ n_depth } "
351354 # Regular columns test name avg t/s values Speedup
352355 # VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV
353- table .append (list (row [:- 4 ]) + [test_name ] + list (row [- 2 :]) + [float (row [- 1 ]) / float (row [- 2 ])])
356+ table .append (list (row [:- 5 ]) + [test_name ] + list (row [- 2 :]) + [float (row [- 1 ]) / float (row [- 2 ])])
354357
355358# Some a-posteriori fixes to make the table contents prettier:
356359for bool_property in BOOL_PROPERTIES :
@@ -376,7 +379,7 @@ def get_rows(properties):
376379 for gns in GPU_NAME_STRIP :
377380 row_table [ip ] = row_table [ip ].replace (gns , "" )
378381
379- gpu_names = row_table [ip ].split ("/ " )
382+ gpu_names = row_table [ip ].split (", " )
380383 num_gpus = len (gpu_names )
381384 all_names_the_same = len (set (gpu_names )) == 1
382385 if len (gpu_names ) >= 2 and all_names_the_same :
0 commit comments