|
1 | | -network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric |
2 | | -"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,50,INFERENCE_TIME,99133.65406 |
3 | | -"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,SEQ_THROUGHPUT,82.63618 |
4 | | -"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,TOKEN_THROUGHPUT,10577.431520000002 |
5 | | -"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.3,50,INFERENCE_TIME,214410.6447 |
6 | | -"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.3,50,KV_CACHE_SIZE,68.84 |
7 | | -"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.3,10,SEQ_THROUGHPUT,38.2071 |
8 | | -"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.3,10,TOKEN_THROUGHPUT,4890.5035 |
| 1 | +network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric,device_subtype |
| 2 | +"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,109007.96, |
| 3 | +"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,76.45, |
| 4 | +"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,9785.75, |
| 5 | +"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,55.64, |
| 6 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,171845.02,H100_PCIe |
| 7 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,57.17,H100_PCIe |
| 8 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,48.09,H100_PCIe |
| 9 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,6155.59,H100_PCIe |
| 10 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,139897.82,H100_NVL |
| 11 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,69.59,H100_NVL |
| 12 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,58.63,H100_NVL |
| 13 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,7504.07,H100_NVL |
| 14 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,125068.76,H100 |
| 15 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,57.09,H100 |
| 16 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,65.50,H100 |
| 17 | +"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,8384.00,H100 |
0 commit comments