Skip to content

Latest commit

 

History

History
1244 lines (1226 loc) · 73.5 KB

File metadata and controls

1244 lines (1226 loc) · 73.5 KB
GPU Performance Profile ISL / OSL Concurrency Config Command
8xB200_NVL Min Latency 1024 / 1024 1 1k1k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 1024 2 1k1k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 1024 4 1k1k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml
8xB200_NVL Low Latency 1024 / 1024 8 1k1k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml
8xB200_NVL Low Latency 1024 / 1024 16 1k1k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml
8xB200_NVL Balanced 1024 / 1024 32 1k1k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml
8xB200_NVL Balanced 1024 / 1024 64 1k1k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml
8xB200_NVL High Throughput 1024 / 1024 128 1k1k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc128.yaml
8xB200_NVL High Throughput 1024 / 1024 256 1k1k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc256.yaml
8xB200_NVL High Throughput 1024 / 1024 512 1k1k_tp8_conc512.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc512.yaml
8xB200_NVL High Throughput 1024 / 1024 1024 1k1k_tp8_conc1024.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc1024.yaml
8xB200_NVL Max Throughput 1024 / 1024 2048 1k1k_tp8_conc2048.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc2048.yaml
8xB200_NVL Min Latency 1024 / 8192 1 1k8k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 8192 2 1k8k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 8192 4 1k8k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc4.yaml
8xB200_NVL Low Latency 1024 / 8192 8 1k8k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc8.yaml
8xB200_NVL Low Latency 1024 / 8192 16 1k8k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc16.yaml
8xB200_NVL Balanced 1024 / 8192 32 1k8k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc32.yaml
8xB200_NVL Balanced 1024 / 8192 64 1k8k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc64.yaml
8xB200_NVL High Throughput 1024 / 8192 128 1k8k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc128.yaml
8xB200_NVL High Throughput 1024 / 8192 256 1k8k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc256.yaml
8xB200_NVL High Throughput 1024 / 8192 512 1k8k_tp8_conc512.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc512.yaml
8xB200_NVL High Throughput 1024 / 8192 1024 1k8k_tp8_conc1024.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc1024.yaml
8xB200_NVL Max Throughput 1024 / 8192 2048 1k8k_tp8_conc2048.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k8k_tp8_conc2048.yaml
8xB200_NVL Min Latency 8192 / 1024 1 8k1k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc1.yaml
8xB200_NVL Low Latency 8192 / 1024 2 8k1k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc2.yaml
8xB200_NVL Low Latency 8192 / 1024 4 8k1k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml
8xB200_NVL Low Latency 8192 / 1024 8 8k1k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml
8xB200_NVL Low Latency 8192 / 1024 16 8k1k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml
8xB200_NVL Balanced 8192 / 1024 32 8k1k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml
8xB200_NVL Balanced 8192 / 1024 64 8k1k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml
8xB200_NVL High Throughput 8192 / 1024 128 8k1k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc128.yaml
8xB200_NVL High Throughput 8192 / 1024 256 8k1k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc256.yaml
8xB200_NVL High Throughput 8192 / 1024 512 8k1k_tp8_conc512.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc512.yaml
8xB200_NVL High Throughput 8192 / 1024 1024 8k1k_tp8_conc1024.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc1024.yaml
8xB200_NVL Max Throughput 8192 / 1024 2048 8k1k_tp8_conc2048.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc2048.yaml
8xH200_SXM Min Latency 1024 / 1024 1 1k1k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc1.yaml
8xH200_SXM Low Latency 1024 / 1024 2 1k1k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc2.yaml
8xH200_SXM Low Latency 1024 / 1024 4 1k1k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml
8xH200_SXM Low Latency 1024 / 1024 8 1k1k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml
8xH200_SXM Low Latency 1024 / 1024 16 1k1k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml
8xH200_SXM Balanced 1024 / 1024 32 1k1k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml
8xH200_SXM Balanced 1024 / 1024 64 1k1k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml
8xH200_SXM High Throughput 1024 / 1024 128 1k1k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc128.yaml
8xH200_SXM High Throughput 1024 / 1024 256 1k1k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc256.yaml
8xH200_SXM High Throughput 1024 / 1024 512 1k1k_tp8_conc512.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc512.yaml
8xH200_SXM High Throughput 1024 / 1024 1024 1k1k_tp8_conc1024.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc1024.yaml
8xH200_SXM Max Throughput 1024 / 1024 2048 1k1k_tp8_conc2048.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc2048.yaml
8xH200_SXM Min Latency 1024 / 8192 1 1k8k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc1.yaml
8xH200_SXM Low Latency 1024 / 8192 2 1k8k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc2.yaml
8xH200_SXM Low Latency 1024 / 8192 4 1k8k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc4.yaml
8xH200_SXM Low Latency 1024 / 8192 8 1k8k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc8.yaml
8xH200_SXM Balanced 1024 / 8192 16 1k8k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc16.yaml
8xH200_SXM Balanced 1024 / 8192 32 1k8k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc32.yaml
8xH200_SXM High Throughput 1024 / 8192 64 1k8k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc64.yaml
8xH200_SXM High Throughput 1024 / 8192 128 1k8k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc128.yaml
8xH200_SXM High Throughput 1024 / 8192 256 1k8k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc256.yaml
8xH200_SXM Max Throughput 1024 / 8192 512 1k8k_tp8_conc512.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k8k_tp8_conc512.yaml
8xH200_SXM Min Latency 8192 / 1024 1 8k1k_tp8_conc1.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc1.yaml
8xH200_SXM Low Latency 8192 / 1024 2 8k1k_tp8_conc2.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc2.yaml
8xH200_SXM Low Latency 8192 / 1024 4 8k1k_tp8_conc4.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml
8xH200_SXM Low Latency 8192 / 1024 8 8k1k_tp8_conc8.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml
8xH200_SXM Balanced 8192 / 1024 16 8k1k_tp8_conc16.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml
8xH200_SXM High Throughput 8192 / 1024 32 8k1k_tp8_conc32.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml
8xH200_SXM High Throughput 8192 / 1024 64 8k1k_tp8_conc64.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml
8xH200_SXM High Throughput 8192 / 1024 128 8k1k_tp8_conc128.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc128.yaml
8xH200_SXM Max Throughput 8192 / 1024 256 8k1k_tp8_conc256.yaml trtllm-serve deepseek-ai/DeepSeek-R1-0528 --config ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc256.yaml
GPU Performance Profile ISL / OSL Concurrency Config Command
4xB200_NVL High Throughput 1024 / 8192 2048 1k8k_tp4_conc2048.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp4_conc2048.yaml
4xB200_NVL Min Latency 8192 / 1024 1024 8k1k_tp4_conc1024.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc1024.yaml
4xB200_NVL Max Throughput 8192 / 1024 2048 8k1k_tp4_conc2048.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc2048.yaml
8xB200_NVL Min Latency 1024 / 1024 1 1k1k_tp8_conc1.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 1024 2 1k1k_tp8_conc2.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 1024 4 1k1k_tp8_conc4.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml
8xB200_NVL Low Latency 1024 / 1024 8 1k1k_tp8_conc8.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml
8xB200_NVL Low Latency 1024 / 1024 16 1k1k_tp8_conc16.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml
8xB200_NVL Balanced 1024 / 1024 32 1k1k_tp8_conc32.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml
8xB200_NVL Balanced 1024 / 1024 64 1k1k_tp8_conc64.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml
8xB200_NVL High Throughput 1024 / 1024 128 1k1k_tp8_conc128.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml
8xB200_NVL High Throughput 1024 / 1024 256 1k1k_tp8_conc256.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml
8xB200_NVL High Throughput 1024 / 1024 512 1k1k_tp8_conc512.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc512.yaml
8xB200_NVL High Throughput 1024 / 1024 1024 1k1k_tp8_conc1024.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc1024.yaml
8xB200_NVL Max Throughput 1024 / 1024 2048 1k1k_tp8_conc2048.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc2048.yaml
8xB200_NVL Min Latency 1024 / 8192 1 1k8k_tp8_conc1.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 8192 2 1k8k_tp8_conc2.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 8192 4 1k8k_tp8_conc4.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc4.yaml
8xB200_NVL Low Latency 1024 / 8192 8 1k8k_tp8_conc8.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc8.yaml
8xB200_NVL Low Latency 1024 / 8192 16 1k8k_tp8_conc16.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc16.yaml
8xB200_NVL Balanced 1024 / 8192 32 1k8k_tp8_conc32.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc32.yaml
8xB200_NVL High Throughput 1024 / 8192 64 1k8k_tp8_conc64.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc64.yaml
8xB200_NVL High Throughput 1024 / 8192 128 1k8k_tp8_conc128.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc128.yaml
8xB200_NVL High Throughput 1024 / 8192 256 1k8k_tp8_conc256.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc256.yaml
8xB200_NVL High Throughput 1024 / 8192 512 1k8k_tp8_conc512.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc512.yaml
8xB200_NVL Max Throughput 1024 / 8192 1024 1k8k_tp8_conc1024.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k8k_tp8_conc1024.yaml
8xB200_NVL Min Latency 8192 / 1024 1 8k1k_tp8_conc1.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc1.yaml
8xB200_NVL Low Latency 8192 / 1024 2 8k1k_tp8_conc2.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc2.yaml
8xB200_NVL Low Latency 8192 / 1024 4 8k1k_tp8_conc4.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml
8xB200_NVL Low Latency 8192 / 1024 8 8k1k_tp8_conc8.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml
8xB200_NVL Balanced 8192 / 1024 16 8k1k_tp8_conc16.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml
8xB200_NVL Balanced 8192 / 1024 32 8k1k_tp8_conc32.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml
8xB200_NVL High Throughput 8192 / 1024 64 8k1k_tp8_conc64.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml
8xB200_NVL High Throughput 8192 / 1024 128 8k1k_tp8_conc128.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml
8xB200_NVL High Throughput 8192 / 1024 256 8k1k_tp8_conc256.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml
8xB200_NVL Max Throughput 8192 / 1024 512 8k1k_tp8_conc512.yaml trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --config ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc512.yaml
GPU Performance Profile ISL / OSL Concurrency Config Command
2xB200_NVL Min Latency 1024 / 8192 4 1k8k_tp2_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml
2xB200_NVL Max Throughput 1024 / 8192 256 1k8k_tp2_conc256.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc256.yaml
2xB200_NVL Min Latency 8192 / 1024 768 8k1k_tp2_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc768.yaml
2xB200_NVL Max Throughput 8192 / 1024 1280 8k1k_tp2_conc1280.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc1280.yaml
4xB200_NVL Min Latency 1024 / 1024 8 1k1k_tp4_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml
4xB200_NVL Low Latency 1024 / 1024 128 1k1k_tp4_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc128.yaml
4xB200_NVL Balanced 1024 / 1024 256 1k1k_tp4_conc256.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc256.yaml
4xB200_NVL High Throughput 1024 / 1024 1280 1k1k_tp4_conc1280.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc1280.yaml
4xB200_NVL Max Throughput 1024 / 1024 1536 1k1k_tp4_conc1536.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc1536.yaml
4xB200_NVL Min Latency 1024 / 8192 10 1k8k_tp4_conc10.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc10.yaml
4xB200_NVL Low Latency 1024 / 8192 64 1k8k_tp4_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml
4xB200_NVL Balanced 1024 / 8192 128 1k8k_tp4_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc128.yaml
4xB200_NVL Balanced 1024 / 8192 384 1k8k_tp4_conc384.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc384.yaml
4xB200_NVL High Throughput 1024 / 8192 640 1k8k_tp4_conc640.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc640.yaml
4xB200_NVL Max Throughput 1024 / 8192 896 1k8k_tp4_conc896.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc896.yaml
4xB200_NVL Min Latency 8192 / 1024 1 8k1k_tp4_conc1.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc1.yaml
4xB200_NVL Low Latency 8192 / 1024 2 8k1k_tp4_conc2.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc2.yaml
4xB200_NVL Low Latency 8192 / 1024 4 8k1k_tp4_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml
4xB200_NVL Low Latency 8192 / 1024 10 8k1k_tp4_conc10.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc10.yaml
4xB200_NVL Balanced 8192 / 1024 32 8k1k_tp4_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml
4xB200_NVL High Throughput 8192 / 1024 64 8k1k_tp4_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml
4xB200_NVL High Throughput 8192 / 1024 256 8k1k_tp4_conc256.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc256.yaml
4xB200_NVL High Throughput 8192 / 1024 1536 8k1k_tp4_conc1536.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc1536.yaml
4xB200_NVL Max Throughput 8192 / 1024 1792 8k1k_tp4_conc1792.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc1792.yaml
8xB200_NVL Min Latency 1024 / 1024 1 1k1k_tp8_conc1.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 1024 2 1k1k_tp8_conc2.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 1024 4 1k1k_tp8_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml
8xB200_NVL Low Latency 1024 / 1024 16 1k1k_tp8_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml
8xB200_NVL Low Latency 1024 / 1024 32 1k1k_tp8_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml
8xB200_NVL Low Latency 1024 / 1024 64 1k1k_tp8_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml
8xB200_NVL Balanced 1024 / 1024 384 1k1k_tp8_conc384.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc384.yaml
8xB200_NVL High Throughput 1024 / 1024 512 1k1k_tp8_conc512.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc512.yaml
8xB200_NVL High Throughput 1024 / 1024 640 1k1k_tp8_conc640.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc640.yaml
8xB200_NVL High Throughput 1024 / 1024 768 1k1k_tp8_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc768.yaml
8xB200_NVL High Throughput 1024 / 1024 896 1k1k_tp8_conc896.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc896.yaml
8xB200_NVL High Throughput 1024 / 1024 1792 1k1k_tp8_conc1792.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc1792.yaml
8xB200_NVL Max Throughput 1024 / 1024 2048 1k1k_tp8_conc2048.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc2048.yaml
8xB200_NVL Min Latency 1024 / 8192 1 1k8k_tp8_conc1.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc1.yaml
8xB200_NVL Low Latency 1024 / 8192 2 1k8k_tp8_conc2.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc2.yaml
8xB200_NVL Low Latency 1024 / 8192 8 1k8k_tp8_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml
8xB200_NVL Low Latency 1024 / 8192 16 1k8k_tp8_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml
8xB200_NVL Balanced 1024 / 8192 32 1k8k_tp8_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml
8xB200_NVL Balanced 1024 / 8192 768 1k8k_tp8_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc768.yaml
8xB200_NVL High Throughput 1024 / 8192 1024 1k8k_tp8_conc1024.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc1024.yaml
8xB200_NVL High Throughput 1024 / 8192 1280 1k8k_tp8_conc1280.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc1280.yaml
8xB200_NVL High Throughput 1024 / 8192 1792 1k8k_tp8_conc1792.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc1792.yaml
8xB200_NVL Max Throughput 1024 / 8192 2048 1k8k_tp8_conc2048.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc2048.yaml
8xB200_NVL Min Latency 8192 / 1024 8 8k1k_tp8_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml
8xB200_NVL Low Latency 8192 / 1024 16 8k1k_tp8_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml
8xB200_NVL Balanced 8192 / 1024 128 8k1k_tp8_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc128.yaml
8xB200_NVL Balanced 8192 / 1024 384 8k1k_tp8_conc384.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc384.yaml
8xB200_NVL High Throughput 8192 / 1024 640 8k1k_tp8_conc640.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc640.yaml
8xB200_NVL Max Throughput 8192 / 1024 2048 8k1k_tp8_conc2048.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc2048.yaml
2xH200_SXM Min Latency 8192 / 1024 16 8k1k_tp2_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml
2xH200_SXM Balanced 8192 / 1024 256 8k1k_tp2_conc256.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc256.yaml
2xH200_SXM Max Throughput 8192 / 1024 384 8k1k_tp2_conc384.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc384.yaml
4xH200_SXM Min Latency 1024 / 1024 128 1k1k_tp4_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc128.yaml
4xH200_SXM Balanced 1024 / 1024 384 1k1k_tp4_conc384.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc384.yaml
4xH200_SXM Max Throughput 1024 / 1024 1024 1k1k_tp4_conc1024.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc1024.yaml
4xH200_SXM High Throughput 1024 / 8192 512 1k8k_tp4_conc512.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc512.yaml
4xH200_SXM Min Latency 8192 / 1024 2 8k1k_tp4_conc2.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc2.yaml
4xH200_SXM Balanced 8192 / 1024 4 8k1k_tp4_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml
4xH200_SXM Max Throughput 8192 / 1024 768 8k1k_tp4_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc768.yaml
8xH200_SXM Min Latency 1024 / 1024 4 1k1k_tp8_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml
8xH200_SXM Low Latency 1024 / 1024 8 1k1k_tp8_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml
8xH200_SXM Low Latency 1024 / 1024 16 1k1k_tp8_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml
8xH200_SXM Low Latency 1024 / 1024 32 1k1k_tp8_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml
8xH200_SXM Balanced 1024 / 1024 64 1k1k_tp8_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml
8xH200_SXM High Throughput 1024 / 1024 512 1k1k_tp8_conc512.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc512.yaml
8xH200_SXM High Throughput 1024 / 1024 768 1k1k_tp8_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc768.yaml
8xH200_SXM High Throughput 1024 / 1024 896 1k1k_tp8_conc896.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc896.yaml
8xH200_SXM Max Throughput 1024 / 1024 2048 1k1k_tp8_conc2048.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc2048.yaml
8xH200_SXM Min Latency 1024 / 8192 1 1k8k_tp8_conc1.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc1.yaml
8xH200_SXM Low Latency 1024 / 8192 2 1k8k_tp8_conc2.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc2.yaml
8xH200_SXM Low Latency 1024 / 8192 4 1k8k_tp8_conc4.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml
8xH200_SXM Low Latency 1024 / 8192 8 1k8k_tp8_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml
8xH200_SXM Low Latency 1024 / 8192 16 1k8k_tp8_conc16.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml
8xH200_SXM Low Latency 1024 / 8192 32 1k8k_tp8_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml
8xH200_SXM Balanced 1024 / 8192 64 1k8k_tp8_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml
8xH200_SXM High Throughput 1024 / 8192 128 1k8k_tp8_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc128.yaml
8xH200_SXM High Throughput 1024 / 8192 256 1k8k_tp8_conc256.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc256.yaml
8xH200_SXM High Throughput 1024 / 8192 768 1k8k_tp8_conc768.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc768.yaml
8xH200_SXM High Throughput 1024 / 8192 896 1k8k_tp8_conc896.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc896.yaml
8xH200_SXM High Throughput 1024 / 8192 1024 1k8k_tp8_conc1024.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc1024.yaml
8xH200_SXM Max Throughput 1024 / 8192 1280 1k8k_tp8_conc1280.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc1280.yaml
8xH200_SXM Min Latency 8192 / 1024 1 8k1k_tp8_conc1.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc1.yaml
8xH200_SXM Low Latency 8192 / 1024 8 8k1k_tp8_conc8.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml
8xH200_SXM Low Latency 8192 / 1024 32 8k1k_tp8_conc32.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml
8xH200_SXM Balanced 8192 / 1024 64 8k1k_tp8_conc64.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml
8xH200_SXM Balanced 8192 / 1024 128 8k1k_tp8_conc128.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc128.yaml
8xH200_SXM High Throughput 8192 / 1024 512 8k1k_tp8_conc512.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc512.yaml
8xH200_SXM High Throughput 8192 / 1024 640 8k1k_tp8_conc640.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc640.yaml
8xH200_SXM Max Throughput 8192 / 1024 1536 8k1k_tp8_conc1536.yaml trtllm-serve openai/gpt-oss-120b --config ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc1536.yaml