@@ -383,6 +383,8 @@ accuracy/test_llm_api.py::TestLlama3_2_1B::test_fp8_pp2
383383accuracy/test_llm_api.py::TestLlama3_2_1B::test_fp8_rowwise
384384accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_auto_dtype
385385accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_fp8_prequantized
386+ accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_fp8_prequantized_tp4
387+ accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_nvfp4_prequantized_tp4
386388accuracy/test_cli_flow.py::TestMistral7B::test_beam_search
387389accuracy/test_cli_flow.py::TestMistral7B::test_fp8_tp4pp2
388390accuracy/test_cli_flow.py::TestMistral7B::test_smooth_quant_tp4pp1
@@ -435,6 +437,8 @@ accuracy/test_llm_api.py::TestMixtral8x7B::test_tp2
435437accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2
436438accuracy/test_llm_api.py::TestMixtral8x7BInstruct::test_awq_tp2
437439accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
440+ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
441+ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]
438442accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
439443accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search
440444accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3
@@ -445,13 +449,13 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_
445449accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[llguidance]
446450accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
447451accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
448- accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_fp8_prequantized_tp4
449- accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_nvfp4_prequantized_tp4
450452accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
451453accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
452454accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
453455accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
454456accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]
457+ accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
458+ accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=TRTLLM]
455459accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8-cuda_graph=False]
456460accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
457461accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]
0 commit comments