Skip to content

Commit 98966cb

Browse files
authored
test: Unwaive Llama 3.1 with torch compile test (NVIDIA#3475)
* Fix log info Signed-off-by: Yi Zhang <[email protected]> * Revert "test: Waive torch compile tests (NVIDIA#3471)" This reverts commit 410f563. Signed-off-by: Yi Zhang <[email protected]> * Update test_llm_api_pytorch.py Signed-off-by: Yi Zhang <[email protected]> --------- Signed-off-by: Yi Zhang <[email protected]>
1 parent a32389b commit 98966cb

File tree

2 files changed

+2
-10
lines changed

2 files changed

+2
-10
lines changed

tensorrt_llm/_torch/pyexecutor/model_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ def _create_extra_inputs(bs, num_tokens_per_request):
521521
# No KV cache space!
522522
continue
523523
logger.info(
524-
f"Run warmup for batch size={bs}, pure {'context' if num_tokens_per_request is not None else 'generation'} phase"
524+
f"Run warmup for batch size={bs}, pure {'context' if num_tokens_per_request > 1 else 'generation'} phase"
525525
)
526526
self.forward(
527527
batch,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
5858
@parametrize_with_ids("torch_compile", [False, True])
5959
@parametrize_with_ids("attn_backend", ["TRTLLM", "FLASHINFER"])
6060
def test_bfloat16(self, attn_backend, torch_compile):
61-
if torch_compile:
62-
pytest.skip("https://nvbugs/5216737")
6361
pytorch_config = PyTorchConfig(
6462
torch_compile_enabled=torch_compile,
6563
cuda_graph_padding_enabled=torch_compile,
@@ -84,8 +82,6 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, attn_backend,
8482
"Pipeline parallel with torch.compile is not supported yet.\n"
8583
"Issue: Unfusing flashinfer_fused_add_rmsnorm causes outputs to be "
8684
"discarded at graph breaks.")
87-
if torch_compile:
88-
pytest.skip("https://nvbugs/5216737")
8985
pytorch_config = PyTorchConfig(
9086
torch_compile_enabled=torch_compile,
9187
cuda_graph_padding_enabled=torch_compile,
@@ -107,8 +103,6 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, attn_backend,
107103
@parametrize_with_ids("attn_backend", ["TRTLLM", "FLASHINFER"])
108104
@parametrize_with_ids("fp8kv", [False, True])
109105
def test_fp8(self, fp8kv, attn_backend, torch_compile):
110-
if torch_compile:
111-
pytest.skip("https://nvbugs/5216737")
112106
quant_config = QuantConfig(QuantAlgo.FP8)
113107
pytorch_config = PyTorchConfig(
114108
torch_compile_enabled=torch_compile,
@@ -140,9 +134,7 @@ def test_fp8(self, fp8kv, attn_backend, torch_compile):
140134
ids=["tp4", "tp2pp2"])
141135
def test_fp8_4gpus(self, tp_size, pp_size, fp8kv, attn_backend,
142136
torch_compile):
143-
if torch_compile:
144-
pytest.skip("https://nvbugs/5216737")
145-
if torch_compile and pp_size > 1:
137+
if pp_size > 1:
146138
pytest.skip(
147139
"Pipeline parallel with torch.compile is not supported yet.\n"
148140
"Issue: Unfusing flashinfer_fused_add_rmsnorm causes outputs to be "

0 commit comments

Comments
 (0)