File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed
tests/integration/defs/accuracy Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -61,16 +61,17 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
6161 @pytest .mark .skip_less_device_memory (32000 )
6262 @parametrize_with_ids ("attn_backend" , ["TRTLLM" , "FLASHINFER" ])
6363 def test_chunked_prefill (self , attn_backend ):
64- pytorch_config = dict (attn_backend = attn_backend , )
64+ pytorch_config = dict (
65+ attn_backend = attn_backend ,
66+ # https://nvbugspro.nvidia.com/bug/5345391
67+ disable_overlap_scheduler = True )
6568 llm = LLM (self .MODEL_PATH ,
6669 enable_chunked_prefill = True ,
67- max_num_tokens = 64 ,
70+ max_num_tokens = 512 ,
6871 ** pytorch_config )
6972 with llm :
7073 task = MMLU (self .MODEL_NAME )
7174 task .evaluate (llm )
72- task = GSM8K (self .MODEL_NAME )
73- task .evaluate (llm )
7475
7576 @pytest .mark .skip_less_device_memory (32000 )
7677 @parametrize_with_ids (
You can’t perform that action at this time.
0 commit comments