@@ -97,7 +97,7 @@ def test_attention_fusion_v0(example_prompts, monkeypatch, model: str,
97
97
llm = LLM (model ,
98
98
enforce_eager = True ,
99
99
compilation_config = compile_config ,
100
- gpu_memory_utilization = 0.4 ,
100
+ gpu_memory_utilization = 0.5 ,
101
101
max_model_len = 2048 )
102
102
103
103
sampling_params = SamplingParams (temperature = 0.0 ,
@@ -130,7 +130,7 @@ def test_attention_fusion_v0(example_prompts, monkeypatch, model: str,
130
130
llm2 = LLM (model ,
131
131
enforce_eager = True ,
132
132
compilation_config = compile_config ,
133
- gpu_memory_utilization = 0.4 ,
133
+ gpu_memory_utilization = 0.5 ,
134
134
max_model_len = 2048 )
135
135
136
136
# check outputs
@@ -207,7 +207,7 @@ def test_attention_fusion_v1(example_prompts, monkeypatch, model: str,
207
207
backend = TestBackend () # also force disable caches
208
208
llm = LLM (model ,
209
209
compilation_config = compile_config ,
210
- gpu_memory_utilization = 0.4 ,
210
+ gpu_memory_utilization = 0.5 ,
211
211
max_model_len = 2048 )
212
212
sampling_params = SamplingParams (temperature = 0.0 ,
213
213
max_tokens = 10 ,
@@ -242,7 +242,7 @@ def test_attention_fusion_v1(example_prompts, monkeypatch, model: str,
242
242
quant_key = quant_key ,
243
243
compile_config = get_current_vllm_config ().compilation_config ))
244
244
llm2 = LLM (model ,
245
- gpu_memory_utilization = 0.4 ,
245
+ gpu_memory_utilization = 0.5 ,
246
246
compilation_config = compile_config ,
247
247
max_model_len = 2048 )
248
248
0 commit comments