44import ray
55
66import vllm
7- from tests .utils import fork_new_process_for_each_test
87from vllm .lora .request import LoRARequest
98
10- from ..utils import multi_gpu_test
9+ from ..utils import create_new_process_for_each_test , multi_gpu_test
1110
1211MODEL_PATH = "meta-llama/Llama-2-7b-hf"
1312
@@ -82,7 +81,7 @@ def v1(run_with_both_engines_lora):
8281
8382# V1 Test: Failing due to numerics on V1.
8483@pytest .mark .skip_v1
85- @fork_new_process_for_each_test
84+ @create_new_process_for_each_test ()
8685def test_llama_lora (sql_lora_files ):
8786
8887 llm = vllm .LLM (MODEL_PATH ,
@@ -97,7 +96,7 @@ def test_llama_lora(sql_lora_files):
9796# Skipping for v1 as v1 doesn't have a good way to expose the num_gpu_blocks
9897# used by the engine yet.
9998@pytest .mark .skip_v1
100- @fork_new_process_for_each_test
99+ @create_new_process_for_each_test ()
101100def test_llama_lora_warmup (sql_lora_files ):
102101 """Test that the LLM initialization works with a warmup LORA path and
103102 is more conservative"""
@@ -128,7 +127,7 @@ def get_num_gpu_blocks_no_lora():
128127# V1 Test: Failing due to numerics on V1.
129128@pytest .mark .skip_v1
130129@multi_gpu_test (num_gpus = 4 )
131- @fork_new_process_for_each_test
130+ @create_new_process_for_each_test ()
132131def test_llama_lora_tp4 (sql_lora_files ):
133132
134133 llm = vllm .LLM (
@@ -143,7 +142,7 @@ def test_llama_lora_tp4(sql_lora_files):
143142
144143
145144@multi_gpu_test (num_gpus = 4 )
146- @fork_new_process_for_each_test
145+ @create_new_process_for_each_test ()
147146def test_llama_lora_tp4_fully_sharded_loras (sql_lora_files ):
148147
149148 llm = vllm .LLM (
@@ -159,7 +158,7 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
159158
160159
161160@multi_gpu_test (num_gpus = 4 )
162- @fork_new_process_for_each_test
161+ @create_new_process_for_each_test ()
163162def test_llama_lora_tp4_fully_sharded_enable_bias (sql_lora_files ):
164163
165164 llm = vllm .LLM (
0 commit comments