16
16
from vllm .v1 .worker .gpu_worker import Worker as V1Worker
17
17
from vllm .worker .worker import Worker
18
18
19
+ NUM_LORAS = 16
20
+
19
21
20
22
@patch .dict (os .environ , {"RANK" : "0" })
21
23
def test_worker_apply_lora (sql_lora_files ):
@@ -58,12 +60,12 @@ def set_active_loras(worker: Union[Worker, V1Worker],
58
60
device_config = DeviceConfig ("cuda" ),
59
61
cache_config = CacheConfig (
60
62
block_size = 16 ,
61
- gpu_memory_utilization = 1.0 ,
62
63
swap_space = 0 ,
63
64
cache_dtype = "auto" ,
64
65
),
65
- lora_config = LoRAConfig (max_lora_rank = 8 , max_cpu_loras = 32 ,
66
- max_loras = 32 ),
66
+ lora_config = LoRAConfig (max_lora_rank = 8 ,
67
+ max_cpu_loras = NUM_LORAS ,
68
+ max_loras = NUM_LORAS ),
67
69
)
68
70
worker = worker_cls (
69
71
vllm_config = vllm_config ,
@@ -78,9 +80,9 @@ def set_active_loras(worker: Union[Worker, V1Worker],
78
80
set_active_loras (worker , [])
79
81
assert worker .list_loras () == set ()
80
82
81
- n_loras = 32
82
83
lora_requests = [
83
- LoRARequest (str (i + 1 ), i + 1 , sql_lora_files ) for i in range (n_loras )
84
+ LoRARequest (str (i + 1 ), i + 1 , sql_lora_files )
85
+ for i in range (NUM_LORAS )
84
86
]
85
87
86
88
set_active_loras (worker , lora_requests )
@@ -89,12 +91,12 @@ def set_active_loras(worker: Union[Worker, V1Worker],
89
91
for lora_request in lora_requests
90
92
}
91
93
92
- for i in range (32 ):
94
+ for i in range (NUM_LORAS ):
93
95
random .seed (i )
94
96
iter_lora_requests = random .choices (lora_requests ,
95
- k = random .randint (1 , n_loras ))
97
+ k = random .randint (1 , NUM_LORAS ))
96
98
random .shuffle (iter_lora_requests )
97
- iter_lora_requests = iter_lora_requests [:- random .randint (0 , n_loras )]
99
+ iter_lora_requests = iter_lora_requests [:- random .randint (0 , NUM_LORAS )]
98
100
set_active_loras (worker , lora_requests )
99
101
assert worker .list_loras ().issuperset (
100
102
{lora_request .lora_int_id
0 commit comments