Skip to content

Commit 3c9cf54

Browse files
vanbasten23jinzhen-lin
authored andcommitted
Add lora test for tp>1 case for TPU. (vllm-project#21970)
Signed-off-by: Xiongfei Wei <[email protected]> Signed-off-by: Jinzhen Lin <[email protected]>
1 parent 46b5ada commit 3c9cf54

File tree

1 file changed

+16
-7
lines changed

1 file changed

+16
-7
lines changed

tests/tpu/lora/test_lora.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import pytest
4+
from torch_xla._internal import tpu
45

56
import vllm
67
from vllm.lora.request import LoRARequest
@@ -27,25 +28,31 @@ def use_v1_only(monkeypatch: pytest.MonkeyPatch):
2728
yield
2829

2930

30-
def setup_vllm(num_loras: int) -> vllm.LLM:
31+
def setup_vllm(num_loras: int, tp: int) -> vllm.LLM:
3132
return vllm.LLM(model="Qwen/Qwen2.5-3B-Instruct",
3233
num_scheduler_steps=1,
3334
max_model_len=256,
3435
max_seq_len_to_capture=256,
3536
max_num_seqs=8,
37+
tensor_parallel_size=tp,
3638
enable_lora=True,
3739
max_loras=num_loras,
3840
max_lora_rank=8)
3941

4042

41-
def test_single_lora():
43+
TPU_TENSOR_PARALLEL_SIZES = [1, tpu.num_available_chips()
44+
] if tpu.num_available_chips() > 1 else [1]
45+
46+
47+
@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
48+
def test_single_lora(tp: int):
4249
"""
4350
This test ensures we can run a single LoRA adapter on the TPU backend.
4451
We run "Username6568/Qwen2.5-3B-Instruct-1_plus_1_equals_1_adapter" which
4552
will force Qwen2.5-3B-Instruct to claim 1+1=1.
4653
"""
4754

48-
llm = setup_vllm(1)
55+
llm = setup_vllm(1, tp)
4956

5057
prompt = "What is 1+1? \n"
5158

@@ -63,7 +70,8 @@ def test_single_lora():
6370
assert int(answer) == 1
6471

6572

66-
def test_lora_hotswapping():
73+
@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
74+
def test_lora_hotswapping(tp: int):
6775
"""
6876
This test ensures we can run multiple LoRA adapters on the TPU backend, even
6977
if we only have space to store 1.
@@ -79,7 +87,7 @@ def test_lora_hotswapping():
7987
for i in range(1, 5)
8088
]
8189

82-
llm = setup_vllm(1)
90+
llm = setup_vllm(1, tp)
8391

8492
prompt = "What is 1+1? \n"
8593

@@ -94,7 +102,8 @@ def test_lora_hotswapping():
94102
assert int(answer) == i + 1
95103

96104

97-
def test_multi_lora():
105+
@pytest.mark.parametrize("tp", TPU_TENSOR_PARALLEL_SIZES)
106+
def test_multi_lora(tp: int):
98107
"""
99108
This test ensures we can run multiple LoRA adapters on the TPU backend, when
100109
we have enough space to store all of them.
@@ -109,7 +118,7 @@ def test_multi_lora():
109118
for i in range(1, 5)
110119
]
111120

112-
llm = setup_vllm(4)
121+
llm = setup_vllm(4, tp)
113122

114123
prompt = "What is 1+1? \n"
115124

0 commit comments

Comments
 (0)