Skip to content

Commit b4f9e96

Browse files
authored
[CI/Build] Clean up LoRA test (vllm-project#23890)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent 05d839c commit b4f9e96

File tree

4 files changed

+40
-87
lines changed

4 files changed

+40
-87
lines changed

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ if [[ $commands == *" entrypoints/llm "* ]]; then
164164
--ignore=entrypoints/llm/test_chat.py \
165165
--ignore=entrypoints/llm/test_accuracy.py \
166166
--ignore=entrypoints/llm/test_init.py \
167-
--ignore=entrypoints/llm/test_generate_multiple_loras.py \
168167
--ignore=entrypoints/llm/test_prompt_validation.py "}
169168
fi
170169

.buildkite/test-pipeline.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,9 @@ steps:
109109
- tests/entrypoints/offline_mode
110110
commands:
111111
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
112-
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_collective_rpc.py
112+
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
113113
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
114114
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
115-
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
116115
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
117116

118117
- label: Entrypoints Test (API Server) # 40min
@@ -326,7 +325,7 @@ steps:
326325
source_file_dependencies:
327326
- vllm/lora
328327
- tests/lora
329-
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
328+
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_llm_with_multi_loras.py
330329
parallelism: 4
331330

332331
- label: PyTorch Compilation Unit Tests
@@ -807,13 +806,13 @@ steps:
807806
# requires multi-GPU testing for validation.
808807
- pytest -v -s -x lora/test_chatglm3_tp.py
809808
- pytest -v -s -x lora/test_llama_tp.py
810-
- pytest -v -s -x lora/test_multi_loras_with_tp.py
809+
- pytest -v -s -x lora/test_llm_with_multi_loras.py
811810

812811

813812
- label: Weight Loading Multiple GPU Test # 33min
814813
mirror_hardwares: [amdexperimental]
815814
working_dir: "/vllm-workspace/tests"
816-
num_gpus: 2
815+
num_gpus: 2
817816
optional: true
818817
source_file_dependencies:
819818
- vllm/

tests/entrypoints/llm/test_generate_multiple_loras.py

Lines changed: 0 additions & 80 deletions
This file was deleted.

tests/lora/test_multi_loras_with_tp.py renamed to tests/lora/test_llm_with_multi_loras.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""
4-
Script to test multi loras service with tp >= 2
4+
This script contains:
5+
1. test multi loras service with tp >= 2
6+
2. test multi loras request
57
"""
8+
import pytest
9+
610
from tests.utils import multi_gpu_test
711
from vllm import LLM, SamplingParams
812
from vllm.lora.request import LoRARequest
@@ -156,3 +160,34 @@ def check_outputs(outputs: str, expected: str):
156160

157161
output_text = call_llm_get_outputs(prompt, "Alice")
158162
check_outputs(output_text, expected_output)
163+
164+
165+
def test_multiple_lora_requests():
166+
llm = LLM(
167+
model=MODEL_PATH,
168+
enable_lora=True,
169+
max_loras=4,
170+
max_lora_rank=LORA_RANK,
171+
max_model_len=512,
172+
gpu_memory_utilization=0.5,
173+
enforce_eager=True,
174+
)
175+
PROMPTS = ["Hello, my name is"] * 2
176+
LORA_NAME = "Alice"
177+
lora_request = [
178+
LoRARequest(LORA_NAME + str(idx), idx + 1,
179+
LORA_NAME_PATH_MAP[LORA_NAME])
180+
for idx in range(len(PROMPTS))
181+
]
182+
# Multiple SamplingParams should be matched with each prompt
183+
outputs = llm.generate(PROMPTS, lora_request=lora_request)
184+
assert len(PROMPTS) == len(outputs)
185+
186+
# Exception raised, if the size of params does not match the size of prompts
187+
with pytest.raises(ValueError):
188+
outputs = llm.generate(PROMPTS, lora_request=lora_request[:1])
189+
190+
# Single LoRARequest should be applied to every prompt
191+
single_lora_request = lora_request[0]
192+
outputs = llm.generate(PROMPTS, lora_request=single_lora_request)
193+
assert len(PROMPTS) == len(outputs)

0 commit comments

Comments
 (0)