|
2 | 2 | """ |
3 | 3 | Script to test add_lora, remove_lora, pin_lora, list_loras functions. |
4 | 4 | """ |
5 | | - |
6 | | -import os |
7 | | - |
8 | 5 | import pytest |
9 | 6 |
|
10 | 7 | from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs |
11 | 8 | from vllm.engine.llm_engine import LLMEngine |
| 9 | +from vllm.entrypoints.openai.api_server import ( |
| 10 | + build_async_engine_client_from_engine_args) |
12 | 11 | from vllm.lora.request import LoRARequest |
13 | 12 |
|
14 | 13 | MODEL_PATH = "meta-llama/Llama-2-7b-hf" |
15 | 14 | LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test" |
16 | 15 | LORA_RANK = 8 |
17 | 16 |
|
18 | | - |
19 | | -@pytest.fixture(autouse=True) |
20 | | -def v1(run_with_both_engines_lora): |
21 | | - # Simple autouse wrapper to run both engines for each test |
22 | | - # This can be promoted up to conftest.py to run for every |
23 | | - # test in a package |
24 | | - pass |
| 17 | +# @pytest.fixture(autouse=True) |
| 18 | +# def v1(run_with_both_engines_lora): |
| 19 | +# # Simple autouse wrapper to run both engines for each test |
| 20 | +# # This can be promoted up to conftest.py to run for every |
| 21 | +# # test in a package |
| 22 | +# pass |
25 | 23 |
|
26 | 24 |
|
27 | 25 | def make_lora_request(lora_id: int): |
@@ -79,22 +77,6 @@ def run_check(fn, args, expected: list): |
79 | 77 | @pytest.mark.asyncio |
80 | 78 | async def test_lora_functions_async(): |
81 | 79 |
|
82 | | - if os.getenv("VLLM_USE_V1") == "0": |
83 | | - pytest.skip( |
84 | | - reason= |
85 | | - "V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions") |
86 | | - |
87 | | - # The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1` |
88 | | - # environment variable. reload vllm.enging.async_llm_engine as |
89 | | - # vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the |
90 | | - # env var. |
91 | | - import importlib |
92 | | - |
93 | | - import vllm.engine.async_llm_engine |
94 | | - importlib.reload(vllm.engine.async_llm_engine) |
95 | | - from vllm.entrypoints.openai.api_server import ( |
96 | | - build_async_engine_client_from_engine_args) |
97 | | - |
98 | 80 | max_loras = 4 |
99 | 81 | engine_args = AsyncEngineArgs(model=MODEL_PATH, |
100 | 82 | enable_lora=True, |
|
0 commit comments