|
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import os |
| 8 | +import unittest.mock as mock |
8 | 9 |
|
| 10 | +import pytest |
9 | 11 | from huggingface_hub import snapshot_download |
10 | 12 |
|
11 | 13 | from vllm.lora.request import LoRARequest |
@@ -114,3 +116,36 @@ def test_default_mm_lora_fails_with_overridden_lora_request( |
114 | 116 | default_mm_loras={"audio": IMAGE_LORA_PATH}, |
115 | 117 | expected_suffix=RESPONSE_SUFFIX_WITH_LORA, |
116 | 118 | ) |
| 119 | + |
| 120 | + |
| 121 | +def test_default_mm_lora_does_not_expand_string_reqs(vllm_runner): |
| 122 | + class MockEngineException(Exception): |
| 123 | + pass |
| 124 | + |
| 125 | + # Regression test for ensuring default multimodal lora resolution |
| 126 | + # does not expand the lora req if the prompt type is a string. |
| 127 | + vllm_runner_kwargs = { |
| 128 | + **VLLM_RUNNER_BASE_KWARGS, |
| 129 | + **{"default_mm_loras": {"audio": AUDIO_LORA_PATH}}, |
| 130 | + } |
| 131 | + |
| 132 | + # Avoid the full generation call since these tests are expensive; |
| 133 | + # just check what lora request is actually submitted to the engine |
| 134 | + mock_err = "Engine is mocked for this test" |
| 135 | + |
| 136 | + with ( |
| 137 | + mock.patch( |
| 138 | + "vllm.v1.engine.llm_engine.LLMEngine.add_request", |
| 139 | + side_effect=MockEngineException(mock_err), |
| 140 | + ) as mock_add_request, |
| 141 | + vllm_runner(**vllm_runner_kwargs) as vllm_model, |
| 142 | + ): |
| 143 | + # Die once we actually submit the request to the engine |
| 144 | + with pytest.raises(MockEngineException): |
| 145 | + vllm_model.llm.generate(prompts=AUDIO_PROMPT) |
| 146 | + |
| 147 | + # Then check to make sure the submitted lora request |
| 148 | + # and text prompt were zipped together correctly |
| 149 | + engine_args, engine_kwargs = mock_add_request.call_args |
| 150 | + assert engine_kwargs["lora_request"] is None |
| 151 | + assert engine_kwargs["prompt_text"] == AUDIO_PROMPT |
0 commit comments