Skip to content

Commit b311efd

Browse files
[Misc] Fix import error in tensorizer tests and cleanup some code (#10349)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 3d158cd commit b311efd

File tree

7 files changed

+67
-58
lines changed

7 files changed

+67
-58
lines changed

tests/tensorizer_loader/test_tensorizer.py

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
import openai
99
import pytest
1010
import torch
11+
from huggingface_hub import snapshot_download
1112
from tensorizer import EncryptionParams
1213

1314
from vllm import SamplingParams
1415
from vllm.engine.arg_utils import EngineArgs
16+
# yapf conflicts with isort for this docstring
1517
# yapf: disable
1618
from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
1719
TensorSerializer,
@@ -20,13 +22,14 @@
2022
open_stream,
2123
serialize_vllm_model,
2224
tensorize_vllm_model)
25+
# yapf: enable
26+
from vllm.utils import import_from_path
2327

2428
from ..conftest import VllmRunner
25-
from ..utils import RemoteOpenAIServer
29+
from ..utils import VLLM_PATH, RemoteOpenAIServer
2630
from .conftest import retry_until_skip
2731

28-
# yapf conflicts with isort for this docstring
29-
32+
EXAMPLES_PATH = VLLM_PATH / "examples"
3033

3134
prompts = [
3235
"Hello, my name is",
@@ -94,8 +97,8 @@ def test_can_deserialize_s3(vllm_runner):
9497
num_readers=1,
9598
s3_endpoint="object.ord1.coreweave.com",
9699
)) as loaded_hf_model:
97-
deserialized_outputs = loaded_hf_model.generate(prompts,
98-
sampling_params)
100+
deserialized_outputs = loaded_hf_model.generate(
101+
prompts, sampling_params)
99102
# noqa: E501
100103

101104
assert deserialized_outputs
@@ -111,23 +114,21 @@ def test_deserialized_encrypted_vllm_model_has_same_outputs(
111114

112115
outputs = vllm_model.generate(prompts, sampling_params)
113116

114-
config_for_serializing = TensorizerConfig(
115-
tensorizer_uri=model_path,
116-
encryption_keyfile=key_path
117-
)
117+
config_for_serializing = TensorizerConfig(tensorizer_uri=model_path,
118+
encryption_keyfile=key_path)
118119
serialize_vllm_model(get_torch_model(vllm_model),
119120
config_for_serializing)
120121

121122
config_for_deserializing = TensorizerConfig(tensorizer_uri=model_path,
122123
encryption_keyfile=key_path)
123124

124-
with vllm_runner(
125-
model_ref,
126-
load_format="tensorizer",
127-
model_loader_extra_config=config_for_deserializing) as loaded_vllm_model: # noqa: E501
125+
with vllm_runner(model_ref,
126+
load_format="tensorizer",
127+
model_loader_extra_config=config_for_deserializing
128+
) as loaded_vllm_model: # noqa: E501
128129

129-
deserialized_outputs = loaded_vllm_model.generate(prompts,
130-
sampling_params)
130+
deserialized_outputs = loaded_vllm_model.generate(
131+
prompts, sampling_params)
131132
# noqa: E501
132133

133134
assert outputs == deserialized_outputs
@@ -156,14 +157,14 @@ def test_deserialized_hf_model_has_same_outputs(hf_runner, vllm_runner,
156157

157158

158159
def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
159-
from huggingface_hub import snapshot_download
160-
161-
from examples.multilora_inference import (create_test_prompts,
162-
process_requests)
160+
multilora_inference = import_from_path(
161+
"examples.multilora_inference",
162+
EXAMPLES_PATH / "multilora_inference.py",
163+
)
163164

164165
model_ref = "meta-llama/Llama-2-7b-hf"
165166
lora_path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")
166-
test_prompts = create_test_prompts(lora_path)
167+
test_prompts = multilora_inference.create_test_prompts(lora_path)
167168

168169
# Serialize model before deserializing and binding LoRA adapters
169170
with vllm_runner(model_ref, ) as vllm_model:
@@ -186,7 +187,8 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
186187
max_num_seqs=50,
187188
max_model_len=1000,
188189
) as loaded_vllm_model:
189-
process_requests(loaded_vllm_model.model.llm_engine, test_prompts)
190+
multilora_inference.process_requests(
191+
loaded_vllm_model.model.llm_engine, test_prompts)
190192

191193
assert loaded_vllm_model
192194

@@ -217,8 +219,11 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
217219

218220
## Start OpenAI API server
219221
openai_args = [
220-
"--dtype", "float16", "--load-format",
221-
"tensorizer", "--model-loader-extra-config",
222+
"--dtype",
223+
"float16",
224+
"--load-format",
225+
"tensorizer",
226+
"--model-loader-extra-config",
222227
json.dumps(model_loader_extra_config),
223228
]
224229

@@ -251,8 +256,7 @@ def test_raise_value_error_on_invalid_load_format(vllm_runner):
251256
torch.cuda.empty_cache()
252257

253258

254-
@pytest.mark.skipif(torch.cuda.device_count() < 2,
255-
reason="Requires 2 GPUs")
259+
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires 2 GPUs")
256260
def test_tensorizer_with_tp_path_without_template(vllm_runner):
257261
with pytest.raises(ValueError):
258262
model_ref = "EleutherAI/pythia-1.4b"
@@ -271,10 +275,9 @@ def test_tensorizer_with_tp_path_without_template(vllm_runner):
271275
)
272276

273277

274-
@pytest.mark.skipif(torch.cuda.device_count() < 2,
275-
reason="Requires 2 GPUs")
276-
def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
277-
tmp_path):
278+
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires 2 GPUs")
279+
def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(
280+
vllm_runner, tmp_path):
278281
model_ref = "EleutherAI/pythia-1.4b"
279282
# record outputs from un-sharded un-tensorized model
280283
with vllm_runner(
@@ -313,13 +316,12 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
313316
disable_custom_all_reduce=True,
314317
enforce_eager=True,
315318
model_loader_extra_config=tensorizer_config) as loaded_vllm_model:
316-
deserialized_outputs = loaded_vllm_model.generate(prompts,
317-
sampling_params)
319+
deserialized_outputs = loaded_vllm_model.generate(
320+
prompts, sampling_params)
318321

319322
assert outputs == deserialized_outputs
320323

321324

322-
323325
@retry_until_skip(3)
324326
def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
325327
gc.collect()
@@ -337,8 +339,8 @@ def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
337339
with vllm_runner(model_ref,
338340
load_format="tensorizer",
339341
model_loader_extra_config=config) as loaded_vllm_model:
340-
deserialized_outputs = loaded_vllm_model.generate(prompts,
341-
sampling_params)
342+
deserialized_outputs = loaded_vllm_model.generate(
343+
prompts, sampling_params)
342344
# noqa: E501
343345

344346
assert outputs == deserialized_outputs

vllm/engine/llm_engine.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,9 +2002,6 @@ def create_trace_span(self, seq_group: SequenceGroup) -> None:
20022002
SpanAttributes.LLM_LATENCY_TIME_IN_MODEL_EXECUTE,
20032003
metrics.model_execute_time)
20042004

2005-
def is_encoder_decoder_model(self):
2006-
return self.input_preprocessor.is_encoder_decoder_model()
2007-
20082005
def _validate_model_inputs(self, inputs: ProcessorInputs,
20092006
lora_request: Optional[LoRARequest]):
20102007
if is_encoder_decoder_inputs(inputs):

vllm/entrypoints/llm.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,3 @@ def _run_engine(
964964
# This is necessary because some requests may be finished earlier than
965965
# its previous requests.
966966
return sorted(outputs, key=lambda x: int(x.request_id))
967-
968-
def _is_encoder_decoder_model(self):
969-
return self.llm_engine.is_encoder_decoder_model()

vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import importlib
2-
import importlib.util
31
import os
42
from functools import cached_property
53
from typing import Callable, Dict, List, Optional, Sequence, Type, Union
@@ -9,7 +7,7 @@
97
ExtractedToolCallInformation)
108
from vllm.logger import init_logger
119
from vllm.transformers_utils.tokenizer import AnyTokenizer
12-
from vllm.utils import is_list_of
10+
from vllm.utils import import_from_path, is_list_of
1311

1412
logger = init_logger(__name__)
1513

@@ -149,13 +147,14 @@ def _register(module):
149147
@classmethod
150148
def import_tool_parser(cls, plugin_path: str) -> None:
151149
"""
152-
Import a user defined tool parser by the path of the tool parser define
150+
Import a user-defined tool parser by the path of the tool parser define
153151
file.
154152
"""
155153
module_name = os.path.splitext(os.path.basename(plugin_path))[0]
156-
spec = importlib.util.spec_from_file_location(module_name, plugin_path)
157-
if spec is None or spec.loader is None:
158-
logger.error("load %s from %s failed.", module_name, plugin_path)
154+
155+
try:
156+
import_from_path(module_name, plugin_path)
157+
except Exception:
158+
logger.exception("Failed to load module '%s' from %s.",
159+
module_name, plugin_path)
159160
return
160-
module = importlib.util.module_from_spec(spec)
161-
spec.loader.exec_module(module)

vllm/inputs/preprocess.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_decoder_start_token_id(self) -> Optional[int]:
6767
model config is unavailable.
6868
'''
6969

70-
if not self.is_encoder_decoder_model():
70+
if not self.model_config.is_encoder_decoder:
7171
print_warning_once("Using None for decoder start token id because "
7272
"this is not an encoder/decoder model.")
7373
return None
@@ -632,7 +632,7 @@ def preprocess(
632632
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
633633
) -> ProcessorInputs:
634634
"""Preprocess the input prompt."""
635-
if self.is_encoder_decoder_model():
635+
if self.model_config.is_encoder_decoder:
636636
# Encoder-decoder model requires special mapping of
637637
# input prompts to encoder & decoder
638638
return self._process_encoder_decoder_prompt(
@@ -660,7 +660,7 @@ async def preprocess_async(
660660
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
661661
) -> ProcessorInputs:
662662
"""Async version of :meth:`preprocess`."""
663-
if self.is_encoder_decoder_model():
663+
if self.model_config.is_encoder_decoder:
664664
# Encoder-decoder model requires special mapping of
665665
# input prompts to encoder & decoder
666666
return await self._process_encoder_decoder_prompt_async(
@@ -679,6 +679,3 @@ async def preprocess_async(
679679
lora_request=lora_request,
680680
prompt_adapter_request=prompt_adapter_request,
681681
)
682-
683-
def is_encoder_decoder_model(self):
684-
return self.model_config.is_encoder_decoder

vllm/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import enum
66
import gc
77
import getpass
8+
import importlib.util
89
import inspect
910
import ipaddress
1011
import os
@@ -1539,6 +1540,25 @@ def is_in_doc_build() -> bool:
15391540
return False
15401541

15411542

1543+
def import_from_path(module_name: str, file_path: Union[str, os.PathLike]):
1544+
"""
1545+
Import a Python file according to its file path.
1546+
1547+
Based on the official recipe:
1548+
https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
1549+
"""
1550+
spec = importlib.util.spec_from_file_location(module_name, file_path)
1551+
if spec is None:
1552+
raise ModuleNotFoundError(f"No module named '{module_name}'")
1553+
1554+
assert spec.loader is not None
1555+
1556+
module = importlib.util.module_from_spec(spec)
1557+
sys.modules[module_name] = module
1558+
spec.loader.exec_module(module)
1559+
return module
1560+
1561+
15421562
# create a library to hold the custom op
15431563
vllm_lib = Library("vllm", "FRAGMENT") # noqa
15441564

vllm/v1/engine/llm_engine.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,6 @@ def step(self) -> List[RequestOutput]:
163163
def get_model_config(self):
164164
pass
165165

166-
def is_encoder_decoder_model(self):
167-
pass
168-
169166
def start_profile(self):
170167
pass
171168

0 commit comments

Comments
 (0)