Skip to content

Commit 3821787

Browse files
authored
[Fix] fix offline env use local mode path (#22526)
Signed-off-by: rongfu.leng <[email protected]>
1 parent c6d80a7 commit 3821787

File tree

3 files changed

+65
-3
lines changed

3 files changed

+65
-3
lines changed

tests/entrypoints/offline_mode/test_offline_mode.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""Tests for HF_HUB_OFFLINE mode"""
4+
import dataclasses
45
import importlib
56
import sys
67

@@ -9,6 +10,7 @@
910

1011
from vllm import LLM
1112
from vllm.distributed import cleanup_dist_env_and_memory
13+
from vllm.engine.arg_utils import EngineArgs
1214

1315
MODEL_CONFIGS = [
1416
{
@@ -108,3 +110,36 @@ def _re_import_modules():
108110
# Error this test if reloading a module failed
109111
if reload_exception is not None:
110112
raise reload_exception
113+
114+
115+
@pytest.mark.skip_global_cleanup
116+
@pytest.mark.usefixtures("cache_models")
117+
def test_model_from_huggingface_offline(monkeypatch: pytest.MonkeyPatch):
118+
# Set HF to offline mode and ensure we can still construct an LLM
119+
with monkeypatch.context() as m:
120+
try:
121+
m.setenv("HF_HUB_OFFLINE", "1")
122+
m.setenv("VLLM_NO_USAGE_STATS", "1")
123+
124+
def disable_connect(*args, **kwargs):
125+
raise RuntimeError("No http calls allowed")
126+
127+
m.setattr(
128+
urllib3.connection.HTTPConnection,
129+
"connect",
130+
disable_connect,
131+
)
132+
m.setattr(
133+
urllib3.connection.HTTPSConnection,
134+
"connect",
135+
disable_connect,
136+
)
137+
# Need to re-import huggingface_hub
138+
# and friends to setup offline mode
139+
_re_import_modules()
140+
engine_args = EngineArgs(model="facebook/opt-125m")
141+
LLM(**dataclasses.asdict(engine_args))
142+
finally:
143+
# Reset the environment after the test
144+
# NB: Assuming tests are run in online mode
145+
_re_import_modules()

vllm/engine/arg_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Literal, Optional, Type, TypeVar, Union, cast, get_args,
1616
get_origin)
1717

18+
import huggingface_hub
1819
import regex as re
1920
import torch
2021
from pydantic import TypeAdapter, ValidationError
@@ -39,7 +40,7 @@
3940
from vllm.ray.lazy_utils import is_ray_initialized
4041
from vllm.reasoning import ReasoningParserManager
4142
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
42-
from vllm.transformers_utils.config import is_interleaved
43+
from vllm.transformers_utils.config import get_model_path, is_interleaved
4344
from vllm.transformers_utils.utils import check_gguf_file
4445
from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, FlexibleArgumentParser,
4546
GiB_bytes, get_ip, is_in_ray_actor)
@@ -457,6 +458,13 @@ def __post_init__(self):
457458
# Setup plugins
458459
from vllm.plugins import load_general_plugins
459460
load_general_plugins()
461+
# when use hf offline,replace model id to local model path
462+
if huggingface_hub.constants.HF_HUB_OFFLINE:
463+
model_id = self.model
464+
self.model = get_model_path(self.model, self.revision)
465+
logger.info(
466+
"HF_HUB_OFFLINE is True, replace model_id [%s] " \
467+
"to model_path [%s]",model_id, self.model)
460468

461469
@staticmethod
462470
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

vllm/transformers_utils/config.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from huggingface_hub import list_repo_files as hf_list_repo_files
1515
from huggingface_hub import try_to_load_from_cache
1616
from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
17-
HFValidationError, LocalEntryNotFoundError,
17+
LocalEntryNotFoundError,
1818
RepositoryNotFoundError,
1919
RevisionNotFoundError)
2020
from transformers import GenerationConfig, PretrainedConfig
@@ -335,6 +335,7 @@ def maybe_override_with_speculators_target_model(
335335
gguf_model_repo = Path(model).parent
336336
else:
337337
gguf_model_repo = None
338+
kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
338339
config_dict, _ = PretrainedConfig.get_config_dict(
339340
model if gguf_model_repo is None else gguf_model_repo,
340341
revision=revision,
@@ -400,6 +401,7 @@ def get_config(
400401
raise ValueError(error_message) from e
401402

402403
if config_format == ConfigFormat.HF:
404+
kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
403405
config_dict, _ = PretrainedConfig.get_config_dict(
404406
model,
405407
revision=revision,
@@ -532,7 +534,7 @@ def try_get_local_file(model: Union[str, Path],
532534
revision=revision)
533535
if isinstance(cached_filepath, str):
534536
return Path(cached_filepath)
535-
except HFValidationError:
537+
except ValueError:
536538
...
537539
return None
538540

@@ -908,3 +910,20 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
908910
exc_info=e)
909911

910912
return max_position_embeddings
913+
914+
915+
def get_model_path(model: Union[str, Path], revision: Optional[str] = None):
916+
if os.path.exists(model):
917+
return model
918+
assert huggingface_hub.constants.HF_HUB_OFFLINE
919+
common_kwargs = {
920+
"local_files_only": huggingface_hub.constants.HF_HUB_OFFLINE,
921+
"revision": revision,
922+
}
923+
924+
if envs.VLLM_USE_MODELSCOPE:
925+
from modelscope.hub.snapshot_download import snapshot_download
926+
return snapshot_download(model_id=model, **common_kwargs)
927+
928+
from huggingface_hub import snapshot_download
929+
return snapshot_download(repo_id=model, **common_kwargs)

0 commit comments

Comments
 (0)