Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit f2e263b

Browse files
authored
[Bugfix] Offline mode fix (vllm-project#8376)
Signed-off-by: Joe Runde <[email protected]>
1 parent 1f0c75a commit f2e263b

File tree

4 files changed

+106
-2
lines changed

4 files changed

+106
-2
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ steps:
9191
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
9292
- pytest -v -s entrypoints/openai
9393
- pytest -v -s entrypoints/test_chat_utils.py
94+
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
9495

9596

9697
- label: Distributed Tests (4 GPUs) # 10min

tests/entrypoints/offline_mode/__init__.py

Whitespace-only changes.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""Tests for HF_HUB_OFFLINE mode"""
2+
import importlib
3+
import sys
4+
import weakref
5+
6+
import pytest
7+
8+
from vllm import LLM
9+
10+
from ...conftest import cleanup
11+
12+
MODEL_NAME = "facebook/opt-125m"
13+
14+
15+
@pytest.fixture(scope="module")
16+
def llm():
17+
# pytest caches the fixture so we use weakref.proxy to
18+
# enable garbage collection
19+
llm = LLM(model=MODEL_NAME,
20+
max_num_batched_tokens=4096,
21+
tensor_parallel_size=1,
22+
gpu_memory_utilization=0.10,
23+
enforce_eager=True)
24+
25+
with llm.deprecate_legacy_api():
26+
yield weakref.proxy(llm)
27+
28+
del llm
29+
30+
cleanup()
31+
32+
33+
@pytest.mark.skip_global_cleanup
34+
def test_offline_mode(llm: LLM, monkeypatch):
35+
# we use the llm fixture to ensure the model files are in-cache
36+
del llm
37+
38+
# Set HF to offline mode and ensure we can still construct an LLM
39+
try:
40+
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
41+
# Need to re-import huggingface_hub and friends to setup offline mode
42+
_re_import_modules()
43+
# Cached model files should be used in offline mode
44+
LLM(model=MODEL_NAME,
45+
max_num_batched_tokens=4096,
46+
tensor_parallel_size=1,
47+
gpu_memory_utilization=0.10,
48+
enforce_eager=True)
49+
finally:
50+
# Reset the environment after the test
51+
# NB: Assuming tests are run in online mode
52+
monkeypatch.delenv("HF_HUB_OFFLINE")
53+
_re_import_modules()
54+
pass
55+
56+
57+
def _re_import_modules():
58+
hf_hub_module_names = [
59+
k for k in sys.modules if k.startswith("huggingface_hub")
60+
]
61+
transformers_module_names = [
62+
k for k in sys.modules if k.startswith("transformers")
63+
and not k.startswith("transformers_modules")
64+
]
65+
66+
reload_exception = None
67+
for module_name in hf_hub_module_names + transformers_module_names:
68+
try:
69+
importlib.reload(sys.modules[module_name])
70+
except Exception as e:
71+
reload_exception = e
72+
# Try to continue clean up so that other tests are less likely to
73+
# be affected
74+
75+
# Error this test if reloading a module failed
76+
if reload_exception is not None:
77+
raise reload_exception

vllm/transformers_utils/config.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
from pathlib import Path
55
from typing import Any, Dict, Optional, Type, Union
66

7-
from huggingface_hub import file_exists, hf_hub_download
7+
import huggingface_hub
8+
from huggingface_hub import (file_exists, hf_hub_download,
9+
try_to_load_from_cache)
810
from transformers import GenerationConfig, PretrainedConfig
911
from transformers.models.auto.image_processing_auto import (
1012
get_image_processor_config)
@@ -70,7 +72,22 @@ def file_or_path_exists(model: Union[str, Path], config_name, revision,
7072
if Path(model).exists():
7173
return (Path(model) / config_name).is_file()
7274

73-
return file_exists(model, config_name, revision=revision, token=token)
75+
# Offline mode support: Check if config file is cached already
76+
cached_filepath = try_to_load_from_cache(repo_id=model,
77+
filename=config_name,
78+
revision=revision)
79+
if isinstance(cached_filepath, str):
80+
# The config file exists in cache- we can continue trying to load
81+
return True
82+
83+
# NB: file_exists will only check for the existence of the config file on
84+
# hf_hub. This will fail in offline mode.
85+
try:
86+
return file_exists(model, config_name, revision=revision, token=token)
87+
except huggingface_hub.errors.OfflineModeIsEnabled:
88+
# Don't raise in offline mode, all we know is that we don't have this
89+
# file cached.
90+
return False
7491

7592

7693
def get_config(
@@ -102,6 +119,15 @@ def get_config(
102119
token=kwargs.get("token")):
103120
config_format = ConfigFormat.MISTRAL
104121
else:
122+
# If we're in offline mode and found no valid config format, then
123+
# raise an offline mode error to indicate to the user that they
124+
# don't have files cached and may need to go online.
125+
# This is conveniently triggered by calling file_exists().
126+
file_exists(model,
127+
HF_CONFIG_NAME,
128+
revision=revision,
129+
token=kwargs.get("token"))
130+
105131
raise ValueError(f"No supported config format found in {model}")
106132

107133
if config_format == ConfigFormat.HF:

0 commit comments

Comments
 (0)