Skip to content

Commit a945fcc

Browse files
authored
Add trust-remote-code flag to handle remote tokenizers (#364)
1 parent be54f8e commit a945fcc

File tree

5 files changed

+39
-6
lines changed

5 files changed

+39
-6
lines changed

vllm/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ class ModelConfig:
2020
tokenizer: Name or path of the huggingface tokenizer to use.
2121
tokenizer_mode: Tokenizer mode. "auto" will use the fast tokenizer if
2222
available, and "slow" will always use the slow tokenizer.
23+
trust_remote_code: Trust remote code (e.g., from HuggingFace) when
24+
downloading the model and tokenizer.
2325
download_dir: Directory to download and load the weights, default to the
2426
default cache directory of huggingface.
2527
use_np_weights: Save a numpy copy of model weights for faster loading.
@@ -36,6 +38,7 @@ def __init__(
3638
model: str,
3739
tokenizer: str,
3840
tokenizer_mode: str,
41+
trust_remote_code: bool,
3942
download_dir: Optional[str],
4043
use_np_weights: bool,
4144
use_dummy_weights: bool,
@@ -45,6 +48,7 @@ def __init__(
4548
self.model = model
4649
self.tokenizer = tokenizer
4750
self.tokenizer_mode = tokenizer_mode
51+
self.trust_remote_code = trust_remote_code
4852
self.download_dir = download_dir
4953
self.use_np_weights = use_np_weights
5054
self.use_dummy_weights = use_dummy_weights

vllm/engine/arg_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class EngineArgs:
1313
model: str
1414
tokenizer: Optional[str] = None
1515
tokenizer_mode: str = 'auto'
16+
trust_remote_code: bool = False
1617
download_dir: Optional[str] = None
1718
use_np_weights: bool = False
1819
use_dummy_weights: bool = False
@@ -55,6 +56,9 @@ def add_cli_args(
5556
help='tokenizer mode. "auto" will use the fast '
5657
'tokenizer if available, and "slow" will '
5758
'always use the slow tokenizer.')
59+
parser.add_argument('--trust-remote-code',
60+
action='store_true',
61+
help='trust remote code from huggingface')
5862
parser.add_argument('--download-dir',
5963
type=str,
6064
default=EngineArgs.download_dir,
@@ -141,9 +145,10 @@ def create_engine_configs(
141145
) -> Tuple[ModelConfig, CacheConfig, ParallelConfig, SchedulerConfig]:
142146
# Initialize the configs.
143147
model_config = ModelConfig(self.model, self.tokenizer,
144-
self.tokenizer_mode, self.download_dir,
145-
self.use_np_weights, self.use_dummy_weights,
146-
self.dtype, self.seed)
148+
self.tokenizer_mode, self.trust_remote_code,
149+
self.download_dir, self.use_np_weights,
150+
self.use_dummy_weights, self.dtype,
151+
self.seed)
147152
cache_config = CacheConfig(self.block_size,
148153
self.gpu_memory_utilization,
149154
self.swap_space)

vllm/engine/llm_engine.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(
6262
f"model={model_config.model!r}, "
6363
f"tokenizer={model_config.tokenizer!r}, "
6464
f"tokenizer_mode={model_config.tokenizer_mode}, "
65+
f"trust_remote_code={model_config.trust_remote_code}, "
6566
f"dtype={model_config.dtype}, "
6667
f"use_dummy_weights={model_config.use_dummy_weights}, "
6768
f"download_dir={model_config.download_dir!r}, "
@@ -78,7 +79,9 @@ def __init__(
7879
self._verify_args()
7980

8081
self.tokenizer = get_tokenizer(
81-
model_config.tokenizer, tokenizer_mode=model_config.tokenizer_mode)
82+
model_config.tokenizer,
83+
tokenizer_mode=model_config.tokenizer_mode,
84+
trust_remote_code=model_config.trust_remote_code)
8285
self.seq_counter = Counter()
8386

8487
# Create the parallel GPU workers.

vllm/entrypoints/llm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class LLM:
2828
tokenizer: The name or path of a HuggingFace Transformers tokenizer.
2929
tokenizer_mode: The tokenizer mode. "auto" will use the fast tokenizer
3030
if available, and "slow" will always use the slow tokenizer.
31+
trust_remote_code: Trust remote code (e.g., from HuggingFace) when
32+
downloading the model and tokenizer.
3133
tensor_parallel_size: The number of GPUs to use for distributed
3234
execution with tensor parallelism.
3335
dtype: The data type for the model weights and activations. Currently,
@@ -43,6 +45,7 @@ def __init__(
4345
model: str,
4446
tokenizer: Optional[str] = None,
4547
tokenizer_mode: str = "auto",
48+
trust_remote_code: bool = False,
4649
tensor_parallel_size: int = 1,
4750
dtype: str = "auto",
4851
seed: int = 0,
@@ -54,6 +57,7 @@ def __init__(
5457
model=model,
5558
tokenizer=tokenizer,
5659
tokenizer_mode=tokenizer_mode,
60+
trust_remote_code=trust_remote_code,
5761
tensor_parallel_size=tensor_parallel_size,
5862
dtype=dtype,
5963
seed=seed,

vllm/transformers_utils/tokenizer.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def get_tokenizer(
1515
tokenizer_name: str,
1616
*args,
1717
tokenizer_mode: str = "auto",
18+
trust_remote_code: bool = False,
1819
**kwargs,
1920
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
2021
"""Gets a tokenizer for the given model name via Huggingface."""
@@ -31,15 +32,31 @@ def get_tokenizer(
3132
f"using '{_FAST_LLAMA_TOKENIZER}' instead of the original "
3233
"tokenizer.")
3334
try:
34-
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, *args,
35-
**kwargs)
35+
tokenizer = AutoTokenizer.from_pretrained(
36+
tokenizer_name,
37+
trust_remote_code=trust_remote_code,
38+
*args,
39+
**kwargs)
3640
except TypeError as e:
3741
# The LLaMA tokenizer causes a protobuf error in some environments.
3842
err_msg = (
3943
"Failed to load the tokenizer. If you are using a LLaMA-based "
4044
f"model, use '{_FAST_LLAMA_TOKENIZER}' instead of the original "
4145
"tokenizer.")
4246
raise RuntimeError(err_msg) from e
47+
except ValueError as e:
48+
# If the error pertains to the tokenizer class not existing or not
49+
# currently being imported, suggest using the --trust-remote-code flag.
50+
if (e is not None and
51+
("does not exist or is not currently imported." in str(e)
52+
or "requires you to execute the tokenizer file" in str(e))):
53+
err_msg = (
54+
"Failed to load the tokenizer. If the tokenizer is a custom "
55+
"tokenizer not yet available in the HuggingFace transformers "
56+
"library, consider using the --trust-remote-code flag.")
57+
raise RuntimeError(err_msg) from e
58+
else:
59+
raise e
4360

4461
if not isinstance(tokenizer, PreTrainedTokenizerFast):
4562
logger.warning(

0 commit comments

Comments
 (0)