Skip to content

Commit 8e9f19d

Browse files
DarkLight1337choprahetarth
authored andcommitted
[Optimization] Cache chat template result when processor fails to be loaded (vllm-project#25341)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent c26434e commit 8e9f19d

File tree

1 file changed

+49
-22
lines changed

1 file changed

+49
-22
lines changed

vllm/entrypoints/chat_utils.py

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,51 @@ def resolve_mistral_chat_template(
421421
return None
422422

423423

424+
_PROCESSOR_CHAT_TEMPLATES = dict[tuple[str, bool], Optional[str]]()
425+
"""
426+
Used in `_try_get_processor_chat_template` to avoid calling
427+
`cached_get_processor` again if the processor fails to be loaded.
428+
429+
This is needed because `lru_cache` does not cache when an exception happens.
430+
"""
431+
432+
433+
def _try_get_processor_chat_template(
434+
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
435+
model_config: ModelConfig,
436+
) -> Optional[str]:
437+
cache_key = (tokenizer.name_or_path, model_config.trust_remote_code)
438+
if cache_key in _PROCESSOR_CHAT_TEMPLATES:
439+
return _PROCESSOR_CHAT_TEMPLATES[cache_key]
440+
441+
try:
442+
processor = cached_get_processor(
443+
tokenizer.name_or_path,
444+
processor_cls=(
445+
PreTrainedTokenizer,
446+
PreTrainedTokenizerFast,
447+
ProcessorMixin,
448+
),
449+
trust_remote_code=model_config.trust_remote_code,
450+
)
451+
if (
452+
isinstance(processor, ProcessorMixin)
453+
and hasattr(processor, "chat_template")
454+
and (chat_template := processor.chat_template) is not None
455+
):
456+
_PROCESSOR_CHAT_TEMPLATES[cache_key] = chat_template
457+
return chat_template
458+
except Exception:
459+
logger.debug(
460+
"Failed to load AutoProcessor chat template for %s",
461+
tokenizer.name_or_path,
462+
exc_info=True,
463+
)
464+
465+
_PROCESSOR_CHAT_TEMPLATES[cache_key] = None
466+
return None
467+
468+
424469
def resolve_hf_chat_template(
425470
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
426471
chat_template: Optional[str],
@@ -434,28 +479,10 @@ def resolve_hf_chat_template(
434479

435480
# 2nd priority: AutoProcessor chat template, unless tool calling is enabled
436481
if tools is None:
437-
try:
438-
processor = cached_get_processor(
439-
tokenizer.name_or_path,
440-
processor_cls=(
441-
PreTrainedTokenizer,
442-
PreTrainedTokenizerFast,
443-
ProcessorMixin,
444-
),
445-
trust_remote_code=model_config.trust_remote_code,
446-
)
447-
if (
448-
isinstance(processor, ProcessorMixin)
449-
and hasattr(processor, "chat_template")
450-
and processor.chat_template is not None
451-
):
452-
return processor.chat_template
453-
except Exception:
454-
logger.debug(
455-
"Failed to load AutoProcessor chat template for %s",
456-
tokenizer.name_or_path,
457-
exc_info=True,
458-
) # noqa: E501
482+
chat_template = _try_get_processor_chat_template(tokenizer,
483+
model_config)
484+
if chat_template is not None:
485+
return chat_template
459486

460487
# 3rd priority: AutoTokenizer chat template
461488
try:

0 commit comments

Comments
 (0)