From 3bb86d69cd4ff50d99cfc64a9d559d5dcf21af12 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:20:00 +0200 Subject: [PATCH] Fix early CUDA initialisation Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../generation/continuous_batching/continuous_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/generation/continuous_batching/continuous_api.py b/src/transformers/generation/continuous_batching/continuous_api.py index e9adc98fc6af..dcc6f287c51c 100644 --- a/src/transformers/generation/continuous_batching/continuous_api.py +++ b/src/transformers/generation/continuous_batching/continuous_api.py @@ -27,7 +27,6 @@ from ...configuration_utils import PreTrainedConfig from ...generation.configuration_utils import GenerationConfig -from ...integrations.hub_kernels import load_and_register_kernel from ...utils.logging import logging from ...utils.metrics import ContinuousBatchProcessorMetrics, attach_tracer, traced from .cache import PagedAttentionCache @@ -609,6 +608,8 @@ def __init__( """ self.model = model.eval() if "paged|" not in model.config._attn_implementation: + from ...integrations.hub_kernels import load_and_register_kernel + attn_implementation = "paged|" + self.model.config._attn_implementation load_and_register_kernel(attn_implementation) model.set_attn_implementation(attn_implementation)