@@ -384,15 +384,19 @@ def _load_model(
384384
385385 match submodel_type :
386386 case SubModelType .Tokenizer :
387- return AutoTokenizer .from_pretrained (tokenizer_path )
387+ # Use local_files_only=True to prevent network requests for validation
388+ # The tokenizer files should already exist locally in the model directory
389+ return AutoTokenizer .from_pretrained (tokenizer_path , local_files_only = True )
388390 case SubModelType .TextEncoder :
389391 # Determine safe dtype based on target device capabilities
390392 target_device = TorchDevice .choose_torch_device ()
391393 model_dtype = TorchDevice .choose_bfloat16_safe_dtype (target_device )
394+ # Use local_files_only=True to prevent network requests for validation
392395 return Qwen3ForCausalLM .from_pretrained (
393396 text_encoder_path ,
394397 torch_dtype = model_dtype ,
395398 low_cpu_mem_usage = True ,
399+ local_files_only = True ,
396400 )
397401
398402 raise ValueError (
@@ -526,12 +530,27 @@ def _load_model(
526530 return self ._load_from_singlefile (config )
527531 case SubModelType .Tokenizer :
528532 # For single-file Qwen3, load tokenizer from HuggingFace
529- return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
533+ # Try local cache first to support offline usage after initial download
534+ return self ._load_tokenizer_with_offline_fallback ()
530535
531536 raise ValueError (
532537 f"Only TextEncoder and Tokenizer submodels are supported. Received: { submodel_type .value if submodel_type else 'None' } "
533538 )
534539
540+ def _load_tokenizer_with_offline_fallback (self ) -> AnyModel :
541+ """Load tokenizer with local_files_only fallback for offline support.
542+
543+ First tries to load from local cache (offline), falling back to network download
544+ if the tokenizer hasn't been cached yet. This ensures offline operation after
545+ the initial download.
546+ """
547+ try :
548+ # Try loading from local cache first (supports offline usage)
549+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE , local_files_only = True )
550+ except OSError :
551+ # Not in cache yet, download from HuggingFace
552+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
553+
535554 def _load_from_singlefile (
536555 self ,
537556 config : AnyModelConfig ,
@@ -686,12 +705,27 @@ def _load_model(
686705 return self ._load_from_gguf (config )
687706 case SubModelType .Tokenizer :
688707 # For GGUF Qwen3, load tokenizer from HuggingFace
689- return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
708+ # Try local cache first to support offline usage after initial download
709+ return self ._load_tokenizer_with_offline_fallback ()
690710
691711 raise ValueError (
692712 f"Only TextEncoder and Tokenizer submodels are supported. Received: { submodel_type .value if submodel_type else 'None' } "
693713 )
694714
715+ def _load_tokenizer_with_offline_fallback (self ) -> AnyModel :
716+ """Load tokenizer with local_files_only fallback for offline support.
717+
718+ First tries to load from local cache (offline), falling back to network download
719+ if the tokenizer hasn't been cached yet. This ensures offline operation after
720+ the initial download.
721+ """
722+ try :
723+ # Try loading from local cache first (supports offline usage)
724+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE , local_files_only = True )
725+ except OSError :
726+ # Not in cache yet, download from HuggingFace
727+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
728+
695729 def _load_from_gguf (
696730 self ,
697731 config : AnyModelConfig ,
0 commit comments