@@ -526,12 +526,27 @@ def _load_model(
526526 return self ._load_from_singlefile (config )
527527 case SubModelType .Tokenizer :
528528 # For single-file Qwen3, load tokenizer from HuggingFace
529- return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
529+ # Try local cache first to support offline usage after initial download
530+ return self ._load_tokenizer_with_offline_fallback ()
530531
531532 raise ValueError (
532533 f"Only TextEncoder and Tokenizer submodels are supported. Received: { submodel_type .value if submodel_type else 'None' } "
533534 )
534535
536+ def _load_tokenizer_with_offline_fallback (self ) -> AnyModel :
537+ """Load tokenizer with local_files_only fallback for offline support.
538+
539+ First tries to load from local cache (offline), falling back to network download
540+ if the tokenizer hasn't been cached yet. This ensures offline operation after
541+ the initial download.
542+ """
543+ try :
544+ # Try loading from local cache first (supports offline usage)
545+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE , local_files_only = True )
546+ except OSError :
547+ # Not in cache yet, download from HuggingFace
548+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
549+
535550 def _load_from_singlefile (
536551 self ,
537552 config : AnyModelConfig ,
@@ -686,12 +701,27 @@ def _load_model(
686701 return self ._load_from_gguf (config )
687702 case SubModelType .Tokenizer :
688703 # For GGUF Qwen3, load tokenizer from HuggingFace
689- return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
704+ # Try local cache first to support offline usage after initial download
705+ return self ._load_tokenizer_with_offline_fallback ()
690706
691707 raise ValueError (
692708 f"Only TextEncoder and Tokenizer submodels are supported. Received: { submodel_type .value if submodel_type else 'None' } "
693709 )
694710
711+ def _load_tokenizer_with_offline_fallback (self ) -> AnyModel :
712+ """Load tokenizer with local_files_only fallback for offline support.
713+
714+ First tries to load from local cache (offline), falling back to network download
715+ if the tokenizer hasn't been cached yet. This ensures offline operation after
716+ the initial download.
717+ """
718+ try :
719+ # Try loading from local cache first (supports offline usage)
720+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE , local_files_only = True )
721+ except OSError :
722+ # Not in cache yet, download from HuggingFace
723+ return AutoTokenizer .from_pretrained (self .DEFAULT_TOKENIZER_SOURCE )
724+
695725 def _load_from_gguf (
696726 self ,
697727 config : AnyModelConfig ,
0 commit comments