fix(model-manager): support offline Qwen3 tokenizer loading for Z-Image

Pfannkuchensack · Pfannkuchensack · commit 4615e295d398 · 2025-12-28T13:59:33.000+01:00
Add local_files_only fallback for Qwen3 tokenizer loading in both Checkpoint and GGUF loaders. This ensures Z-Image models can generate images offline after the initial tokenizer download. The tokenizer is now loaded with local_files_only=True first, falling back to network download only if files aren't cached yet. Fixes #8716
diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py
@@ -526,12 +526,27 @@ def _load_model(
                 return self._load_from_singlefile(config)
             case SubModelType.Tokenizer:
                 # For single-file Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
 
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
 
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+
     def _load_from_singlefile(
         self,
         config: AnyModelConfig,
@@ -686,12 +701,27 @@ def _load_model(
                 return self._load_from_gguf(config)
             case SubModelType.Tokenizer:
                 # For GGUF Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
 
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
 
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+
     def _load_from_gguf(
         self,
         config: AnyModelConfig,