Merge branch 'main' into chore/update-whats-new-6.10.0

lstein · web-flow · commit 6546df7f6805 · 2026-01-03T15:28:59.000-05:00
diff --git a/.github/workflows/mkdocs-material.yml b/.github/workflows/mkdocs-material.yml
@@ -22,12 +22,12 @@ jobs:
 
     steps:
       - name: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: setup python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
-          python-version: '3.10'
+          python-version: '3.12'
           cache: pip
           cache-dependency-path: pyproject.toml
 
diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py
@@ -384,15 +384,19 @@ def _load_model(
 
         match submodel_type:
             case SubModelType.Tokenizer:
-                return AutoTokenizer.from_pretrained(tokenizer_path)
+                # Use local_files_only=True to prevent network requests for validation
+                # The tokenizer files should already exist locally in the model directory
+                return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
             case SubModelType.TextEncoder:
                 # Determine safe dtype based on target device capabilities
                 target_device = TorchDevice.choose_torch_device()
                 model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
+                # Use local_files_only=True to prevent network requests for validation
                 return Qwen3ForCausalLM.from_pretrained(
                     text_encoder_path,
                     torch_dtype=model_dtype,
                     low_cpu_mem_usage=True,
+                    local_files_only=True,
                 )
 
         raise ValueError(
@@ -526,12 +530,27 @@ def _load_model(
                 return self._load_from_singlefile(config)
             case SubModelType.Tokenizer:
                 # For single-file Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
 
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
 
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+
     def _load_from_singlefile(
         self,
         config: AnyModelConfig,
@@ -686,12 +705,27 @@ def _load_model(
                 return self._load_from_gguf(config)
             case SubModelType.Tokenizer:
                 # For GGUF Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
 
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
 
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+
     def _load_from_gguf(
         self,
         config: AnyModelConfig,
diff --git a/invokeai/backend/quantization/gguf/ggml_tensor.py b/invokeai/backend/quantization/gguf/ggml_tensor.py
@@ -17,21 +17,32 @@ def dequantize_and_run(func, args, kwargs):
     Also casts other floating point tensors to match the compute_dtype of GGMLTensors
     to avoid dtype mismatches in matrix operations.
     """
-    # Find the compute_dtype from any GGMLTensor in the args
+    # Find the compute_dtype and target_device from any GGMLTensor in the args
     compute_dtype = None
+    target_device = None
     for a in args:
         if hasattr(a, "compute_dtype"):
             compute_dtype = a.compute_dtype
+        if isinstance(a, torch.Tensor) and target_device is None:
+            target_device = a.device
+        if compute_dtype is not None and target_device is not None:
             break
-    if compute_dtype is None:
+    if compute_dtype is None or target_device is None:
         for v in kwargs.values():
-            if hasattr(v, "compute_dtype"):
+            if hasattr(v, "compute_dtype") and compute_dtype is None:
                 compute_dtype = v.compute_dtype
+            if isinstance(v, torch.Tensor) and target_device is None:
+                target_device = v.device
+            if compute_dtype is not None and target_device is not None:
                 break
 
     def process_tensor(t):
         if hasattr(t, "get_dequantized_tensor"):
-            return t.get_dequantized_tensor()
+            result = t.get_dequantized_tensor()
+            # Ensure the dequantized tensor is on the target device
+            if target_device is not None and result.device != target_device:
+                result = result.to(target_device)
+            return result
         elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
             # Cast other floating point tensors to match the GGUF compute_dtype
             return t.to(compute_dtype)
diff --git a/invokeai/frontend/web/public/locales/en-GB.json b/invokeai/frontend/web/public/locales/en-GB.json
@@ -0,0 +1 @@
+{}