Skip to content

Commit 6546df7

Browse files
authored
Merge branch 'main' into chore/update-whats-new-6.10.0
2 parents f926575 + 3b2d2ef commit 6546df7

File tree

4 files changed

+56
-10
lines changed

4 files changed

+56
-10
lines changed

.github/workflows/mkdocs-material.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ jobs:
2222

2323
steps:
2424
- name: checkout
25-
uses: actions/checkout@v4
25+
uses: actions/checkout@v5
2626

2727
- name: setup python
28-
uses: actions/setup-python@v5
28+
uses: actions/setup-python@v6
2929
with:
30-
python-version: '3.10'
30+
python-version: '3.12'
3131
cache: pip
3232
cache-dependency-path: pyproject.toml
3333

invokeai/backend/model_manager/load/model_loaders/z_image.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -384,15 +384,19 @@ def _load_model(
384384

385385
match submodel_type:
386386
case SubModelType.Tokenizer:
387-
return AutoTokenizer.from_pretrained(tokenizer_path)
387+
# Use local_files_only=True to prevent network requests for validation
388+
# The tokenizer files should already exist locally in the model directory
389+
return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
388390
case SubModelType.TextEncoder:
389391
# Determine safe dtype based on target device capabilities
390392
target_device = TorchDevice.choose_torch_device()
391393
model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
394+
# Use local_files_only=True to prevent network requests for validation
392395
return Qwen3ForCausalLM.from_pretrained(
393396
text_encoder_path,
394397
torch_dtype=model_dtype,
395398
low_cpu_mem_usage=True,
399+
local_files_only=True,
396400
)
397401

398402
raise ValueError(
@@ -526,12 +530,27 @@ def _load_model(
526530
return self._load_from_singlefile(config)
527531
case SubModelType.Tokenizer:
528532
# For single-file Qwen3, load tokenizer from HuggingFace
529-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
533+
# Try local cache first to support offline usage after initial download
534+
return self._load_tokenizer_with_offline_fallback()
530535

531536
raise ValueError(
532537
f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
533538
)
534539

540+
def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
541+
"""Load tokenizer with local_files_only fallback for offline support.
542+
543+
First tries to load from local cache (offline), falling back to network download
544+
if the tokenizer hasn't been cached yet. This ensures offline operation after
545+
the initial download.
546+
"""
547+
try:
548+
# Try loading from local cache first (supports offline usage)
549+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
550+
except OSError:
551+
# Not in cache yet, download from HuggingFace
552+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
553+
535554
def _load_from_singlefile(
536555
self,
537556
config: AnyModelConfig,
@@ -686,12 +705,27 @@ def _load_model(
686705
return self._load_from_gguf(config)
687706
case SubModelType.Tokenizer:
688707
# For GGUF Qwen3, load tokenizer from HuggingFace
689-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
708+
# Try local cache first to support offline usage after initial download
709+
return self._load_tokenizer_with_offline_fallback()
690710

691711
raise ValueError(
692712
f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
693713
)
694714

715+
def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
716+
"""Load tokenizer with local_files_only fallback for offline support.
717+
718+
First tries to load from local cache (offline), falling back to network download
719+
if the tokenizer hasn't been cached yet. This ensures offline operation after
720+
the initial download.
721+
"""
722+
try:
723+
# Try loading from local cache first (supports offline usage)
724+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
725+
except OSError:
726+
# Not in cache yet, download from HuggingFace
727+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
728+
695729
def _load_from_gguf(
696730
self,
697731
config: AnyModelConfig,

invokeai/backend/quantization/gguf/ggml_tensor.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,32 @@ def dequantize_and_run(func, args, kwargs):
1717
Also casts other floating point tensors to match the compute_dtype of GGMLTensors
1818
to avoid dtype mismatches in matrix operations.
1919
"""
20-
# Find the compute_dtype from any GGMLTensor in the args
20+
# Find the compute_dtype and target_device from any GGMLTensor in the args
2121
compute_dtype = None
22+
target_device = None
2223
for a in args:
2324
if hasattr(a, "compute_dtype"):
2425
compute_dtype = a.compute_dtype
26+
if isinstance(a, torch.Tensor) and target_device is None:
27+
target_device = a.device
28+
if compute_dtype is not None and target_device is not None:
2529
break
26-
if compute_dtype is None:
30+
if compute_dtype is None or target_device is None:
2731
for v in kwargs.values():
28-
if hasattr(v, "compute_dtype"):
32+
if hasattr(v, "compute_dtype") and compute_dtype is None:
2933
compute_dtype = v.compute_dtype
34+
if isinstance(v, torch.Tensor) and target_device is None:
35+
target_device = v.device
36+
if compute_dtype is not None and target_device is not None:
3037
break
3138

3239
def process_tensor(t):
3340
if hasattr(t, "get_dequantized_tensor"):
34-
return t.get_dequantized_tensor()
41+
result = t.get_dequantized_tensor()
42+
# Ensure the dequantized tensor is on the target device
43+
if target_device is not None and result.device != target_device:
44+
result = result.to(target_device)
45+
return result
3546
elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
3647
# Cast other floating point tensors to match the GGUF compute_dtype
3748
return t.to(compute_dtype)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

0 commit comments

Comments
 (0)