Skip to content

Commit 4f040f1

Browse files
Merge branch 'main' into feature/z-image-control
2 parents bde3acc + ab6b672 commit 4f040f1

File tree

19 files changed

+1391
-705
lines changed

19 files changed

+1391
-705
lines changed

invokeai/app/invocations/latents_to_image.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,6 @@
22

33
import torch
44
from diffusers.image_processor import VaeImageProcessor
5-
from diffusers.models.attention_processor import (
6-
AttnProcessor2_0,
7-
LoRAAttnProcessor2_0,
8-
LoRAXFormersAttnProcessor,
9-
XFormersAttnProcessor,
10-
)
115
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
126
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
137

@@ -77,26 +71,9 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
7771
assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
7872
latents = latents.to(TorchDevice.choose_torch_device())
7973
if self.fp32:
74+
# FP32 mode: convert everything to float32 for maximum precision
8075
vae.to(dtype=torch.float32)
81-
82-
use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
83-
vae.decoder.mid_block.attentions[0].processor,
84-
(
85-
AttnProcessor2_0,
86-
XFormersAttnProcessor,
87-
LoRAXFormersAttnProcessor,
88-
LoRAAttnProcessor2_0,
89-
),
90-
)
91-
# if xformers or torch_2_0 is used attention block does not need
92-
# to be in float32 which can save lots of memory
93-
if use_torch_2_0_or_xformers:
94-
vae.post_quant_conv.to(latents.dtype)
95-
vae.decoder.conv_in.to(latents.dtype)
96-
vae.decoder.mid_block.to(latents.dtype)
97-
else:
98-
latents = latents.float()
99-
76+
latents = latents.float()
10077
else:
10178
vae.to(dtype=torch.float16)
10279
latents = latents.half()

invokeai/app/invocations/z_image_denoise.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,9 @@
1717
Input,
1818
InputField,
1919
LatentsField,
20-
WithBoard,
21-
WithMetadata,
2220
ZImageConditioningField,
2321
)
24-
from invokeai.app.invocations.z_image_control import ZImageControlField
25-
from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
26-
from invokeai.app.invocations.model import LoRAField, TransformerField, VAEField
22+
from invokeai.app.invocations.model import TransformerField
2723
from invokeai.app.invocations.primitives import LatentsOutput
2824
from invokeai.app.services.shared.invocation_context import InvocationContext
2925
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat
@@ -43,10 +39,10 @@
4339
title="Denoise - Z-Image",
4440
tags=["image", "z-image"],
4541
category="image",
46-
version="1.0.0",
42+
version="1.1.0",
4743
classification=Classification.Prototype,
4844
)
49-
class ZImageDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
45+
class ZImageDenoiseInvocation(BaseInvocation):
5046
"""Run the denoising process with a Z-Image model."""
5147

5248
# If latents is provided, this means we are doing image-to-image.

invokeai/app/invocations/z_image_latents_to_image.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
5555

5656
# FLUX VAE doesn't support seamless, so only apply for AutoencoderKL
5757
seamless_context = (
58-
nullcontext()
59-
if is_flux_vae
60-
else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
58+
nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
6159
)
6260

6361
with seamless_context, vae_info.model_on_device() as (_, vae):

invokeai/backend/model_manager/configs/factory.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,17 @@
7373
Main_GGUF_ZImage_Config,
7474
MainModelDefaultSettings,
7575
)
76+
from invokeai.backend.model_manager.configs.qwen3_encoder import (
77+
Qwen3Encoder_Checkpoint_Config,
78+
Qwen3Encoder_GGUF_Config,
79+
Qwen3Encoder_Qwen3Encoder_Config,
80+
)
7681
from invokeai.backend.model_manager.configs.siglip import SigLIP_Diffusers_Config
7782
from invokeai.backend.model_manager.configs.spandrel import Spandrel_Checkpoint_Config
7883
from invokeai.backend.model_manager.configs.t2i_adapter import (
7984
T2IAdapter_Diffusers_SD1_Config,
8085
T2IAdapter_Diffusers_SDXL_Config,
8186
)
82-
from invokeai.backend.model_manager.configs.qwen3_encoder import (
83-
Qwen3Encoder_Checkpoint_Config,
84-
Qwen3Encoder_GGUF_Config,
85-
Qwen3Encoder_Qwen3Encoder_Config,
86-
)
8787
from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config
8888
from invokeai.backend.model_manager.configs.textual_inversion import (
8989
TI_File_SD1_Config,

invokeai/backend/model_manager/configs/qwen3_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from pydantic import Field
44

55
from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Config_Base
6-
from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
76
from invokeai.backend.model_manager.configs.identification_utils import (
87
NotAMatchError,
98
raise_for_class_name,
@@ -13,6 +12,7 @@
1312
)
1413
from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
1514
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType
15+
from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
1616

1717

1818
def _has_qwen3_keys(state_dict: dict[str | int, Any]) -> bool:

invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_diffusers_rms_norm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import torch
2-
32
from diffusers.models.normalization import RMSNorm as DiffusersRMSNorm
43

54
from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.cast_to_device import cast_to_device

invokeai/backend/model_manager/load/model_loaders/z_image.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import accelerate
88
import torch
9-
109
from transformers import AutoTokenizer, Qwen3ForCausalLM
1110

1211
from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Diffusers_Config_Base
@@ -288,8 +287,7 @@ def _load_from_singlefile(
288287

289288
if not isinstance(config, Main_GGUF_ZImage_Config):
290289
raise TypeError(
291-
f"Expected Main_GGUF_ZImage_Config, got {type(config).__name__}. "
292-
"Model configuration type mismatch."
290+
f"Expected Main_GGUF_ZImage_Config, got {type(config).__name__}. Model configuration type mismatch."
293291
)
294292
model_path = Path(config.path)
295293

@@ -511,10 +509,11 @@ def _load_from_singlefile(
511509
self,
512510
config: AnyModelConfig,
513511
) -> AnyModel:
514-
from invokeai.backend.util.logging import InvokeAILogger
515512
from safetensors.torch import load_file
516513
from transformers import Qwen3Config, Qwen3ForCausalLM
517514

515+
from invokeai.backend.util.logging import InvokeAILogger
516+
518517
logger = InvokeAILogger.get_logger(self.__class__.__name__)
519518

520519
if not isinstance(config, Qwen3Encoder_Checkpoint_Config):
@@ -670,15 +669,15 @@ def _load_from_gguf(
670669
self,
671670
config: AnyModelConfig,
672671
) -> AnyModel:
673-
from invokeai.backend.util.logging import InvokeAILogger
674672
from transformers import Qwen3Config, Qwen3ForCausalLM
675673

674+
from invokeai.backend.util.logging import InvokeAILogger
675+
676676
logger = InvokeAILogger.get_logger(self.__class__.__name__)
677677

678678
if not isinstance(config, Qwen3Encoder_GGUF_Config):
679679
raise TypeError(
680-
f"Expected Qwen3Encoder_GGUF_Config, got {type(config).__name__}. "
681-
"Model configuration type mismatch."
680+
f"Expected Qwen3Encoder_GGUF_Config, got {type(config).__name__}. Model configuration type mismatch."
682681
)
683682
model_path = Path(config.path)
684683

invokeai/backend/patches/layer_patcher.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,7 @@ def _apply_model_layer_patch(
175175

176176
# TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
177177
# devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
178-
params_dict = patch.get_parameters(
179-
dict(module_to_patch.named_parameters(recurse=False)), weight=patch_weight
180-
)
178+
params_dict = patch.get_parameters(dict(module_to_patch.named_parameters(recurse=False)), weight=patch_weight)
181179
if not params_dict:
182180
logger = InvokeAILogger.get_logger(LayerPatcher.__name__)
183181
logger.warning(f"LoRA patch returned no parameters for module: {module_to_patch_key}")

invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,18 @@ def is_state_dict_likely_z_image_lora(state_dict: dict[str | int, torch.Tensor])
2828
# Check for Z-Image transformer keys (S3-DiT architecture)
2929
# Various training frameworks use different prefixes
3030
has_transformer_keys = any(
31-
k.startswith((
32-
"transformer.",
33-
"base_model.model.transformer.",
34-
"diffusion_model.",
35-
)) for k in str_keys
31+
k.startswith(
32+
(
33+
"transformer.",
34+
"base_model.model.transformer.",
35+
"diffusion_model.",
36+
)
37+
)
38+
for k in str_keys
3639
)
3740

3841
# Check for Qwen3 text encoder keys
39-
has_qwen3_keys = any(
40-
k.startswith(("text_encoder.", "base_model.model.text_encoder.")) for k in str_keys
41-
)
42+
has_qwen3_keys = any(k.startswith(("text_encoder.", "base_model.model.text_encoder.")) for k in str_keys)
4243

4344
return has_transformer_keys or has_qwen3_keys
4445

@@ -96,15 +97,15 @@ def lora_model_from_z_image_state_dict(
9697
# Check and strip text encoder prefixes first
9798
for prefix in text_encoder_prefixes:
9899
if layer_key.startswith(prefix):
99-
clean_key = layer_key[len(prefix):]
100+
clean_key = layer_key[len(prefix) :]
100101
is_text_encoder = True
101102
break
102103

103104
# If not text encoder, check transformer prefixes
104105
if not is_text_encoder:
105106
for prefix in transformer_prefixes:
106107
if layer_key.startswith(prefix):
107-
clean_key = layer_key[len(prefix):]
108+
clean_key = layer_key[len(prefix) :]
108109
break
109110

110111
# Apply the appropriate internal prefix

invokeai/frontend/web/public/locales/en.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,14 @@
501501
"title": "Next Prompt in History",
502502
"desc": "When the prompt is focused, move to the next (newer) prompt in your history."
503503
},
504+
"promptWeightUp": {
505+
"title": "Increase Weight of Prompt Selection",
506+
"desc": "When the prompt is focused and text is selected, increase the weight of the selected prompt."
507+
},
508+
"promptWeightDown": {
509+
"title": "Decrease Weight of Prompt Selection",
510+
"desc": "When the prompt is focused and text is selected, decrease the weight of the selected prompt."
511+
},
504512
"toggleLeftPanel": {
505513
"title": "Toggle Left Panel",
506514
"desc": "Show or hide the left panel."

0 commit comments

Comments
 (0)