Skip to content

Commit e211ac9

Browse files
Merge branch 'feat/z-image-turbo-support' into feature/z-image-control
2 parents 78d0e1d + 1e72feb commit e211ac9

File tree

31 files changed

+1775
-292
lines changed

31 files changed

+1775
-292
lines changed

invokeai/app/api/routers/model_manager.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,59 @@ async def delete_model(
447447
raise HTTPException(status_code=404, detail=str(e))
448448

449449

450+
class BulkDeleteModelsRequest(BaseModel):
451+
"""Request body for bulk model deletion."""
452+
453+
keys: List[str] = Field(description="List of model keys to delete")
454+
455+
456+
class BulkDeleteModelsResponse(BaseModel):
457+
"""Response body for bulk model deletion."""
458+
459+
deleted: List[str] = Field(description="List of successfully deleted model keys")
460+
failed: List[dict] = Field(description="List of failed deletions with error messages")
461+
462+
463+
@model_manager_router.post(
464+
"/i/bulk_delete",
465+
operation_id="bulk_delete_models",
466+
responses={
467+
200: {"description": "Models deleted (possibly with some failures)"},
468+
},
469+
status_code=200,
470+
)
471+
async def bulk_delete_models(
472+
request: BulkDeleteModelsRequest = Body(description="List of model keys to delete"),
473+
) -> BulkDeleteModelsResponse:
474+
"""
475+
Delete multiple model records from database.
476+
477+
The configuration records will be removed. The corresponding weights files will be
478+
deleted as well if they reside within the InvokeAI "models" directory.
479+
Returns a list of successfully deleted keys and failed deletions with error messages.
480+
"""
481+
logger = ApiDependencies.invoker.services.logger
482+
installer = ApiDependencies.invoker.services.model_manager.install
483+
484+
deleted = []
485+
failed = []
486+
487+
for key in request.keys:
488+
try:
489+
installer.delete(key)
490+
deleted.append(key)
491+
logger.info(f"Deleted model: {key}")
492+
except UnknownModelException as e:
493+
logger.error(f"Failed to delete model {key}: {str(e)}")
494+
failed.append({"key": key, "error": str(e)})
495+
except Exception as e:
496+
logger.error(f"Failed to delete model {key}: {str(e)}")
497+
failed.append({"key": key, "error": str(e)})
498+
499+
logger.info(f"Bulk delete completed: {len(deleted)} deleted, {len(failed)} failed")
500+
return BulkDeleteModelsResponse(deleted=deleted, failed=failed)
501+
502+
450503
@model_manager_router.delete(
451504
"/i/{key}/image",
452505
operation_id="delete_model_image",

invokeai/app/invocations/metadata.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
170170
title="Core Metadata",
171171
tags=["metadata"],
172172
category="metadata",
173-
version="2.0.0",
173+
version="2.1.0",
174174
classification=Classification.Internal,
175175
)
176176
class CoreMetadataInvocation(BaseInvocation):
@@ -221,6 +221,10 @@ class CoreMetadataInvocation(BaseInvocation):
221221
default=None,
222222
description="The VAE used for decoding, if the main model's default was not used",
223223
)
224+
qwen3_encoder: Optional[ModelIdentifierField] = InputField(
225+
default=None,
226+
description="The Qwen3 text encoder model used for Z-Image inference",
227+
)
224228

225229
# High resolution fix metadata.
226230
hrf_enabled: Optional[bool] = InputField(

invokeai/app/invocations/z_image_denoise.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,17 @@ def _load_text_conditioning(
130130
) -> torch.Tensor:
131131
"""Load Z-Image text conditioning."""
132132
cond_data = context.conditioning.load(conditioning_name)
133-
assert len(cond_data.conditionings) == 1
133+
if len(cond_data.conditionings) != 1:
134+
raise ValueError(
135+
f"Expected exactly 1 conditioning entry for Z-Image, got {len(cond_data.conditionings)}. "
136+
"Ensure you are using the Z-Image text encoder."
137+
)
134138
z_image_conditioning = cond_data.conditionings[0]
135-
assert isinstance(z_image_conditioning, ZImageConditioningInfo)
139+
if not isinstance(z_image_conditioning, ZImageConditioningInfo):
140+
raise TypeError(
141+
f"Expected ZImageConditioningInfo, got {type(z_image_conditioning).__name__}. "
142+
"Ensure you are using the Z-Image text encoder."
143+
)
136144
z_image_conditioning = z_image_conditioning.to(dtype=dtype, device=device)
137145
return z_image_conditioning.prompt_embeds
138146

@@ -147,8 +155,10 @@ def _get_noise(
147155
seed: int,
148156
) -> torch.Tensor:
149157
"""Generate initial noise tensor."""
158+
# Generate noise as float32 on CPU for maximum compatibility,
159+
# then cast to target dtype/device
150160
rand_device = "cpu"
151-
rand_dtype = torch.float16
161+
rand_dtype = torch.float32
152162

153163
return torch.randn(
154164
batch_size,
@@ -204,8 +214,8 @@ def time_shift(mu: float, sigma: float, t: float) -> float:
204214
return sigmas
205215

206216
def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
207-
inference_dtype = torch.bfloat16
208217
device = TorchDevice.choose_torch_device()
218+
inference_dtype = TorchDevice.choose_bfloat16_safe_dtype(device)
209219

210220
transformer_info = context.models.load(self.transformer.transformer)
211221

@@ -221,7 +231,8 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
221231
neg_prompt_embeds: torch.Tensor | None = None
222232
do_classifier_free_guidance = self.guidance_scale > 0.0 and self.negative_conditioning is not None
223233
if do_classifier_free_guidance:
224-
assert self.negative_conditioning is not None
234+
if self.negative_conditioning is None:
235+
raise ValueError("Negative conditioning is required when guidance_scale > 0")
225236
neg_prompt_embeds = self._load_text_conditioning(
226237
context=context,
227238
conditioning_name=self.negative_conditioning.conditioning_name,
@@ -283,7 +294,8 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
283294
inpaint_mask = self._prep_inpaint_mask(context, latents)
284295
inpaint_extension: RectifiedFlowInpaintExtension | None = None
285296
if inpaint_mask is not None:
286-
assert init_latents is not None
297+
if init_latents is None:
298+
raise ValueError("Initial latents are required when using an inpaint mask (image-to-image inpainting)")
287299
inpaint_extension = RectifiedFlowInpaintExtension(
288300
init_latents=init_latents,
289301
inpaint_mask=inpaint_mask,
@@ -527,6 +539,10 @@ def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatc
527539
"""Iterate over LoRA models to apply to the transformer."""
528540
for lora in self.transformer.loras:
529541
lora_info = context.models.load(lora.lora)
530-
assert isinstance(lora_info.model, ModelPatchRaw)
542+
if not isinstance(lora_info.model, ModelPatchRaw):
543+
raise TypeError(
544+
f"Expected ModelPatchRaw for LoRA '{lora.lora.key}', got {type(lora_info.model).__name__}. "
545+
"The LoRA model may be corrupted or incompatible."
546+
)
531547
yield (lora_info.model, lora.weight)
532548
del lora_info

invokeai/app/invocations/z_image_image_to_latents.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,18 @@ class ZImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
4141

4242
@staticmethod
4343
def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
44-
assert isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder))
44+
if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
45+
raise TypeError(
46+
f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
47+
"Ensure you are using a compatible VAE model."
48+
)
4549

4650
with vae_info.model_on_device() as (_, vae):
47-
assert isinstance(vae, (AutoencoderKL, FluxAutoEncoder))
51+
if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
52+
raise TypeError(
53+
f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
54+
"VAE model type changed unexpectedly after loading."
55+
)
4856

4957
vae_dtype = next(iter(vae.parameters())).dtype
5058
image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
@@ -80,7 +88,11 @@ def invoke(self, context: InvocationContext) -> LatentsOutput:
8088
image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
8189

8290
vae_info = context.models.load(self.vae.vae)
83-
assert isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder))
91+
if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
92+
raise TypeError(
93+
f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
94+
"Ensure you are using a compatible VAE model."
95+
)
8496

8597
context.util.signal_progress("Running VAE")
8698
latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)

invokeai/app/invocations/z_image_latents_to_image.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
4545
latents = context.tensors.load(self.latents.latents_name)
4646

4747
vae_info = context.models.load(self.vae.vae)
48-
assert isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder))
48+
if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
49+
raise TypeError(
50+
f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
51+
"Ensure you are using a compatible VAE model."
52+
)
4953

5054
is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
5155

@@ -58,7 +62,11 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
5862

5963
with seamless_context, vae_info.model_on_device() as (_, vae):
6064
context.util.signal_progress("Running VAE")
61-
assert isinstance(vae, (AutoencoderKL, FluxAutoEncoder))
65+
if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
66+
raise TypeError(
67+
f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
68+
"VAE model type changed unexpectedly after loading."
69+
)
6270

6371
vae_dtype = next(iter(vae.parameters())).dtype
6472
latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)

invokeai/app/invocations/z_image_lora_loader.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ def invoke(self, context: InvocationContext) -> ZImageLoRALoaderOutput:
136136
if not context.models.exists(lora.lora.key):
137137
raise Exception(f"Unknown lora: {lora.lora.key}!")
138138

139-
assert lora.lora.base is BaseModelType.ZImage
139+
if lora.lora.base is not BaseModelType.ZImage:
140+
raise ValueError(
141+
f"LoRA '{lora.lora.key}' is for {lora.lora.base.value if lora.lora.base else 'unknown'} models, "
142+
"not Z-Image models. Ensure you are using a Z-Image compatible LoRA."
143+
)
140144

141145
added_loras.append(lora.lora.key)
142146

invokeai/app/invocations/z_image_text_encoder.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,27 +63,41 @@ def _encode_prompt(self, context: InvocationContext, max_seq_len: int) -> torch.
6363
(_, tokenizer) = exit_stack.enter_context(tokenizer_info.model_on_device())
6464

6565
# Apply LoRA models to the text encoder
66+
lora_dtype = TorchDevice.choose_bfloat16_safe_dtype(device)
6667
exit_stack.enter_context(
6768
LayerPatcher.apply_smart_model_patches(
6869
model=text_encoder,
6970
patches=self._lora_iterator(context),
7071
prefix=Z_IMAGE_LORA_QWEN3_PREFIX,
71-
dtype=torch.bfloat16,
72+
dtype=lora_dtype,
7273
)
7374
)
7475

7576
context.util.signal_progress("Running Qwen3 text encoder")
76-
assert isinstance(text_encoder, PreTrainedModel)
77-
assert isinstance(tokenizer, PreTrainedTokenizerBase)
77+
if not isinstance(text_encoder, PreTrainedModel):
78+
raise TypeError(
79+
f"Expected PreTrainedModel for text encoder, got {type(text_encoder).__name__}. "
80+
"The Qwen3 encoder model may be corrupted or incompatible."
81+
)
82+
if not isinstance(tokenizer, PreTrainedTokenizerBase):
83+
raise TypeError(
84+
f"Expected PreTrainedTokenizerBase for tokenizer, got {type(tokenizer).__name__}. "
85+
"The Qwen3 tokenizer may be corrupted or incompatible."
86+
)
7887

7988
# Apply chat template similar to diffusers ZImagePipeline
8089
# The chat template formats the prompt for the Qwen3 model
81-
prompt_formatted = tokenizer.apply_chat_template(
82-
[{"role": "user", "content": prompt}],
83-
tokenize=False,
84-
add_generation_prompt=True,
85-
enable_thinking=True,
86-
)
90+
try:
91+
prompt_formatted = tokenizer.apply_chat_template(
92+
[{"role": "user", "content": prompt}],
93+
tokenize=False,
94+
add_generation_prompt=True,
95+
enable_thinking=True,
96+
)
97+
except (AttributeError, TypeError) as e:
98+
# Fallback if tokenizer doesn't support apply_chat_template or enable_thinking
99+
context.logger.warning(f"Chat template failed ({e}), using raw prompt.")
100+
prompt_formatted = prompt
87101

88102
# Tokenize the formatted prompt
89103
text_inputs = tokenizer(
@@ -97,8 +111,16 @@ def _encode_prompt(self, context: InvocationContext, max_seq_len: int) -> torch.
97111

98112
text_input_ids = text_inputs.input_ids
99113
attention_mask = text_inputs.attention_mask
100-
assert isinstance(text_input_ids, torch.Tensor)
101-
assert isinstance(attention_mask, torch.Tensor)
114+
if not isinstance(text_input_ids, torch.Tensor):
115+
raise TypeError(
116+
f"Expected torch.Tensor for input_ids, got {type(text_input_ids).__name__}. "
117+
"Tokenizer returned unexpected type."
118+
)
119+
if not isinstance(attention_mask, torch.Tensor):
120+
raise TypeError(
121+
f"Expected torch.Tensor for attention_mask, got {type(attention_mask).__name__}. "
122+
"Tokenizer returned unexpected type."
123+
)
102124

103125
# Check for truncation
104126
untruncated_ids = tokenizer(prompt_formatted, padding="longest", return_tensors="pt").input_ids
@@ -119,6 +141,18 @@ def _encode_prompt(self, context: InvocationContext, max_seq_len: int) -> torch.
119141
attention_mask=prompt_mask,
120142
output_hidden_states=True,
121143
)
144+
145+
# Validate hidden_states output
146+
if not hasattr(outputs, "hidden_states") or outputs.hidden_states is None:
147+
raise RuntimeError(
148+
"Text encoder did not return hidden_states. "
149+
"Ensure output_hidden_states=True is supported by this model."
150+
)
151+
if len(outputs.hidden_states) < 2:
152+
raise RuntimeError(
153+
f"Expected at least 2 hidden states from text encoder, got {len(outputs.hidden_states)}. "
154+
"This may indicate an incompatible model or configuration."
155+
)
122156
prompt_embeds = outputs.hidden_states[-2]
123157

124158
# Z-Image expects a 2D tensor [seq_len, hidden_dim] with only valid tokens
@@ -127,13 +161,21 @@ def _encode_prompt(self, context: InvocationContext, max_seq_len: int) -> torch.
127161
# Since batch_size=1, we take the first item and filter by mask
128162
prompt_embeds = prompt_embeds[0][prompt_mask[0]]
129163

130-
assert isinstance(prompt_embeds, torch.Tensor)
164+
if not isinstance(prompt_embeds, torch.Tensor):
165+
raise TypeError(
166+
f"Expected torch.Tensor for prompt embeddings, got {type(prompt_embeds).__name__}. "
167+
"Text encoder returned unexpected type."
168+
)
131169
return prompt_embeds
132170

133171
def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]:
134172
"""Iterate over LoRA models to apply to the Qwen3 text encoder."""
135173
for lora in self.qwen3_encoder.loras:
136174
lora_info = context.models.load(lora.lora)
137-
assert isinstance(lora_info.model, ModelPatchRaw)
175+
if not isinstance(lora_info.model, ModelPatchRaw):
176+
raise TypeError(
177+
f"Expected ModelPatchRaw for LoRA '{lora.lora.key}', got {type(lora_info.model).__name__}. "
178+
"The LoRA model may be corrupted or incompatible."
179+
)
138180
yield (lora_info.model, lora.weight)
139181
del lora_info

invokeai/backend/model_manager/configs/factory.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
Main_Checkpoint_SD2_Config,
6262
Main_Checkpoint_SDXL_Config,
6363
Main_Checkpoint_SDXLRefiner_Config,
64+
Main_Checkpoint_ZImage_Config,
6465
Main_Diffusers_CogView4_Config,
6566
Main_Diffusers_SD1_Config,
6667
Main_Diffusers_SD2_Config,
@@ -78,7 +79,11 @@
7879
T2IAdapter_Diffusers_SD1_Config,
7980
T2IAdapter_Diffusers_SDXL_Config,
8081
)
81-
from invokeai.backend.model_manager.configs.qwen3_encoder import Qwen3Encoder_Qwen3Encoder_Config
82+
from invokeai.backend.model_manager.configs.qwen3_encoder import (
83+
Qwen3Encoder_Checkpoint_Config,
84+
Qwen3Encoder_GGUF_Config,
85+
Qwen3Encoder_Qwen3Encoder_Config,
86+
)
8287
from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config
8388
from invokeai.backend.model_manager.configs.textual_inversion import (
8489
TI_File_SD1_Config,
@@ -151,6 +156,7 @@
151156
Annotated[Main_Checkpoint_SDXL_Config, Main_Checkpoint_SDXL_Config.get_tag()],
152157
Annotated[Main_Checkpoint_SDXLRefiner_Config, Main_Checkpoint_SDXLRefiner_Config.get_tag()],
153158
Annotated[Main_Checkpoint_FLUX_Config, Main_Checkpoint_FLUX_Config.get_tag()],
159+
Annotated[Main_Checkpoint_ZImage_Config, Main_Checkpoint_ZImage_Config.get_tag()],
154160
# Main (Pipeline) - quantized formats
155161
Annotated[Main_BnBNF4_FLUX_Config, Main_BnBNF4_FLUX_Config.get_tag()],
156162
Annotated[Main_GGUF_FLUX_Config, Main_GGUF_FLUX_Config.get_tag()],
@@ -196,6 +202,8 @@
196202
Annotated[T5Encoder_BnBLLMint8_Config, T5Encoder_BnBLLMint8_Config.get_tag()],
197203
# Qwen3 Encoder
198204
Annotated[Qwen3Encoder_Qwen3Encoder_Config, Qwen3Encoder_Qwen3Encoder_Config.get_tag()],
205+
Annotated[Qwen3Encoder_Checkpoint_Config, Qwen3Encoder_Checkpoint_Config.get_tag()],
206+
Annotated[Qwen3Encoder_GGUF_Config, Qwen3Encoder_GGUF_Config.get_tag()],
199207
# TI - file format
200208
Annotated[TI_File_SD1_Config, TI_File_SD1_Config.get_tag()],
201209
Annotated[TI_File_SD2_Config, TI_File_SD2_Config.get_tag()],

0 commit comments

Comments
 (0)