Skip to content

Commit f456400

Browse files
Merge branch 'main' into feature/xlabs-flux-lora-support
2 parents e5b756b + 5a0b227 commit f456400

File tree

92 files changed

+45558
-930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+45558
-930
lines changed

invokeai/app/api/dependencies.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
FLUXConditioningInfo,
5050
SD3ConditioningInfo,
5151
SDXLConditioningInfo,
52+
ZImageConditioningInfo,
5253
)
5354
from invokeai.backend.util.logging import InvokeAILogger
5455
from invokeai.version.invokeai_version import __version__
@@ -129,6 +130,7 @@ def initialize(
129130
FLUXConditioningInfo,
130131
SD3ConditioningInfo,
131132
CogView4ConditioningInfo,
133+
ZImageConditioningInfo,
132134
],
133135
ephemeral=True,
134136
),

invokeai/app/api/routers/workflows.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,15 @@ async def get_workflow_thumbnail(
223223
raise HTTPException(status_code=404)
224224

225225

226+
@workflows_router.get("/tags", operation_id="get_all_tags")
227+
async def get_all_tags(
228+
categories: Optional[list[WorkflowCategory]] = Query(default=None, description="The categories to include"),
229+
) -> list[str]:
230+
"""Gets all unique tags from workflows"""
231+
232+
return ApiDependencies.invoker.services.workflow_records.get_all_tags(categories=categories)
233+
234+
226235
@workflows_router.get("/counts_by_tag", operation_id="get_counts_by_tag")
227236
async def get_counts_by_tag(
228237
tags: list[str] = Query(description="The tags to get counts for"),

invokeai/app/invocations/fields.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class FieldDescriptions:
154154
clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
155155
t5_encoder = "T5 tokenizer and text encoder"
156156
glm_encoder = "GLM (THUDM) tokenizer and text encoder"
157+
qwen3_encoder = "Qwen3 tokenizer and text encoder"
157158
clip_embed_model = "CLIP Embed loader"
158159
clip_g_model = "CLIP-G Embed loader"
159160
unet = "UNet (scheduler, LoRAs)"
@@ -169,6 +170,7 @@ class FieldDescriptions:
169170
flux_model = "Flux model (Transformer) to load"
170171
sd3_model = "SD3 model (MMDiTX) to load"
171172
cogview4_model = "CogView4 model (Transformer) to load"
173+
z_image_model = "Z-Image model (Transformer) to load"
172174
sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
173175
sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
174176
onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -321,6 +323,12 @@ class CogView4ConditioningField(BaseModel):
321323
conditioning_name: str = Field(description="The name of conditioning tensor")
322324

323325

326+
class ZImageConditioningField(BaseModel):
327+
"""A Z-Image conditioning tensor primitive value"""
328+
329+
conditioning_name: str = Field(description="The name of conditioning tensor")
330+
331+
324332
class ConditioningField(BaseModel):
325333
"""A conditioning tensor primitive value"""
326334

invokeai/app/invocations/latents_to_image.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,6 @@
22

33
import torch
44
from diffusers.image_processor import VaeImageProcessor
5-
from diffusers.models.attention_processor import (
6-
AttnProcessor2_0,
7-
LoRAAttnProcessor2_0,
8-
LoRAXFormersAttnProcessor,
9-
XFormersAttnProcessor,
10-
)
115
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
126
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
137

@@ -77,26 +71,9 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
7771
assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
7872
latents = latents.to(TorchDevice.choose_torch_device())
7973
if self.fp32:
74+
# FP32 mode: convert everything to float32 for maximum precision
8075
vae.to(dtype=torch.float32)
81-
82-
use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
83-
vae.decoder.mid_block.attentions[0].processor,
84-
(
85-
AttnProcessor2_0,
86-
XFormersAttnProcessor,
87-
LoRAXFormersAttnProcessor,
88-
LoRAAttnProcessor2_0,
89-
),
90-
)
91-
# if xformers or torch_2_0 is used attention block does not need
92-
# to be in float32 which can save lots of memory
93-
if use_torch_2_0_or_xformers:
94-
vae.post_quant_conv.to(latents.dtype)
95-
vae.decoder.conv_in.to(latents.dtype)
96-
vae.decoder.mid_block.to(latents.dtype)
97-
else:
98-
latents = latents.float()
99-
76+
latents = latents.float()
10077
else:
10178
vae.to(dtype=torch.float16)
10279
latents = latents.half()

invokeai/app/invocations/metadata.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
158158
"cogview4_img2img",
159159
"cogview4_inpaint",
160160
"cogview4_outpaint",
161+
"z_image_txt2img",
162+
"z_image_img2img",
163+
"z_image_inpaint",
164+
"z_image_outpaint",
161165
]
162166

163167

@@ -166,7 +170,7 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
166170
title="Core Metadata",
167171
tags=["metadata"],
168172
category="metadata",
169-
version="2.0.0",
173+
version="2.1.0",
170174
classification=Classification.Internal,
171175
)
172176
class CoreMetadataInvocation(BaseInvocation):
@@ -217,6 +221,10 @@ class CoreMetadataInvocation(BaseInvocation):
217221
default=None,
218222
description="The VAE used for decoding, if the main model's default was not used",
219223
)
224+
qwen3_encoder: Optional[ModelIdentifierField] = InputField(
225+
default=None,
226+
description="The Qwen3 text encoder model used for Z-Image inference",
227+
)
220228

221229
# High resolution fix metadata.
222230
hrf_enabled: Optional[bool] = InputField(

invokeai/app/invocations/model.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ class GlmEncoderField(BaseModel):
7272
text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
7373

7474

75+
class Qwen3EncoderField(BaseModel):
76+
"""Field for Qwen3 text encoder used by Z-Image models."""
77+
78+
tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
79+
text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
80+
loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading")
81+
82+
7583
class VAEField(BaseModel):
7684
vae: ModelIdentifierField = Field(description="Info to load vae submodel")
7785
seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')

invokeai/app/invocations/primitives.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
SD3ConditioningField,
2828
TensorField,
2929
UIComponent,
30+
ZImageConditioningField,
3031
)
3132
from invokeai.app.services.images.images_common import ImageDTO
3233
from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -461,6 +462,17 @@ def build(cls, conditioning_name: str) -> "CogView4ConditioningOutput":
461462
return cls(conditioning=CogView4ConditioningField(conditioning_name=conditioning_name))
462463

463464

465+
@invocation_output("z_image_conditioning_output")
466+
class ZImageConditioningOutput(BaseInvocationOutput):
467+
"""Base class for nodes that output a Z-Image text conditioning tensor."""
468+
469+
conditioning: ZImageConditioningField = OutputField(description=FieldDescriptions.cond)
470+
471+
@classmethod
472+
def build(cls, conditioning_name: str) -> "ZImageConditioningOutput":
473+
return cls(conditioning=ZImageConditioningField(conditioning_name=conditioning_name))
474+
475+
464476
@invocation_output("conditioning_output")
465477
class ConditioningOutput(BaseInvocationOutput):
466478
"""Base class for nodes that output a single conditioning tensor"""
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team
2+
"""Z-Image Control invocation for spatial conditioning."""
3+
4+
from pydantic import BaseModel, Field
5+
6+
from invokeai.app.invocations.baseinvocation import (
7+
BaseInvocation,
8+
BaseInvocationOutput,
9+
Classification,
10+
invocation,
11+
invocation_output,
12+
)
13+
from invokeai.app.invocations.fields import (
14+
FieldDescriptions,
15+
ImageField,
16+
InputField,
17+
OutputField,
18+
)
19+
from invokeai.app.invocations.model import ModelIdentifierField
20+
from invokeai.app.services.shared.invocation_context import InvocationContext
21+
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType
22+
23+
24+
class ZImageControlField(BaseModel):
25+
"""A Z-Image control conditioning field for spatial control (Canny, HED, Depth, Pose, MLSD)."""
26+
27+
image_name: str = Field(description="The name of the preprocessed control image")
28+
control_model: ModelIdentifierField = Field(description="The Z-Image ControlNet adapter model")
29+
control_context_scale: float = Field(
30+
default=0.75,
31+
ge=0.0,
32+
le=2.0,
33+
description="The strength of the control signal. Recommended range: 0.65-0.80.",
34+
)
35+
begin_step_percent: float = Field(
36+
default=0.0,
37+
ge=0.0,
38+
le=1.0,
39+
description="When the control is first applied (% of total steps)",
40+
)
41+
end_step_percent: float = Field(
42+
default=1.0,
43+
ge=0.0,
44+
le=1.0,
45+
description="When the control is last applied (% of total steps)",
46+
)
47+
48+
49+
@invocation_output("z_image_control_output")
50+
class ZImageControlOutput(BaseInvocationOutput):
51+
"""Z-Image Control output containing control configuration."""
52+
53+
control: ZImageControlField = OutputField(description="Z-Image control conditioning")
54+
55+
56+
@invocation(
57+
"z_image_control",
58+
title="Z-Image ControlNet",
59+
tags=["image", "z-image", "control", "controlnet"],
60+
category="control",
61+
version="1.1.0",
62+
classification=Classification.Prototype,
63+
)
64+
class ZImageControlInvocation(BaseInvocation):
65+
"""Configure Z-Image ControlNet for spatial conditioning.
66+
67+
Takes a preprocessed control image (e.g., Canny edges, depth map, pose)
68+
and a Z-Image ControlNet adapter model to enable spatial control.
69+
70+
Supports 5 control modes: Canny, HED, Depth, Pose, MLSD.
71+
Recommended control_context_scale: 0.65-0.80.
72+
"""
73+
74+
image: ImageField = InputField(
75+
description="The preprocessed control image (Canny, HED, Depth, Pose, or MLSD)",
76+
)
77+
control_model: ModelIdentifierField = InputField(
78+
description=FieldDescriptions.controlnet_model,
79+
title="Control Model",
80+
ui_model_base=BaseModelType.ZImage,
81+
ui_model_type=ModelType.ControlNet,
82+
)
83+
control_context_scale: float = InputField(
84+
default=0.75,
85+
ge=0.0,
86+
le=2.0,
87+
description="Strength of the control signal. Recommended range: 0.65-0.80.",
88+
title="Control Scale",
89+
)
90+
begin_step_percent: float = InputField(
91+
default=0.0,
92+
ge=0.0,
93+
le=1.0,
94+
description="When the control is first applied (% of total steps)",
95+
)
96+
end_step_percent: float = InputField(
97+
default=1.0,
98+
ge=0.0,
99+
le=1.0,
100+
description="When the control is last applied (% of total steps)",
101+
)
102+
103+
def invoke(self, context: InvocationContext) -> ZImageControlOutput:
104+
return ZImageControlOutput(
105+
control=ZImageControlField(
106+
image_name=self.image.image_name,
107+
control_model=self.control_model,
108+
control_context_scale=self.control_context_scale,
109+
begin_step_percent=self.begin_step_percent,
110+
end_step_percent=self.end_step_percent,
111+
)
112+
)

0 commit comments

Comments
 (0)