invoke-ai
diff --git a/‎invokeai/app/api/dependencies.py‎
Lines changed: 2 additions & 0 deletions b/‎invokeai/app/api/dependencies.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎invokeai/app/api/routers/workflows.py‎
Lines changed: 9 additions & 0 deletions b/‎invokeai/app/api/routers/workflows.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎invokeai/app/invocations/fields.py‎
Lines changed: 8 additions & 0 deletions b/‎invokeai/app/invocations/fields.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎invokeai/app/invocations/latents_to_image.py‎
Lines changed: 2 additions & 25 deletions b/‎invokeai/app/invocations/latents_to_image.py‎
Lines changed: 2 additions & 25 deletions
diff --git a/‎invokeai/app/invocations/metadata.py‎
Lines changed: 9 additions & 1 deletion b/‎invokeai/app/invocations/metadata.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎invokeai/app/invocations/model.py‎
Lines changed: 8 additions & 0 deletions b/‎invokeai/app/invocations/model.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎invokeai/app/invocations/primitives.py‎
Lines changed: 12 additions & 0 deletions b/‎invokeai/app/invocations/primitives.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎invokeai/app/invocations/z_image_control.py‎
Lines changed: 112 additions & 0 deletions b/‎invokeai/app/invocations/z_image_control.py‎
Lines changed: 112 additions & 0 deletions
@@ -49,6 +49,7 @@
     FLUXConditioningInfo,
     SD3ConditioningInfo,
     SDXLConditioningInfo,
+    ZImageConditioningInfo,
 )
 from invokeai.backend.util.logging import InvokeAILogger
 from invokeai.version.invokeai_version import __version__
@@ -129,6 +130,7 @@ def initialize(
                     FLUXConditioningInfo,
                     SD3ConditioningInfo,
                     CogView4ConditioningInfo,
+                    ZImageConditioningInfo,
                 ],
                 ephemeral=True,
             ),
 
@@ -223,6 +223,15 @@ async def get_workflow_thumbnail(
         raise HTTPException(status_code=404)
 
 
+@workflows_router.get("/tags", operation_id="get_all_tags")
+async def get_all_tags(
+    categories: Optional[list[WorkflowCategory]] = Query(default=None, description="The categories to include"),
+) -> list[str]:
+    """Gets all unique tags from workflows"""
+
+    return ApiDependencies.invoker.services.workflow_records.get_all_tags(categories=categories)
+
+
 @workflows_router.get("/counts_by_tag", operation_id="get_counts_by_tag")
 async def get_counts_by_tag(
     tags: list[str] = Query(description="The tags to get counts for"),
 
@@ -154,6 +154,7 @@ class FieldDescriptions:
     clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
     t5_encoder = "T5 tokenizer and text encoder"
     glm_encoder = "GLM (THUDM) tokenizer and text encoder"
+    qwen3_encoder = "Qwen3 tokenizer and text encoder"
     clip_embed_model = "CLIP Embed loader"
     clip_g_model = "CLIP-G Embed loader"
     unet = "UNet (scheduler, LoRAs)"
@@ -169,6 +170,7 @@ class FieldDescriptions:
     flux_model = "Flux model (Transformer) to load"
     sd3_model = "SD3 model (MMDiTX) to load"
     cogview4_model = "CogView4 model (Transformer) to load"
+    z_image_model = "Z-Image model (Transformer) to load"
     sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
     sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
     onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -321,6 +323,12 @@ class CogView4ConditioningField(BaseModel):
     conditioning_name: str = Field(description="The name of conditioning tensor")
 
 
+class ZImageConditioningField(BaseModel):
+    """A Z-Image conditioning tensor primitive value"""
+
+    conditioning_name: str = Field(description="The name of conditioning tensor")
+
+
 class ConditioningField(BaseModel):
     """A conditioning tensor primitive value"""
 
 
@@ -2,12 +2,6 @@
 
 import torch
 from diffusers.image_processor import VaeImageProcessor
-from diffusers.models.attention_processor import (
-    AttnProcessor2_0,
-    LoRAAttnProcessor2_0,
-    LoRAXFormersAttnProcessor,
-    XFormersAttnProcessor,
-)
 from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
 from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
 
@@ -77,26 +71,9 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
             assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
             latents = latents.to(TorchDevice.choose_torch_device())
             if self.fp32:
+                # FP32 mode: convert everything to float32 for maximum precision
                 vae.to(dtype=torch.float32)
-
-                use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
-                    vae.decoder.mid_block.attentions[0].processor,
-                    (
-                        AttnProcessor2_0,
-                        XFormersAttnProcessor,
-                        LoRAXFormersAttnProcessor,
-                        LoRAAttnProcessor2_0,
-                    ),
-                )
-                # if xformers or torch_2_0 is used attention block does not need
-                # to be in float32 which can save lots of memory
-                if use_torch_2_0_or_xformers:
-                    vae.post_quant_conv.to(latents.dtype)
-                    vae.decoder.conv_in.to(latents.dtype)
-                    vae.decoder.mid_block.to(latents.dtype)
-                else:
-                    latents = latents.float()
-
+                latents = latents.float()
             else:
                 vae.to(dtype=torch.float16)
                 latents = latents.half()
 
@@ -158,6 +158,10 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
     "cogview4_img2img",
     "cogview4_inpaint",
     "cogview4_outpaint",
+    "z_image_txt2img",
+    "z_image_img2img",
+    "z_image_inpaint",
+    "z_image_outpaint",
 ]
 
 
@@ -166,7 +170,7 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
     title="Core Metadata",
     tags=["metadata"],
     category="metadata",
-    version="2.0.0",
+    version="2.1.0",
     classification=Classification.Internal,
 )
 class CoreMetadataInvocation(BaseInvocation):
@@ -217,6 +221,10 @@ class CoreMetadataInvocation(BaseInvocation):
         default=None,
         description="The VAE used for decoding, if the main model's default was not used",
     )
+    qwen3_encoder: Optional[ModelIdentifierField] = InputField(
+        default=None,
+        description="The Qwen3 text encoder model used for Z-Image inference",
+    )
 
     # High resolution fix metadata.
     hrf_enabled: Optional[bool] = InputField(
 
@@ -72,6 +72,14 @@ class GlmEncoderField(BaseModel):
     text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
 
 
+class Qwen3EncoderField(BaseModel):
+    """Field for Qwen3 text encoder used by Z-Image models."""
+
+    tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
+    text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
+    loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading")
+
+
 class VAEField(BaseModel):
     vae: ModelIdentifierField = Field(description="Info to load vae submodel")
     seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')
 
@@ -27,6 +27,7 @@
     SD3ConditioningField,
     TensorField,
     UIComponent,
+    ZImageConditioningField,
 )
 from invokeai.app.services.images.images_common import ImageDTO
 from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -461,6 +462,17 @@ def build(cls, conditioning_name: str) -> "CogView4ConditioningOutput":
         return cls(conditioning=CogView4ConditioningField(conditioning_name=conditioning_name))
 
 
+@invocation_output("z_image_conditioning_output")
+class ZImageConditioningOutput(BaseInvocationOutput):
+    """Base class for nodes that output a Z-Image text conditioning tensor."""
+
+    conditioning: ZImageConditioningField = OutputField(description=FieldDescriptions.cond)
+
+    @classmethod
+    def build(cls, conditioning_name: str) -> "ZImageConditioningOutput":
+        return cls(conditioning=ZImageConditioningField(conditioning_name=conditioning_name))
+
+
 @invocation_output("conditioning_output")
 class ConditioningOutput(BaseInvocationOutput):
     """Base class for nodes that output a single conditioning tensor"""
 
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team
+"""Z-Image Control invocation for spatial conditioning."""
+
+from pydantic import BaseModel, Field
+
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    Classification,
+    invocation,
+    invocation_output,
+)
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    ImageField,
+    InputField,
+    OutputField,
+)
+from invokeai.app.invocations.model import ModelIdentifierField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType
+
+
+class ZImageControlField(BaseModel):
+    """A Z-Image control conditioning field for spatial control (Canny, HED, Depth, Pose, MLSD)."""
+
+    image_name: str = Field(description="The name of the preprocessed control image")
+    control_model: ModelIdentifierField = Field(description="The Z-Image ControlNet adapter model")
+    control_context_scale: float = Field(
+        default=0.75,
+        ge=0.0,
+        le=2.0,
+        description="The strength of the control signal. Recommended range: 0.65-0.80.",
+    )
+    begin_step_percent: float = Field(
+        default=0.0,
+        ge=0.0,
+        le=1.0,
+        description="When the control is first applied (% of total steps)",
+    )
+    end_step_percent: float = Field(
+        default=1.0,
+        ge=0.0,
+        le=1.0,
+        description="When the control is last applied (% of total steps)",
+    )
+
+
+@invocation_output("z_image_control_output")
+class ZImageControlOutput(BaseInvocationOutput):
+    """Z-Image Control output containing control configuration."""
+
+    control: ZImageControlField = OutputField(description="Z-Image control conditioning")
+
+
+@invocation(
+    "z_image_control",
+    title="Z-Image ControlNet",
+    tags=["image", "z-image", "control", "controlnet"],
+    category="control",
+    version="1.1.0",
+    classification=Classification.Prototype,
+)
+class ZImageControlInvocation(BaseInvocation):
+    """Configure Z-Image ControlNet for spatial conditioning.
+
+    Takes a preprocessed control image (e.g., Canny edges, depth map, pose)
+    and a Z-Image ControlNet adapter model to enable spatial control.
+
+    Supports 5 control modes: Canny, HED, Depth, Pose, MLSD.
+    Recommended control_context_scale: 0.65-0.80.
+    """
+
+    image: ImageField = InputField(
+        description="The preprocessed control image (Canny, HED, Depth, Pose, or MLSD)",
+    )
+    control_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.controlnet_model,
+        title="Control Model",
+        ui_model_base=BaseModelType.ZImage,
+        ui_model_type=ModelType.ControlNet,
+    )
+    control_context_scale: float = InputField(
+        default=0.75,
+        ge=0.0,
+        le=2.0,
+        description="Strength of the control signal. Recommended range: 0.65-0.80.",
+        title="Control Scale",
+    )
+    begin_step_percent: float = InputField(
+        default=0.0,
+        ge=0.0,
+        le=1.0,
+        description="When the control is first applied (% of total steps)",
+    )
+    end_step_percent: float = InputField(
+        default=1.0,
+        ge=0.0,
+        le=1.0,
+        description="When the control is last applied (% of total steps)",
+    )
+
+    def invoke(self, context: InvocationContext) -> ZImageControlOutput:
+        return ZImageControlOutput(
+            control=ZImageControlField(
+                image_name=self.image.image_name,
+                control_model=self.control_model,
+                control_context_scale=self.control_context_scale,
+                begin_step_percent=self.begin_step_percent,
+                end_step_percent=self.end_step_percent,
+            )
+        )