Support Flux 2 Klein 9B variant

Acly · Acly · commit f00ed73ffcc8 · 2026-01-16T22:00:22.000+01:00
diff --git a/ai_diffusion/comfy_client.py b/ai_diffusion/comfy_client.py
@@ -727,7 +727,7 @@ def _find_text_encoder_models(model_list: Sequence[str]):
     kind = ResourceKind.text_encoder
     return {
         resource_id(kind, Arch.all, te): _find_model(model_list, kind, Arch.all, te)
-        for te in ["clip_l", "clip_g", "t5", "qwen", "qwen_3"]
+        for te in ["clip_l", "clip_g", "t5", "qwen", "qwen_3_4b", "qwen_3_8b"]
     }
 
 
diff --git a/ai_diffusion/comfy_workflow.py b/ai_diffusion/comfy_workflow.py
@@ -601,7 +601,7 @@ def empty_latent_image(self, extent: Extent, arch: Arch, batch_size=1):
         w, h = extent.width, extent.height
         if arch.is_flux_like or arch.is_qwen_like or arch in (Arch.sd3, Arch.chroma, Arch.zimage):
             return self.add("EmptySD3LatentImage", 1, width=w, height=h, batch_size=batch_size)
-        if arch is Arch.flux2:
+        if arch.is_flux2:
             return self.add("EmptyFlux2LatentImage", 1, width=w, height=h, batch_size=batch_size)
         else:
             return self.add("EmptyLatentImage", 1, width=w, height=h, batch_size=batch_size)
diff --git a/ai_diffusion/resolution.py b/ai_diffusion/resolution.py
@@ -183,7 +183,7 @@ def prepare_diffusion_input(
 
     # The checkpoint may require a different resolution than what is requested.
     mult = 8
-    if arch.is_flux_like or arch in (Arch.chroma, Arch.flux2):
+    if arch.is_flux_like or arch is Arch.chroma or arch.is_flux2:
         mult = 16
     if arch is Arch.sd3:
         mult = 64
diff --git a/ai_diffusion/resources.py b/ai_diffusion/resources.py
@@ -80,7 +80,8 @@ class Arch(Enum):
     sd3 = "SD 3"
     flux = "Flux"
     flux_k = "Flux Kontext"
-    flux2 = "Flux 2 Klein 4B"
+    flux2_4b = "Flux 2 Klein 4B"
+    flux2_9b = "Flux 2 Klein 9B"
     illu = "Illustrious"
     illu_v = "Illustrious v-prediction"
     chroma = "Chroma"
@@ -109,7 +110,9 @@ def from_string(string: str, model_type: str = "eps", filename: str | None = Non
         if string == "flux" or string == "flux-schnell":
             return Arch.flux
         if string == "flux2" and model_type == "klein-4b":
-            return Arch.flux2
+            return Arch.flux2_4b
+        if string == "flux2" and model_type == "klein-9b":
+            return Arch.flux2_9b
         if string == "illu":
             return Arch.illu
         if string == "illu_v":
@@ -190,7 +193,7 @@ def is_edit(self):  # edit models make changes to input images
 
     @property
     def supports_edit(self):  # includes text-to-image models that can also edit
-        return self.is_edit or self is Arch.flux2
+        return self.is_edit or self.is_flux2
 
     @property
     def is_sdxl_like(self):
@@ -201,6 +204,10 @@ def is_sdxl_like(self):
     def is_flux_like(self):
         return self in [Arch.flux, Arch.flux_k]
 
+    @property
+    def is_flux2(self):
+        return self in [Arch.flux2_4b, Arch.flux2_9b]
+
     @property
     def is_qwen_like(self):
         return self in [Arch.qwen, Arch.qwen_e, Arch.qwen_e_p, Arch.qwen_l]
@@ -216,12 +223,16 @@ def text_encoders(self):
                 return ["clip_l", "clip_g"]
             case Arch.flux | Arch.flux_k:
                 return ["clip_l", "t5"]
+            case Arch.flux2_4b:
+                return ["qwen_3_4b"]
+            case Arch.flux2_9b:
+                return ["qwen_3_8b"]
             case Arch.chroma:
                 return ["t5"]
             case Arch.qwen | Arch.qwen_e | Arch.qwen_e_p | Arch.qwen_l:
                 return ["qwen"]
-            case Arch.zimage | Arch.flux2:
-                return ["qwen_3"]
+            case Arch.zimage:
+                return ["qwen_3_4b"]
         raise ValueError(f"Unsupported architecture: {self}")
 
     @staticmethod
@@ -232,7 +243,8 @@ def list():
             Arch.sd3,
             Arch.flux,
             Arch.flux_k,
-            Arch.flux2,
+            Arch.flux2_4b,
+            Arch.flux2_9b,
             Arch.illu,
             Arch.illu_v,
             Arch.chroma,
@@ -753,15 +765,17 @@ def is_required(kind: ResourceKind, arch: Arch, identifier: ControlMode | Upscal
     resource_id(ResourceKind.text_encoder, Arch.all, "clip_g"): ["clip_g"],
     resource_id(ResourceKind.text_encoder, Arch.all, "t5"): ["t5xxl_fp16", "t5xxl_fp8_e4m3fn", "t5xxl_fp8_e4m3fn_scaled", "t5-v1_1-xxl", "t5"],
     resource_id(ResourceKind.text_encoder, Arch.all, "qwen"): ["qwen_2.5_vl_7b", "qwen_2", "qwen-2", "qwen"],
-    resource_id(ResourceKind.text_encoder, Arch.all, "qwen_3"): ["qwen_3_4b", "qwen3-4b", "qwen_3", "qwen-3"],
+    resource_id(ResourceKind.text_encoder, Arch.all, "qwen_3_4b"): ["qwen_3_4b", "qwen3-4b", "qwen_3", "qwen-3"],
+    resource_id(ResourceKind.text_encoder, Arch.all, "qwen_3_8b"): ["qwen_3_8b", "qwen3-8b"],
     resource_id(ResourceKind.vae, Arch.sd15, "default"): ["vae-ft-mse-840000-ema"],
     resource_id(ResourceKind.vae, Arch.sdxl, "default"): ["sdxl_vae"],
     resource_id(ResourceKind.vae, Arch.illu, "default"): ["sdxl_vae"],
     resource_id(ResourceKind.vae, Arch.illu_v, "default"): ["sdxl_vae"],
     resource_id(ResourceKind.vae, Arch.sd3, "default"): ["sd3"],
     resource_id(ResourceKind.vae, Arch.flux, "default"): ["flux-", "flux_", "flux/", "flux1", "ae.s"],
     resource_id(ResourceKind.vae, Arch.flux_k, "default"): ["flux-", "flux_", "flux/", "flux1", "ae.s"],
-    resource_id(ResourceKind.vae, Arch.flux2, "default"): ["flux2"],
+    resource_id(ResourceKind.vae, Arch.flux2_4b, "default"): ["flux2"],
+    resource_id(ResourceKind.vae, Arch.flux2_9b, "default"): ["flux2"],
     resource_id(ResourceKind.vae, Arch.chroma, "default"): ["flux-", "flux_", "flux/", "flux1", "ae.s"],
     resource_id(ResourceKind.vae, Arch.qwen, "default"): ["qwen"],
     resource_id(ResourceKind.vae, Arch.qwen_e, "default"): ["qwen"],
diff --git a/ai_diffusion/ui/theme.py b/ai_diffusion/ui/theme.py
@@ -63,7 +63,7 @@ def checkpoint_icon(arch: Arch, format: FileFormat | None = None, client: Client
         return icon("sd-version-flux")
     elif arch is Arch.flux_k:
         return icon("sd-version-flux-k")
-    elif arch is Arch.flux2:
+    elif arch.is_flux2:
         return icon("sd-version-flux-2")
     elif arch is Arch.illu:
         return icon("sd-version-illu")
diff --git a/ai_diffusion/workflow.py b/ai_diffusion/workflow.py
@@ -133,15 +133,17 @@ def load_checkpoint_with_lora(w: ComfyWorkflow, checkpoint: CheckpointInput, mod
                     clip = w.load_dual_clip(te["clip_g"], te["clip_l"], type="sd3")
             case Arch.flux | Arch.flux_k:
                 clip = w.load_dual_clip(te["clip_l"], te["t5"], type="flux")
-            case Arch.flux2:
-                clip = w.load_clip(te["qwen_3"], type="flux2")
+            case Arch.flux2_4b:
+                clip = w.load_clip(te["qwen_3_4b"], type="flux2")
+            case Arch.flux2_9b:
+                clip = w.load_clip(te["qwen_3_8b"], type="flux2")
             case Arch.chroma:
                 clip = w.load_clip(te["t5"], type="chroma")
                 clip = w.t5_tokenizer_options(clip, min_padding=1, min_length=0)
             case Arch.qwen | Arch.qwen_e | Arch.qwen_e_p | Arch.qwen_l:
                 clip = w.load_clip(te["qwen"], type="qwen_image")
             case Arch.zimage:
-                clip = w.load_clip(te["qwen_3"], type="lumina2")
+                clip = w.load_clip(te["qwen_3_4b"], type="lumina2")
             case _:
                 raise RuntimeError(f"No text encoder for model architecture {arch.name}")
 
@@ -701,7 +703,7 @@ def apply_reference_conditioning(
     extra_input = (c.image for c in cond.all_control if c.mode.is_ip_adapter)
     extra_images = [i.load(w) for i in extra_input]
     match arch:
-        case Arch.flux2 | Arch.qwen_e_p:
+        case Arch.flux2_4b | Arch.flux2_9b | Arch.qwen_e_p:
             if cond.edit_reference and input_latent:
                 positive = w.reference_latent(positive, input_latent)
             for extra_image in extra_images:
@@ -1455,7 +1457,7 @@ def prepare_prompts(
         "negative_prompt": cond.negative,
     }
     models = style.get_models([])
-    layer_replace = "Picture {}" if arch in (Arch.qwen_e_p, Arch.flux2) else ""
+    layer_replace = "Picture {}" if arch is Arch.qwen_e_p or arch.is_flux2 else ""
 
     cond.style = style.style_prompt
     cond.positive = strip_prompt_comments(cond.positive)
diff --git a/tests/config.py b/tests/config.py
@@ -20,6 +20,6 @@
     Arch.sdxl: "RealVisXL_V5.0_fp16.safetensors",
     Arch.flux: "svdq-int4_r32-flux.1-krea-dev.safetensors",
     Arch.flux_k: "svdq-int4_r32-flux.1-kontext-dev.safetensors",
-    Arch.flux2: "flux-2-klein-4b.safetensors",
+    Arch.flux2_4b: "flux-2-klein-4b.safetensors",
     Arch.zimage: "z_image_turbo_bf16.safetensors",
 }
diff --git a/tests/test_workflow.py b/tests/test_workflow.py
@@ -85,7 +85,7 @@ def default_style(client: Client, sd_ver=Arch.sd15):
         style.sampler = "Flux - Euler simple"
         style.cfg_scale = 1.0
         style.sampler_steps = 8
-    if sd_ver is Arch.flux2:
+    if sd_ver.is_flux2:
         style.sampler = "Flux 2 - Euler"
         style.cfg_scale = 1.0
         style.sampler_steps = 4
@@ -815,7 +815,7 @@ def test_refine_live(qtapp, client, sdver):
     run_and_save(qtapp, client, job, f"test_refine_live_{sdver.name}")
 
 
-@pytest.mark.parametrize("arch", [Arch.flux_k, Arch.flux2])
+@pytest.mark.parametrize("arch", [Arch.flux_k, Arch.flux2_4b])
 def test_edit(qtapp, local_client, arch):
     image = Image.load(image_dir / "flowers.webp")
     style = default_style(local_client, arch)
@@ -825,7 +825,7 @@ def test_edit(qtapp, local_client, arch):
     run_and_save(qtapp, local_client, job, f"test_edit_{arch.name}")
 
 
-@pytest.mark.parametrize("arch", [Arch.flux_k, Arch.flux2])
+@pytest.mark.parametrize("arch", [Arch.flux_k, Arch.flux2_4b])
 def test_edit_selection(qtapp, local_client, arch):
     image = Image.load(image_dir / "flowers.webp")
     mask = Mask.load(image_dir / "flowers_mask.png")

Original file line number	Diff line number	Diff line change
`@@ -727,7 +727,7 @@ def _find_text_encoder_models(model_list: Sequence[str]):`
`727`	`727`	`kind = ResourceKind.text_encoder`
`728`	`728`	`return {`
`729`	`729`	`resource_id(kind, Arch.all, te): _find_model(model_list, kind, Arch.all, te)`
`730`		`- for te in ["clip_l", "clip_g", "t5", "qwen", "qwen_3"]`
	`730`	`+ for te in ["clip_l", "clip_g", "t5", "qwen", "qwen_3_4b", "qwen_3_8b"]`
`731`	`731`	`}`
`732`	`732`
`733`	`733`
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,6 @@`
`20`	`20`	`Arch.sdxl: "RealVisXL_V5.0_fp16.safetensors",`
`21`	`21`	`Arch.flux: "svdq-int4_r32-flux.1-krea-dev.safetensors",`
`22`	`22`	`Arch.flux_k: "svdq-int4_r32-flux.1-kontext-dev.safetensors",`
`23`		`- Arch.flux2: "flux-2-klein-4b.safetensors",`
	`23`	`+ Arch.flux2_4b: "flux-2-klein-4b.safetensors",`
`24`	`24`	`Arch.zimage: "z_image_turbo_bf16.safetensors",`
`25`	`25`	`}`