leejet
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎assets/z_image/bf16.png‎
1.01 MB b/‎assets/z_image/bf16.png‎
1.01 MB
diff --git a/‎assets/z_image/q2_K.png‎
1.15 MB b/‎assets/z_image/q2_K.png‎
1.15 MB
diff --git a/‎assets/z_image/q3_K.png‎
1.07 MB b/‎assets/z_image/q3_K.png‎
1.07 MB
diff --git a/‎assets/z_image/q4_0.png‎
1.01 MB b/‎assets/z_image/q4_0.png‎
1.01 MB
diff --git a/‎assets/z_image/q4_K.png‎
1.02 MB b/‎assets/z_image/q4_K.png‎
1.02 MB
diff --git a/‎assets/z_image/q5_0.png‎
1.01 MB b/‎assets/z_image/q5_0.png‎
1.01 MB
diff --git a/‎assets/z_image/q6_K.png‎
1.02 MB b/‎assets/z_image/q6_K.png‎
1.02 MB
diff --git a/‎assets/z_image/q8_0.png‎
1.01 MB b/‎assets/z_image/q8_0.png‎
1.01 MB
diff --git a/‎conditioner.hpp‎
Lines changed: 28 additions & 4 deletions b/‎conditioner.hpp‎
Lines changed: 28 additions & 4 deletions
@@ -45,6 +45,7 @@ API and command-line option may change frequently.***
     - [Chroma](./docs/chroma.md)
     - [Chroma1-Radiance](./docs/chroma_radiance.md)
     - [Qwen Image](./docs/qwen_image.md)
+    - [Z-Image](./docs/z_image.md)
   - Image Edit Models
     - [FLUX.1-Kontext-dev](./docs/kontext.md)
     - [Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
@@ -129,6 +130,7 @@ If you want to improve performance or reduce VRAM/RAM usage, please refer to [pe
 - [🔥Qwen Image](./docs/qwen_image.md)
 - [🔥Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
 - [🔥Wan2.1/Wan2.2](./docs/wan.md)
+- [🔥Z-Image](./docs/z_image.md)
 - [LoRA](./docs/lora.md)
 - [LCM/LCM-LoRA](./docs/lcm.md)
 - [Using PhotoMaker to personalize image generation](./docs/photo_maker.md)
 
@@ -1638,6 +1638,8 @@ struct LLMEmbedder : public Conditioner {
         LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL;
         if (sd_version_is_flux2(version)) {
             arch = LLM::LLMArch::MISTRAL_SMALL_3_2;
+        } else if (sd_version_is_z_image(version)) {
+            arch = LLM::LLMArch::QWEN3;
         }
         if (arch == LLM::LLMArch::MISTRAL_SMALL_3_2) {
             tokenizer = std::make_shared<LLM::MistralTokenizer>();
@@ -1785,9 +1787,31 @@ struct LLMEmbedder : public Conditioner {
             prompt = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n";
             prompt += img_prompt;
 
-            prompt_attn_range.first = prompt.size();
+            prompt_attn_range.first = static_cast<int>(prompt.size());
             prompt += conditioner_params.text;
-            prompt_attn_range.second = prompt.size();
+            prompt_attn_range.second = static_cast<int>(prompt.size());
+
+            prompt += "<|im_end|>\n<|im_start|>assistant\n";
+        } else if (sd_version_is_flux2(version)) {
+            prompt_template_encode_start_idx = 0;
+            out_layers                       = {10, 20, 30};
+
+            prompt = "[SYSTEM_PROMPT]You are an AI that reasons about image descriptions. You give structured responses focusing on object relationships, object\nattribution and actions without speculation.[/SYSTEM_PROMPT][INST]";
+
+            prompt_attn_range.first = static_cast<int>(prompt.size());
+            prompt += conditioner_params.text;
+            prompt_attn_range.second = static_cast<int>(prompt.size());
+
+            prompt += "[/INST]";
+        } else if (sd_version_is_z_image(version)) {
+            prompt_template_encode_start_idx = 0;
+            out_layers                       = {35};  // -2
+
+            prompt = "<|im_start|>user\n";
+
+            prompt_attn_range.first = static_cast<int>(prompt.size());
+            prompt += conditioner_params.text;
+            prompt_attn_range.second = static_cast<int>(prompt.size());
 
             prompt += "<|im_end|>\n<|im_start|>assistant\n";
         } else if (sd_version_is_flux2(version)) {
@@ -1806,9 +1830,9 @@ struct LLMEmbedder : public Conditioner {
 
             prompt = "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n";
 
-            prompt_attn_range.first = prompt.size();
+            prompt_attn_range.first = static_cast<int>(prompt.size());
             prompt += conditioner_params.text;
-            prompt_attn_range.second = prompt.size();
+            prompt_attn_range.second = static_cast<int>(prompt.size());
 
             prompt += "<|im_end|>\n<|im_start|>assistant\n";
         }