diff --git a/docs.json b/docs.json index 6a58ebc97..3c5fe1490 100644 --- a/docs.json +++ b/docs.json @@ -142,6 +142,12 @@ "tutorials/image/hidream/hidream-e1" ] }, + { + "group": "Z-Image", + "pages": [ + "tutorials/image/z-image/z-image-turbo" + ] + }, "tutorials/image/cosmos/cosmos-predict2-t2i", "tutorials/image/omnigen/omnigen2" ] @@ -757,6 +763,12 @@ "zh-CN/tutorials/image/hidream/hidream-e1" ] }, + { + "group": "Z-Image", + "pages": [ + "zh-CN/tutorials/image/z-image/z-image-turbo" + ] + }, "zh-CN/tutorials/image/cosmos/cosmos-predict2-t2i", "zh-CN/tutorials/image/omnigen/omnigen2" ] diff --git a/tutorials/image/z-image/z-image-turbo.mdx b/tutorials/image/z-image/z-image-turbo.mdx new file mode 100644 index 000000000..17a97b020 --- /dev/null +++ b/tutorials/image/z-image/z-image-turbo.mdx @@ -0,0 +1,63 @@ +--- +title: "Z-Image ComfyUI Workflow Example" +description: "Z-Image is a 6B parameter efficient image generation foundation model with single-stream diffusion transformer, offering sub-second inference latency." +sidebarTitle: "Z-Image" +--- + +import UpdateReminder from '/snippets/tutorials/update-reminder.mdx' + +**Z-Image (造相)** is a powerful and highly efficient image generation model with **6B** parameters, developed by Alibaba's Tongyi Lab. It uses a **Scalable Single-Stream DiT** (S3-DiT) architecture where text, visual semantic tokens, and image VAE tokens are concatenated at the sequence level to serve as a unified input stream, maximizing parameter efficiency. + +**Model Variants**: +- 🚀 **Z-Image-Turbo** – A distilled version that matches or exceeds leading competitors with only **8 NFEs** (Number of Function Evaluations). It offers **sub-second inference latency** on enterprise-grade H800 GPUs and fits within **16GB VRAM consumer devices**. +- 🧱 **Z-Image-Base** – The non-distilled foundation model for community-driven fine-tuning and custom development. +- ✍️ **Z-Image-Edit** – A variant fine-tuned for image editing tasks with impressive instruction-following capabilities. + +**Model Highlights**: +- **Photorealistic Quality**: Delivers strong photorealistic image generation while maintaining excellent aesthetic quality +- **Accurate Bilingual Text Rendering**: Excels at accurately rendering complex Chinese and English text +- **Prompt Enhancing & Reasoning**: Prompt Enhancer empowers the model with reasoning capabilities +- **Sub-second Inference**: Achieves fast generation speed on supported hardware + +**Related Links**: +- [GitHub](https://github.com/Tongyi-MAI/Z-Image) +- [Hugging Face](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) + +## Z-Image-Turbo text-to-image workflow + + + Download JSON Workflow File + + + + Run on ComfyUI Cloud + + + + +## Model links + +**text_encoders** + +- [qwen_3_4b.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors) + +**diffusion_models** + +- [z_image_turbo_bf16.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors) + +**vae** + +- [ae.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors) + +**Model Storage Location** + +``` +📂 ComfyUI/ +├── 📂 models/ +│ ├── 📂 text_encoders/ +│ │ └── qwen_3_4b.safetensors +│ ├── 📂 diffusion_models/ +│ │ └── z_image_turbo_bf16.safetensors +│ └── 📂 vae/ +│ └── ae.safetensors +``` diff --git a/zh-CN/tutorials/image/z-image/z-image-turbo.mdx b/zh-CN/tutorials/image/z-image/z-image-turbo.mdx new file mode 100644 index 000000000..c16b5c3ef --- /dev/null +++ b/zh-CN/tutorials/image/z-image/z-image-turbo.mdx @@ -0,0 +1,63 @@ +--- +title: "Z-Image ComfyUI 工作流示例" +description: "Z-Image 是一个拥有 6B 参数的高效图像生成基础模型,采用单流扩散变换器架构,可实现亚秒级推理延迟。" +sidebarTitle: "Z-Image" +--- + +import UpdateReminder from '/snippets/zh/tutorials/update-reminder.mdx' + +**Z-Image(造相)** 是阿里巴巴通义实验室开发的一个强大且高效的图像生成模型,拥有 **6B** 参数。它采用 **可扩展单流 DiT**(S3-DiT)架构,将文本、视觉语义 token 和图像 VAE token 在序列级别进行拼接,作为统一的输入流,最大化参数效率。 + +**模型变体**: +- 🚀 **Z-Image-Turbo** – 蒸馏版本,仅需 **8 NFEs**(函数评估次数)即可达到或超越领先竞争对手。在企业级 H800 GPU 上可实现**亚秒级推理延迟**,并可在 **16GB 显存的消费级设备**上运行。 +- 🧱 **Z-Image-Base** – 非蒸馏基础模型,用于社区驱动的微调和自定义开发。 +- ✍️ **Z-Image-Edit** – 针对图像编辑任务微调的变体,具有出色的指令遵循能力。 + +**模型亮点**: +- **照片级真实质量**:在保持出色美学质量的同时,提供强大的照片级真实图像生成 +- **精准的双语文本渲染**:擅长准确渲染复杂的中英文文本 +- **提示词增强与推理**:提示词增强器赋予模型推理能力 +- **亚秒级推理**:在支持的硬件上实现快速生成 + +**相关链接**: +- [GitHub](https://github.com/Tongyi-MAI/Z-Image) +- [Hugging Face](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) + +## Z-Image-Turbo 文生图工作流 + + + 下载 JSON 工作流文件 + + + + 在 ComfyUI Cloud 上运行 + + + + +## 模型链接 + +**text_encoders(文本编码器)** + +- [qwen_3_4b.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors) + +**diffusion_models(扩散模型)** + +- [z_image_turbo_bf16.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors) + +**vae** + +- [ae.safetensors](https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors) + +**模型存储位置** + +``` +📂 ComfyUI/ +├── 📂 models/ +│ ├── 📂 text_encoders/ +│ │ └── qwen_3_4b.safetensors +│ ├── 📂 diffusion_models/ +│ │ └── z_image_turbo_bf16.safetensors +│ └── 📂 vae/ +│ └── ae.safetensors +```
Download JSON Workflow File
Run on ComfyUI Cloud
下载 JSON 工作流文件
在 ComfyUI Cloud 上运行