ostris · whmc76 · Feb 6, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/config/examples/train_lora_zimage_base_32gb.yaml b/config/examples/train_lora_zimage_base_32gb.yaml
@@ -0,0 +1,87 @@
+---
+# Z-Image Base - Character/Person LoRA training (32GB VRAM, e.g. RTX 5090)
+# Best practices: Prodigy or Prodigy Schedule Free optimizer, batch_size 2 (or 4 with gradient_accumulation),
+# DOP for identity preservation, 1024 resolution, linear rank 128. Replace folder_path and trigger/sample prompts.
+job: extension
+config:
+  name: "my_zimage_base_character_lora_v1"
+  process:
+    - type: 'diffusion_trainer'
+      training_folder: "output"
+      device: cuda:0
+      # LoRA: rank 128 good for character identity; use 64 if VRAM tight. Z-Image typically no conv training.
+      network:
+        type: "lora"
+        linear: 128
+        linear_alpha: 128
+      save:
+        dtype: bf16
+        save_every: 500
+        max_step_saves_to_keep: 6
+        save_format: safetensors
+      datasets:
+        - folder_path: "/path/to/images/folder"
+          caption_ext: "txt"
+          caption_dropout_rate: 0.05
+          cache_latents_to_disk: true
+          # 1024 matches Z-Image native; use [512, 768, 1024] for multi-res if preferred
+          resolution: [ 1024, 1024 ]
+      train:
+        batch_size: 2  # 32GB allows 2; Prodigy works well with larger batch. Try 4 or gradient_accumulation: 2 if headroom
+        gradient_accumulation: 1
+        steps: 3000  # 2500-3000 typical for character identity
+        train_unet: true
+        train_text_encoder: false
+        gradient_checkpointing: true
+        noise_scheduler: "flowmatch"
+        timestep_type: "weighted"
+        content_or_style: "balanced"
+        loss_type: "mse"
+        dtype: bf16
+        # Prodigy: nominal lr 1.0 (adaptive); use prodigy_schedulefree for schedule-free variant
+        optimizer: "prodigy"
+        lr: 1.0
+        optimizer_params:
+          weight_decay: 0.01
+        lr_scheduler: "constant"
+        # DOP: preserves model output without trigger, reduces overfitting for character LoRA
+        diff_output_preservation: true
+        diff_output_preservation_multiplier: 1.0
+        diff_output_preservation_class: "person"
+        switch_boundary_every: 1
+        unload_text_encoder: false
+        # cache_text_embeddings: true  # optional, saves VRAM if using captions
+        ema_config:
+          use_ema: false
+          ema_decay: 0.99
+        skip_first_sample: false
+        disable_sampling: false
+      logging:
+        log_every: 1
+        use_ui_logger: true
+      model:
+        name_or_path: "Tongyi-MAI/Z-Image"
+        arch: "zimage"
+        quantize: true
+        qtype: "qfloat8"
+        quantize_te: true
+        qtype_te: "qfloat8"
+        low_vram: false  # set true if OOM on 32GB
+        model_kwargs: {}
+      sample:
+        sampler: "flowmatch"
+        sample_every: 250
+        width: 1024
+        height: 1024
+        samples:
+          - prompt: "[trigger], studio portrait, soft lighting"
+          - prompt: "[trigger] on a beach, golden hour"
+          - prompt: "[trigger], casual outfit, urban background"
+        neg: ""
+        seed: 42
+        walk_seed: true
+        guidance_scale: 4   # Base uses CFG
+        sample_steps: 30
+meta:
+  name: "[name]"
+  version: '1.0'
diff --git a/config/examples/train_lora_zimage_turbo_32gb.yaml b/config/examples/train_lora_zimage_turbo_32gb.yaml
@@ -0,0 +1,84 @@
+---
+# Z-Image Turbo - Character/Person LoRA training (32GB VRAM, e.g. RTX 5090)
+# Requires training adapter (assistant_lora_path) to avoid distilled quality loss. Use v2 adapter.
+# Best practices: Prodigy or Prodigy Schedule Free, batch_size 2, DOP for identity, 1024 resolution, rank 128.
+job: extension
+config:
+  name: "my_zimage_turbo_character_lora_v1"
+  process:
+    - type: 'diffusion_trainer'
+      training_folder: "output"
+      device: cuda:0
+      network:
+        type: "lora"
+        linear: 128
+        linear_alpha: 128
+      save:
+        dtype: bf16
+        save_every: 500
+        max_step_saves_to_keep: 6
+        save_format: safetensors
+      datasets:
+        - folder_path: "/path/to/images/folder"
+          caption_ext: "txt"
+          caption_dropout_rate: 0.05
+          cache_latents_to_disk: true
+          resolution: [ 1024, 1024 ]
+      train:
+        batch_size: 2  # Prodigy works well with batch 2-4 on 32GB
+        gradient_accumulation: 1
+        steps: 3000
+        train_unet: true
+        train_text_encoder: false
+        gradient_checkpointing: true
+        noise_scheduler: "flowmatch"
+        timestep_type: "weighted"
+        content_or_style: "balanced"
+        loss_type: "mse"
+        dtype: bf16
+        optimizer: "prodigy"  # or prodigy_schedulefree
+        lr: 1.0
+        optimizer_params:
+          weight_decay: 0.01
+        lr_scheduler: "constant"
+        diff_output_preservation: true  # DOP for character identity
+        diff_output_preservation_multiplier: 1.0
+        diff_output_preservation_class: "person"
+        switch_boundary_every: 1
+        unload_text_encoder: false
+        ema_config:
+          use_ema: false
+          ema_decay: 0.99
+        skip_first_sample: false
+        disable_sampling: false
+      logging:
+        log_every: 1
+        use_ui_logger: true
+      model:
+        name_or_path: "Tongyi-MAI/Z-Image-Turbo"
+        arch: "zimage"
+        # Required for Turbo: training adapter prevents quality degradation from distilled model
+        assistant_lora_path: "ostris/zimage_turbo_training_adapter/zimage_turbo_training_adapter_v2.safetensors"
+        quantize: true
+        qtype: "qfloat8"
+        quantize_te: true
+        qtype_te: "qfloat8"
+        low_vram: false
+        model_kwargs: {}
+      sample:
+        sampler: "flowmatch"
+        sample_every: 250
+        width: 1024
+        height: 1024
+        samples:
+          - prompt: "[trigger], studio portrait, soft lighting"
+          - prompt: "[trigger] on a beach, golden hour"
+          - prompt: "[trigger], casual outfit, urban background"
+        neg: ""
+        seed: 42
+        walk_seed: true
+        guidance_scale: 1   # Turbo distilled: use 1
+        sample_steps: 8     # Turbo: fewer steps
+meta:
+  name: "[name]"
+  version: '1.0'
diff --git a/dgx_requirements.txt b/dgx_requirements.txt
@@ -33,6 +33,7 @@ k-diffusion
 open_clip_torch
 timm
 prodigyopt
+prodigy-plus-schedule-free
 controlnet_aux==0.0.10
 python-dotenv
 bitsandbytes

diff --git a/jobs/process/BaseSDTrainProcess.py b/jobs/process/BaseSDTrainProcess.py
@@ -2180,6 +2180,9 @@ def run(self):
             ### HOOK ###
             if self.torch_profiler is not None:
                 self.torch_profiler.start()
+            # Schedule-Free optimizers (e.g. Prodigy Schedule Free) need train() during training step
+            if hasattr(optimizer, 'train') and callable(optimizer.train):
+                optimizer.train()
             did_oom = False
             loss_dict = None
             try:
@@ -2262,8 +2265,13 @@ def run(self):
                         # print above the progress bar
                         if self.progress_bar is not None:
                             self.progress_bar.pause()
+                        # Schedule-Free: use averaged params for checkpoint
+                        if hasattr(optimizer, 'eval') and callable(optimizer.eval):
+                            optimizer.eval()
                         print_acc(f"\nSaving at step {self.step_num}")
                         self.save(self.step_num)
+                        if hasattr(optimizer, 'train') and callable(optimizer.train):
+                            optimizer.train()
                         self.ensure_params_requires_grad()
                         # clear any grads
                         optimizer.zero_grad()
@@ -2276,10 +2284,15 @@ def run(self):
                         if self.progress_bar is not None:
                             self.progress_bar.pause()
                         flush()
+                        # Schedule-Free: use averaged params for sampling
+                        if hasattr(optimizer, 'eval') and callable(optimizer.eval):
+                            optimizer.eval()
                         # print above the progress bar
                         if self.train_config.free_u:
                             self.sd.pipeline.disable_freeu()
                         self.sample(self.step_num)
+                        if hasattr(optimizer, 'train') and callable(optimizer.train):
+                            optimizer.train()
                         if self.train_config.unload_text_encoder:
                             # make sure the text encoder is unloaded
                             self.sd.text_encoder_to('cpu')

diff --git a/requirements.txt b/requirements.txt
@@ -20,6 +20,7 @@ k-diffusion
 open_clip_torch
 timm
 prodigyopt
+prodigy-plus-schedule-free
 controlnet_aux==0.0.10
 python-dotenv
 bitsandbytes

diff --git a/toolkit/optimizer.py b/toolkit/optimizer.py
@@ -39,6 +39,20 @@ def get_optimizer(
         # let net be the neural network you want to train
         # you can choose weight decay value based on your problem, 0 by default
         optimizer = Prodigy8bit(params, lr=use_lr, eps=1e-6, **optimizer_params)
+    elif (lower_type.startswith("prodigy_schedulefree") or
+          lower_type.replace("-", "_") == "prodigy_schedule_free"):
+        try:
+            from prodigyplus.prodigy_plus_schedulefree import ProdigyPlusScheduleFree
+        except ImportError:
+            raise ImportError(
+                "Prodigy Schedule Free requires: pip install prodigy-plus-schedule-free"
+            )
+        print("Using Prodigy + Schedule-Free optimizer")
+        use_lr = learning_rate
+        if use_lr < 0.1:
+            use_lr = 1.0
+        print(f"Using lr {use_lr}")
+        optimizer = ProdigyPlusScheduleFree(params, lr=use_lr, **optimizer_params)
     elif lower_type.startswith("prodigy"):
         from prodigyopt import Prodigy
 

diff --git a/ui/src/app/jobs/new/SimpleJob.tsx b/ui/src/app/jobs/new/SimpleJob.tsx
@@ -491,7 +491,11 @@ export default function SimpleJob({
                   onChange={value => setJobConfig(value, 'config.process[0].train.optimizer')}
                   options={[
                     { value: 'adamw8bit', label: 'AdamW8Bit' },
+                    { value: 'adamw', label: 'AdamW' },
                     { value: 'adafactor', label: 'Adafactor' },
+                    { value: 'prodigy', label: 'Prodigy' },
+                    { value: 'prodigy8bit', label: 'Prodigy 8-bit' },
+                    { value: 'prodigy_schedulefree', label: 'Prodigy Schedule Free' },
                   ]}
                 />
                 <NumberInput