add uncond slg variant

stduhpf · stduhpf · commit ff67c30738b2 · 2025-07-18T16:24:27.000+02:00
fix default slg params
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -105,6 +105,7 @@ struct SDParams {
     float slg_scale              = 0.0f;
     float skip_layer_start       = 0.01f;
     float skip_layer_end         = 0.2f;
+    bool slg_uncond              = false;
 
     float apg_eta            = 1.0f;
     float apg_momentum       = 0.0f;
@@ -216,11 +217,14 @@ void print_usage(int argc, const char* argv[]) {
     printf("                                     (replaces saturation with a smooth approximation)\n");
     printf("  --slg-scale SCALE                  skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
     printf("                                     0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
-    printf("  --eta SCALE                        eta in DDIM, only for DDIM and TCD: (default: 0)\n");
+    printf("  --slg-uncond                       Use CFG's forward pass for SLG instead of a separate pass, only for DiT models\n");
+    printf("                                     To use this, it's recommended to keep slg-scale to 0, both for performance and quality reasons\n");
+    printf("                                     This should be slightly faster than normal cfg when cfg_scale != 1.\n");
     printf("  --skip-layers LAYERS               Layers to skip for SLG steps: (default: [7,8,9])\n");
     printf("  --skip-layer-start START           SLG enabling point: (default: 0.01)\n");
     printf("  --skip-layer-end END               SLG disabling point: (default: 0.2)\n");
     printf("                                     SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])\n");
+    printf("  --eta SCALE                        eta in DDIM, only for DDIM and TCD: (default: 0)\n");
     printf("  --strength STRENGTH                strength for noising/unnoising (default: 0.75)\n");
     printf("  --style-ratio STYLE-RATIO          strength for keeping input identity (default: 20)\n");
     printf("  --control-strength STRENGTH        strength to apply Control Net (default: 0.9)\n");
@@ -683,6 +687,7 @@ std::string get_image_params(SDParams params, int64_t seed) {
         }
     }
     if (params.slg_scale != 0 && params.skip_layers.size() != 0) {
+        parameter_string += "Unconditional SLG: " + std::string(params.slg_uncond ? "True" : "False") + ", ";
         parameter_string += "SLG scale: " + std::to_string(params.cfg_scale) + ", ";
         parameter_string += "Skip layers: [";
         for (const auto& layer : params.skip_layers) {
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -886,7 +886,7 @@ class StableDiffusionGGML {
 
         bool has_unconditioned = img_cfg_scale != 1.0 && uncond.c_crossattn != NULL;
         bool has_img_cond      = cfg_scale != img_cfg_scale && img_cond.c_crossattn != NULL;
-        bool has_skiplayer     = slg_scale != 0.0 && skip_layers.size() > 0;
+        bool has_skiplayer     = (slg_scale != 0.0 || guidance.slg.uncond) && skip_layers.size() > 0;
 
         // denoise wrapper
         struct ggml_tensor* out_cond     = ggml_dup_tensor(work_ctx, x);
@@ -899,7 +899,9 @@ class StableDiffusionGGML {
         }
         if (has_skiplayer) {
             if (sd_version_is_dit(version)) {
-                out_skip = ggml_dup_tensor(work_ctx, x);
+                if (slg_scale != 0.0) {
+                    out_skip = ggml_dup_tensor(work_ctx, x);
+                }
             } else {
                 has_skiplayer = false;
                 LOG_WARN("SLG is incompatible with %s models", model_version_to_str[version]);
@@ -973,6 +975,8 @@ class StableDiffusionGGML {
                                          control_strength,
                                          &out_cond);
             }
+            int step_count         = sigmas.size();
+            bool is_skiplayer_step = has_skiplayer && step > (int)(guidance.slg.layer_start * step_count) && step < (int)(guidance.slg.layer_end * step_count);
 
             float* negative_data = NULL;
             if (has_unconditioned) {
@@ -981,18 +985,36 @@ class StableDiffusionGGML {
                     control_net->compute(n_threads, noised_input, control_hint, timesteps, uncond.c_crossattn, uncond.c_vector);
                     controls = control_net->controls;
                 }
-                diffusion_model->compute(n_threads,
-                                         noised_input,
-                                         timesteps,
-                                         uncond.c_crossattn,
-                                         uncond.c_concat,
-                                         uncond.c_vector,
-                                         guidance_tensor,
-                                         ref_latents,
-                                         -1,
-                                         controls,
-                                         control_strength,
-                                         &out_uncond);
+                if (is_skiplayer_step && guidance.slg.uncond) {
+                    LOG_DEBUG("Skipping layers at uncond step %d\n", step);
+                    diffusion_model->compute(n_threads,
+                                             noised_input,
+                                             timesteps,
+                                             uncond.c_crossattn,
+                                             uncond.c_concat,
+                                             uncond.c_vector,
+                                             guidance_tensor,
+                                             ref_latents,
+                                             -1,
+                                             controls,
+                                             control_strength,
+                                             &out_uncond,
+                                             NULL,
+                                             skip_layers);
+                } else {
+                    diffusion_model->compute(n_threads,
+                                             noised_input,
+                                             timesteps,
+                                             uncond.c_crossattn,
+                                             uncond.c_concat,
+                                             uncond.c_vector,
+                                             guidance_tensor,
+                                             ref_latents,
+                                             -1,
+                                             controls,
+                                             control_strength,
+                                             &out_uncond);
+                }
                 negative_data = (float*)out_uncond->data;
             }
 
@@ -1013,10 +1035,8 @@ class StableDiffusionGGML {
                 img_cond_data = (float*)out_img_cond->data;
             }
 
-            int step_count         = sigmas.size();
-            bool is_skiplayer_step = has_skiplayer && step > (int)(guidance.slg.layer_start * step_count) && step < (int)(guidance.slg.layer_end * step_count);
             float* skip_layer_data = NULL;
-            if (is_skiplayer_step) {
+            if (is_skiplayer_step && slg_scale != 0.0) {
                 LOG_DEBUG("Skipping layers at step %d\n", step);
                 // skip layer (same as conditionned)
                 diffusion_model->compute(n_threads,
@@ -1106,7 +1126,7 @@ class StableDiffusionGGML {
                     } else {
                         float delta = deltas[i];
 
-                        if(cfg_scale != 1) {
+                        if (cfg_scale != 1) {
                             latent_result = positive_data[i] + (cfg_scale - 1) * delta;
                         } else if (has_img_cond) {
                             latent_result = positive_data[i] + (img_cfg_scale - 1) * delta;
@@ -1116,7 +1136,7 @@ class StableDiffusionGGML {
                     // img_cfg_scale == 1
                     latent_result = img_cond_data[i] + cfg_scale * (positive_data[i] - img_cond_data[i]);
                 }
-                if (is_skiplayer_step) {
+                if (is_skiplayer_step && slg_scale != 0.0) {
                     latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
                 }
                 // v = latent_result, eps = latent_result
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -152,6 +152,7 @@ typedef struct {
     float layer_start;
     float layer_end;
     float scale;
+    bool uncond;
 } sd_slg_params_t;
 
 typedef struct {