From 46698f2940f18254fe2dc7271543fe6eeb75f65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 29 Aug 2025 23:29:39 +0200 Subject: [PATCH 1/2] Add flow shift parameter (for SD3 and Wan) --- denoiser.hpp | 2 +- examples/cli/main.cpp | 8 ++++++-- stable-diffusion.cpp | 16 ++++++++++++++-- stable-diffusion.h | 1 + 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/denoiser.hpp b/denoiser.hpp index 385bcfbeb..b0cde8a8d 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -382,7 +382,7 @@ struct DiscreteFlowDenoiser : public Denoiser { float sigma_data = 1.0f; - DiscreteFlowDenoiser() { + DiscreteFlowDenoiser(float shift = 3.0f) : shift(shift) { set_parameters(); } diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 91d74f173..de62ee197 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -115,6 +115,7 @@ struct SDParams { bool chroma_use_dit_mask = true; bool chroma_use_t5_mask = false; int chroma_t5_mask_pad = 1; + float flow_shift = INFINITY; SDParams() { sd_sample_params_init(&sample_params); @@ -278,8 +279,9 @@ void print_usage(int argc, const char* argv[]) { printf(" --chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma\n"); printf(" --video-frames video frames (default: 1)\n"); printf(" --fps fps (default: 24)\n"); - printf(" --moe-boundary BOUNDARY Timestep boundary for Wan2.2 MoE model. (default: 0.875)\n"); - printf(" Only enabled if `--high-noise-steps` is set to -1\n"); + printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n"); + printf(" only enabled if `--high-noise-steps` is set to -1\n"); + printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n"); printf(" -v, --verbose print extra info\n"); } @@ -514,6 +516,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { {"", "--style-ratio", "", ¶ms.style_ratio}, {"", "--control-strength", "", ¶ms.control_strength}, {"", "--moe-boundary", "", ¶ms.moe_boundary}, + {"", "--flow-shift", "", ¶ms.flow_shift}, }; options.bool_options = { @@ -1181,6 +1184,7 @@ int main(int argc, const char* argv[]) { params.chroma_use_dit_mask, params.chroma_use_t5_mask, params.chroma_t5_mask_pad, + params.flow_shift, }; sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index db89cbb74..69ababd19 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -681,7 +681,11 @@ class StableDiffusionGGML { if (sd_version_is_sd3(version)) { LOG_INFO("running in FLOW mode"); - denoiser = std::make_shared(); + float shift = sd_ctx_params->flow_shift; + if( shift == INFINITY){ + shift = 3.0; + } + denoiser = std::make_shared(shift); } else if (sd_version_is_flux(version)) { LOG_INFO("running in Flux FLOW mode"); float shift = 1.0f; // TODO: validate @@ -694,7 +698,14 @@ class StableDiffusionGGML { denoiser = std::make_shared(shift); } else if (sd_version_is_wan(version)) { LOG_INFO("running in FLOW mode"); - denoiser = std::make_shared(); + float shift = sd_ctx_params->flow_shift; + if(shift == INFINITY) { + shift = 5.0; + if (version == VERSION_WAN2){ + shift = 12.0; + } + } + denoiser = std::make_shared(shift); } else if (is_using_v_parameterization) { LOG_INFO("running in v-prediction mode"); denoiser = std::make_shared(); @@ -1553,6 +1564,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { sd_ctx_params->chroma_use_dit_mask = true; sd_ctx_params->chroma_use_t5_mask = false; sd_ctx_params->chroma_t5_mask_pad = 1; + sd_ctx_params->flow_shift = INFINITY; } char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { diff --git a/stable-diffusion.h b/stable-diffusion.h index 5ffe50618..7bfb52700 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -142,6 +142,7 @@ typedef struct { bool chroma_use_dit_mask; bool chroma_use_t5_mask; int chroma_t5_mask_pad; + float flow_shift; } sd_ctx_params_t; typedef struct { From 2dea4ead088bf4bee5c58a92f599b8714e0de43a Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 7 Sep 2025 02:13:28 +0800 Subject: [PATCH 2/2] unify code style and fix some issues --- denoiser.hpp | 3 ++- examples/cli/main.cpp | 3 ++- stable-diffusion.cpp | 7 ++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/denoiser.hpp b/denoiser.hpp index b0cde8a8d..339c0cfb9 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -382,7 +382,8 @@ struct DiscreteFlowDenoiser : public Denoiser { float sigma_data = 1.0f; - DiscreteFlowDenoiser(float shift = 3.0f) : shift(shift) { + DiscreteFlowDenoiser(float shift = 3.0f) + : shift(shift) { set_parameters(); } diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index de62ee197..a3bf0ced3 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -172,6 +172,7 @@ void print_params(SDParams params) { printf(" sample_params: %s\n", SAFE_STR(sample_params_str)); printf(" high_noise_sample_params: %s\n", SAFE_STR(high_noise_sample_params_str)); printf(" moe_boundary: %.3f\n", params.moe_boundary); + printf(" flow_shift: %.2f\n", params.flow_shift); printf(" strength(img2img): %.2f\n", params.strength); printf(" rng: %s\n", sd_rng_type_name(params.rng_type)); printf(" seed: %ld\n", params.seed); @@ -281,7 +282,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --fps fps (default: 24)\n"); printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n"); printf(" only enabled if `--high-noise-steps` is set to -1\n"); - printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n"); + printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n"); printf(" -v, --verbose print extra info\n"); } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 69ababd19..64164a2fb 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -682,7 +682,7 @@ class StableDiffusionGGML { if (sd_version_is_sd3(version)) { LOG_INFO("running in FLOW mode"); float shift = sd_ctx_params->flow_shift; - if( shift == INFINITY){ + if (shift == INFINITY) { shift = 3.0; } denoiser = std::make_shared(shift); @@ -699,11 +699,8 @@ class StableDiffusionGGML { } else if (sd_version_is_wan(version)) { LOG_INFO("running in FLOW mode"); float shift = sd_ctx_params->flow_shift; - if(shift == INFINITY) { + if (shift == INFINITY) { shift = 5.0; - if (version == VERSION_WAN2){ - shift = 12.0; - } } denoiser = std::make_shared(shift); } else if (is_using_v_parameterization) {