diff --git a/denoiser.hpp b/denoiser.hpp index 385bcfbeb..339c0cfb9 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -382,7 +382,8 @@ struct DiscreteFlowDenoiser : public Denoiser { float sigma_data = 1.0f; - DiscreteFlowDenoiser() { + DiscreteFlowDenoiser(float shift = 3.0f) + : shift(shift) { set_parameters(); } diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 91d74f173..a3bf0ced3 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -115,6 +115,7 @@ struct SDParams { bool chroma_use_dit_mask = true; bool chroma_use_t5_mask = false; int chroma_t5_mask_pad = 1; + float flow_shift = INFINITY; SDParams() { sd_sample_params_init(&sample_params); @@ -171,6 +172,7 @@ void print_params(SDParams params) { printf(" sample_params: %s\n", SAFE_STR(sample_params_str)); printf(" high_noise_sample_params: %s\n", SAFE_STR(high_noise_sample_params_str)); printf(" moe_boundary: %.3f\n", params.moe_boundary); + printf(" flow_shift: %.2f\n", params.flow_shift); printf(" strength(img2img): %.2f\n", params.strength); printf(" rng: %s\n", sd_rng_type_name(params.rng_type)); printf(" seed: %ld\n", params.seed); @@ -278,8 +280,9 @@ void print_usage(int argc, const char* argv[]) { printf(" --chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma\n"); printf(" --video-frames video frames (default: 1)\n"); printf(" --fps fps (default: 24)\n"); - printf(" --moe-boundary BOUNDARY Timestep boundary for Wan2.2 MoE model. (default: 0.875)\n"); - printf(" Only enabled if `--high-noise-steps` is set to -1\n"); + printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n"); + printf(" only enabled if `--high-noise-steps` is set to -1\n"); + printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n"); printf(" -v, --verbose print extra info\n"); } @@ -514,6 +517,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { {"", "--style-ratio", "", ¶ms.style_ratio}, {"", "--control-strength", "", ¶ms.control_strength}, {"", "--moe-boundary", "", ¶ms.moe_boundary}, + {"", "--flow-shift", "", ¶ms.flow_shift}, }; options.bool_options = { @@ -1181,6 +1185,7 @@ int main(int argc, const char* argv[]) { params.chroma_use_dit_mask, params.chroma_use_t5_mask, params.chroma_t5_mask_pad, + params.flow_shift, }; sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index db89cbb74..64164a2fb 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -681,7 +681,11 @@ class StableDiffusionGGML { if (sd_version_is_sd3(version)) { LOG_INFO("running in FLOW mode"); - denoiser = std::make_shared(); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 3.0; + } + denoiser = std::make_shared(shift); } else if (sd_version_is_flux(version)) { LOG_INFO("running in Flux FLOW mode"); float shift = 1.0f; // TODO: validate @@ -694,7 +698,11 @@ class StableDiffusionGGML { denoiser = std::make_shared(shift); } else if (sd_version_is_wan(version)) { LOG_INFO("running in FLOW mode"); - denoiser = std::make_shared(); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 5.0; + } + denoiser = std::make_shared(shift); } else if (is_using_v_parameterization) { LOG_INFO("running in v-prediction mode"); denoiser = std::make_shared(); @@ -1553,6 +1561,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { sd_ctx_params->chroma_use_dit_mask = true; sd_ctx_params->chroma_use_t5_mask = false; sd_ctx_params->chroma_t5_mask_pad = 1; + sd_ctx_params->flow_shift = INFINITY; } char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { diff --git a/stable-diffusion.h b/stable-diffusion.h index 5ffe50618..7bfb52700 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -142,6 +142,7 @@ typedef struct { bool chroma_use_dit_mask; bool chroma_use_t5_mask; int chroma_t5_mask_pad; + float flow_shift; } sd_ctx_params_t; typedef struct {