Skip to content

Commit 0eb04d9

Browse files
committed
update stable-diffusion.cpp to master-301-fd693ac
1 parent 5839282 commit 0eb04d9

File tree

6 files changed

+162
-52
lines changed

6 files changed

+162
-52
lines changed

otherarch/sdcpp/denoiser.hpp

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,25 @@ struct GITSSchedule : SigmaSchedule {
232232
}
233233
};
234234

235+
struct SGMUniformSchedule : SigmaSchedule {
236+
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
237+
std::vector<float> result;
238+
if (n == 0) {
239+
result.push_back(0.0f);
240+
return result;
241+
}
242+
result.reserve(n + 1);
243+
int t_max = TIMESTEPS - 1;
244+
int t_min = 0;
245+
std::vector<float> timesteps = linear_space(static_cast<float>(t_max), static_cast<float>(t_min), n + 1);
246+
for (int i = 0; i < n; i++) {
247+
result.push_back(t_to_sigma_func(timesteps[i]));
248+
}
249+
result.push_back(0.0f);
250+
return result;
251+
}
252+
};
253+
235254
struct KarrasSchedule : SigmaSchedule {
236255
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
237256
// These *COULD* be function arguments here,
@@ -251,6 +270,35 @@ struct KarrasSchedule : SigmaSchedule {
251270
}
252271
};
253272

273+
struct SimpleSchedule : SigmaSchedule {
274+
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
275+
std::vector<float> result_sigmas;
276+
277+
if (n == 0) {
278+
return result_sigmas;
279+
}
280+
281+
result_sigmas.reserve(n + 1);
282+
283+
int model_sigmas_len = TIMESTEPS;
284+
285+
float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
286+
287+
for (uint32_t i = 0; i < n; ++i) {
288+
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
289+
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
290+
291+
if (timestep_index < 0) {
292+
timestep_index = 0;
293+
}
294+
295+
result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
296+
}
297+
result_sigmas.push_back(0.0f);
298+
return result_sigmas;
299+
}
300+
};
301+
254302
// Close to Beta Schedule, but increadably simple in code.
255303
struct SmoothStepSchedule : SigmaSchedule {
256304
static constexpr float smoothstep(float x) {
@@ -722,7 +770,6 @@ static void sample_k_diffusion(sample_method_t method,
722770
} break;
723771
case DPMPP2S_A: {
724772
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
725-
struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x);
726773
struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x);
727774

728775
for (int i = 0; i < steps; i++) {
@@ -737,22 +784,15 @@ static void sample_k_diffusion(sample_method_t method,
737784
auto sigma_fn = [](float t) -> float { return exp(-t); };
738785

739786
if (sigma_down == 0) {
740-
// Euler step
741-
float* vec_d = (float*)d->data;
787+
// d = (x - denoised) / sigmas[i];
788+
// dt = sigma_down - sigmas[i];
789+
// x += d * dt;
790+
// => x = denoised
742791
float* vec_x = (float*)x->data;
743792
float* vec_denoised = (float*)denoised->data;
744793

745-
for (int j = 0; j < ggml_nelements(d); j++) {
746-
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i];
747-
}
748-
749-
// TODO: If sigma_down == 0, isn't this wrong?
750-
// But
751-
// https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525
752-
// has this exactly the same way.
753-
float dt = sigma_down - sigmas[i];
754-
for (int j = 0; j < ggml_nelements(d); j++) {
755-
vec_x[j] = vec_x[j] + vec_d[j] * dt;
794+
for (int j = 0; j < ggml_nelements(x); j++) {
795+
vec_x[j] = vec_denoised[j];
756796
}
757797
} else {
758798
// DPM-Solver++(2S)
@@ -761,7 +801,6 @@ static void sample_k_diffusion(sample_method_t method,
761801
float h = t_next - t;
762802
float s = t + 0.5f * h;
763803

764-
float* vec_d = (float*)d->data;
765804
float* vec_x = (float*)x->data;
766805
float* vec_x2 = (float*)x2->data;
767806
float* vec_denoised = (float*)denoised->data;

otherarch/sdcpp/main.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ struct SDParams {
103103
bool verbose = false;
104104
bool offload_params_to_cpu = false;
105105
bool control_net_cpu = false;
106-
bool normalize_input = false;
107106
bool clip_on_cpu = false;
108107
bool vae_on_cpu = false;
109108
bool diffusion_flash_attn = false;
@@ -156,7 +155,6 @@ void print_params(SDParams params) {
156155
printf(" pm_id_images_dir: %s\n", params.pm_id_images_dir.c_str());
157156
printf(" pm_id_embed_path: %s\n", params.pm_id_embed_path.c_str());
158157
printf(" pm_style_strength: %.2f\n", params.pm_style_strength);
159-
printf(" normalize input image: %s\n", params.normalize_input ? "true" : "false");
160158
printf(" output_path: %s\n", params.output_path.c_str());
161159
printf(" init_image_path: %s\n", params.init_image_path.c_str());
162160
printf(" end_image_path: %s\n", params.end_image_path.c_str());
@@ -248,9 +246,10 @@ void print_usage(int argc, const char* argv[]) {
248246
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
249247
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
250248
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
251-
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
249+
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
252250
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
253251
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
252+
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
254253
printf(" --steps STEPS number of sample steps (default: 20)\n");
255254
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
256255
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
@@ -261,7 +260,7 @@ void print_usage(int argc, const char* argv[]) {
261260
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
262261
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
263262
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
264-
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
263+
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
265264
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
266265
printf(" (high noise) sampling method (default: \"euler_a\")\n");
267266
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
@@ -274,7 +273,7 @@ void print_usage(int argc, const char* argv[]) {
274273
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
275274
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
276275
printf(" -b, --batch-count COUNT number of images to generate\n");
277-
printf(" --clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
276+
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
278277
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
279278
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
280279
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
@@ -305,7 +304,6 @@ void print_usage(int argc, const char* argv[]) {
305304
printf(" --pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n");
306305
printf(" --pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed\n");
307306
printf(" --pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20)\n");
308-
printf(" --normalize-input normalize PHOTOMAKER input id images\n");
309307
printf(" -v, --verbose print extra info\n");
310308
}
311309

@@ -520,6 +518,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
520518
{"", "--chroma-t5-mask-pad", "", &params.chroma_t5_mask_pad},
521519
{"", "--video-frames", "", &params.video_frames},
522520
{"", "--fps", "", &params.fps},
521+
{"", "--timestep-shift", "", &params.sample_params.shifted_timestep},
523522
};
524523

525524
options.float_options = {
@@ -550,7 +549,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
550549
{"", "--vae-tiling", "", true, &params.vae_tiling_params.enabled},
551550
{"", "--offload-to-cpu", "", true, &params.offload_params_to_cpu},
552551
{"", "--control-net-cpu", "", true, &params.control_net_cpu},
553-
{"", "--normalize-input", "", true, &params.normalize_input},
554552
{"", "--clip-on-cpu", "", true, &params.clip_on_cpu},
555553
{"", "--vae-on-cpu", "", true, &params.vae_on_cpu},
556554
{"", "--diffusion-fa", "", true, &params.diffusion_flash_attn},
@@ -875,6 +873,11 @@ void parse_args(int argc, const char** argv, SDParams& params) {
875873
exit(1);
876874
}
877875

876+
if (params.sample_params.shifted_timestep < 0 || params.sample_params.shifted_timestep > 1000) {
877+
fprintf(stderr, "error: timestep-shift must be between 0 and 1000\n");
878+
exit(1);
879+
}
880+
878881
if (params.upscale_repeats < 1) {
879882
fprintf(stderr, "error: upscale multiplier must be at least 1\n");
880883
exit(1);
@@ -1372,7 +1375,6 @@ int main(int argc, const char* argv[]) {
13721375
params.batch_count,
13731376
control_image,
13741377
params.control_strength,
1375-
params.normalize_input,
13761378
{
13771379
pmid_images.data(),
13781380
(int)pmid_images.size(),

otherarch/sdcpp/model.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2473,6 +2473,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
24732473

24742474
auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
24752475

2476+
std::mutex tensor_mutex;
24762477
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
24772478
const std::string& name = tensor_storage.name;
24782479
ggml_type tensor_type = tensor_storage.type;
@@ -2490,6 +2491,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
24902491
tensor_type = dst_type;
24912492
}
24922493

2494+
std::lock_guard<std::mutex> lock(tensor_mutex);
24932495
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
24942496
if (tensor == NULL) {
24952497
LOG_ERROR("ggml_new_tensor failed");

otherarch/sdcpp/pmid.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,8 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
599599
return false;
600600
}
601601

602-
bool dry_run = true;
602+
bool dry_run = true;
603+
std::mutex tensor_mutex;
603604
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
604605
const std::string& name = tensor_storage.name;
605606

@@ -608,6 +609,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
608609
return true;
609610
}
610611
if (dry_run) {
612+
std::lock_guard<std::mutex> lock(tensor_mutex);
611613
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
612614
tensor_storage.type,
613615
tensor_storage.n_dims,

0 commit comments

Comments
 (0)