refector: reuse some code

leejet · leejet · commit 7dac89ad7517 · 2025-07-01T23:33:50.000+08:00
diff --git a/common.hpp b/common.hpp
@@ -57,7 +57,7 @@ class UpSampleBlock : public GGMLBlock {
         auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
 
         x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST);  // [N, channels, h*2, w*2]
-        x = conv->forward(ctx, x);    // [N, out_channels, h*2, w*2]
+        x = conv->forward(ctx, x);                             // [N, out_channels, h*2, w*2]
         return x;
     }
 };
diff --git a/denoiser.hpp b/denoiser.hpp
@@ -347,12 +347,13 @@ struct EDMVDenoiser : public CompVisVDenoiser {
     float min_sigma = 0.002;
     float max_sigma = 120.0;
 
-    EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0) : min_sigma(min_sigma), max_sigma(max_sigma) {
+    EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
+        : min_sigma(min_sigma), max_sigma(max_sigma) {
         schedule = std::make_shared<ExponentialSchedule>();
     }
 
     float t_to_sigma(float t) {
-        return std::exp(t * 4/(float)TIMESTEPS);
+        return std::exp(t * 4 / (float)TIMESTEPS);
     }
 
     float sigma_to_t(float s) {
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
@@ -118,7 +118,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
                                      a->ne[1] * b->ne[1],
                                      a->ne[2] * b->ne[2],
                                      a->ne[3] * b->ne[3],
-                                     GGML_SCALE_MODE_NEAREST), 
+                                     GGML_SCALE_MODE_NEAREST),
                     b);
 }
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1566,6 +1566,29 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
     return result_images;
 }
 
+ggml_tensor* generate_init_latent(sd_ctx_t* sd_ctx,
+                                  ggml_context* work_ctx,
+                                  int width,
+                                  int height) {
+    int C = 4;
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        C = 16;
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        C = 16;
+    }
+    int W                    = width / 8;
+    int H                    = height / 8;
+    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.0609f);
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.1159f);
+    } else {
+        ggml_set_f32(init_latent, 0.f);
+    }
+    return init_latent;
+}
+
 sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                     const char* prompt_c_str,
                     const char* negative_prompt_c_str,
@@ -1622,27 +1645,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
 
     std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
 
-    int C = 4;
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        C = 16;
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        C = 16;
-    }
-    int W                    = width / 8;
-    int H                    = height / 8;
-    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.0609f);
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.1159f);
-    } else {
-        ggml_set_f32(init_latent, 0.f);
-    }
-
     if (sd_version_is_inpaint(sd_ctx->sd->version)) {
         LOG_WARN("This is an inpainting model, this should only be used in img2img mode with a mask");
     }
 
+    ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
+
     sd_image_t* result_images = generate_image(sd_ctx,
                                                work_ctx,
                                                init_latent,
@@ -2046,23 +2054,6 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
     }
     sd_ctx->sd->rng->manual_seed(seed);
 
-    int C = 4;
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        C = 16;
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        C = 16;
-    }
-    int W                    = width / 8;
-    int H                    = height / 8;
-    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.0609f);
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.1159f);
-    } else {
-        ggml_set_f32(init_latent, 0.f);
-    }
-
     size_t t0 = ggml_time_ms();
 
     std::vector<struct ggml_tensor*> ref_latents;
@@ -2085,6 +2076,8 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
 
     std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
 
+    ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
+
     sd_image_t* result_images = generate_image(sd_ctx,
                                                work_ctx,
                                                init_latent,

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ class UpSampleBlock : public GGMLBlock {`
`57`	`57`	`auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);`
`58`	`58`
`59`	`59`	`x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h2, w2]`
`60`		`- x = conv->forward(ctx, x); // [N, out_channels, h2, w2]`
	`60`	`+ x = conv->forward(ctx, x); // [N, out_channels, h2, w2]`
`61`	`61`	`return x;`
`62`	`62`	`}`
`63`	`63`	`};`
Original file line number	Diff line number	Diff line change
`@@ -347,12 +347,13 @@ struct EDMVDenoiser : public CompVisVDenoiser {`
`347`	`347`	`float min_sigma = 0.002;`
`348`	`348`	`float max_sigma = 120.0;`
`349`	`349`
`350`		`- EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0) : min_sigma(min_sigma), max_sigma(max_sigma) {`
	`350`	`+ EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)`
	`351`	`+ : min_sigma(min_sigma), max_sigma(max_sigma) {`
`351`	`352`	`schedule = std::make_shared<ExponentialSchedule>();`
`352`	`353`	`}`
`353`	`354`
`354`	`355`	`float t_to_sigma(float t) {`
`355`		`- return std::exp(t * 4/(float)TIMESTEPS);`
	`356`	`+ return std::exp(t * 4 / (float)TIMESTEPS);`
`356`	`357`	`}`
`357`	`358`
`358`	`359`	`float sigma_to_t(float s) {`
Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g`
`118`	`118`	`a->ne[1] * b->ne[1],`
`119`	`119`	`a->ne[2] * b->ne[2],`
`120`	`120`	`a->ne[3] * b->ne[3],`
`121`		`- GGML_SCALE_MODE_NEAREST),`
	`121`	`+ GGML_SCALE_MODE_NEAREST),`
`122`	`122`	`b);`
`123`	`123`	`}`
`124`	`124`