Skip to content

Commit 7dac89a

Browse files
committed
refector: reuse some code
1 parent 9251756 commit 7dac89a

File tree

4 files changed

+32
-38
lines changed

4 files changed

+32
-38
lines changed

common.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class UpSampleBlock : public GGMLBlock {
5757
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
5858

5959
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
60-
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
60+
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
6161
return x;
6262
}
6363
};

denoiser.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,13 @@ struct EDMVDenoiser : public CompVisVDenoiser {
347347
float min_sigma = 0.002;
348348
float max_sigma = 120.0;
349349

350-
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0) : min_sigma(min_sigma), max_sigma(max_sigma) {
350+
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
351+
: min_sigma(min_sigma), max_sigma(max_sigma) {
351352
schedule = std::make_shared<ExponentialSchedule>();
352353
}
353354

354355
float t_to_sigma(float t) {
355-
return std::exp(t * 4/(float)TIMESTEPS);
356+
return std::exp(t * 4 / (float)TIMESTEPS);
356357
}
357358

358359
float sigma_to_t(float s) {

ggml_extend.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
118118
a->ne[1] * b->ne[1],
119119
a->ne[2] * b->ne[2],
120120
a->ne[3] * b->ne[3],
121-
GGML_SCALE_MODE_NEAREST),
121+
GGML_SCALE_MODE_NEAREST),
122122
b);
123123
}
124124

stable-diffusion.cpp

Lines changed: 27 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,6 +1566,29 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15661566
return result_images;
15671567
}
15681568

1569+
ggml_tensor* generate_init_latent(sd_ctx_t* sd_ctx,
1570+
ggml_context* work_ctx,
1571+
int width,
1572+
int height) {
1573+
int C = 4;
1574+
if (sd_version_is_sd3(sd_ctx->sd->version)) {
1575+
C = 16;
1576+
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
1577+
C = 16;
1578+
}
1579+
int W = width / 8;
1580+
int H = height / 8;
1581+
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
1582+
if (sd_version_is_sd3(sd_ctx->sd->version)) {
1583+
ggml_set_f32(init_latent, 0.0609f);
1584+
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
1585+
ggml_set_f32(init_latent, 0.1159f);
1586+
} else {
1587+
ggml_set_f32(init_latent, 0.f);
1588+
}
1589+
return init_latent;
1590+
}
1591+
15691592
sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15701593
const char* prompt_c_str,
15711594
const char* negative_prompt_c_str,
@@ -1622,27 +1645,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16221645

16231646
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
16241647

1625-
int C = 4;
1626-
if (sd_version_is_sd3(sd_ctx->sd->version)) {
1627-
C = 16;
1628-
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
1629-
C = 16;
1630-
}
1631-
int W = width / 8;
1632-
int H = height / 8;
1633-
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
1634-
if (sd_version_is_sd3(sd_ctx->sd->version)) {
1635-
ggml_set_f32(init_latent, 0.0609f);
1636-
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
1637-
ggml_set_f32(init_latent, 0.1159f);
1638-
} else {
1639-
ggml_set_f32(init_latent, 0.f);
1640-
}
1641-
16421648
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
16431649
LOG_WARN("This is an inpainting model, this should only be used in img2img mode with a mask");
16441650
}
16451651

1652+
ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
1653+
16461654
sd_image_t* result_images = generate_image(sd_ctx,
16471655
work_ctx,
16481656
init_latent,
@@ -2046,23 +2054,6 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
20462054
}
20472055
sd_ctx->sd->rng->manual_seed(seed);
20482056

2049-
int C = 4;
2050-
if (sd_version_is_sd3(sd_ctx->sd->version)) {
2051-
C = 16;
2052-
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
2053-
C = 16;
2054-
}
2055-
int W = width / 8;
2056-
int H = height / 8;
2057-
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
2058-
if (sd_version_is_sd3(sd_ctx->sd->version)) {
2059-
ggml_set_f32(init_latent, 0.0609f);
2060-
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
2061-
ggml_set_f32(init_latent, 0.1159f);
2062-
} else {
2063-
ggml_set_f32(init_latent, 0.f);
2064-
}
2065-
20662057
size_t t0 = ggml_time_ms();
20672058

20682059
std::vector<struct ggml_tensor*> ref_latents;
@@ -2085,6 +2076,8 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
20852076

20862077
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
20872078

2079+
ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
2080+
20882081
sd_image_t* result_images = generate_image(sd_ctx,
20892082
work_ctx,
20902083
init_latent,

0 commit comments

Comments
 (0)