@@ -1566,6 +1566,29 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15661566 return result_images;
15671567}
15681568
1569+ ggml_tensor* generate_init_latent (sd_ctx_t * sd_ctx,
1570+ ggml_context* work_ctx,
1571+ int width,
1572+ int height) {
1573+ int C = 4 ;
1574+ if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
1575+ C = 16 ;
1576+ } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
1577+ C = 16 ;
1578+ }
1579+ int W = width / 8 ;
1580+ int H = height / 8 ;
1581+ ggml_tensor* init_latent = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, W, H, C, 1 );
1582+ if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
1583+ ggml_set_f32 (init_latent, 0 .0609f );
1584+ } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
1585+ ggml_set_f32 (init_latent, 0 .1159f );
1586+ } else {
1587+ ggml_set_f32 (init_latent, 0 .f );
1588+ }
1589+ return init_latent;
1590+ }
1591+
15691592sd_image_t * txt2img (sd_ctx_t * sd_ctx,
15701593 const char * prompt_c_str,
15711594 const char * negative_prompt_c_str,
@@ -1622,27 +1645,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16221645
16231646 std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (sample_steps);
16241647
1625- int C = 4 ;
1626- if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
1627- C = 16 ;
1628- } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
1629- C = 16 ;
1630- }
1631- int W = width / 8 ;
1632- int H = height / 8 ;
1633- ggml_tensor* init_latent = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, W, H, C, 1 );
1634- if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
1635- ggml_set_f32 (init_latent, 0 .0609f );
1636- } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
1637- ggml_set_f32 (init_latent, 0 .1159f );
1638- } else {
1639- ggml_set_f32 (init_latent, 0 .f );
1640- }
1641-
16421648 if (sd_version_is_inpaint (sd_ctx->sd ->version )) {
16431649 LOG_WARN (" This is an inpainting model, this should only be used in img2img mode with a mask" );
16441650 }
16451651
1652+ ggml_tensor* init_latent = generate_init_latent (sd_ctx, work_ctx, width, height);
1653+
16461654 sd_image_t * result_images = generate_image (sd_ctx,
16471655 work_ctx,
16481656 init_latent,
@@ -2046,23 +2054,6 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
20462054 }
20472055 sd_ctx->sd ->rng ->manual_seed (seed);
20482056
2049- int C = 4 ;
2050- if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
2051- C = 16 ;
2052- } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
2053- C = 16 ;
2054- }
2055- int W = width / 8 ;
2056- int H = height / 8 ;
2057- ggml_tensor* init_latent = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, W, H, C, 1 );
2058- if (sd_version_is_sd3 (sd_ctx->sd ->version )) {
2059- ggml_set_f32 (init_latent, 0 .0609f );
2060- } else if (sd_version_is_flux (sd_ctx->sd ->version )) {
2061- ggml_set_f32 (init_latent, 0 .1159f );
2062- } else {
2063- ggml_set_f32 (init_latent, 0 .f );
2064- }
2065-
20662057 size_t t0 = ggml_time_ms ();
20672058
20682059 std::vector<struct ggml_tensor *> ref_latents;
@@ -2085,6 +2076,8 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
20852076
20862077 std::vector<float > sigmas = sd_ctx->sd ->denoiser ->get_sigmas (sample_steps);
20872078
2079+ ggml_tensor* init_latent = generate_init_latent (sd_ctx, work_ctx, width, height);
2080+
20882081 sd_image_t * result_images = generate_image (sd_ctx,
20892082 work_ctx,
20902083 init_latent,
0 commit comments