@@ -351,8 +351,8 @@ class StableDiffusionGGML {
351351 offload_params_to_cpu,
352352 model_loader.tensor_storages_types );
353353 diffusion_model = std::make_shared<MMDiTModel>(backend,
354- offload_params_to_cpu,
355- model_loader.tensor_storages_types );
354+ offload_params_to_cpu,
355+ model_loader.tensor_storages_types );
356356 } else if (sd_version_is_flux (version)) {
357357 bool is_chroma = false ;
358358 for (auto pair : model_loader.tensor_storages_types ) {
@@ -388,11 +388,11 @@ class StableDiffusionGGML {
388388 1 ,
389389 true );
390390 diffusion_model = std::make_shared<WanModel>(backend,
391- offload_params_to_cpu,
392- model_loader.tensor_storages_types ,
393- " model.diffusion_model" ,
394- version,
395- sd_ctx_params->diffusion_flash_attn );
391+ offload_params_to_cpu,
392+ model_loader.tensor_storages_types ,
393+ " model.diffusion_model" ,
394+ version,
395+ sd_ctx_params->diffusion_flash_attn );
396396 if (strlen (SAFE_STR (sd_ctx_params->high_noise_diffusion_model_path )) > 0 ) {
397397 high_noise_diffusion_model = std::make_shared<WanModel>(backend,
398398 offload_params_to_cpu,
@@ -1286,7 +1286,15 @@ class StableDiffusionGGML {
12861286 ggml_tensor* result = NULL ;
12871287 if (!use_tiny_autoencoder) {
12881288 process_vae_input_tensor (x);
1289- first_stage_model->compute (n_threads, x, false , &result, work_ctx);
1289+ if (vae_tiling && !decode_video) {
1290+ // split latent in 32x32 tiles and compute in several steps
1291+ auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
1292+ first_stage_model->compute (n_threads, in, true , &out, NULL );
1293+ };
1294+ sd_tiling (x, result, 8 , 32 , 0 .5f , on_tiling, false );
1295+ } else {
1296+ first_stage_model->compute (n_threads, x, false , &result, work_ctx);
1297+ }
12901298 first_stage_model->free_compute_buffer ();
12911299 } else {
12921300 tae_first_stage->compute (n_threads, x, false , &result, work_ctx);
@@ -1313,12 +1321,12 @@ class StableDiffusionGGML {
13131321 -0 .0313f , -0 .1649f , 0 .0117f , 0 .0723f , -0 .2839f , -0 .2083f , -0 .0520f , 0 .3748f ,
13141322 0 .0152f , 0 .1957f , 0 .1433f , -0 .2944f , 0 .3573f , -0 .0548f , -0 .1681f , -0 .0667f };
13151323 latents_std_vec = {
1316- 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1317- 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1318- 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1319- 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1320- 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1321- 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
1324+ 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1325+ 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1326+ 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1327+ 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1328+ 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1329+ 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
13221330 }
13231331 for (int i = 0 ; i < latent->ne [3 ]; i++) {
13241332 float mean = latents_mean_vec[i];
@@ -1353,12 +1361,12 @@ class StableDiffusionGGML {
13531361 -0 .0313f , -0 .1649f , 0 .0117f , 0 .0723f , -0 .2839f , -0 .2083f , -0 .0520f , 0 .3748f ,
13541362 0 .0152f , 0 .1957f , 0 .1433f , -0 .2944f , 0 .3573f , -0 .0548f , -0 .1681f , -0 .0667f };
13551363 latents_std_vec = {
1356- 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1357- 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1358- 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1359- 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1360- 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1361- 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
1364+ 0 .4765f , 1 .0364f , 0 .4514f , 1 .1677f , 0 .5313f , 0 .4990f , 0 .4818f , 0 .5013f ,
1365+ 0 .8158f , 1 .0344f , 0 .5894f , 1 .0901f , 0 .6885f , 0 .6165f , 0 .8454f , 0 .4978f ,
1366+ 0 .5759f , 0 .3523f , 0 .7135f , 0 .6804f , 0 .5833f , 1 .4146f , 0 .8986f , 0 .5659f ,
1367+ 0 .7069f , 0 .5338f , 0 .4889f , 0 .4917f , 0 .4069f , 0 .4999f , 0 .6866f , 0 .4093f ,
1368+ 0 .5709f , 0 .6065f , 0 .6415f , 0 .4944f , 0 .5726f , 1 .2042f , 0 .5458f , 1 .6887f ,
1369+ 0 .3971f , 1 .0600f , 0 .3943f , 0 .5537f , 0 .5444f , 0 .4089f , 0 .7468f , 0 .7744f };
13621370 }
13631371 for (int i = 0 ; i < latent->ne [3 ]; i++) {
13641372 float mean = latents_mean_vec[i];
@@ -1416,7 +1424,7 @@ class StableDiffusionGGML {
14161424 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
14171425 first_stage_model->compute (n_threads, in, true , &out, NULL );
14181426 };
1419- sd_tiling (x, result, 8 , 32 , 0 .5f , on_tiling);
1427+ sd_tiling (x, result, 8 , 32 , 0 .5f , on_tiling, true );
14201428 } else {
14211429 first_stage_model->compute (n_threads, x, true , &result, work_ctx);
14221430 }
@@ -1428,7 +1436,7 @@ class StableDiffusionGGML {
14281436 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
14291437 tae_first_stage->compute (n_threads, in, true , &out);
14301438 };
1431- sd_tiling (x, result, 8 , 64 , 0 .5f , on_tiling);
1439+ sd_tiling (x, result, 8 , 64 , 0 .5f , on_tiling, true );
14321440 } else {
14331441 tae_first_stage->compute (n_threads, x, true , &result);
14341442 }
0 commit comments