@@ -615,6 +615,44 @@ std::string convert_diffusers_dit_to_original_flux(std::string name) {
615615 return name;
616616}
617617
618+ std::string convert_diffusers_dit_to_original_lumina2 (std::string name) {
619+ int num_layers = 30 ;
620+ int num_refiner_layers = 2 ;
621+ static std::unordered_map<std::string, std::string> z_image_name_map;
622+
623+ if (z_image_name_map.empty ()) {
624+ z_image_name_map[" all_x_embedder.2-1." ] = " x_embedder." ;
625+ z_image_name_map[" all_final_layer.2-1." ] = " final_layer." ;
626+
627+ // --- transformer blocks ---
628+ auto add_attention_map = [&](const std::string& prefix, int num) {
629+ for (int i = 0 ; i < num; ++i) {
630+ std::string block_prefix = prefix + std::to_string (i) + " ." ;
631+ std::string dst_prefix = prefix + std::to_string (i) + " ." ;
632+
633+ z_image_name_map[block_prefix + " attention.norm_q." ] = dst_prefix + " attention.q_norm." ;
634+ z_image_name_map[block_prefix + " attention.norm_k." ] = dst_prefix + " attention.k_norm." ;
635+ z_image_name_map[block_prefix + " attention.to_out.0." ] = dst_prefix + " attention.out." ;
636+
637+ z_image_name_map[block_prefix + " attention.to_q.weight" ] = dst_prefix + " attention.qkv.weight" ;
638+ z_image_name_map[block_prefix + " attention.to_q.bias" ] = dst_prefix + " attention.qkv.bias" ;
639+ z_image_name_map[block_prefix + " attention.to_k.weight" ] = dst_prefix + " attention.qkv.weight.1" ;
640+ z_image_name_map[block_prefix + " attention.to_k.bias" ] = dst_prefix + " attention.qkv.bias.1" ;
641+ z_image_name_map[block_prefix + " attention.to_v.weight" ] = dst_prefix + " attention.qkv.weight.2" ;
642+ z_image_name_map[block_prefix + " attention.to_v.bias" ] = dst_prefix + " attention.qkv.bias.2" ;
643+ }
644+ };
645+
646+ add_attention_map (" noise_refiner." , num_refiner_layers);
647+ add_attention_map (" context_refiner." , num_refiner_layers);
648+ add_attention_map (" layers." , num_layers);
649+ }
650+
651+ replace_with_prefix_map (name, z_image_name_map);
652+
653+ return name;
654+ }
655+
618656std::string convert_diffusion_model_name (std::string name, std::string prefix, SDVersion version) {
619657 if (sd_version_is_sd1 (version) || sd_version_is_sd2 (version)) {
620658 name = convert_diffusers_unet_to_original_sd1 (name);
@@ -624,6 +662,8 @@ std::string convert_diffusion_model_name(std::string name, std::string prefix, S
624662 name = convert_diffusers_dit_to_original_sd3 (name);
625663 } else if (sd_version_is_flux (version) || sd_version_is_flux2 (version)) {
626664 name = convert_diffusers_dit_to_original_flux (name);
665+ } else if (sd_version_is_z_image (version)) {
666+ name = convert_diffusers_dit_to_original_lumina2 (name);
627667 }
628668 return name;
629669}
0 commit comments