Flux Lite (Freepik) support

stduhpf · stduhpf · commit 2210257201df · 2024-11-20T22:52:01.000+01:00
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -853,7 +853,7 @@ void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version
 
         if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
             latent_rgb_proj = sd3_latent_rgb_proj;
-        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL|| version == VERSION_FLUX_LITE) {
             latent_rgb_proj = flux_latent_rgb_proj;
         } else {
             // unknown model
diff --git a/flux.hpp b/flux.hpp
@@ -826,6 +826,9 @@ namespace Flux {
             if (version == VERSION_FLUX_SCHNELL) {
                 flux_params.guidance_embed = false;
             }
+            if (version == VERSION_FLUX_LITE){
+                flux_params.depth = 8;
+            }
             flux = Flux(flux_params);
             flux.init(params_ctx, tensor_types, prefix);
         }
diff --git a/model.cpp b/model.cpp
@@ -1393,15 +1393,20 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
 
 SDVersion ModelLoader::get_sd_version() {
     TensorStorage token_embedding_weight;
-    bool is_flux = false;
-    bool is_sd3  = false;
+    bool is_flux    = false;
+    bool is_schnell = true;
+    bool is_lite    = true;
+    bool is_sd3     = false;
     for (auto& tensor_storage : tensor_storages) {
         if (tensor_storage.name.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
-            return VERSION_FLUX_DEV;
+            is_schnell = false;
         }
         if (tensor_storage.name.find("model.diffusion_model.double_blocks.") != std::string::npos) {
             is_flux = true;
         }
+        if (tensor_storage.name.find("model.diffusion_model.double_blocks.8") != std::string::npos) {
+            is_lite = false;
+        }
         if (tensor_storage.name.find("joint_blocks.0.x_block.attn2.ln_q.weight") != std::string::npos) {
             return VERSION_SD3_5_2B;
         }
@@ -1432,7 +1437,14 @@ SDVersion ModelLoader::get_sd_version() {
         }
     }
     if (is_flux) {
-        return VERSION_FLUX_SCHNELL;
+        if (is_schnell) {
+            GGML_ASSERT(!is_lite);
+            return VERSION_FLUX_SCHNELL;
+        } else if (is_lite) {
+            return VERSION_FLUX_LITE;
+        } else {
+            return VERSION_FLUX_DEV;
+        }
     }
     if (is_sd3) {
         return VERSION_SD3_2B;
@@ -1856,7 +1868,21 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
         const std::string& name = tensor_storage.name;
 
         ggml_type tensor_type = tensor_storage.type;
-        tensor_set_type(tensor_type, tensor_storage, type, fallback_type);
+        auto _type            = type;
+        // attemmpt to improve q2_k quant by using higher quants for final blocks
+        if (type == GGML_TYPE_Q2_K) {
+            if (name.find("single_blocks.37") != std::string::npos ||
+                name.find("double_blocks.0") != std::string::npos) {
+                _type = GGML_TYPE_Q4_K;
+            } else if (name.find("single_blocks.36") != std::string::npos ||
+                       name.find("single_blocks.35") != std::string::npos ||
+                       name.find("single_blocks.0") != std::string::npos ||
+                       name.find("double_blocks.18") != std::string::npos ||
+                       name.find("double_blocks.1") != std::string::npos) {
+                _type = GGML_TYPE_Q3_K;
+            }
+        }
+        tensor_set_type(tensor_type, tensor_storage, _type, fallback_type);
 
         ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
         if (tensor == NULL) {
@@ -1890,7 +1916,8 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
     return success;
 }
 
-int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type, ggml_type fallback_type /*= GGML_TYPE_COUNT*/) {
+int64_t
+ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type, ggml_type fallback_type /*= GGML_TYPE_COUNT*/) {
     size_t alignment = 128;
     if (backend != NULL) {
         alignment = ggml_backend_get_alignment(backend);
@@ -1905,7 +1932,22 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type,
     }
 
     for (auto& tensor_storage : processed_tensor_storages) {
-        tensor_set_type(tensor_storage.type, tensor_storage, type, fallback_type);
+        auto _type = type;
+        auto name  = tensor_storage.name;
+        // attemmpt to improve q2_k quant by using higher quants for final blocks
+        if (type == GGML_TYPE_Q2_K) {
+            if (name.find("single_blocks.37") != std::string::npos ||
+                name.find("double_blocks.0") != std::string::npos) {
+                _type = GGML_TYPE_Q4_K;
+            } else if (name.find("single_blocks.36") != std::string::npos ||
+                       name.find("single_blocks.35") != std::string::npos ||
+                       name.find("single_blocks.0") != std::string::npos ||
+                       name.find("double_blocks.18") != std::string::npos ||
+                       name.find("double_blocks.1") != std::string::npos) {
+                _type = GGML_TYPE_Q3_K;
+            }
+        }
+        tensor_set_type(tensor_storage.type, tensor_storage, _type, fallback_type);
         mem_size += tensor_storage.nbytes() + alignment;
     }
 
diff --git a/model.h b/model.h
@@ -27,6 +27,7 @@ enum SDVersion {
     VERSION_FLUX_SCHNELL,
     VERSION_SD3_5_8B,
     VERSION_SD3_5_2B,
+    VERSION_FLUX_LITE,
     VERSION_COUNT,
 };
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -33,7 +33,8 @@ const char* model_version_to_str[] = {
     "Flux Dev",
     "Flux Schnell",
     "SD3.5 8B",
-    "SD3.5 2B"};
+    "SD3.5 2B",
+    "Flux Lite 8B"};
 
 const char* sampling_methods_str[] = {
     "Euler A",
@@ -291,7 +292,7 @@ class StableDiffusionGGML {
             }
         } else if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
             scale_factor = 1.5305f;
-        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
             scale_factor = 0.3611;
             // TODO: shift_factor
         }
@@ -312,7 +313,7 @@ class StableDiffusionGGML {
         } else {
             clip_backend   = backend;
             bool use_t5xxl = false;
-            if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+            if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
                 use_t5xxl = true;
             }
             if (!ggml_backend_is_cpu(backend) && use_t5xxl && conditioner_wtype != GGML_TYPE_F32) {
@@ -326,7 +327,7 @@ class StableDiffusionGGML {
             if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
                 cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
                 diffusion_model  = std::make_shared<MMDiTModel>(backend, model_loader.tensor_storages_types, version);
-            } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+            } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
                 cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
                 diffusion_model  = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, version);
             } else {
@@ -525,7 +526,7 @@ class StableDiffusionGGML {
         if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
             LOG_INFO("running in FLOW mode");
             denoiser = std::make_shared<DiscreteFlowDenoiser>();
-        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+        } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
             LOG_INFO("running in Flux FLOW mode");
             float shift = 1.15f;
             if (version == VERSION_FLUX_SCHNELL) {
@@ -811,7 +812,7 @@ class StableDiffusionGGML {
             out_uncond = ggml_dup_tensor(tmp_ctx, x);
         }
         if (has_skiplayer) {
-            if (version == VERSION_SD3_2B || version == VERSION_SD3_5_2B || version == VERSION_SD3_5_8B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+            if (version == VERSION_SD3_2B || version == VERSION_SD3_5_2B || version == VERSION_SD3_5_8B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
                 out_skip = ggml_dup_tensor(tmp_ctx, x);
             } else {
                 has_skiplayer = false;
@@ -1008,7 +1009,7 @@ class StableDiffusionGGML {
         } else {
             if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
                 C = 32;
-            } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+            } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
                 C = 32;
             }
         }
@@ -1346,7 +1347,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
     int C = 4;
     if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
         C = 16;
-    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
+    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL || sd_ctx->sd->version == VERSION_FLUX_LITE) {
         C = 16;
     }
     int W = width / 8;
@@ -1471,7 +1472,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
     if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
         params.mem_size *= 3;
     }
-    if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
+    if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL || sd_ctx->sd->version == VERSION_FLUX_LITE) {
         params.mem_size *= 4;
     }
     if (sd_ctx->sd->stacked_id) {
@@ -1496,15 +1497,15 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
     int C = 4;
     if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
         C = 16;
-    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
+    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL || sd_ctx->sd->version == VERSION_FLUX_LITE) {
         C = 16;
     }
     int W                    = width / 8;
     int H                    = height / 8;
     ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
     if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
         ggml_set_f32(init_latent, 0.0609f);
-    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
+    } else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL || sd_ctx->sd->version == VERSION_FLUX_LITE) {
         ggml_set_f32(init_latent, 0.1159f);
     } else {
         ggml_set_f32(init_latent, 0.f);
@@ -1575,7 +1576,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
     if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
         params.mem_size *= 2;
     }
-    if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
+    if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL || sd_ctx->sd->version == VERSION_FLUX_LITE) {
         params.mem_size *= 3;
     }
     if (sd_ctx->sd->stacked_id) {
diff --git a/vae.hpp b/vae.hpp
@@ -458,7 +458,7 @@ class AutoencodingEngine : public GGMLBlock {
                        bool use_video_decoder = false,
                        SDVersion version      = VERSION_SD1)
         : decode_only(decode_only), use_video_decoder(use_video_decoder) {
-        if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
+        if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
             dd_config.z_channels = 16;
             use_quant            = false;
         }

Original file line number	Diff line number	Diff line change
`@@ -826,6 +826,9 @@ namespace Flux {`
`826`	`826`	`if (version == VERSION_FLUX_SCHNELL) {`
`827`	`827`	`flux_params.guidance_embed = false;`
`828`	`828`	`}`
	`829`	`+ if (version == VERSION_FLUX_LITE){`
	`830`	`+ flux_params.depth = 8;`
	`831`	`+ }`
`829`	`832`	`flux = Flux(flux_params);`
`830`	`833`	`flux.init(params_ctx, tensor_types, prefix);`
`831`	`834`	`}`
Original file line number	Diff line number	Diff line change
`@@ -458,7 +458,7 @@ class AutoencodingEngine : public GGMLBlock {`
`458`	`458`	`bool use_video_decoder = false,`
`459`	`459`	`SDVersion version = VERSION_SD1)`
`460`	`460`	`: decode_only(decode_only), use_video_decoder(use_video_decoder) {`
`461`		`- if (version == VERSION_SD3_2B \|\| version == VERSION_SD3_5_8B \|\| version == VERSION_SD3_5_2B \|\| version == VERSION_FLUX_DEV \|\| version == VERSION_FLUX_SCHNELL) {`
	`461`	`+ if (version == VERSION_SD3_2B \|\| version == VERSION_SD3_5_8B \|\| version == VERSION_SD3_5_2B \|\| version == VERSION_FLUX_DEV \|\| version == VERSION_FLUX_SCHNELL \|\| version == VERSION_FLUX_LITE) {`
`462`	`462`	`dd_config.z_channels = 16;`
`463`	`463`	`use_quant = false;`
`464`	`464`	`}`