Refactor: Flexible Flux arch

stduhpf · stduhpf · commit 4080c2914019 · 2024-11-29T01:44:44.000+01:00
diff --git a/diffusion_model.hpp b/diffusion_model.hpp
@@ -133,9 +133,8 @@ struct FluxModel : public DiffusionModel {
 
     FluxModel(ggml_backend_t backend,
               std::map<std::string, enum ggml_type>& tensor_types,
-              SDVersion version = VERSION_FLUX_DEV,
               bool flash_attn   = false)
-        : flux(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
+        : flux(backend, tensor_types, "model.diffusion_model", flash_attn) {
     }
 
     void alloc_params_buffer() {
diff --git a/flux.hpp b/flux.hpp
@@ -834,16 +834,43 @@ namespace Flux {
         FluxRunner(ggml_backend_t backend,
                    std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
                    const std::string prefix                            = "",
-                   SDVersion version                                   = VERSION_FLUX_DEV,
-                   bool flash_attn   = false)
+                   bool flash_attn                                     = false)
             : GGMLRunner(backend) {
-            flux_params.flash_attn = flash_attn;
-            if (version == VERSION_FLUX_SCHNELL) {
-                flux_params.guidance_embed = false;
+            flux_params.flash_attn          = flash_attn;
+            flux_params.guidance_embed      = false;
+            flux_params.depth               = 0;
+            flux_params.depth_single_blocks = 0;
+            for (auto pair : tensor_types) {
+                std::string tensor_name = pair.first;
+                if (tensor_name.find("model.diffusion_model.") == std::string::npos)
+                    continue;
+                if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
+                    // not schnell
+                    flux_params.guidance_embed = true;
+                }
+                size_t db = tensor_name.find("double_blocks.");
+                if (db != std::string::npos) {
+                    tensor_name     = tensor_name.substr(db);  // remove prefix
+                    int block_depth = atoi(tensor_name.substr(14, tensor_name.find(".", 14)).c_str());
+                    if (block_depth + 1 > flux_params.depth) {
+                        flux_params.depth = block_depth + 1;
+                    }
+                }
+                size_t sb = tensor_name.find("single_blocks.");
+                if (sb != std::string::npos) {
+                    tensor_name     = tensor_name.substr(sb);  // remove prefix
+                    int block_depth = atoi(tensor_name.substr(14, tensor_name.find(".", 14)).c_str());
+                    if (block_depth + 1 > flux_params.depth_single_blocks) {
+                        flux_params.depth_single_blocks = block_depth + 1;
+                    }
+                }
             }
-            if (version == VERSION_FLUX_LITE) {
-                flux_params.depth = 8;
+
+            LOG_INFO("Flux blocks: %d double, %d single", flux_params.depth, flux_params.depth_single_blocks);
+            if (!flux_params.guidance_embed) {
+                LOG_INFO("Flux guidance is disabled (Schnell mode)");
             }
+
             flux = Flux(flux_params);
             flux.init(params_ctx, tensor_types, prefix);
         }
diff --git a/model.cpp b/model.cpp
@@ -1459,18 +1459,9 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
 
 SDVersion ModelLoader::get_sd_version() {
     TensorStorage token_embedding_weight;
-    bool is_flux    = false;
-    bool is_schnell = true;
-    bool is_lite    = true;
     for (auto& tensor_storage : tensor_storages) {
-        if (tensor_storage.name.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
-            is_schnell = false;
-        }
         if (tensor_storage.name.find("model.diffusion_model.double_blocks.") != std::string::npos) {
-            is_flux = true;
-        }
-        if (tensor_storage.name.find("model.diffusion_model.double_blocks.8") != std::string::npos) {
-            is_lite = false;
+            return VERSION_FLUX;
         }
         if (tensor_storage.name.find("model.diffusion_model.joint_blocks.") != std::string::npos) {
             return VERSION_SD3;
@@ -1495,16 +1486,7 @@ SDVersion ModelLoader::get_sd_version() {
             // break;
         }
     }
-    if (is_flux) {
-        if (is_schnell) {
-            GGML_ASSERT(!is_lite);
-            return VERSION_FLUX_SCHNELL;
-        } else if (is_lite) {
-            return VERSION_FLUX_LITE;
-        } else {
-            return VERSION_FLUX_DEV;
-        }
-    }
+
     if (token_embedding_weight.ne[0] == 768) {
         return VERSION_SD1;
     } else if (token_embedding_weight.ne[0] == 1024) {
diff --git a/model.h b/model.h
@@ -23,14 +23,12 @@ enum SDVersion {
     VERSION_SDXL,
     VERSION_SVD,
     VERSION_SD3,
-    VERSION_FLUX_DEV,
-    VERSION_FLUX_SCHNELL,
-    VERSION_FLUX_LITE,
+    VERSION_FLUX,
     VERSION_COUNT,
 };
 
 static inline bool sd_version_is_flux(SDVersion version) {
-    if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL || version == VERSION_FLUX_LITE) {
+    if (version == VERSION_FLUX) {
         return true;
     }
     return false;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -30,9 +30,7 @@ const char* model_version_to_str[] = {
     "SDXL",
     "SVD",
     "SD3.x",
-    "Flux Dev",
-    "Flux Schnell",
-    "Flux Lite 8B"};
+    "Flux"};
 
 const char* sampling_methods_str[] = {
     "Euler A",
@@ -331,7 +329,7 @@ class StableDiffusionGGML {
                 diffusion_model  = std::make_shared<MMDiTModel>(backend, model_loader.tensor_storages_types);
             } else if (sd_version_is_flux(version)) {
                 cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
-                diffusion_model  = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, version, diffusion_flash_attn);
+                diffusion_model  = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, diffusion_flash_attn);
             } else {
                 if (id_embeddings_path.find("v2") != std::string::npos) {
                     cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_loader.tensor_storages_types, embeddings_path, version, PM_VERSION_2);
@@ -533,9 +531,12 @@ class StableDiffusionGGML {
             denoiser = std::make_shared<DiscreteFlowDenoiser>();
         } else if (sd_version_is_flux(version)) {
             LOG_INFO("running in Flux FLOW mode");
-            float shift = 1.15f;
-            if (version == VERSION_FLUX_SCHNELL) {
-                shift = 1.0f;  // TODO: validate
+            float shift = 1.0f;  // TODO: validate
+            for (auto pair : model_loader.tensor_storages_types) {
+                if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
+                    shift = 1.15f;
+                    break;
+                }
             }
             denoiser = std::make_shared<FluxFlowDenoiser>(shift);
         } else if (is_using_v_parameterization) {

Original file line number	Diff line number	Diff line change
`@@ -133,9 +133,8 @@ struct FluxModel : public DiffusionModel {`
`133`	`133`
`134`	`134`	`FluxModel(ggml_backend_t backend,`
`135`	`135`	`std::map<std::string, enum ggml_type>& tensor_types,`
`136`		`- SDVersion version = VERSION_FLUX_DEV,`
`137`	`136`	`bool flash_attn = false)`
`138`		`- : flux(backend, tensor_types, "model.diffusion_model", version, flash_attn) {`
	`137`	`+ : flux(backend, tensor_types, "model.diffusion_model", flash_attn) {`
`139`	`138`	`}`
`140`	`139`
`141`	`140`	`void alloc_params_buffer() {`