add conv2d direct for controlnet

daniandtheweb · daniandtheweb · commit 8974ec134faa · 2025-07-31T12:46:51.000+02:00
diff --git a/control.hpp b/control.hpp
@@ -27,13 +27,16 @@ class ControlNetBlock : public GGMLBlock {
     int num_heads                          = 8;
     int num_head_channels                  = -1;   // channels // num_heads
     int context_dim                        = 768;  // 1024 for VERSION_SD2, 2048 for VERSION_SDXL
+    bool direct                            = false;
 
 public:
     int model_channels  = 320;
     int adm_in_channels = 2816;  // only for VERSION_SDXL
 
-    ControlNetBlock(SDVersion version = VERSION_SD1)
-        : version(version) {
+    ControlNetBlock(SDVersion version = VERSION_SD1,
+                    bool direct       = false)
+        : version(version),
+          direct(direct) {
         if (sd_version_is_sd2(version)) {
             context_dim       = 1024;
             num_head_channels = 64;
@@ -65,7 +68,7 @@ class ControlNetBlock : public GGMLBlock {
         }
 
         // input_blocks
-        blocks["input_blocks.0.0"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, model_channels, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_blocks.0.0"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, model_channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
 
         std::vector<int> input_block_chans;
         input_block_chans.push_back(model_channels);
@@ -86,26 +89,26 @@ class ControlNetBlock : public GGMLBlock {
         };
 
         auto make_zero_conv = [&](int64_t channels) {
-            return new Conv2d(channels, channels, {1, 1});
+            return new Conv2d(channels, channels, {1, 1}, {1, 1}, {0, 0}, {1, 1}, true, direct);
         };
 
         blocks["zero_convs.0.0"] = std::shared_ptr<GGMLBlock>(make_zero_conv(model_channels));
 
-        blocks["input_hint_block.0"] = std::shared_ptr<GGMLBlock>(new Conv2d(hint_channels, 16, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_hint_block.0"] = std::shared_ptr<GGMLBlock>(new Conv2d(hint_channels, 16, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.2"] = std::shared_ptr<GGMLBlock>(new Conv2d(16, 16, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_hint_block.2"] = std::shared_ptr<GGMLBlock>(new Conv2d(16, 16, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.4"] = std::shared_ptr<GGMLBlock>(new Conv2d(16, 32, {3, 3}, {2, 2}, {1, 1}));
+        blocks["input_hint_block.4"] = std::shared_ptr<GGMLBlock>(new Conv2d(16, 32, {3, 3}, {2, 2}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.6"] = std::shared_ptr<GGMLBlock>(new Conv2d(32, 32, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_hint_block.6"] = std::shared_ptr<GGMLBlock>(new Conv2d(32, 32, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.8"] = std::shared_ptr<GGMLBlock>(new Conv2d(32, 96, {3, 3}, {2, 2}, {1, 1}));
+        blocks["input_hint_block.8"] = std::shared_ptr<GGMLBlock>(new Conv2d(32, 96, {3, 3}, {2, 2}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.10"] = std::shared_ptr<GGMLBlock>(new Conv2d(96, 96, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_hint_block.10"] = std::shared_ptr<GGMLBlock>(new Conv2d(96, 96, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.12"] = std::shared_ptr<GGMLBlock>(new Conv2d(96, 256, {3, 3}, {2, 2}, {1, 1}));
+        blocks["input_hint_block.12"] = std::shared_ptr<GGMLBlock>(new Conv2d(96, 256, {3, 3}, {2, 2}, {1, 1}, {1, 1}, true, direct));
         // nn.SiLU()
-        blocks["input_hint_block.14"] = std::shared_ptr<GGMLBlock>(new Conv2d(256, model_channels, {3, 3}, {1, 1}, {1, 1}));
+        blocks["input_hint_block.14"] = std::shared_ptr<GGMLBlock>(new Conv2d(256, model_channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
 
         size_t len_mults = channel_mult.size();
         for (int i = 0; i < len_mults; i++) {
@@ -318,8 +321,9 @@ struct ControlNet : public GGMLRunner {
 
     ControlNet(ggml_backend_t backend,
                const String2GGMLType& tensor_types = {},
-               SDVersion version                   = VERSION_SD1)
-        : GGMLRunner(backend), control_net(version) {
+               SDVersion version                   = VERSION_SD1,
+               bool direct                         = false)
+        : GGMLRunner(backend), control_net(version, direct) {
         control_net.init(params_ctx, tensor_types, "");
     }
 
@@ -455,4 +459,4 @@ struct ControlNet : public GGMLRunner {
     }
 };
 
-#endif  // __CONTROL_HPP__
+#endif  // __CONTROL_HPP__
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -423,7 +423,7 @@ class StableDiffusionGGML {
                 } else {
                     controlnet_backend = backend;
                 }
-                control_net = std::make_shared<ControlNet>(controlnet_backend, model_loader.tensor_storages_types, version);
+                control_net = std::make_shared<ControlNet>(controlnet_backend, model_loader.tensor_storages_types, version, sd_ctx_params->diffusion_conv_direct);
             }
 
             if (strstr(SAFE_STR(sd_ctx_params->stacked_id_embed_dir), "v2")) {

Original file line number	Diff line number	Diff line change
`@@ -423,7 +423,7 @@ class StableDiffusionGGML {`
`423`	`423`	`} else {`
`424`	`424`	`controlnet_backend = backend;`
`425`	`425`	`}`
`426`		`- control_net = std::make_shared<ControlNet>(controlnet_backend, model_loader.tensor_storages_types, version);`
	`426`	`+ control_net = std::make_shared<ControlNet>(controlnet_backend, model_loader.tensor_storages_types, version, sd_ctx_params->diffusion_conv_direct);`
`427`	`427`	`}`
`428`	`428`
`429`	`429`	`if (strstr(SAFE_STR(sd_ctx_params->stacked_id_embed_dir), "v2")) {`