LostRuins
diff --git a/‎otherarch/sdcpp/clip.hpp‎
Lines changed: 226 additions & 440 deletions b/‎otherarch/sdcpp/clip.hpp‎
Lines changed: 226 additions & 440 deletions
diff --git a/‎otherarch/sdcpp/common.hpp‎
Lines changed: 5 additions & 20 deletions b/‎otherarch/sdcpp/common.hpp‎
Lines changed: 5 additions & 20 deletions
diff --git a/‎otherarch/sdcpp/conditioner.hpp‎
Lines changed: 1206 additions & 0 deletions b/‎otherarch/sdcpp/conditioner.hpp‎
Lines changed: 1206 additions & 0 deletions
diff --git a/‎otherarch/sdcpp/control.hpp‎
Lines changed: 12 additions & 20 deletions b/‎otherarch/sdcpp/control.hpp‎
Lines changed: 12 additions & 20 deletions
@@ -279,26 +279,11 @@ class CrossAttention : public GGMLBlock {
         int64_t n_context = context->ne[1];
         int64_t inner_dim = d_head * n_head;
 
-        auto q = to_q->forward(ctx, x);                                 // [N, n_token, inner_dim]
-        q      = ggml_reshape_4d(ctx, q, d_head, n_head, n_token, n);   // [N, n_token, n_head, d_head]
-        q      = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3));      // [N, n_head, n_token, d_head]
-        q      = ggml_reshape_3d(ctx, q, d_head, n_token, n_head * n);  // [N * n_head, n_token, d_head]
+        auto q = to_q->forward(ctx, x);        // [N, n_token, inner_dim]
+        auto k = to_k->forward(ctx, context);  // [N, n_context, inner_dim]
+        auto v = to_v->forward(ctx, context);  // [N, n_context, inner_dim]
 
-        auto k = to_k->forward(ctx, context);                             // [N, n_context, inner_dim]
-        k      = ggml_reshape_4d(ctx, k, d_head, n_head, n_context, n);   // [N, n_context, n_head, d_head]
-        k      = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3));        // [N, n_head, n_context, d_head]
-        k      = ggml_reshape_3d(ctx, k, d_head, n_context, n_head * n);  // [N * n_head, n_context, d_head]
-
-        auto v = to_v->forward(ctx, context);                             // [N, n_context, inner_dim]
-        v      = ggml_reshape_4d(ctx, v, d_head, n_head, n_context, n);   // [N, n_context, n_head, d_head]
-        v      = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3));        // [N, n_head, d_head, n_context]
-        v      = ggml_reshape_3d(ctx, v, n_context, d_head, n_head * n);  // [N * n_head, d_head, n_context]
-
-        auto kqv = ggml_nn_attention(ctx, q, k, v, false);  // [N * n_head, n_token, d_head]
-        kqv      = ggml_reshape_4d(ctx, kqv, d_head, n_token, n_head, n);
-        kqv      = ggml_cont(ctx, ggml_permute(ctx, kqv, 0, 2, 1, 3));  // [N, n_token, n_head, d_head]
-
-        x = ggml_reshape_3d(ctx, kqv, d_head * n_head, n_token, n);  // [N, n_token, inner_dim]
+        x = ggml_nn_attention_ext(ctx, q, k, v, n_head, NULL, false);  // [N, n_token, inner_dim]
 
         x = to_out_0->forward(ctx, x);  // [N, n_token, query_dim]
         return x;
@@ -382,7 +367,7 @@ class SpatialTransformer : public GGMLBlock {
     int64_t n_head;
     int64_t d_head;
     int64_t depth       = 1;    // 1
-    int64_t context_dim = 768;  // hidden_size, 1024 for VERSION_2_x
+    int64_t context_dim = 768;  // hidden_size, 1024 for VERSION_SD2
 
 public:
     SpatialTransformer(int64_t in_channels,
 
@@ -14,7 +14,7 @@
 */
 class ControlNetBlock : public GGMLBlock {
 protected:
-    SDVersion version = VERSION_1_x;
+    SDVersion version = VERSION_SD1;
     // network hparams
     int in_channels                        = 4;
     int out_channels                       = 4;
@@ -26,19 +26,19 @@ class ControlNetBlock : public GGMLBlock {
     int time_embed_dim                     = 1280;  // model_channels*4
     int num_heads                          = 8;
     int num_head_channels                  = -1;   // channels // num_heads
-    int context_dim                        = 768;  // 1024 for VERSION_2_x, 2048 for VERSION_XL
+    int context_dim                        = 768;  // 1024 for VERSION_SD2, 2048 for VERSION_SDXL
 
 public:
     int model_channels  = 320;
-    int adm_in_channels = 2816;  // only for VERSION_XL
+    int adm_in_channels = 2816;  // only for VERSION_SDXL
 
-    ControlNetBlock(SDVersion version = VERSION_1_x)
+    ControlNetBlock(SDVersion version = VERSION_SD1)
         : version(version) {
-        if (version == VERSION_2_x) {
+        if (version == VERSION_SD2) {
             context_dim       = 1024;
             num_head_channels = 64;
             num_heads         = -1;
-        } else if (version == VERSION_XL) {
+        } else if (version == VERSION_SDXL) {
             context_dim           = 2048;
             attention_resolutions = {4, 2};
             channel_mult          = {1, 2, 4};
@@ -58,7 +58,7 @@ class ControlNetBlock : public GGMLBlock {
         // time_embed_1 is nn.SiLU()
         blocks["time_embed.2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, time_embed_dim));
 
-        if (version == VERSION_XL || version == VERSION_SVD) {
+        if (version == VERSION_SDXL || version == VERSION_SVD) {
             blocks["label_emb.0.0"] = std::shared_ptr<GGMLBlock>(new Linear(adm_in_channels, time_embed_dim));
             // label_emb_1 is nn.SiLU()
             blocks["label_emb.0.2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, time_embed_dim));
@@ -306,8 +306,8 @@ class ControlNetBlock : public GGMLBlock {
     }
 };
 
-struct ControlNet : public GGMLModule {
-    SDVersion version = VERSION_1_x;
+struct ControlNet : public GGMLRunner {
+    SDVersion version = VERSION_SD1;
     ControlNetBlock control_net;
 
     ggml_backend_buffer_t control_buffer = NULL;  // keep control output tensors in backend memory
@@ -318,8 +318,8 @@ struct ControlNet : public GGMLModule {
 
     ControlNet(ggml_backend_t backend,
                ggml_type wtype,
-               SDVersion version = VERSION_1_x)
-        : GGMLModule(backend, wtype), control_net(version) {
+               SDVersion version = VERSION_SD1)
+        : GGMLRunner(backend, wtype), control_net(version) {
         control_net.init(params_ctx, wtype);
     }
 
@@ -369,14 +369,6 @@ struct ControlNet : public GGMLModule {
         return "control_net";
     }
 
-    size_t get_params_mem_size() {
-        return control_net.get_params_mem_size();
-    }
-
-    size_t get_params_num() {
-        return control_net.get_params_num();
-    }
-
     void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
         control_net.get_param_tensors(tensors, prefix);
     }
@@ -434,7 +426,7 @@ struct ControlNet : public GGMLModule {
             return build_graph(x, hint, timesteps, context, y);
         };
 
-        GGMLModule::compute(get_graph, n_threads, false, output, output_ctx);
+        GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
         guided_hint_cached = true;
     }