leejet
diff --git a/‎common.hpp‎
Lines changed: 31 additions & 4 deletions b/‎common.hpp‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎examples/cli/main.cpp‎
Lines changed: 2 additions & 2 deletions b/‎examples/cli/main.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml_extend.hpp‎
Lines changed: 14 additions & 15 deletions b/‎ggml_extend.hpp‎
Lines changed: 14 additions & 15 deletions
diff --git a/‎model.cpp‎
Lines changed: 1 addition & 0 deletions b/‎model.cpp‎
Lines changed: 1 addition & 0 deletions
@@ -177,7 +177,7 @@ class ResBlock : public GGMLBlock {
     }
 };
 
-class GEGLU : public GGMLBlock {
+class GEGLU : public UnaryBlock {
 protected:
     int64_t dim_in;
     int64_t dim_out;
@@ -216,14 +216,41 @@ class GEGLU : public GGMLBlock {
     }
 };
 
+class GELU : public UnaryBlock {
+public:
+    GELU(int64_t dim_in, int64_t dim_out, bool bias = true) {
+        blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
+    }
+
+    struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
+        // x: [ne3, ne2, ne1, dim_in]
+        // return: [ne3, ne2, ne1, dim_out]
+        auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
+
+        x = proj->forward(ctx, x);
+        x = ggml_gelu_inplace(ctx, x);
+        return x;
+    }
+};
+
 class FeedForward : public GGMLBlock {
 public:
+    enum class Activation {
+        GEGLU,
+        GELU
+    };
     FeedForward(int64_t dim,
                 int64_t dim_out,
-                int64_t mult = 4) {
+                int64_t mult          = 4,
+                Activation activation = Activation::GEGLU) {
         int64_t inner_dim = dim * mult;
 
-        blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GEGLU(dim, inner_dim));
+        if (activation == Activation::GELU) {
+            blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GELU(dim, inner_dim));
+        } else {
+            blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GEGLU(dim, inner_dim));
+        }
+
         // net_1 is nn.Dropout(), skip for inference
         blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out));
     }
@@ -232,7 +259,7 @@ class FeedForward : public GGMLBlock {
         // x: [ne3, ne2, ne1, dim]
         // return: [ne3, ne2, ne1, dim_out]
 
-        auto net_0 = std::dynamic_pointer_cast<GEGLU>(blocks["net.0"]);
+        auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]);
         auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);
 
         x = net_0->forward(ctx, x);  // [ne3, ne2, ne1, inner_dim]
 
@@ -27,7 +27,7 @@
 
 #include "avi_writer.h"
 
-#include "qwen.hpp"
+#include "qwen_image.hpp"
 
 #if defined(_WIN32)
 #define NOMINMAX
@@ -1142,7 +1142,7 @@ int main(int argc, const char* argv[]) {
     SDParams params;
     params.verbose = true;
     sd_set_log_callback(sd_log_cb, (void*)&params);
-    Qwen::Qwen2_5_VLEmbedder::load_from_file_and_test(argv[1]);
+    Qwen::QwenImageRunner::load_from_file_and_test(argv[1]);
     exit(1);
     parse_args(argc, argv, params);
     params.sample_params.guidance.slg.layers                 = params.skip_layers.data();
 
@@ -1353,25 +1353,28 @@ __STATIC_INLINE__ std::vector<float> arange(float start, float end, float step =
 // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
 __STATIC_INLINE__ std::vector<float> timestep_embedding(std::vector<float> timesteps,
                                                         int dim,
-                                                        int max_period = 10000) {
+                                                        int max_period       = 10000,
+                                                        bool flip_sin_to_cos = true,
+                                                        float scale          = 1.f) {
     // timesteps: [N,]
     // embedding: [N, dim]
-    size_t N        = timesteps.size();
-    int acutual_dim = dim;
-    if (dim % 2 != 0) {
-        acutual_dim = dim + 1;
-    }
-    std::vector<float> embedding(N * acutual_dim, 0.f);
+    size_t N = timesteps.size();
+    std::vector<float> embedding(N * dim, 0.f);
     int half = dim / 2;
     std::vector<float> freqs(half);
     for (int i = 0; i < half; ++i) {
         freqs[i] = (float)std::exp(-std::log(max_period) * i / half);
     }
     for (int i = 0; i < N; ++i) {
         for (int j = 0; j < half; ++j) {
-            float arg                             = timesteps[i] * freqs[j];
-            embedding[i * acutual_dim + j]        = std::cos(arg);
-            embedding[i * acutual_dim + j + half] = std::sin(arg);
+            float arg = timesteps[i] * freqs[j] * scale;
+            if (flip_sin_to_cos) {
+                embedding[i * dim + j]        = std::cos(arg);
+                embedding[i * dim + j + half] = std::sin(arg);
+            } else {
+                embedding[i * dim + j]        = std::sin(arg);
+                embedding[i * dim + j + half] = std::cos(arg);
+            }
         }
     }
     return embedding;
@@ -1392,11 +1395,7 @@ __STATIC_INLINE__ struct ggml_tensor* new_timestep_embedding(struct ggml_context
     // timesteps: [N,]
     // embedding: [N, dim]
     std::vector<float> embedding_vec = timestep_embedding(timesteps, dim, max_period);
-    int acutual_dim                  = dim;
-    if (dim % 2 != 0) {
-        acutual_dim = dim + 1;
-    }
-    struct ggml_tensor* embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, acutual_dim, timesteps.size());
+    struct ggml_tensor* embedding    = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps.size());
     if (embedding->data != NULL) {
         memcpy(((char*)embedding->data), ((char*)embedding_vec.data()), ggml_nbytes(embedding));
     } else {
 
@@ -728,6 +728,7 @@ void preprocess_tensor(TensorStorage tensor_storage,
 
     // convert unet transformer linear to conv2d 1x1
     if (starts_with(new_name, "model.diffusion_model.") &&
+        !starts_with(new_name, "model.diffusion_model.proj_out.") &&
         (ends_with(new_name, "proj_in.weight") || ends_with(new_name, "proj_out.weight"))) {
         tensor_storage.unsqueeze();
     }
Original file line number	Diff line number	Diff line change
`@@ -728,6 +728,7 @@ void preprocess_tensor(TensorStorage tensor_storage,`
`728`	`728`
`729`	`729`	`// convert unet transformer linear to conv2d 1x1`
`730`	`730`	`if (starts_with(new_name, "model.diffusion_model.") &&`
	`731`	`+ !starts_with(new_name, "model.diffusion_model.proj_out.") &&`
`731`	`732`	`(ends_with(new_name, "proj_in.weight") \|\| ends_with(new_name, "proj_out.weight"))) {`
`732`	`733`	`tensor_storage.unsqueeze();`
`733`	`734`	`}`