apply jeffbolz f32 patch leejet/stable-diffusion.cpp#851 (comment)

LostRuins · LostRuins · commit 15249baea1c4 · 2025-10-03T19:18:46.000+08:00
diff --git a/otherarch/sdcpp/common.hpp b/otherarch/sdcpp/common.hpp
@@ -242,7 +242,8 @@ class FeedForward : public GGMLBlock {
     FeedForward(int64_t dim,
                 int64_t dim_out,
                 int64_t mult          = 4,
-                Activation activation = Activation::GEGLU) {
+                Activation activation = Activation::GEGLU,
+                bool force_prec_f32 = false) {
         int64_t inner_dim = dim * mult;
 
         if (activation == Activation::GELU) {
@@ -252,7 +253,7 @@ class FeedForward : public GGMLBlock {
         }
 
         // net_1 is nn.Dropout(), skip for inference
-        blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out));
+        blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32));
     }
 
     struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
diff --git a/otherarch/sdcpp/ggml_extend.hpp b/otherarch/sdcpp/ggml_extend.hpp
@@ -939,8 +939,12 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ct
 __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx,
                                                      struct ggml_tensor* x,
                                                      struct ggml_tensor* w,
-                                                     struct ggml_tensor* b) {
+                                                     struct ggml_tensor* b,
+                                                     bool force_prec_f32 = false) {
     x = ggml_mul_mat(ctx, w, x);
+    if (force_prec_f32) {
+        ggml_mul_mat_set_prec(x, GGML_PREC_F32);
+    }
     if (b != NULL) {
         x = ggml_add_inplace(ctx, x, b);
     }
@@ -1953,6 +1957,7 @@ class Linear : public UnaryBlock {
     int64_t out_features;
     bool bias;
     bool force_f32;
+    bool force_prec_f32;
 
     void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
         enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
@@ -1970,19 +1975,21 @@ class Linear : public UnaryBlock {
     Linear(int64_t in_features,
            int64_t out_features,
            bool bias      = true,
-           bool force_f32 = false)
+           bool force_f32 = false,
+           bool force_prec_f32 = false)
         : in_features(in_features),
           out_features(out_features),
           bias(bias),
-          force_f32(force_f32) {}
+          force_f32(force_f32),
+          force_prec_f32(force_prec_f32) {}
 
     struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
         struct ggml_tensor* w = params["weight"];
         struct ggml_tensor* b = NULL;
         if (bias) {
             b = params["bias"];
         }
-        return ggml_nn_linear(ctx, x, w, b);
+        return ggml_nn_linear(ctx, x, w, b, force_prec_f32);
     }
 };
 
diff --git a/otherarch/sdcpp/qwen_image.hpp b/otherarch/sdcpp/qwen_image.hpp
@@ -196,7 +196,7 @@ namespace Qwen {
 
             blocks["img_norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
             blocks["img_norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
-            blocks["img_mlp"]   = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU));
+            blocks["img_mlp"]   = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU, true));
 
             // txt_mod.0 is nn.SiLU()
             blocks["txt_mod.1"] = std::shared_ptr<GGMLBlock>(new Linear(dim, 6 * dim, true));