Skip to content

Commit 15249ba

Browse files
committed
1 parent f282362 commit 15249ba

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

otherarch/sdcpp/common.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ class FeedForward : public GGMLBlock {
242242
FeedForward(int64_t dim,
243243
int64_t dim_out,
244244
int64_t mult = 4,
245-
Activation activation = Activation::GEGLU) {
245+
Activation activation = Activation::GEGLU,
246+
bool force_prec_f32 = false) {
246247
int64_t inner_dim = dim * mult;
247248

248249
if (activation == Activation::GELU) {
@@ -252,7 +253,7 @@ class FeedForward : public GGMLBlock {
252253
}
253254

254255
// net_1 is nn.Dropout(), skip for inference
255-
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out));
256+
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32));
256257
}
257258

258259
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {

otherarch/sdcpp/ggml_extend.hpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -939,8 +939,12 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ct
939939
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx,
940940
struct ggml_tensor* x,
941941
struct ggml_tensor* w,
942-
struct ggml_tensor* b) {
942+
struct ggml_tensor* b,
943+
bool force_prec_f32 = false) {
943944
x = ggml_mul_mat(ctx, w, x);
945+
if (force_prec_f32) {
946+
ggml_mul_mat_set_prec(x, GGML_PREC_F32);
947+
}
944948
if (b != NULL) {
945949
x = ggml_add_inplace(ctx, x, b);
946950
}
@@ -1953,6 +1957,7 @@ class Linear : public UnaryBlock {
19531957
int64_t out_features;
19541958
bool bias;
19551959
bool force_f32;
1960+
bool force_prec_f32;
19561961

19571962
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
19581963
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
@@ -1970,19 +1975,21 @@ class Linear : public UnaryBlock {
19701975
Linear(int64_t in_features,
19711976
int64_t out_features,
19721977
bool bias = true,
1973-
bool force_f32 = false)
1978+
bool force_f32 = false,
1979+
bool force_prec_f32 = false)
19741980
: in_features(in_features),
19751981
out_features(out_features),
19761982
bias(bias),
1977-
force_f32(force_f32) {}
1983+
force_f32(force_f32),
1984+
force_prec_f32(force_prec_f32) {}
19781985

19791986
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
19801987
struct ggml_tensor* w = params["weight"];
19811988
struct ggml_tensor* b = NULL;
19821989
if (bias) {
19831990
b = params["bias"];
19841991
}
1985-
return ggml_nn_linear(ctx, x, w, b);
1992+
return ggml_nn_linear(ctx, x, w, b, force_prec_f32);
19861993
}
19871994
};
19881995

otherarch/sdcpp/qwen_image.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ namespace Qwen {
196196

197197
blocks["img_norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
198198
blocks["img_norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
199-
blocks["img_mlp"] = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU));
199+
blocks["img_mlp"] = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU, true));
200200

201201
// txt_mod.0 is nn.SiLU()
202202
blocks["txt_mod.1"] = std::shared_ptr<GGMLBlock>(new Linear(dim, 6 * dim, true));

0 commit comments

Comments
 (0)