Use ggml_pad instead of concat with empty tensor

stduhpf · stduhpf · commit 2bdc8470f7b0 · 2025-06-02T17:04:16.000+02:00
diff --git a/flux.hpp b/flux.hpp
@@ -884,7 +884,7 @@ namespace Flux {
                 vec = approx->forward(ctx, vec);                           // [344, N, hidden_size]
 
                 if (y != NULL) {
-                    txt_img_mask = ggml_concat(ctx, y, ggml_scale_inplace(ctx, ggml_new_tensor_1d(ctx, GGML_TYPE_F32, img->ne[1]), 0), 0);
+                    txt_img_mask = ggml_pad(ctx, y, img->ne[1], 0, 0, 0);
                 }
             } else {
                 auto time_in   = std::dynamic_pointer_cast<MLPEmbedder>(blocks["time_in"]);
diff --git a/t5.hpp b/t5.hpp
@@ -434,6 +434,7 @@ class T5UniGramTokenizer {
                 tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);
                 weights.insert(weights.end(), length - weights.size(), 1.0);
                 if (attention_mask != nullptr) {
+                    // maybe keep some padding tokens unmasked? 
                     attention_mask->insert(attention_mask->end(), length - attention_mask->size(), -HUGE_VALF);
                 }
             }
@@ -594,7 +595,7 @@ class T5Attention : public GGMLBlock {
         }
         if (past_bias != NULL) {
             if (mask != NULL) {
-                mask = ggml_repeat(ctx,mask,past_bias);
+                mask = ggml_repeat(ctx, mask, past_bias);
                 mask = ggml_add(ctx, mask, past_bias);
             } else {
                 mask = past_bias;

Original file line number	Diff line number	Diff line change
`@@ -884,7 +884,7 @@ namespace Flux {`
`884`	`884`	`vec = approx->forward(ctx, vec); // [344, N, hidden_size]`
`885`	`885`
`886`	`886`	`if (y != NULL) {`
`887`		`- txt_img_mask = ggml_concat(ctx, y, ggml_scale_inplace(ctx, ggml_new_tensor_1d(ctx, GGML_TYPE_F32, img->ne[1]), 0), 0);`
	`887`	`+ txt_img_mask = ggml_pad(ctx, y, img->ne[1], 0, 0, 0);`
`888`	`888`	`}`
`889`	`889`	`} else {`
`890`	`890`	`auto time_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks["time_in"]);`
Original file line number	Diff line number	Diff line change
`@@ -434,6 +434,7 @@ class T5UniGramTokenizer {`
`434`	`434`	`tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);`
`435`	`435`	`weights.insert(weights.end(), length - weights.size(), 1.0);`
`436`	`436`	`if (attention_mask != nullptr) {`
	`437`	`+ // maybe keep some padding tokens unmasked?`
`437`	`438`	`attention_mask->insert(attention_mask->end(), length - attention_mask->size(), -HUGE_VALF);`
`438`	`439`	`}`
`439`	`440`	`}`
`@@ -594,7 +595,7 @@ class T5Attention : public GGMLBlock {`
`594`	`595`	`}`
`595`	`596`	`if (past_bias != NULL) {`
`596`	`597`	`if (mask != NULL) {`
`597`		`- mask = ggml_repeat(ctx,mask,past_bias);`
	`598`	`+ mask = ggml_repeat(ctx, mask, past_bias);`
`598`	`599`	`mask = ggml_add(ctx, mask, past_bias);`
`599`	`600`	`} else {`
`600`	`601`	`mask = past_bias;`