fix(temp): Fix CBdecay to make decay contiguous for metal

gabe-l-hart · gabe-l-hart · commit 3da5c97bb18c · 2025-10-24T13:52:08.000-06:00
We shouldn't need this once cumsum can operate on other dims and we can
avoid all the various permutes elsewhere.

Branch: Mamba2SSD

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -11938,7 +11938,7 @@ struct llm_graph_context_mamba : public llm_graph_context {
                         cb(decay, "decay", il);
 
                         // step 5: compute surrogate_attention_matrix
-            /* !! */    ggml_tensor * CBdecay = ggml_mul(ctx, CB, decay);
+                        ggml_tensor * CBdecay = ggml_mul(ctx, CB, ggml_cont(ctx, decay));
                         ggml_tensor * surrogate_attention_matrix = ggml_tri_keep(ctx, CBdecay, GGML_TRI_TYPE_LOWER_DIAG);
                         cb(surrogate_attention_matrix, "surrogate_attention_matrix", il);