temp: Cast ssm to F32

gabe-l-hart · gabe-l-hart · commit 86788a2431cd · 2025-11-04T16:36:36.000-07:00
This will be needed until F16 support is added for SSM_SCAN

Branch: Mamba2SSD

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/models/graph-context-mamba.cpp b/src/models/graph-context-mamba.cpp
@@ -242,6 +242,7 @@ ggml_tensor * llm_graph_context_mamba::build_mamba2_layer(llm_graph_input_rs * i
         //  while avoiding to make unnecessary copies of the states)
         auto get_ssm_rows = [&](ggml_context * ctx, ggml_tensor * states, ggml_tensor * ids) {
             ggml_tensor * ssm = ggml_reshape_4d(ctx, states, d_state, head_dim, n_head, mctx_cur->get_size());
+            ssm = ggml_cast(ctx, ssm, GGML_TYPE_F32);
 
             // Empty y that will be extended with each chunk of tokens
             ggml_tensor * y = ggml_new_tensor_4d(ctx, x->type, x->ne[0], x->ne[1], 0, x->ne[3]);