Let the debug out.

pwilkin · pwilkin · commit 2cab86a09fe5 · 2025-10-16T16:29:01.000+02:00
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
@@ -2975,7 +2975,7 @@ static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggm
     const ggml_tensor * src0 = dst->src[0];
 
     ggml_tri_type ttype = (ggml_tri_type) dst->op_params[0];
-    float c = *((float *) &(dst->op_params[1]));
+    float c = ggml_get_op_params_f32(dst, 1);
     bool keep_org_val = isnan(c);
 
     GGML_ASSERT(ggml_is_contiguous(src0));
@@ -10902,7 +10902,6 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
     // src6, src7, src8 are nullptr in recurrent version
 
     const int64_t H_v               = (int64_t) dst->op_params[0];
-    const int64_t S_k               = (int64_t) dst->op_params[1];
     const int64_t S_v               = (int64_t) dst->op_params[2];
     const int64_t original_n_tokens = (int64_t) dst->op_params[3];  // Get original sequence length
     const int64_t n_tokens          = original_n_tokens;            // Use the original sequence length
@@ -10972,7 +10971,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_copy", token);
+        //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_copy", token);
 
         // 1. last_recurrent_state = last_recurrent_state * g_t (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {
@@ -10986,7 +10985,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_times_g_t", token);
+        //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_times_g_t", token);
         
         // 2. kv_mem = (last_recurrent_state * k_t.unsqueeze(-1)).sum(dim=-2) (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {
@@ -11001,7 +11000,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(kv_mem, n_seqs * H_v * S_v, "kv_mem", token);
+        //print_debug_info(kv_mem, n_seqs * H_v * S_v, "kv_mem", token);
         
         // 3. delta = (v_t - kv_mem) * beta_t (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {
@@ -11013,7 +11012,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(delta, n_seqs * H_v * S_v, "delta", token);
+        //print_debug_info(delta, n_seqs * H_v * S_v, "delta", token);
         
         // 4. last_recurrent_state = last_recurrent_state + k_t.unsqueeze(-1) * delta.unsqueeze(-2) (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {
@@ -11027,7 +11026,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state", token);
+        //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state", token);
         
         // 5. core_attn_out[:, :, i] = (last_recurrent_state * q_t.unsqueeze(-1)).sum(dim=-2) (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {
@@ -11041,7 +11040,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa
                 }
             }
         }
-        print_debug_info(attn_out_t, n_seqs * S_v * H_v, "attn_out_t", token);
+        //print_debug_info(attn_out_t, n_seqs * S_v * H_v, "attn_out_t", token);
         
         // Store the output for this token (for all seqs and heads)
         for (int64_t seq = 0; seq < n_seqs; seq++) {

Original file line number	Diff line number	Diff line change
`@@ -2975,7 +2975,7 @@ static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggm`
`2975`	`2975`	`const ggml_tensor * src0 = dst->src[0];`
`2976`	`2976`
`2977`	`2977`	`ggml_tri_type ttype = (ggml_tri_type) dst->op_params[0];`
`2978`		`- float c = ((float ) &(dst->op_params[1]));`
	`2978`	`+ float c = ggml_get_op_params_f32(dst, 1);`
`2979`	`2979`	`bool keep_org_val = isnan(c);`
`2980`	`2980`
`2981`	`2981`	`GGML_ASSERT(ggml_is_contiguous(src0));`
`@@ -10902,7 +10902,6 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`10902`	`10902`	`// src6, src7, src8 are nullptr in recurrent version`
`10903`	`10903`
`10904`	`10904`	`const int64_t H_v = (int64_t) dst->op_params[0];`
`10905`		`- const int64_t S_k = (int64_t) dst->op_params[1];`
`10906`	`10905`	`const int64_t S_v = (int64_t) dst->op_params[2];`
`10907`	`10906`	`const int64_t original_n_tokens = (int64_t) dst->op_params[3]; // Get original sequence length`
`10908`	`10907`	`const int64_t n_tokens = original_n_tokens; // Use the original sequence length`
`@@ -10972,7 +10971,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`10972`	`10971`	`}`
`10973`	`10972`	`}`
`10974`	`10973`	`}`
`10975`		`- print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_copy", token);`
	`10974`	`+ //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_copy", token);`
`10976`	`10975`
`10977`	`10976`	`// 1. last_recurrent_state = last_recurrent_state * g_t (for all seqs and heads)`
`10978`	`10977`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`
`@@ -10986,7 +10985,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`10986`	`10985`	`}`
`10987`	`10986`	`}`
`10988`	`10987`	`}`
`10989`		`- print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_times_g_t", token);`
	`10988`	`+ //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state_times_g_t", token);`
`10990`	`10989`
`10991`	`10990`	`// 2. kv_mem = (last_recurrent_state * k_t.unsqueeze(-1)).sum(dim=-2) (for all seqs and heads)`
`10992`	`10991`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`
`@@ -11001,7 +11000,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`11001`	`11000`	`}`
`11002`	`11001`	`}`
`11003`	`11002`	`}`
`11004`		`- print_debug_info(kv_mem, n_seqs * H_v * S_v, "kv_mem", token);`
	`11003`	`+ //print_debug_info(kv_mem, n_seqs * H_v * S_v, "kv_mem", token);`
`11005`	`11004`
`11006`	`11005`	`// 3. delta = (v_t - kv_mem) * beta_t (for all seqs and heads)`
`11007`	`11006`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`
`@@ -11013,7 +11012,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`11013`	`11012`	`}`
`11014`	`11013`	`}`
`11015`	`11014`	`}`
`11016`		`- print_debug_info(delta, n_seqs * H_v * S_v, "delta", token);`
	`11015`	`+ //print_debug_info(delta, n_seqs * H_v * S_v, "delta", token);`
`11017`	`11016`
`11018`	`11017`	`// 4. last_recurrent_state = last_recurrent_state + k_t.unsqueeze(-1) * delta.unsqueeze(-2) (for all seqs and heads)`
`11019`	`11018`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`
`@@ -11027,7 +11026,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`11027`	`11026`	`}`
`11028`	`11027`	`}`
`11029`	`11028`	`}`
`11030`		`- print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state", token);`
	`11029`	`+ //print_debug_info(temp_state, n_seqs * H_v * S_v * S_v, "temp_state", token);`
`11031`	`11030`
`11032`	`11031`	`// 5. core_attn_out[:, :, i] = (last_recurrent_state * q_t.unsqueeze(-1)).sum(dim=-2) (for all seqs and heads)`
`11033`	`11032`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`
`@@ -11041,7 +11040,7 @@ void ggml_compute_forward_delta_net_recurrent_f32(const ggml_compute_params * pa`
`11041`	`11040`	`}`
`11042`	`11041`	`}`
`11043`	`11042`	`}`
`11044`		`- print_debug_info(attn_out_t, n_seqs * S_v * H_v, "attn_out_t", token);`
	`11043`	`+ //print_debug_info(attn_out_t, n_seqs * S_v * H_v, "attn_out_t", token);`
`11045`	`11044`
`11046`	`11045`	`// Store the output for this token (for all seqs and heads)`
`11047`	`11046`	`for (int64_t seq = 0; seq < n_seqs; seq++) {`