fix MM nullptr from zero bias (pytorch#13523)

zonglinpeng · facebook-github-bot · commit 4b52ca2c054c · 2025-08-19T16:49:49.000-07:00
Summary:

solve
```
*Error* Unhandled user exception: LoadProhibitedCause (0x00000000)
```

Differential Revision: D80487955
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -21,8 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) {
 }
 
 void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
+  ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size);
   Result<void*> temp_mem_res = ctx.allocate_temp(size);
-  return temp_mem_res.ok() ? temp_mem_res.get() : nullptr;
+  if (temp_mem_res.ok()) {
+    void* ptr = temp_mem_res.get();
+    ET_LOG(Info, "Successfully allocated temp memory at %p", ptr);
+    return ptr;
+  } else {
+    ET_LOG(
+        Error,
+        "Failed to allocate temp memory, error: 0x%x",
+        static_cast<uint32_t>(temp_mem_res.error()));
+    return nullptr;
+  }
 }
 
 // Quantize a fp32 value to an int8_t/uint8_t value
diff --git a/backends/cadence/hifi/operators/op_mm.cpp b/backends/cadence/hifi/operators/op_mm.cpp
@@ -79,6 +79,17 @@ Tensor& mm_out(
         (WORD32* __restrict__)kernels::allocate_temp_memory(
             ctx, (n * p) * sizeof(WORD32));
 
+    // Allocate zero-initialized bias for matmul function (it doesn't accept
+    // NULL)
+    FLOAT32* __restrict__ p_bias_zero =
+        (FLOAT32* __restrict__)kernels::allocate_temp_memory(
+            ctx, m * sizeof(FLOAT32));
+
+    // Initialize bias to zero since mm operation has no bias
+    for (int i = 0; i < m; i++) {
+      p_bias_zero[i] = 0.0f;
+    }
+
     WORD32 p_inp_shape[2];
     p_inp_shape[0] = n;
     p_inp_shape[1] = p;
@@ -109,19 +120,20 @@ Tensor& mm_out(
 
     const FLOAT32* __restrict__ p_vec = (const FLOAT32* __restrict__)p_o;
 
+    // mm will always be converted to addmm and to linear, and move transpose to
+    // graph
     WORD32 val = xa_nn_matmul_f32xf32_f32(
         p_out,
         p_mat1,
         p_vec,
-        NULL,
+        p_bias_zero,
         rows,
         cols1,
         row_stride1,
         vec_count,
         vec_offset,
         out_offset,
         out_stride);
-
     return out;
   }