fix MM nullptr from zero bias

zonglinpeng · web-flow · commit a94350c61f5a · 2025-08-22T23:40:24.000-07:00
Differential Revision: D80487955 Pull Request resolved: #13523
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -21,8 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) {
 }
 
 void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
+  ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size);
   Result<void*> temp_mem_res = ctx.allocate_temp(size);
-  return temp_mem_res.ok() ? temp_mem_res.get() : nullptr;
+  if (temp_mem_res.ok()) {
+    void* ptr = temp_mem_res.get();
+    ET_LOG(Info, "Successfully allocated temp memory at %p", ptr);
+    return ptr;
+  } else {
+    ET_LOG(
+        Error,
+        "Failed to allocate temp memory, error: 0x%x",
+        static_cast<uint32_t>(temp_mem_res.error()));
+    return nullptr;
+  }
 }
 
 // Quantize a fp32 value to an int8_t/uint8_t value
diff --git a/backends/cadence/hifi/operators/op_mm.cpp b/backends/cadence/hifi/operators/op_mm.cpp
@@ -79,6 +79,15 @@ Tensor& mm_out(
         (WORD32* __restrict__)kernels::allocate_temp_memory(
             ctx, (n * p) * sizeof(WORD32));
 
+    // Allocate zero-initialized bias for matmul function (it doesn't accept
+    // NULL)
+    FLOAT32* __restrict__ p_bias_zero =
+        (FLOAT32* __restrict__)kernels::allocate_temp_memory(
+            ctx, m * sizeof(FLOAT32));
+
+    // Initialize bias to zero since mm operation has no bias
+    memset(p_bias_zero, 0, m * sizeof(FLOAT32));
+
     WORD32 p_inp_shape[2];
     p_inp_shape[0] = n;
     p_inp_shape[1] = p;
@@ -109,19 +118,20 @@ Tensor& mm_out(
 
     const FLOAT32* __restrict__ p_vec = (const FLOAT32* __restrict__)p_o;
 
+    // mm will always be converted to addmm and to linear, and move transpose to
+    // graph
     WORD32 val = xa_nn_matmul_f32xf32_f32(
         p_out,
         p_mat1,
         p_vec,
-        NULL,
+        p_bias_zero,
         rows,
         cols1,
         row_stride1,
         vec_count,
         vec_offset,
         out_offset,
         out_stride);
-
     return out;
   }