feat: Add assertion and comment about relationship between simd size and num simd groups

gabe-l-hart · gabe-l-hart · commit d06d08769cfa · 2025-07-22T09:22:08.000-06:00
Branch: GraniteFourPerf

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
@@ -3022,6 +3022,13 @@ static bool ggml_metal_encode_node(
                     const int64_t shmem_size = d_state / 32;
                     GGML_ASSERT(shmem_size * 32 == d_state);
 
+                    // The final simd_sum won't work if the number of simd groups is
+                    // larger than the size of a single simd group. If this case is
+                    // hit at some point, the logic in the second simd_sum could be
+                    // expanded to handle this with one more sequential simd_sum to
+                    // collapse simd group sums another time.
+                    GGML_ASSERT(shmem_size <= 32);
+
                     // One thread pre element in d_state
                     GGML_ASSERT(d_state <= (int64_t)pipeline.maxTotalThreadsPerThreadgroup);