Update 2D reduction to only be enabled for non-packed dims

alexdean08 · alexdean08 · commit 01502f627119 · 2025-07-30T11:30:51.000-07:00
diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py
@@ -23,6 +23,7 @@
     all_memory_layouts,
     all_packed_dims,
     PackedDim,
+    get_node_memory_layout,
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 
@@ -535,6 +536,27 @@ def check_reduce_node(node: torch.fx.Node) -> bool:
         if isinstance(dim_list, list) and len(dim_list) > 2:
             return False
 
+        if isinstance(dim_list, list) and len(dim_list) == 2:
+            # Try to get the memory layout for this node
+            try:
+                memory_layout = get_node_memory_layout(node)
+
+                # If we have memory layout information, check if any dimension in dim_list corresponds to a packed dimension
+                if memory_layout is not None:
+                    for dim in dim_list:
+                        # For WIDTH_PACKED layout, dimension 3 (W) is packed
+                        if memory_layout == VkMemoryLayout.TENSOR_WIDTH_PACKED and dim == 3:
+                            return False
+                        # For HEIGHT_PACKED layout, dimension 2 (H) is packed
+                        elif memory_layout == VkMemoryLayout.TENSOR_HEIGHT_PACKED and dim == 2:
+                            return False
+                        # For CHANNELS_PACKED layout, dimension 1 (C) is packed
+                        elif memory_layout == VkMemoryLayout.TENSOR_CHANNELS_PACKED and dim == 1:
+                            return False
+            except (AssertionError, KeyError, AttributeError):
+                # If we can't get memory layout information, we'll assume the dims aren't packed
+                pass
+
         keepdim = node.args[2]
         if isinstance(keepdim, bool) and not keepdim:
             return False
diff --git a/backends/vulkan/runtime/graph/ops/glsl/reduce2d.glsl b/backends/vulkan/runtime/graph/ops/glsl/reduce2d.glsl
@@ -58,7 +58,7 @@ int tid_to_smi(const ivec2 tid) {
 // with the accumulator.
 #define POSTPROCESS(accum) ${POSTPROCESS}
 
-void reduce_2d(const ivec2 tid, ivec3 scan_pos) {
+void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {
   // shared memory index of this thread
   const int smi = tid_to_smi(tid);
 
@@ -124,5 +124,5 @@ void main() {
     return;
   }
 
-  reduce_2d(tid, scan_pos);
+  reduce_2d_non_packed_dim(tid, scan_pos);
 }
diff --git a/backends/vulkan/runtime/graph/ops/impl/Reduce.cpp b/backends/vulkan/runtime/graph/ops/impl/Reduce.cpp
@@ -179,6 +179,12 @@ void add_reduce2d_node(
   reduce_dim1 = nchw_dim_to_whcn_dim(reduce_dim1, ndim);
   reduce_dim2 = nchw_dim_to_whcn_dim(reduce_dim2, ndim);
   
+  // Check that none of the reduction dims are packed
+  VK_CHECK_COND(graph.packed_dim_of(in) != reduce_dim1);
+  VK_CHECK_COND(graph.packed_dim_of(in) != reduce_dim2);
+  VK_CHECK_COND(graph.packed_dim_of(out) != reduce_dim1);
+  VK_CHECK_COND(graph.packed_dim_of(out) != reduce_dim2);
+
   // Check that the concat dim is not one of the reduction dims
   if (graph.dim_of(in) == 4 && graph.size_at<int>(0, in) > 1) {
     VK_CHECK_COND(graph.concat_dim_of(in) != reduce_dim1);
@@ -232,13 +238,12 @@ void add_reduce2d_node(
       const ValueRef dim_ref = graph.get_or_add_value_for_int(dim_val);  \
       return add_reduce_node(                                            \
           graph, args[0], dim_ref, args[out_arg_idx], #op_name);         \
-    }
-    if (dims_list.size() == 2) {                                  \
+    }                                                                    \
+    if (dims_list.size() == 2) {                                         \
       return add_reduce2d_node(                                          \
           graph, args[0], args[1], args[out_arg_idx], #op_name);         \
-    }
-    VK_CHECK_COND(false, "Only 1 or 2 dimensions supported");          \
     }                                                                    \
+    VK_CHECK_COND(false, "Only 1 or 2 dimensions supported");            \
   }
 
 DEFINE_REDUCE_FN(sum, 4)

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ int tid_to_smi(const ivec2 tid) {`
`58`	`58`	`// with the accumulator.`
`59`	`59`	`#define POSTPROCESS(accum) ${POSTPROCESS}`
`60`	`60`
`61`		`-void reduce_2d(const ivec2 tid, ivec3 scan_pos) {`
	`61`	`+void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {`
`62`	`62`	`// shared memory index of this thread`
`63`	`63`	`const int smi = tid_to_smi(tid);`
`64`	`64`
`@@ -124,5 +124,5 @@ void main() {`
`124`	`124`	`return;`
`125`	`125`	`}`
`126`	`126`
`127`		`- reduce_2d(tid, scan_pos);`
	`127`	`+ reduce_2d_non_packed_dim(tid, scan_pos);`
`128`	`128`	`}`