pytorch
diff --git a/‎backends/vulkan/partitioner/supported_ops.py‎
Lines changed: 0 additions & 3 deletions b/‎backends/vulkan/partitioner/supported_ops.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/reduce.glsl‎
Lines changed: 5 additions & 5 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/reduce.glsl‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl‎
Lines changed: 0 additions & 108 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl‎
Lines changed: 0 additions & 108 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.yaml‎
Lines changed: 0 additions & 16 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.yaml‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl‎
Lines changed: 0 additions & 95 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl‎
Lines changed: 0 additions & 95 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.yaml‎
Lines changed: 0 additions & 16 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.yaml‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Reduce.cpp‎
Lines changed: 9 additions & 9 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Reduce.cpp‎
Lines changed: 9 additions & 9 deletions
@@ -105,9 +105,6 @@ def __contains__(self, op):
 ]
 
 NO_DYNAMIC_SHAPE = [
-    # Reduction
-    exir_ops.edge.aten.mean.dim,
-    exir_ops.edge.aten.sum.dim_IntList,
     # Normalization
     exir_ops.edge.aten._native_batch_norm_legit_no_training.default,
     exir_ops.edge.aten.native_layer_norm.default,
 
@@ -109,7 +109,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
     // Iterate over the partial outputs to obtain the overall output
     int group_i = tid.y * NWORKERS;
     accum = shared_vecs[group_i++];
-    for (int i = 1; i < NWORKERS; ++i, group_i++) {
+    for (int i = 1; i < NWORKERS; i++, group_i++) {
       accum = UPDATE_ACCUM(accum, shared_vecs[group_i]);
     }
 
@@ -123,7 +123,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
 
     // Explicitly set padding elements to 0
     if (is_last_texel && nspill > 0) {
-      [[unroll]] for (int i = nspill; i < 4; ++i) {
+      [[unroll]] for (int i = nspill; i < 4; i++) {
         accum[i] = 0;
       }
     }
@@ -165,7 +165,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
   // padding elements are ignored
   if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {
     const vec4 intex = load_texel(tin, scan_pos);
-    for (int i = 0; i < nspill; ++i) {
+    for (int i = 0; i < nspill; i++) {
       accum.x = UPDATE_ACCUM(accum.x, intex[i]);
     }
   }
@@ -179,13 +179,13 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
     // Iterate over the partial maximums to obtain the overall maximum
     int group_i = tid.y * NWORKERS;
     accum = shared_vecs[group_i++];
-    for (int i = 1; i < NWORKERS; ++i, group_i++) {
+    for (int i = 1; i < NWORKERS; i++, group_i++) {
       accum = UPDATE_ACCUM(accum, shared_vecs[group_i]);
     }
     // Each element of the texel is itself a partial maximum; iterate over the
     // texel to find the actual maximum
     float accum_final = accum.x;
-    [[unroll]] for (int i = 1; i < 4; ++i) {
+    [[unroll]] for (int i = 1; i < 4; i++) {
       accum_final = UPDATE_ACCUM(accum[i], accum_final);
     }
 
 
@@ -25,8 +25,11 @@ void resize_reduce_node(
   vTensorPtr out = graph->get_tensor(args[0].refs[0]);
   vTensorPtr in = graph->get_tensor(args[1].refs[0]);
 
-  std::vector<int64_t> in_sizes = in->sizes();
-  // out->virtual_resize(in_sizes);
+  int dim = extra_args[0];
+
+  std::vector<int64_t> new_sizes = in->sizes();
+  new_sizes[normalize(dim, new_sizes.size())] = 1;
+  out->virtual_resize(new_sizes);
 }
 
 void add_reduce_node(
@@ -48,12 +51,8 @@ void add_reduce_node(
   // Check that the concat dim is not the reduction dim, if the tensor has a
   // batch dim greater than 1.
   if (graph.dim_of(in) == 4 && graph.size_at<int>(0, in) > 1) {
-    VK_CHECK_COND(
-        graph.concat_dim_of(in) != reduce_dim,
-        "Reduce shader currently does not support concat dim == reduce dim");
-    VK_CHECK_COND(
-        graph.concat_dim_of(out) != reduce_dim,
-        "Reduce shader currently does not support concat dim == reduce dim");
+    VK_CHECK_COND(graph.concat_dim_of(in) != reduce_dim);
+    VK_CHECK_COND(graph.concat_dim_of(out) != reduce_dim);
   }
 
   vkapi::ShaderInfo shader_descriptor;
@@ -97,7 +96,8 @@ void add_reduce_node(
       // Specialization Constants
       {graph.packed_dim_of(out), reduce_dim, group_dim},
       // Resizing Logic
-      resize_reduce_node));
+      resize_reduce_node,
+      {dim}));
 }
 
 #define DEFINE_REDUCE_FN(op_name, out_arg_idx)                           \
Original file line number	Diff line number	Diff line change
`@@ -105,9 +105,6 @@ def __contains__(self, op):`
`105`	`105`	`]`
`106`	`106`
`107`	`107`	`NO_DYNAMIC_SHAPE = [`
`108`		`- # Reduction`
`109`		`- exir_ops.edge.aten.mean.dim,`
`110`		`- exir_ops.edge.aten.sum.dim_IntList,`
`111`	`108`	`# Normalization`
`112`	`109`	`exir_ops.edge.aten._native_batch_norm_legit_no_training.default,`
`113`	`110`	`exir_ops.edge.aten.native_layer_norm.default,`
Original file line number	Diff line number	Diff line change
`@@ -109,7 +109,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {`
`109`	`109`	`// Iterate over the partial outputs to obtain the overall output`
`110`	`110`	`int group_i = tid.y * NWORKERS;`
`111`	`111`	`accum = shared_vecs[group_i++];`
`112`		`- for (int i = 1; i < NWORKERS; ++i, group_i++) {`
	`112`	`+ for (int i = 1; i < NWORKERS; i++, group_i++) {`
`113`	`113`	`accum = UPDATE_ACCUM(accum, shared_vecs[group_i]);`
`114`	`114`	`}`
`115`	`115`
`@@ -123,7 +123,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {`
`123`	`123`
`124`	`124`	`// Explicitly set padding elements to 0`
`125`	`125`	`if (is_last_texel && nspill > 0) {`
`126`		`- [[unroll]] for (int i = nspill; i < 4; ++i) {`
	`126`	`+ [[unroll]] for (int i = nspill; i < 4; i++) {`
`127`	`127`	`accum[i] = 0;`
`128`	`128`	`}`
`129`	`129`	`}`
`@@ -165,7 +165,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {`
`165`	`165`	`// padding elements are ignored`
`166`	`166`	`if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {`
`167`	`167`	`const vec4 intex = load_texel(tin, scan_pos);`
`168`		`- for (int i = 0; i < nspill; ++i) {`
	`168`	`+ for (int i = 0; i < nspill; i++) {`
`169`	`169`	`accum.x = UPDATE_ACCUM(accum.x, intex[i]);`
`170`	`170`	`}`
`171`	`171`	`}`
`@@ -179,13 +179,13 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {`
`179`	`179`	`// Iterate over the partial maximums to obtain the overall maximum`
`180`	`180`	`int group_i = tid.y * NWORKERS;`
`181`	`181`	`accum = shared_vecs[group_i++];`
`182`		`- for (int i = 1; i < NWORKERS; ++i, group_i++) {`
	`182`	`+ for (int i = 1; i < NWORKERS; i++, group_i++) {`
`183`	`183`	`accum = UPDATE_ACCUM(accum, shared_vecs[group_i]);`
`184`	`184`	`}`
`185`	`185`	`// Each element of the texel is itself a partial maximum; iterate over the`
`186`	`186`	`// texel to find the actual maximum`
`187`	`187`	`float accum_final = accum.x;`
`188`		`- [[unroll]] for (int i = 1; i < 4; ++i) {`
	`188`	`+ [[unroll]] for (int i = 1; i < 4; i++) {`
`189`	`189`	`accum_final = UPDATE_ACCUM(accum[i], accum_final);`
`190`	`190`	`}`
`191`	`191`