Update on "[ET-VK][Ops] aten.var.dim in reduce"

morelos · morelos · commit 8c22723e0908 · 2025-05-28T13:00:45.000-07:00
Incorporated variance logic into reduce by adding additional logic Differential Revision: [D75247432](https://our.internmc.facebook.com/intern/diff/D75247432/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/glsl/reduce_texture3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/reduce_texture3d.glsl
@@ -46,10 +46,10 @@ $if VARIANCE_MODE:
 // work group will write into its assigned element in the shared array.
 #define MAX_NTHREADS 16
 
-shared vec4 shared_vecs[MAX_NTHREADS];
+shared VEC4_T shared_vecs[MAX_NTHREADS];
 // Second accumulator for variance mode - used for sum of values, prev
 // accumulator is used for sum of squares
-shared vec4 shared_sum_sq[MAX_NTHREADS];
+shared VEC4_T shared_sum_sq[MAX_NTHREADS];
 shared int shared_count[MAX_NTHREADS];
 
 #include "indexing_utils.h"
@@ -58,9 +58,9 @@ int tid_to_smi(const ivec2 tid) {
   return tid.x + tid.y * NWORKERS;
 }
 
-vec4 calculate_variance(vec4 sum, vec4 sum_sq, int count) {
-  vec4 mean = sum / float(count);
-  vec4 variance = (sum_sq / float(count)) - (mean * mean);
+VEC4_T calculate_variance(VEC4_T sum, VEC4_T sum_sq, int count) {
+  VEC4_T mean = sum / float(count);
+  VEC4_T variance = (sum_sq / float(count)) - (mean * mean);
 
   if ((pc.unbiased != 0) && (count > 1)) {
     variance = variance * (float(count) / float(count - 1.0));
@@ -111,10 +111,10 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
   const int smi = tid_to_smi(tid);
 
   scan_pos[reduce_dim] = 0;
-  vec4 accum = INIT_ACCUM(load_texel(tin, scan_pos));
+  VEC4_T accum = INIT_ACCUM(load_texel(tin, scan_pos));
 
 #ifdef VARIANCE_MODE
-  vec4 sum_sq = VEC4_T(0);
+  VEC4_T sum_sq = VEC4_T(0);
   int count = 0;
 #endif
 
@@ -123,7 +123,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
   // the reduction row
   for (int i = tid.x; i < tin_sizes[reduce_dim];
        i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
-    vec4 val = load_texel(tin, scan_pos);
+    VEC4_T val = load_texel(tin, scan_pos);
     accum = UPDATE_ACCUM(accum, val);
 #ifdef VARIANCE_MODE
     sum_sq += val * val;
@@ -166,7 +166,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
         scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
 
 #ifdef VARIANCE_MODE
-    vec4 variance = calculate_variance(accum, sum_sq, count);
+    VEC4_T variance = calculate_variance(accum, sum_sq, count);
 #endif
 
     // Explicitly set padding elements to 0
@@ -208,10 +208,10 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
   const int reduce_len = tin_sizes[packed_dim] - nspill;
 
   scan_pos[reduce_dim] = 0;
-  vec4 accum = INIT_ACCUM(vec4(load_texel(tin, scan_pos).x));
+  VEC4_T accum = INIT_ACCUM(VEC4_T(load_texel(tin, scan_pos).x));
 
 #ifdef VARIANCE_MODE
-  vec4 sum_sq = VEC4_T(0);
+  VEC4_T sum_sq = VEC4_T(0);
   int count = 0;
 #endif
 
@@ -220,7 +220,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
   scan_pos[reduce_dim] = tid.x;
   for (int i = tid.x * 4; i < reduce_len;
        i += NWORKERS * 4, scan_pos[reduce_dim] += NWORKERS) {
-    vec4 val = load_texel(tin, scan_pos);
+    VEC4_T val = load_texel(tin, scan_pos);
     accum = UPDATE_ACCUM(accum, val);
 #ifdef VARIANCE_MODE
     sum_sq += val * val;
@@ -231,7 +231,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
   // element of the texel needs to be processed individually such that the
   // padding elements are ignored
   if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {
-    const vec4 val = load_texel(tin, scan_pos);
+    const VEC4_T val = load_texel(tin, scan_pos);
     for (int i = 0; i < nspill; i++) {
       accum.x = UPDATE_ACCUM(accum.x, val[i]);
 #ifdef VARIANCE_MODE
@@ -280,7 +280,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
     }
 
     scan_pos[reduce_dim] = tid.x;
-    write_texel(tout, scan_pos, vec4(variance, 0, 0, 0));
+    write_texel(tout, scan_pos, VEC4_T(variance, 0, 0, 0));
 #else
     // Each element of the texel is itself a partial maximum; iterate over the
     // texel to find the actual maximum
@@ -290,7 +290,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
     }
 
     scan_pos[reduce_dim] = tid.x;
-    write_texel(tout, scan_pos, POSTPROCESS(vec4(accum_final, 0, 0, 0)));
+    write_texel(tout, scan_pos, POSTPROCESS(VEC4_T(accum_final, 0, 0, 0)));
 #endif
   }
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/reduce_texture3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/reduce_texture3d.yaml
@@ -1,4 +1,3 @@
-
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
diff --git a/backends/vulkan/runtime/graph/ops/impl/Reduce.cpp b/backends/vulkan/runtime/graph/ops/impl/Reduce.cpp
@@ -60,8 +60,7 @@ void add_reduce_buffer_node(
 
   std::vector<PushConstantDataInfo> push_constants;
   int32_t unbiased_int = static_cast<int32_t>(unbiased);
-  push_constants.emplace_back(
-      PushConstantDataInfo(&unbiased_int, sizeof(unbiased_int)));
+  push_constants.emplace_back(&unbiased_int, sizeof(unbiased_int));
 
   graph.execute_nodes().emplace_back(new DispatchNode(
       graph,
@@ -137,8 +136,7 @@ void add_reduce_texture_node(
 
   std::vector<PushConstantDataInfo> push_constants;
   int32_t unbiased_int = static_cast<int32_t>(unbiased);
-  push_constants.emplace_back(
-      PushConstantDataInfo(&unbiased_int, sizeof(unbiased_int)));
+  push_constants.emplace_back(&unbiased_int, sizeof(unbiased_int));
 
   graph.execute_nodes().emplace_back(new DispatchNode(
       graph,
@@ -177,19 +175,11 @@ void add_reduce_node(
 }
 
 #define DEFINE_REDUCE_FN(op_name, out_arg_idx)                           \
-  void op_name(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
-    const IntListPtr dims_list = graph.get_int_list(args[1]);            \
-    VK_CHECK_COND(dims_list->size() == 1);                               \
-    return add_reduce_node(                                              \
-        graph, args[0], dims_list->at(0), args[out_arg_idx], #op_name);  \
-  }
-
-#define DEFINE_VAR_FN(op_name, out_arg_idx)                              \
   void op_name(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
     const IntListPtr dims_list = graph.get_int_list(args[1]);            \
     VK_CHECK_COND(dims_list->size() == 1);                               \
     bool unbiased = false;                                               \
-    if (args.size() > 2) {                                               \
+    if (strcmp(#op_name, "var") == 0 && args.size() > 2) {               \
       unbiased = graph.get_bool(args[2]);                                \
     }                                                                    \
     return add_reduce_node(                                              \
@@ -205,7 +195,7 @@ DEFINE_REDUCE_FN(sum, 4)
 DEFINE_REDUCE_FN(mean, 4)
 DEFINE_REDUCE_FN(amax, 3)
 DEFINE_REDUCE_FN(amin, 3)
-DEFINE_VAR_FN(var, 4)
+DEFINE_REDUCE_FN(var, 4)
 
 REGISTER_OPERATORS {
   VK_REGISTER_OP(aten.sum.dim_IntList, sum);

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`# Copyright (c) Meta Platforms, Inc. and affiliates.`
`3`	`2`	`# All rights reserved.`
`4`	`3`	`#`