Fix test compilation and runtime error.

jgibson2 · jgibson2 · commit c0a3db950900 · 2025-12-03T11:28:14.000-05:00
diff --git a/kernels/portable/cpu/op_grid_sampler_2d.cpp b/kernels/portable/cpu/op_grid_sampler_2d.cpp
@@ -92,46 +92,46 @@ void grid_sample_2d_bilinear_kernel_impl_nchw(
             // For zeros padding, only sample if within bounds
             if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) {
               out_val += in_data
-                  [in_channel_offset + iy_nw * in.strides()[2] +
-                   ix_nw * in.strides()[3]] *
+                             [in_channel_offset + iy_nw * in.strides()[2] +
+                              ix_nw * in.strides()[3]] *
                   nw_weight;
             }
             if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) {
               out_val += in_data
-                  [in_channel_offset + iy_ne * in.strides()[2] +
-                   ix_ne * in.strides()[3]] *
+                             [in_channel_offset + iy_ne * in.strides()[2] +
+                              ix_ne * in.strides()[3]] *
                   ne_weight;
             }
             if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) {
               out_val += in_data
-                  [in_channel_offset + iy_sw * in.strides()[2] +
-                   ix_sw * in.strides()[3]] *
+                             [in_channel_offset + iy_sw * in.strides()[2] +
+                              ix_sw * in.strides()[3]] *
                   sw_weight;
             }
             if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) {
               out_val += in_data
-                  [in_channel_offset + iy_se * in.strides()[2] +
-                   ix_se * in.strides()[3]] *
+                             [in_channel_offset + iy_se * in.strides()[2] +
+                              ix_se * in.strides()[3]] *
                   se_weight;
             }
           } else {
             // For border/reflection padding, coordinates are already clipped
             out_val = in_data
                           [in_channel_offset + iy_nw * in.strides()[2] +
                            ix_nw * in.strides()[3]] *
-                      nw_weight +
-                  in_data
-                      [in_channel_offset + iy_ne * in.strides()[2] +
-                       ix_ne * in.strides()[3]] *
-                      ne_weight +
-                  in_data
-                      [in_channel_offset + iy_sw * in.strides()[2] +
-                       ix_sw * in.strides()[3]] *
-                      sw_weight +
-                  in_data
-                      [in_channel_offset + iy_se * in.strides()[2] +
-                       ix_se * in.strides()[3]] *
-                      se_weight;
+                    nw_weight +
+                in_data
+                        [in_channel_offset + iy_ne * in.strides()[2] +
+                         ix_ne * in.strides()[3]] *
+                    ne_weight +
+                in_data
+                        [in_channel_offset + iy_sw * in.strides()[2] +
+                         ix_sw * in.strides()[3]] *
+                    sw_weight +
+                in_data
+                        [in_channel_offset + iy_se * in.strides()[2] +
+                         ix_se * in.strides()[3]] *
+                    se_weight;
           }
 
           // Write output in NCHW order
@@ -197,7 +197,8 @@ void grid_sample_2d_nearest_kernel_impl_nchw(
           // Use nearbyint (not round) to match ATen's rounding behavior.
           // nearbyint uses the current rounding mode (typically round-to-even),
           // which matches PyTorch's (ATen's) behavior. In contrast, round may
-          // not always respect the rounding mode. See: aten/src/ATen/native/GridSampler.cpp
+          // not always respect the rounding mode. See:
+          // aten/src/ATen/native/GridSampler.cpp
           int64_t ix_nearest = static_cast<int64_t>(std::nearbyint(ix));
           int64_t iy_nearest = static_cast<int64_t>(std::nearbyint(iy));
 
@@ -214,7 +215,8 @@ void grid_sample_2d_nearest_kernel_impl_nchw(
             }
           } else {
             // For border/reflection padding, clip coordinates after rounding
-            // Rounding can push coordinates out of bounds even after grid_sampler_compute_source_index
+            // Rounding can push coordinates out of bounds even after
+            // grid_sampler_compute_source_index
             ix_nearest = clip_coordinates(ix_nearest, inp_W);
             iy_nearest = clip_coordinates(iy_nearest, inp_H);
             out_val = in_data
@@ -232,7 +234,6 @@ void grid_sample_2d_nearest_kernel_impl_nchw(
   }
 }
 
-
 template <typename CTYPE>
 void grid_sample_2d_bicubic_kernel_impl_nchw(
     const Tensor& in,
@@ -277,8 +278,9 @@ void grid_sample_2d_bicubic_kernel_impl_nchw(
           const CTYPE y = grid_data[grid_idx + grid.strides()[3]];
 
           // Compute source coordinates in pixel space
-          // For bicubic, we need raw unnormalized coordinates without padding applied
-          // Padding is applied later when fetching individual pixels from the 4x4 neighborhood
+          // For bicubic, we need raw unnormalized coordinates without padding
+          // applied Padding is applied later when fetching individual pixels
+          // from the 4x4 neighborhood
           CTYPE ix = grid_sampler_unnormalize(x, inp_W, align_corners);
           CTYPE iy = grid_sampler_unnormalize(y, inp_H, align_corners);
 
@@ -309,12 +311,10 @@ void grid_sample_2d_bicubic_kernel_impl_nchw(
               return static_cast<CTYPE>(0);
             } else if (padding_mode == GridSamplerPadding::Border) {
               // For border padding, clip coordinates to valid range
-              int64_t iy_safe = std::max(
-                  static_cast<int64_t>(0),
-                  std::min(iy, inp_H - 1));
-              int64_t ix_safe = std::max(
-                  static_cast<int64_t>(0),
-                  std::min(ix, inp_W - 1));
+              int64_t iy_safe =
+                  std::max(static_cast<int64_t>(0), std::min(iy, inp_H - 1));
+              int64_t ix_safe =
+                  std::max(static_cast<int64_t>(0), std::min(ix, inp_W - 1));
               return in_data
                   [in_channel_offset + iy_safe * in.strides()[2] +
                    ix_safe * in.strides()[3]];
@@ -324,16 +324,22 @@ void grid_sample_2d_bicubic_kernel_impl_nchw(
               CTYPE ix_reflected = static_cast<CTYPE>(ix);
 
               if (align_corners) {
-                iy_reflected = reflect_coordinates(iy_reflected, 0, 2 * (inp_H - 1));
-                ix_reflected = reflect_coordinates(ix_reflected, 0, 2 * (inp_W - 1));
+                iy_reflected =
+                    reflect_coordinates(iy_reflected, 0, 2 * (inp_H - 1));
+                ix_reflected =
+                    reflect_coordinates(ix_reflected, 0, 2 * (inp_W - 1));
               } else {
-                iy_reflected = reflect_coordinates(iy_reflected, -1, 2 * inp_H - 1);
-                ix_reflected = reflect_coordinates(ix_reflected, -1, 2 * inp_W - 1);
+                iy_reflected =
+                    reflect_coordinates(iy_reflected, -1, 2 * inp_H - 1);
+                ix_reflected =
+                    reflect_coordinates(ix_reflected, -1, 2 * inp_W - 1);
               }
 
               // Clip to ensure we're in bounds (reflection + clip for safety)
-              int64_t iy_safe = static_cast<int64_t>(clip_coordinates(iy_reflected, inp_H));
-              int64_t ix_safe = static_cast<int64_t>(clip_coordinates(ix_reflected, inp_W));
+              int64_t iy_safe =
+                  static_cast<int64_t>(clip_coordinates(iy_reflected, inp_H));
+              int64_t ix_safe =
+                  static_cast<int64_t>(clip_coordinates(ix_reflected, inp_W));
 
               return in_data
                   [in_channel_offset + iy_safe * in.strides()[2] +
@@ -375,7 +381,11 @@ void grid_sample_2d_bicubic_kernel_impl_nchw(
 
           // Interpolate in y-direction
           CTYPE out_val = cubic_interp1d(
-              coefficients[0], coefficients[1], coefficients[2], coefficients[3], ty);
+              coefficients[0],
+              coefficients[1],
+              coefficients[2],
+              coefficients[3],
+              ty);
 
           // Write output in NCHW order
           const int64_t out_idx =
@@ -409,7 +419,8 @@ Tensor& grid_sampler_2d_out(
       "Failed to validate arguments and resize output tensor");
 
   // Convert integer mode parameters to enums
-  GridSamplerInterpolation mode = static_cast<GridSamplerInterpolation>(interpolation_mode);
+  GridSamplerInterpolation mode =
+      static_cast<GridSamplerInterpolation>(interpolation_mode);
   GridSamplerPadding padding = static_cast<GridSamplerPadding>(padding_mode);
 
   // Validate mode and padding values
@@ -454,7 +465,6 @@ Tensor& grid_sampler_2d_out(
 // NOLINTEND(facebook-hte-ConstantArgumentPassByValue,
 // facebook-hte-ParameterMightThrowOnCopy)
 
-
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/util/grid_sampler_2d_util.cpp b/kernels/portable/cpu/util/grid_sampler_2d_util.cpp
@@ -55,6 +55,12 @@ Error check_grid_sampler_2d_args_and_resize_out(
       InvalidArgument,
       "Input and grid must have same dtype");
 
+  // Input and output must have the same dtype
+  ET_CHECK_OR_RETURN_ERROR(
+      tensors_have_same_dtype(input, out),
+      InvalidArgument,
+      "Input and output must have the same dtype");
+
   // Resize output tensor to [N, C, H_out, W_out]
   std::array<exec_aten::SizesType, 4> out_sizes = {
       static_cast<exec_aten::SizesType>(input.size(0)),
@@ -64,9 +70,7 @@ Error check_grid_sampler_2d_args_and_resize_out(
 
   Error err = resize_tensor(out, {out_sizes.data(), 4});
   ET_CHECK_OR_RETURN_ERROR(
-      err == Error::Ok,
-      InvalidArgument,
-      "Failed to resize output tensor");
+      err == Error::Ok, InvalidArgument, "Failed to resize output tensor");
 
   return Error::Ok;
 }
diff --git a/kernels/portable/cpu/util/grid_sampler_2d_util.h b/kernels/portable/cpu/util/grid_sampler_2d_util.h
@@ -18,9 +18,8 @@ namespace executor {
 // Ported from aten/src/ATen/native/GridSampler.h
 // note that these need to be in the SAME ORDER as the enum in GridSampler.h
 // as they are mapped to integer values (0, 1, 2) in this order
-enum class GridSamplerInterpolation {Bilinear, Nearest, Bicubic};
-enum class GridSamplerPadding {Zeros, Border, Reflection};
-
+enum class GridSamplerInterpolation { Bilinear, Nearest, Bicubic };
+enum class GridSamplerPadding { Zeros, Border, Reflection };
 
 // Ported from aten/src/ATen/native/GridSampler.h
 // Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value,
@@ -34,10 +33,8 @@ enum class GridSamplerPadding {Zeros, Border, Reflection};
 //     +1 --> (size - 1) + 0.5 == size - 0.5
 //     scale_factor = size / 2
 template <typename scalar_t>
-inline scalar_t grid_sampler_unnormalize(
-    scalar_t coord,
-    int64_t size,
-    bool align_corners) {
+inline scalar_t
+grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) {
   if (align_corners) {
     // unnormalize coord from [-1, 1] to [0, size - 1]
     return ((coord + 1) / 2) * (size - 1);
@@ -61,10 +58,8 @@ inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) {
 // The bounds are passed as twice their value so that half-integer values
 // can be represented as ints.
 template <typename scalar_t>
-inline scalar_t reflect_coordinates(
-    scalar_t in,
-    int64_t twice_low,
-    int64_t twice_high) {
+inline scalar_t
+reflect_coordinates(scalar_t in, int64_t twice_low, int64_t twice_high) {
   if (twice_low == twice_high) {
     return static_cast<scalar_t>(0);
   }
@@ -120,14 +115,16 @@ inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) {
 }
 
 // Ported from aten/src/ATen/native/UpSample.h
-// Cubic convolution function 2 (for points between 1 and 2 units from the point)
+// Cubic convolution function 2 (for points between 1 and 2 units from the
+// point)
 template <typename scalar_t>
 inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) {
   return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
 }
 
 // Ported from aten/src/ATen/native/UpSample.h
-// Computes the 4 cubic interpolation coefficients for a given position t in [0, 1]
+// Computes the 4 cubic interpolation coefficients for a given position t in [0,
+// 1]
 template <typename scalar_t>
 inline void get_cubic_upsample_coefficients(scalar_t coeffs[4], scalar_t t) {
   // Standard bicubic interpolation uses alpha = -0.75
@@ -145,12 +142,8 @@ inline void get_cubic_upsample_coefficients(scalar_t coeffs[4], scalar_t t) {
 // Ported from aten/src/ATen/native/UpSample.h
 // Performs 1D cubic interpolation given 4 points and a position t in [0, 1]
 template <typename scalar_t>
-inline scalar_t cubic_interp1d(
-    scalar_t x0,
-    scalar_t x1,
-    scalar_t x2,
-    scalar_t x3,
-    scalar_t t) {
+inline scalar_t
+cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, scalar_t x3, scalar_t t) {
   scalar_t coeffs[4];
   get_cubic_upsample_coefficients<scalar_t>(coeffs, t);
 
diff --git a/kernels/portable/test/op_grid_sampler_2d_test.py b/kernels/portable/test/op_grid_sampler_2d_test.py
@@ -232,4 +232,4 @@ def test_grid_sampler_2d_batch_processing(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
diff --git a/kernels/portable/test/targets.bzl b/kernels/portable/test/targets.bzl
@@ -68,7 +68,6 @@ def define_common_targets():
         op_test(name = "op_allclose_test")
         op_test(name = "op_div_test")
         op_test(name = "op_gelu_test")
-        op_test(name = "op_grid_sampler_2d_test")
         op_test(name = "op_mul_test")
 
     if is_xplat():
diff --git a/kernels/portable/test/test_grid_sampler_2d_executorch.py b/kernels/portable/test/test_grid_sampler_2d_executorch.py
diff --git a/kernels/test/op_grid_sampler_2d_test.cpp b/kernels/test/op_grid_sampler_2d_test.cpp

Original file line number	Diff line number	Diff line change
`@@ -232,4 +232,4 @@ def test_grid_sampler_2d_batch_processing(self):`
`232`	`232`
`233`	`233`
`234`	`234`	`if __name__ == "__main__":`
`235`		`- unittest.main()`
	`235`	`+ unittest.main()`