pytorch
diff --git a/‎kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp‎
Lines changed: 271 additions & 0 deletions b/‎kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp‎
Lines changed: 271 additions & 0 deletions
diff --git a/‎kernels/portable/functions.yaml‎
Lines changed: 5 additions & 0 deletions b/‎kernels/portable/functions.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎kernels/portable/test/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎kernels/portable/test/TARGETS‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+#include <algorithm>
+#include <cmath>
+
+#include <executorch/kernels/portable/cpu/util/upsample_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::ArrayRef;
+using executorch::aten::SizesType;
+using std::optional;
+
+namespace {
+
+// Anti-aliasing filter for bilinear interpolation
+// Adapted from PyTorch's implementation
+template <typename T>
+inline T bilinear_aa_filter(T x) {
+  x = std::abs(x);
+  if (x < 1.0) {
+    return 1.0 - x;
+  }
+  return 0.0;
+}
+
+// Compute weights and indices for a single output pixel with anti-aliasing
+template <typename T>
+void compute_aa_weights_for_pixel(
+    int64_t output_idx,
+    int64_t input_size,
+    int64_t output_size,
+    bool align_corners,
+    int64_t* indices,
+    T* weights,
+    int64_t* num_contributors) {
+  const T scale = area_pixel_compute_scale<T>(
+      input_size, output_size, align_corners, optional<double>());
+
+  const T center = area_pixel_compute_source_index<T>(
+      scale, output_idx, align_corners, /*cubic=*/false);
+
+  // Support is the filter radius - for downsampling, we need a larger filter
+  const T support = (scale >= 1.0) ? scale : 1.0;
+
+  // Find the range of input pixels that contribute
+  const int64_t xmin = std::max(
+      static_cast<int64_t>(center - support + 0.5), static_cast<int64_t>(0));
+  const int64_t xmax =
+      std::min(static_cast<int64_t>(center + support + 0.5), input_size);
+
+  T total_weight = 0.0;
+  *num_contributors = std::min(xmax - xmin, int64_t(4));
+
+  // Compute weights for contributing pixels
+  for (int64_t j = 0; j < *num_contributors; ++j) {
+    int64_t x = xmin + j;
+    T weight = bilinear_aa_filter<T>(
+        (x - center + 0.5) / (scale >= 1.0 ? scale : 1.0));
+    indices[j] = x;
+    weights[j] = weight;
+    total_weight += weight;
+  }
+
+  // Normalize weights
+  if (total_weight > 0) {
+    for (int64_t j = 0; j < *num_contributors; ++j) {
+      weights[j] /= total_weight;
+    }
+  }
+}
+
+template <typename CTYPE>
+void upsample_bilinear2d_aa_kernel_impl(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    bool align_corners,
+    const float scale_h,
+    const float scale_w,
+    Tensor& out) {
+  const auto in_data = in.const_data_ptr<CTYPE>();
+  auto out_data = out.mutable_data_ptr<CTYPE>();
+
+  const bool is_nchw =
+      is_contiguous_dim_order(in.dim_order().data(), in.dim_order().size());
+
+  if (is_nchw) {
+    // NCHW layout
+    for (int64_t n = 0; n < out.size(0); ++n) {
+      for (int64_t c = 0; c < out.size(1); ++c) {
+        const auto in_plane =
+            in_data + (n * in.size(1) + c) * in.size(2) * in.size(3);
+        auto out_plane =
+            out_data + (n * out.size(1) + c) * out.size(2) * out.size(3);
+
+        for (int64_t oh = 0; oh < out.size(2); ++oh) {
+          // Compute height weights for this output row
+          int64_t h_indices[4];
+          float h_weights[4];
+          int64_t h_num_contributors;
+          compute_aa_weights_for_pixel<float>(
+              oh,
+              in.size(2),
+              out.size(2),
+              align_corners,
+              h_indices,
+              h_weights,
+              &h_num_contributors);
+
+          for (int64_t ow = 0; ow < out.size(3); ++ow) {
+            // Compute width weights for this output column
+            int64_t w_indices[4];
+            float w_weights[4];
+            int64_t w_num_contributors;
+            compute_aa_weights_for_pixel<float>(
+                ow,
+                in.size(3),
+                out.size(3),
+                align_corners,
+                w_indices,
+                w_weights,
+                &w_num_contributors);
+
+            CTYPE value = 0;
+
+            // Apply anti-aliased interpolation
+            for (int64_t ih_idx = 0; ih_idx < h_num_contributors; ++ih_idx) {
+              int64_t ih = h_indices[ih_idx];
+              float h_weight = h_weights[ih_idx];
+
+              for (int64_t iw_idx = 0; iw_idx < w_num_contributors; ++iw_idx) {
+                int64_t iw = w_indices[iw_idx];
+                float w_weight = w_weights[iw_idx];
+
+                value += in_plane[ih * in.size(3) + iw] * h_weight * w_weight;
+              }
+            }
+
+            out_plane[oh * out.size(3) + ow] = value;
+          }
+        }
+      }
+    }
+  } else {
+    // NHWC layout
+    for (int64_t n = 0; n < out.size(0); ++n) {
+      const auto in_batch = in_data + n * in.size(1) * in.size(2) * in.size(3);
+      auto out_batch = out_data + n * out.size(1) * out.size(2) * out.size(3);
+
+      for (int64_t oh = 0; oh < out.size(2); ++oh) {
+        // Compute height weights for this output row
+        int64_t h_indices[4];
+        float h_weights[4];
+        int64_t h_num_contributors;
+        compute_aa_weights_for_pixel<float>(
+            oh,
+            in.size(2),
+            out.size(2),
+            align_corners,
+            h_indices,
+            h_weights,
+            &h_num_contributors);
+
+        for (int64_t ow = 0; ow < out.size(3); ++ow) {
+          // Compute width weights for this output column
+          int64_t w_indices[4];
+          float w_weights[4];
+          int64_t w_num_contributors;
+          compute_aa_weights_for_pixel<float>(
+              ow,
+              in.size(3),
+              out.size(3),
+              align_corners,
+              w_indices,
+              w_weights,
+              &w_num_contributors);
+
+          for (int64_t c = 0; c < out.size(1); ++c) {
+            CTYPE value = 0;
+
+            // Apply anti-aliased interpolation
+            for (int64_t ih_idx = 0; ih_idx < h_num_contributors; ++ih_idx) {
+              int64_t ih = h_indices[ih_idx];
+              float h_weight = h_weights[ih_idx];
+
+              for (int64_t iw_idx = 0; iw_idx < w_num_contributors; ++iw_idx) {
+                int64_t iw = w_indices[iw_idx];
+                float w_weight = w_weights[iw_idx];
+
+                value += in_batch[(ih * in.size(3) + iw) * in.size(1) + c] *
+                    h_weight * w_weight;
+              }
+            }
+
+            out_batch[(oh * out.size(3) + ow) * out.size(1) + c] = value;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace
+
+// Check function for anti-aliased bilinear upsampling
+bool check_upsample_bilinear2d_aa_args(
+    const Tensor& in,
+    const executorch::aten::OptionalArrayRef<int64_t>& output_size,
+    const bool align_corners,
+    const executorch::aten::OptionalArrayRef<double>& scale_factors,
+    Tensor& out) {
+  // Use the same checks as regular bilinear upsampling
+  return check_upsample_bilinear2d_args(
+      in, output_size, align_corners, scale_factors, out);
+}
+
+// Main entry point for anti-aliased bilinear upsampling
+Tensor& _upsample_bilinear2d_aa_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    const executorch::aten::OptionalArrayRef<int64_t> output_size,
+    bool align_corners,
+    const executorch::aten::OptionalArrayRef<double> scale_factors,
+    Tensor& out) {
+  // Preconditions (checked in check_..._args):
+  //  In and out tensors have same dtype.
+  //  In and out tensors are rank 4 and have same dim[0] and dim[1].
+  //  In and out tensors are NHWC or NCHW dim order.
+  ET_KERNEL_CHECK(
+      ctx,
+      check_upsample_bilinear2d_aa_args(
+          in, output_size, align_corners, scale_factors, out),
+      InvalidArgument,
+      out);
+
+  double scale_h, scale_w;
+
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_upsample_2d(
+          in, output_size, scale_factors, scale_h, scale_w, out) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor");
+
+  const auto kernel_scale_h = area_pixel_compute_scale<double>(
+      in.sizes()[2], out.sizes()[2], align_corners, scale_h);
+  const auto kernel_scale_w = area_pixel_compute_scale<double>(
+      in.sizes()[3], out.sizes()[3], align_corners, scale_w);
+
+  ET_SWITCH_REALHBF16_TYPES(
+      in.scalar_type(), ctx, "_upsample_bilinear2d_aa.out", CTYPE, [&]() {
+        upsample_bilinear2d_aa_kernel_impl<CTYPE>(
+            ctx, in, align_corners, kernel_scale_h, kernel_scale_w, out);
+      });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
@@ -965,6 +965,11 @@
     - arg_meta: null
       kernel_name: torch::executor::upsample_bilinear2d_vec_out
 
+- op: _upsample_bilinear2d_aa.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::_upsample_bilinear2d_aa_out
+
 - op: upsample_nearest2d.vec_out
   kernels:
     - arg_meta: null
 
@@ -20,6 +20,7 @@ runtime.cxx_library(
     deps = [
         "//executorch/extension/aten_util:aten_bridge",
         "//executorch/kernels/portable/cpu:op_upsample_bilinear2d",
+        "//executorch/kernels/portable/cpu:op_upsample_bilinear2d_aa",
         "//executorch/kernels/portable/cpu:op_upsample_nearest2d",
         "//executorch/runtime/core/exec_aten:lib",
     ],