pytorch
diff --git a/‎kernels/portable/cpu/op_upsample_bilinear2d.cpp‎
Lines changed: 127 additions & 0 deletions b/‎kernels/portable/cpu/op_upsample_bilinear2d.cpp‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎kernels/portable/cpu/util/targets.bzl‎
Lines changed: 11 additions & 0 deletions b/‎kernels/portable/cpu/util/targets.bzl‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎kernels/portable/cpu/util/upsample_util.cpp‎
Lines changed: 104 additions & 0 deletions b/‎kernels/portable/cpu/util/upsample_util.cpp‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎kernels/portable/cpu/util/upsample_util.h‎
Lines changed: 137 additions & 0 deletions b/‎kernels/portable/cpu/util/upsample_util.h‎
Lines changed: 137 additions & 0 deletions
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <array>
+#include <executorch/kernels/portable/cpu/util/upsample_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+  
+using exec_aten::ArrayRef;
+using exec_aten::SizesType;
+using exec_aten::optional;
+
+namespace {
+  template <typename CTYPE>
+  void upsample_bilinear2d_kernel_impl(
+      const Tensor& in,
+      bool align_corners,
+      const float scale_h,
+      const float scale_w,
+      Tensor& out) {
+      const auto in_data = in.const_data_ptr<CTYPE>();
+      auto out_data = out.mutable_data_ptr<CTYPE>();
+      
+      auto in_plane = in_data;
+      for (auto n = 0; n < out.size(0); n++) {
+          for (auto c = 0; c < out.size(1); c++) {
+              for (auto h = 0; h < out.size(2); h++) {
+                  for (auto w = 0; w < out.size(3); w++) {
+                      // Compute source index.
+                      // See area_pixel_compute_source_index in pytorch/aten/src/ATen/native/UpSample.h
+                      float in_h = area_pixel_compute_source_index(scale_h, h, align_corners, false);
+                      float in_w = area_pixel_compute_source_index(scale_w, w, align_corners, false);
+                      
+                      int64_t in_h1, in_h2, in_w1, in_w2;
+                      float weight_h, inv_weight_h, weight_w, inv_weight_w;
+                      
+                      compute_source_index_and_lambda(
+                        in_h1, 
+                        in_h2, 
+                        weight_h, 
+                        inv_weight_h, 
+                        scale_h, 
+                        h,
+                        in.sizes()[2], 
+                        out.sizes()[2],
+                        align_corners);
+
+                      compute_source_index_and_lambda(
+                        in_w1, 
+                        in_w2, 
+                        weight_w, 
+                        inv_weight_w, 
+                        scale_w, 
+                        w,
+                        in.sizes()[3], 
+                        out.sizes()[3],
+                        align_corners);
+                    
+                      const auto top_left = in_plane[in_h1 * in.strides()[2] + in_w1 * in.strides()[3]];
+                      const auto top_right = in_plane[in_h1 * in.strides()[2] + in_w2 * in.strides()[3]];
+                      const auto bottom_left = in_plane[in_h2 * in.strides()[2] + in_w1 * in.strides()[3]];
+                      const auto bottom_right = in_plane[in_h2 * in.strides()[2] + in_w2 * in.strides()[3]];
+                      
+                      const auto top = top_left * weight_w + top_right * inv_weight_w;
+                      const auto bottom = bottom_left * weight_w + bottom_right * inv_weight_w;
+                      const auto val = top * weight_h + bottom * inv_weight_h;
+
+                      *out_data = val;
+                      out_data++;
+                  }
+              }
+              
+              in_plane += in.strides()[1];
+          }
+      }
+  }
+}
+
+Tensor& upsample_bilinear2d_out(
+    KernelRuntimeContext& ctx, 
+    const Tensor& in,
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    bool align_corners,
+    const optional<double> scale_h,
+    const optional<double> scale_w, 
+    Tensor& out) {
+
+  // Preconditions (checked in check_..._args):
+  //  In and out tensors have same dtype.
+  //  In and out tensors are rank 4 and have same dim[0] and dim[1].
+  //  In and out tensors are default dim order (NCHW).
+  ET_KERNEL_CHECK(
+      ctx,
+      check_upsample_bilinear2d_args(in, output_size, align_corners, scale_h, scale_w, out),
+      InvalidArgument, 
+      out);
+  
+    ET_KERNEL_CHECK_MSG(
+        ctx,
+        resize_upsample_2d(in, output_size, scale_h, scale_w, out) == Error::Ok,
+        InvalidArgument,
+        out,
+        "Failed to resize output tensor");
+    
+    const auto dim = in.dim();    
+    const auto kernel_scale_h = area_pixel_compute_scale<double>(in.sizes()[2], out.sizes()[2], align_corners, scale_h);
+    const auto kernel_scale_w = area_pixel_compute_scale<double>(in.sizes()[3], out.sizes()[3], align_corners, scale_w);
+    
+    ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, "upsample_bilinear2d.out", CTYPE, [&]() {
+      upsample_bilinear2d_kernel_impl<CTYPE>(in, align_corners, kernel_scale_h, kernel_scale_w, out);
+    });
+
+    return out;
+}
+
+}
+}
+}
@@ -31,6 +31,7 @@ def define_common_targets():
             "//executorch/kernels/portable/cpu/util:advanced_index_util",
             "//executorch/kernels/portable/cpu/util:slice_util",
             "//executorch/kernels/portable/cpu/util:elementwise_util",
+            "//executorch/kernels/portable/cpu/util:upsample_util",
         ],
         visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
     )
@@ -266,6 +267,16 @@ def define_common_targets():
         visibility = ["//executorch/kernels/portable/cpu/..."],
     )
 
+    runtime.cxx_library(
+        name = "upsample_util",
+        srcs = ["upsample_util.cpp"],
+        exported_headers = ["upsample_util.h"],
+        deps = [
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+        visibility = ["//executorch/kernels/portable/cpu/..."],
+    )
+
     # Utility functions that can be used by operators that perform reduction
     for aten_mode in [True, False]:
         suffix = "_aten" if aten_mode else ""
 
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/util/upsample_util.h>
+#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+
+namespace torch {
+namespace executor {
+
+bool check_upsample_2d_common_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out) {
+
+    ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
+    ET_LOG_AND_RETURN_IF_FALSE(in.dim() == 4);
+    ET_LOG_AND_RETURN_IF_FALSE(out.dim() == 4);
+    ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(in));
+    ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(out));
+    ET_LOG_AND_RETURN_IF_FALSE(
+        (output_size.size() == 2 && !scale_h.has_value() && !scale_w.has_value()) ||
+        (output_size.size() == 0 && scale_h.has_value() && scale_w.has_value()));
+    ET_LOG_AND_RETURN_IF_FALSE(!scale_h.has_value() || scale_h.value() > 0);
+    ET_LOG_AND_RETURN_IF_FALSE(!scale_w.has_value() || scale_w.value() > 0);
+    ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 1 || output_size[0] > 0);
+    ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 2 || output_size[1] > 0);
+
+    return true;
+}
+
+bool check_upsample_bilinear2d_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    ET_UNUSED const bool align_corners,
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out) {
+    return check_upsample_2d_common_args(
+        in,
+        output_size,
+        scale_h,
+        scale_w,
+        out);
+}
+
+bool check_upsample_nearest2d_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out) {
+    return check_upsample_2d_common_args(
+        in,
+        output_size,
+        scale_h,
+        scale_w,
+        out);
+}
+
+Error resize_upsample_2d(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w,
+    Tensor& out) {
+    // Either output_size or scale_factors are provided, not both. This
+    // is checked in check_..._args.
+    // Scales are transformed according to align_corners.
+    std::array<Tensor::SizesType, kTensorDimensionLimit> target_size;
+
+    const auto dim = in.dim();    
+    std::copy(in.sizes().cbegin(), in.sizes().cend(), target_size.begin());
+
+    if (scale_h.has_value() && scale_w.has_value()) {
+        target_size[dim - 2] = static_cast<Tensor::SizesType>(in.sizes()[dim - 2] * scale_h.value());
+        target_size[dim - 1] = static_cast<Tensor::SizesType>(in.sizes()[dim - 1] * scale_w.value());
+    }
+    else if (output_size.size() == 2) {
+        target_size[dim - 2] = output_size[0];
+        target_size[dim - 1] = output_size[1];
+    } else {
+        ET_LOG(Error, "Invalid output_size or scale_factors");
+        return Error::InvalidArgument;
+    }
+    
+    ET_CHECK_OR_RETURN_ERROR(
+        target_size[dim - 2] > 0 && target_size[dim - 1] > 0,
+        InvalidArgument,
+        "Upsampled output size must be non-empty, but was %ld x %ld.",
+        static_cast<long>(target_size[dim - 2]),
+        static_cast<long>(target_size[dim - 1]));
+
+    return resize_tensor(out, {target_size.data(), static_cast<size_t>(dim)});
+}
+
+}
+}
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+    
+bool check_upsample_2d_common_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out);
+
+bool check_upsample_bilinear2d_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const bool align_corners,
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out);
+
+bool check_upsample_nearest2d_args(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w, 
+    Tensor& out);
+
+Error resize_upsample_2d(
+    const Tensor& in, 
+    const exec_aten::ArrayRef<int64_t> output_size, 
+    const exec_aten::optional<double> scale_h, 
+    const exec_aten::optional<double> scale_w,
+    Tensor& out);
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t compute_scales_value(
+    const exec_aten::optional<double> scale,
+    int64_t input_size,
+    int64_t output_size) {
+      return scale.has_value()
+          ? static_cast<scalar_t>(1.0 / scale.value())
+          : (static_cast<scalar_t>(input_size) / output_size);
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t area_pixel_compute_scale(
+    int64_t input_size,
+    int64_t output_size,
+    bool align_corners,
+    const exec_aten::optional<double> scale) {
+  // see Note [area_pixel_compute_scale]
+  if(align_corners) {
+    if(output_size > 1) {
+      return static_cast<scalar_t>(input_size - 1) / (output_size - 1);
+    } else {
+      return static_cast<scalar_t>(0);
+    }
+  } else {
+    return compute_scales_value<scalar_t>(scale, input_size, output_size);
+  }
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template <typename scalar_t>
+inline scalar_t area_pixel_compute_source_index(
+    scalar_t scale,
+    int64_t dst_index,
+    bool align_corners,
+    bool cubic) {
+  if (align_corners) {
+    return scale * dst_index;
+  } else {
+    scalar_t src_idx = scale * (dst_index + static_cast<scalar_t>(0.5)) -
+        static_cast<scalar_t>(0.5);
+    return (!cubic && src_idx < static_cast<scalar_t>(0)) ? scalar_t(0)
+                                                          : src_idx;
+  }
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+// when `real_input_index` becomes larger than the range the floating point
+// type can accurately represent, the type casting to `int64_t` might exceed
+// `input_size`, causing overflow. So we guard it with `std::min` below.
+template<typename scalar_t, typename opmath_t>
+inline void guard_index_and_lambda(const opmath_t& real_input_index, const int64_t& input_size, int64_t& input_index, scalar_t& lambda) {
+  input_index = std::min(static_cast<int64_t>(floorf(real_input_index)), input_size - 1);
+  lambda = std::min(
+      std::max(real_input_index - input_index, static_cast<opmath_t>(0)),
+      static_cast<opmath_t>(1)
+    );
+}
+
+// Ported from aten/src/ATen/native/UpSample.h
+template<typename scalar_t, typename opmath_t>
+inline void compute_source_index_and_lambda(
+    int64_t& input_index0,
+    int64_t& input_index1,
+    scalar_t& lambda0,
+    scalar_t& lambda1,
+    opmath_t ratio,
+    int64_t output_index,
+    int64_t input_size,
+    int64_t output_size,
+    bool align_corners) {
+  if (output_size == input_size) {
+    // scale_factor = 1, simply copy
+    input_index0 = output_index;
+    input_index1 = output_index;
+    lambda0 = static_cast<scalar_t>(1);
+    lambda1 = static_cast<scalar_t>(0);
+  } else {
+    const auto real_input_index =
+        area_pixel_compute_source_index<opmath_t>(
+            ratio, output_index, align_corners, /*cubic=*/false);
+    guard_index_and_lambda(real_input_index, input_size, input_index0, lambda1);
+    int64_t offset = (input_index0 < input_size - 1) ? 1 : 0;
+    input_index1 = input_index0 + offset;
+    lambda0 = static_cast<scalar_t>(1.) - lambda1;
+  }
+}
+
+}
+}