Skip to content

Commit 68691f8

Browse files
committed
Add portable upsample_bilinear2d kernel
Summary: Add a upsample_bilinear2d kernel to the portable kernel library. This implementation re-uses some of the inner logic from the ATen implementation (see Upsample.h and UpsampleKernel.cpp), however I have not ported the outer kernel structure as it relies on TensorIterator and runtime allocation. It may be worth re-visiting this in the future, either by looking at pulling in more of the ATen implementation or adding an optimized variant. Differential Revision: D65756150
1 parent e95f171 commit 68691f8

File tree

8 files changed

+950
-0
lines changed

8 files changed

+950
-0
lines changed
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/portable/cpu/util/upsample_util.h>
10+
#include <executorch/runtime/kernel/kernel_includes.h>
11+
#include <algorithm>
12+
#include <array>
13+
14+
namespace torch {
15+
namespace executor {
16+
namespace native {
17+
18+
using exec_aten::ArrayRef;
19+
using exec_aten::optional;
20+
using exec_aten::SizesType;
21+
22+
namespace {
23+
template <typename CTYPE>
24+
void upsample_bilinear2d_kernel_impl(
25+
const Tensor& in,
26+
bool align_corners,
27+
const float scale_h,
28+
const float scale_w,
29+
Tensor& out) {
30+
const auto in_data = in.const_data_ptr<CTYPE>();
31+
auto out_data = out.mutable_data_ptr<CTYPE>();
32+
33+
auto in_plane = in_data;
34+
for (auto n = 0; n < out.size(0); n++) {
35+
for (auto c = 0; c < out.size(1); c++) {
36+
for (auto h = 0; h < out.size(2); h++) {
37+
for (auto w = 0; w < out.size(3); w++) {
38+
// Compute source index.
39+
// See area_pixel_compute_source_index in
40+
// pytorch/aten/src/ATen/native/UpSample.h
41+
float in_h =
42+
area_pixel_compute_source_index(scale_h, h, align_corners, false);
43+
float in_w =
44+
area_pixel_compute_source_index(scale_w, w, align_corners, false);
45+
46+
int64_t in_h1, in_h2, in_w1, in_w2;
47+
float weight_h, inv_weight_h, weight_w, inv_weight_w;
48+
49+
compute_source_index_and_lambda(
50+
in_h1,
51+
in_h2,
52+
weight_h,
53+
inv_weight_h,
54+
scale_h,
55+
h,
56+
in.sizes()[2],
57+
out.sizes()[2],
58+
align_corners);
59+
60+
compute_source_index_and_lambda(
61+
in_w1,
62+
in_w2,
63+
weight_w,
64+
inv_weight_w,
65+
scale_w,
66+
w,
67+
in.sizes()[3],
68+
out.sizes()[3],
69+
align_corners);
70+
71+
const auto top_left =
72+
in_plane[in_h1 * in.strides()[2] + in_w1 * in.strides()[3]];
73+
const auto top_right =
74+
in_plane[in_h1 * in.strides()[2] + in_w2 * in.strides()[3]];
75+
const auto bottom_left =
76+
in_plane[in_h2 * in.strides()[2] + in_w1 * in.strides()[3]];
77+
const auto bottom_right =
78+
in_plane[in_h2 * in.strides()[2] + in_w2 * in.strides()[3]];
79+
80+
const auto top = top_left * weight_w + top_right * inv_weight_w;
81+
const auto bottom =
82+
bottom_left * weight_w + bottom_right * inv_weight_w;
83+
const auto val = top * weight_h + bottom * inv_weight_h;
84+
85+
*out_data = val;
86+
out_data++;
87+
}
88+
}
89+
90+
in_plane += in.strides()[1];
91+
}
92+
}
93+
}
94+
} // namespace
95+
96+
Tensor& upsample_bilinear2d_out(
97+
KernelRuntimeContext& ctx,
98+
const Tensor& in,
99+
const exec_aten::ArrayRef<int64_t> output_size,
100+
bool align_corners,
101+
const optional<double> scale_h,
102+
const optional<double> scale_w,
103+
Tensor& out) {
104+
// Preconditions (checked in check_..._args):
105+
// In and out tensors have same dtype.
106+
// In and out tensors are rank 4 and have same dim[0] and dim[1].
107+
// In and out tensors are default dim order (NCHW).
108+
ET_KERNEL_CHECK(
109+
ctx,
110+
check_upsample_bilinear2d_args(
111+
in, output_size, align_corners, scale_h, scale_w, out),
112+
InvalidArgument,
113+
out);
114+
115+
ET_KERNEL_CHECK_MSG(
116+
ctx,
117+
resize_upsample_2d(in, output_size, scale_h, scale_w, out) == Error::Ok,
118+
InvalidArgument,
119+
out,
120+
"Failed to resize output tensor");
121+
122+
const auto dim = in.dim();
123+
const auto kernel_scale_h = area_pixel_compute_scale<double>(
124+
in.sizes()[2], out.sizes()[2], align_corners, scale_h);
125+
const auto kernel_scale_w = area_pixel_compute_scale<double>(
126+
in.sizes()[3], out.sizes()[3], align_corners, scale_w);
127+
128+
ET_SWITCH_REAL_TYPES(
129+
in.scalar_type(), ctx, "upsample_bilinear2d.out", CTYPE, [&]() {
130+
upsample_bilinear2d_kernel_impl<CTYPE>(
131+
in, align_corners, kernel_scale_h, kernel_scale_w, out);
132+
});
133+
134+
return out;
135+
}
136+
137+
} // namespace native
138+
} // namespace executor
139+
} // namespace torch

kernels/portable/cpu/util/targets.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def define_common_targets():
3131
"//executorch/kernels/portable/cpu/util:advanced_index_util",
3232
"//executorch/kernels/portable/cpu/util:slice_util",
3333
"//executorch/kernels/portable/cpu/util:elementwise_util",
34+
"//executorch/kernels/portable/cpu/util:upsample_util",
3435
],
3536
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
3637
)
@@ -266,6 +267,16 @@ def define_common_targets():
266267
visibility = ["//executorch/kernels/portable/cpu/..."],
267268
)
268269

270+
runtime.cxx_library(
271+
name = "upsample_util",
272+
srcs = ["upsample_util.cpp"],
273+
exported_headers = ["upsample_util.h"],
274+
deps = [
275+
"//executorch/runtime/kernel:kernel_includes",
276+
],
277+
visibility = ["//executorch/kernels/portable/cpu/..."],
278+
)
279+
269280
# Utility functions that can be used by operators that perform reduction
270281
for aten_mode in [True, False]:
271282
suffix = "_aten" if aten_mode else ""
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/portable/cpu/util/upsample_util.h>
10+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
11+
12+
namespace torch {
13+
namespace executor {
14+
15+
bool check_upsample_2d_common_args(
16+
const Tensor& in,
17+
const exec_aten::ArrayRef<int64_t> output_size,
18+
const exec_aten::optional<double> scale_h,
19+
const exec_aten::optional<double> scale_w,
20+
Tensor& out) {
21+
ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
22+
ET_LOG_AND_RETURN_IF_FALSE(in.dim() == 4);
23+
ET_LOG_AND_RETURN_IF_FALSE(out.dim() == 4);
24+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(in));
25+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(out));
26+
ET_LOG_AND_RETURN_IF_FALSE(
27+
(output_size.size() == 2 && !scale_h.has_value() &&
28+
!scale_w.has_value()) ||
29+
(output_size.size() == 0 && scale_h.has_value() && scale_w.has_value()));
30+
ET_LOG_AND_RETURN_IF_FALSE(!scale_h.has_value() || scale_h.value() > 0);
31+
ET_LOG_AND_RETURN_IF_FALSE(!scale_w.has_value() || scale_w.value() > 0);
32+
ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 1 || output_size[0] > 0);
33+
ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 2 || output_size[1] > 0);
34+
35+
return true;
36+
}
37+
38+
bool check_upsample_bilinear2d_args(
39+
const Tensor& in,
40+
const exec_aten::ArrayRef<int64_t> output_size,
41+
ET_UNUSED const bool align_corners,
42+
const exec_aten::optional<double> scale_h,
43+
const exec_aten::optional<double> scale_w,
44+
Tensor& out) {
45+
return check_upsample_2d_common_args(in, output_size, scale_h, scale_w, out);
46+
}
47+
48+
bool check_upsample_nearest2d_args(
49+
const Tensor& in,
50+
const exec_aten::ArrayRef<int64_t> output_size,
51+
const exec_aten::optional<double> scale_h,
52+
const exec_aten::optional<double> scale_w,
53+
Tensor& out) {
54+
return check_upsample_2d_common_args(in, output_size, scale_h, scale_w, out);
55+
}
56+
57+
Error resize_upsample_2d(
58+
const Tensor& in,
59+
const exec_aten::ArrayRef<int64_t> output_size,
60+
const exec_aten::optional<double> scale_h,
61+
const exec_aten::optional<double> scale_w,
62+
Tensor& out) {
63+
// Either output_size or scale_factors are provided, not both. This
64+
// is checked in check_..._args.
65+
// Scales are transformed according to align_corners.
66+
std::array<Tensor::SizesType, kTensorDimensionLimit> target_size;
67+
68+
const auto dim = in.dim();
69+
std::copy(in.sizes().cbegin(), in.sizes().cend(), target_size.begin());
70+
71+
if (scale_h.has_value() && scale_w.has_value()) {
72+
target_size[dim - 2] =
73+
static_cast<Tensor::SizesType>(in.sizes()[dim - 2] * scale_h.value());
74+
target_size[dim - 1] =
75+
static_cast<Tensor::SizesType>(in.sizes()[dim - 1] * scale_w.value());
76+
} else if (output_size.size() == 2) {
77+
target_size[dim - 2] = output_size[0];
78+
target_size[dim - 1] = output_size[1];
79+
} else {
80+
ET_LOG(Error, "Invalid output_size or scale_factors");
81+
return Error::InvalidArgument;
82+
}
83+
84+
ET_CHECK_OR_RETURN_ERROR(
85+
target_size[dim - 2] > 0 && target_size[dim - 1] > 0,
86+
InvalidArgument,
87+
"Upsampled output size must be non-empty, but was %ld x %ld.",
88+
static_cast<long>(target_size[dim - 2]),
89+
static_cast<long>(target_size[dim - 1]));
90+
91+
return resize_tensor(out, {target_size.data(), static_cast<size_t>(dim)});
92+
}
93+
94+
} // namespace executor
95+
} // namespace torch

0 commit comments

Comments
 (0)