Skip to content

Commit 6e084a7

Browse files
GregoryComerfacebook-github-bot
authored andcommitted
Add portable upsample_bilinear2d kernel
Summary: Add a upsample_bilinear2d kernel to the portable kernel library. This implementation re-uses some of the inner logic from the ATen implementation (see Upsample.h and UpsampleKernel.cpp), however I have not ported the outer kernel structure as it relies on TensorIterator and runtime allocation. It may be worth re-visiting this in the future, either by looking at pulling in more of the ATen implementation or adding an optimized variant. Differential Revision: D65756150
1 parent e95f171 commit 6e084a7

File tree

8 files changed

+946
-0
lines changed

8 files changed

+946
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <algorithm>
10+
#include <array>
11+
#include <executorch/kernels/portable/cpu/util/upsample_util.h>
12+
#include <executorch/runtime/kernel/kernel_includes.h>
13+
14+
namespace torch {
15+
namespace executor {
16+
namespace native {
17+
18+
using exec_aten::ArrayRef;
19+
using exec_aten::SizesType;
20+
using exec_aten::optional;
21+
22+
namespace {
23+
template <typename CTYPE>
24+
void upsample_bilinear2d_kernel_impl(
25+
const Tensor& in,
26+
bool align_corners,
27+
const float scale_h,
28+
const float scale_w,
29+
Tensor& out) {
30+
const auto in_data = in.const_data_ptr<CTYPE>();
31+
auto out_data = out.mutable_data_ptr<CTYPE>();
32+
33+
auto in_plane = in_data;
34+
for (auto n = 0; n < out.size(0); n++) {
35+
for (auto c = 0; c < out.size(1); c++) {
36+
for (auto h = 0; h < out.size(2); h++) {
37+
for (auto w = 0; w < out.size(3); w++) {
38+
// Compute source index.
39+
// See area_pixel_compute_source_index in pytorch/aten/src/ATen/native/UpSample.h
40+
float in_h = area_pixel_compute_source_index(scale_h, h, align_corners, false);
41+
float in_w = area_pixel_compute_source_index(scale_w, w, align_corners, false);
42+
43+
int64_t in_h1, in_h2, in_w1, in_w2;
44+
float weight_h, inv_weight_h, weight_w, inv_weight_w;
45+
46+
compute_source_index_and_lambda(
47+
in_h1,
48+
in_h2,
49+
weight_h,
50+
inv_weight_h,
51+
scale_h,
52+
h,
53+
in.sizes()[2],
54+
out.sizes()[2],
55+
align_corners);
56+
57+
compute_source_index_and_lambda(
58+
in_w1,
59+
in_w2,
60+
weight_w,
61+
inv_weight_w,
62+
scale_w,
63+
w,
64+
in.sizes()[3],
65+
out.sizes()[3],
66+
align_corners);
67+
68+
const auto top_left = in_plane[in_h1 * in.strides()[2] + in_w1 * in.strides()[3]];
69+
const auto top_right = in_plane[in_h1 * in.strides()[2] + in_w2 * in.strides()[3]];
70+
const auto bottom_left = in_plane[in_h2 * in.strides()[2] + in_w1 * in.strides()[3]];
71+
const auto bottom_right = in_plane[in_h2 * in.strides()[2] + in_w2 * in.strides()[3]];
72+
73+
const auto top = top_left * weight_w + top_right * inv_weight_w;
74+
const auto bottom = bottom_left * weight_w + bottom_right * inv_weight_w;
75+
const auto val = top * weight_h + bottom * inv_weight_h;
76+
77+
*out_data = val;
78+
out_data++;
79+
}
80+
}
81+
82+
in_plane += in.strides()[1];
83+
}
84+
}
85+
}
86+
}
87+
88+
Tensor& upsample_bilinear2d_out(
89+
KernelRuntimeContext& ctx,
90+
const Tensor& in,
91+
const exec_aten::ArrayRef<int64_t> output_size,
92+
bool align_corners,
93+
const optional<double> scale_h,
94+
const optional<double> scale_w,
95+
Tensor& out) {
96+
97+
// Preconditions (checked in check_..._args):
98+
// In and out tensors have same dtype.
99+
// In and out tensors are rank 4 and have same dim[0] and dim[1].
100+
// In and out tensors are default dim order (NCHW).
101+
ET_KERNEL_CHECK(
102+
ctx,
103+
check_upsample_bilinear2d_args(in, output_size, align_corners, scale_h, scale_w, out),
104+
InvalidArgument,
105+
out);
106+
107+
ET_KERNEL_CHECK_MSG(
108+
ctx,
109+
resize_upsample_2d(in, output_size, scale_h, scale_w, out) == Error::Ok,
110+
InvalidArgument,
111+
out,
112+
"Failed to resize output tensor");
113+
114+
const auto dim = in.dim();
115+
const auto kernel_scale_h = area_pixel_compute_scale<double>(in.sizes()[2], out.sizes()[2], align_corners, scale_h);
116+
const auto kernel_scale_w = area_pixel_compute_scale<double>(in.sizes()[3], out.sizes()[3], align_corners, scale_w);
117+
118+
ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, "upsample_bilinear2d.out", CTYPE, [&]() {
119+
upsample_bilinear2d_kernel_impl<CTYPE>(in, align_corners, kernel_scale_h, kernel_scale_w, out);
120+
});
121+
122+
return out;
123+
}
124+
125+
}
126+
}
127+
}

kernels/portable/cpu/util/targets.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def define_common_targets():
3131
"//executorch/kernels/portable/cpu/util:advanced_index_util",
3232
"//executorch/kernels/portable/cpu/util:slice_util",
3333
"//executorch/kernels/portable/cpu/util:elementwise_util",
34+
"//executorch/kernels/portable/cpu/util:upsample_util",
3435
],
3536
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
3637
)
@@ -266,6 +267,16 @@ def define_common_targets():
266267
visibility = ["//executorch/kernels/portable/cpu/..."],
267268
)
268269

270+
runtime.cxx_library(
271+
name = "upsample_util",
272+
srcs = ["upsample_util.cpp"],
273+
exported_headers = ["upsample_util.h"],
274+
deps = [
275+
"//executorch/runtime/kernel:kernel_includes",
276+
],
277+
visibility = ["//executorch/kernels/portable/cpu/..."],
278+
)
279+
269280
# Utility functions that can be used by operators that perform reduction
270281
for aten_mode in [True, False]:
271282
suffix = "_aten" if aten_mode else ""
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/portable/cpu/util/upsample_util.h>
10+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
11+
12+
namespace torch {
13+
namespace executor {
14+
15+
bool check_upsample_2d_common_args(
16+
const Tensor& in,
17+
const exec_aten::ArrayRef<int64_t> output_size,
18+
const exec_aten::optional<double> scale_h,
19+
const exec_aten::optional<double> scale_w,
20+
Tensor& out) {
21+
22+
ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
23+
ET_LOG_AND_RETURN_IF_FALSE(in.dim() == 4);
24+
ET_LOG_AND_RETURN_IF_FALSE(out.dim() == 4);
25+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(in));
26+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(out));
27+
ET_LOG_AND_RETURN_IF_FALSE(
28+
(output_size.size() == 2 && !scale_h.has_value() && !scale_w.has_value()) ||
29+
(output_size.size() == 0 && scale_h.has_value() && scale_w.has_value()));
30+
ET_LOG_AND_RETURN_IF_FALSE(!scale_h.has_value() || scale_h.value() > 0);
31+
ET_LOG_AND_RETURN_IF_FALSE(!scale_w.has_value() || scale_w.value() > 0);
32+
ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 1 || output_size[0] > 0);
33+
ET_LOG_AND_RETURN_IF_FALSE(output_size.size() < 2 || output_size[1] > 0);
34+
35+
return true;
36+
}
37+
38+
bool check_upsample_bilinear2d_args(
39+
const Tensor& in,
40+
const exec_aten::ArrayRef<int64_t> output_size,
41+
ET_UNUSED const bool align_corners,
42+
const exec_aten::optional<double> scale_h,
43+
const exec_aten::optional<double> scale_w,
44+
Tensor& out) {
45+
return check_upsample_2d_common_args(
46+
in,
47+
output_size,
48+
scale_h,
49+
scale_w,
50+
out);
51+
}
52+
53+
bool check_upsample_nearest2d_args(
54+
const Tensor& in,
55+
const exec_aten::ArrayRef<int64_t> output_size,
56+
const exec_aten::optional<double> scale_h,
57+
const exec_aten::optional<double> scale_w,
58+
Tensor& out) {
59+
return check_upsample_2d_common_args(
60+
in,
61+
output_size,
62+
scale_h,
63+
scale_w,
64+
out);
65+
}
66+
67+
Error resize_upsample_2d(
68+
const Tensor& in,
69+
const exec_aten::ArrayRef<int64_t> output_size,
70+
const exec_aten::optional<double> scale_h,
71+
const exec_aten::optional<double> scale_w,
72+
Tensor& out) {
73+
// Either output_size or scale_factors are provided, not both. This
74+
// is checked in check_..._args.
75+
// Scales are transformed according to align_corners.
76+
std::array<Tensor::SizesType, kTensorDimensionLimit> target_size;
77+
78+
const auto dim = in.dim();
79+
std::copy(in.sizes().cbegin(), in.sizes().cend(), target_size.begin());
80+
81+
if (scale_h.has_value() && scale_w.has_value()) {
82+
target_size[dim - 2] = static_cast<Tensor::SizesType>(in.sizes()[dim - 2] * scale_h.value());
83+
target_size[dim - 1] = static_cast<Tensor::SizesType>(in.sizes()[dim - 1] * scale_w.value());
84+
}
85+
else if (output_size.size() == 2) {
86+
target_size[dim - 2] = output_size[0];
87+
target_size[dim - 1] = output_size[1];
88+
} else {
89+
ET_LOG(Error, "Invalid output_size or scale_factors");
90+
return Error::InvalidArgument;
91+
}
92+
93+
ET_CHECK_OR_RETURN_ERROR(
94+
target_size[dim - 2] > 0 && target_size[dim - 1] > 0,
95+
InvalidArgument,
96+
"Upsampled output size must be non-empty, but was %ld x %ld.",
97+
static_cast<long>(target_size[dim - 2]),
98+
static_cast<long>(target_size[dim - 1]));
99+
100+
return resize_tensor(out, {target_size.data(), static_cast<size_t>(dim)});
101+
}
102+
103+
}
104+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
13+
#include <executorch/runtime/kernel/kernel_includes.h>
14+
15+
namespace torch {
16+
namespace executor {
17+
18+
bool check_upsample_2d_common_args(
19+
const Tensor& in,
20+
const exec_aten::ArrayRef<int64_t> output_size,
21+
const exec_aten::optional<double> scale_h,
22+
const exec_aten::optional<double> scale_w,
23+
Tensor& out);
24+
25+
bool check_upsample_bilinear2d_args(
26+
const Tensor& in,
27+
const exec_aten::ArrayRef<int64_t> output_size,
28+
const bool align_corners,
29+
const exec_aten::optional<double> scale_h,
30+
const exec_aten::optional<double> scale_w,
31+
Tensor& out);
32+
33+
bool check_upsample_nearest2d_args(
34+
const Tensor& in,
35+
const exec_aten::ArrayRef<int64_t> output_size,
36+
const exec_aten::optional<double> scale_h,
37+
const exec_aten::optional<double> scale_w,
38+
Tensor& out);
39+
40+
Error resize_upsample_2d(
41+
const Tensor& in,
42+
const exec_aten::ArrayRef<int64_t> output_size,
43+
const exec_aten::optional<double> scale_h,
44+
const exec_aten::optional<double> scale_w,
45+
Tensor& out);
46+
47+
// Ported from aten/src/ATen/native/UpSample.h
48+
template <typename scalar_t>
49+
inline scalar_t compute_scales_value(
50+
const exec_aten::optional<double> scale,
51+
int64_t input_size,
52+
int64_t output_size) {
53+
return scale.has_value()
54+
? static_cast<scalar_t>(1.0 / scale.value())
55+
: (static_cast<scalar_t>(input_size) / output_size);
56+
}
57+
58+
// Ported from aten/src/ATen/native/UpSample.h
59+
template <typename scalar_t>
60+
inline scalar_t area_pixel_compute_scale(
61+
int64_t input_size,
62+
int64_t output_size,
63+
bool align_corners,
64+
const exec_aten::optional<double> scale) {
65+
// see Note [area_pixel_compute_scale]
66+
if(align_corners) {
67+
if(output_size > 1) {
68+
return static_cast<scalar_t>(input_size - 1) / (output_size - 1);
69+
} else {
70+
return static_cast<scalar_t>(0);
71+
}
72+
} else {
73+
return compute_scales_value<scalar_t>(scale, input_size, output_size);
74+
}
75+
}
76+
77+
// Ported from aten/src/ATen/native/UpSample.h
78+
template <typename scalar_t>
79+
inline scalar_t area_pixel_compute_source_index(
80+
scalar_t scale,
81+
int64_t dst_index,
82+
bool align_corners,
83+
bool cubic) {
84+
if (align_corners) {
85+
return scale * dst_index;
86+
} else {
87+
scalar_t src_idx = scale * (dst_index + static_cast<scalar_t>(0.5)) -
88+
static_cast<scalar_t>(0.5);
89+
return (!cubic && src_idx < static_cast<scalar_t>(0)) ? scalar_t(0)
90+
: src_idx;
91+
}
92+
}
93+
94+
// Ported from aten/src/ATen/native/UpSample.h
95+
// when `real_input_index` becomes larger than the range the floating point
96+
// type can accurately represent, the type casting to `int64_t` might exceed
97+
// `input_size`, causing overflow. So we guard it with `std::min` below.
98+
template<typename scalar_t, typename opmath_t>
99+
inline void guard_index_and_lambda(const opmath_t& real_input_index, const int64_t& input_size, int64_t& input_index, scalar_t& lambda) {
100+
input_index = std::min(static_cast<int64_t>(floorf(real_input_index)), input_size - 1);
101+
lambda = std::min(
102+
std::max(real_input_index - input_index, static_cast<opmath_t>(0)),
103+
static_cast<opmath_t>(1)
104+
);
105+
}
106+
107+
// Ported from aten/src/ATen/native/UpSample.h
108+
template<typename scalar_t, typename opmath_t>
109+
inline void compute_source_index_and_lambda(
110+
int64_t& input_index0,
111+
int64_t& input_index1,
112+
scalar_t& lambda0,
113+
scalar_t& lambda1,
114+
opmath_t ratio,
115+
int64_t output_index,
116+
int64_t input_size,
117+
int64_t output_size,
118+
bool align_corners) {
119+
if (output_size == input_size) {
120+
// scale_factor = 1, simply copy
121+
input_index0 = output_index;
122+
input_index1 = output_index;
123+
lambda0 = static_cast<scalar_t>(1);
124+
lambda1 = static_cast<scalar_t>(0);
125+
} else {
126+
const auto real_input_index =
127+
area_pixel_compute_source_index<opmath_t>(
128+
ratio, output_index, align_corners, /*cubic=*/false);
129+
guard_index_and_lambda(real_input_index, input_size, input_index0, lambda1);
130+
int64_t offset = (input_index0 < input_size - 1) ? 1 : 0;
131+
input_index1 = input_index0 + offset;
132+
lambda0 = static_cast<scalar_t>(1.) - lambda1;
133+
}
134+
}
135+
136+
}
137+
}

0 commit comments

Comments
 (0)