Skip to content

Commit 2f09fcd

Browse files
swolchokhinriksnaer
authored andcommitted
Use elementwise_util instead of functional_util in op_neg, delete optimized op (pytorch#11660)
Allows vectorization (checked assembly to make sure; it's different from the old optimized op because the optimized op wasn't parallelized), so we can delete the optimized op as well.
1 parent 6a72709 commit 2f09fcd

File tree

6 files changed

+14
-62
lines changed

6 files changed

+14
-62
lines changed

kernels/optimized/cpu/op_neg.cpp

Lines changed: 0 additions & 42 deletions
This file was deleted.

kernels/optimized/optimized.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,6 @@
9292
- arg_meta: null
9393
kernel_name: torch::executor::opt_native_layer_norm_out
9494

95-
- op: neg.out
96-
kernels:
97-
- arg_meta: null
98-
kernel_name: torch::executor::opt_neg_out
99-
10095
- op: sub.out
10196
kernels:
10297
- arg_meta: null

kernels/portable/cpu/op_neg.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include <executorch/kernels/portable/cpu/util/functional_util.h>
9+
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1010
#include <executorch/runtime/kernel/kernel_includes.h>
1111
#include <executorch/runtime/platform/assert.h>
1212

@@ -33,12 +33,17 @@ Tensor& neg_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
3333
ET_KERNEL_CHECK(
3434
ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
3535

36-
ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "neg.out", CTYPE, [&] {
37-
apply_unary_map_fn(
38-
[](const CTYPE val_in) { return static_cast<CTYPE>(-val_in); },
39-
in.const_data_ptr<CTYPE>(),
40-
out.mutable_data_ptr<CTYPE>(),
41-
in.numel());
36+
static constexpr const char op_name[] = "neg.out";
37+
ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] {
38+
utils::internal::apply_unitensor_elementwise_fn<
39+
CTYPE,
40+
op_name,
41+
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
42+
[](const auto val_in) { return -val_in; },
43+
ctx,
44+
in,
45+
utils::SupportedTensorDtypes::REALHBF16,
46+
out);
4247
});
4348

4449
return out;

kernels/test/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def define_common_targets():
276276
_common_op_test("op_native_group_norm_test", ["aten", "portable"])
277277
_common_op_test("op_native_layer_norm_test", ["aten", "portable", "optimized"])
278278
_common_op_test("op_ne_test", ["aten", "portable"])
279-
_common_op_test("op_neg_test", ["aten", "portable", "optimized"])
279+
_common_op_test("op_neg_test", ["aten", "portable"])
280280
_common_op_test("op_nonzero_test", ["aten", "portable"])
281281
_common_op_test("op_ones_test", ["aten", "portable"])
282282
_common_op_test("op_pdist_forward_test", ["aten", "portable"])

shim_et/xplat/executorch/kernels/optimized/op_registration_util.bzl

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,6 @@ OPTIMIZED_ATEN_OPS = (
259259
"//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
260260
],
261261
),
262-
op_target(
263-
name = "op_neg",
264-
deps = [
265-
"//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
266-
],
267-
),
268262
op_target(
269263
name = "op_sub",
270264
deps = [

shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ ATEN_OPS = (
918918
op_target(
919919
name = "op_neg",
920920
deps = [
921-
"//executorch/kernels/portable/cpu/util:functional_util",
921+
"//executorch/kernels/portable/cpu/util:elementwise_util",
922922
],
923923
),
924924
op_target(

0 commit comments

Comments
 (0)