Skip to content

Commit 41d7533

Browse files
authored
Merge pull request #8 from dijopaul/main
Adding sigmoid and tanh optimizations
2 parents 71d78a1 + c0b1005 commit 41d7533

File tree

4 files changed

+120
-1
lines changed

4 files changed

+120
-1
lines changed

backends/cadence/aot/functions_hifi.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@
9292
- arg_meta: null
9393
kernel_name: torch::executor::sub_out
9494

95+
- op: tanh.out
96+
kernels:
97+
- arg_meta: null
98+
kernel_name: torch::executor::tanh_out
99+
95100
- op: view_copy.out
96101
kernels:
97102
- arg_meta: null

backends/cadence/hifi/operators/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,22 @@ set(_aten_ops__srcs
2323
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
2424
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
2525
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
26+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp"
2627
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp"
28+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_tanh.cpp"
2729
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
2830
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
2931
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
3032
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
3133
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
3234
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
33-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sigmoid.cpp"
3435
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
3536
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_softmax.cpp"
3637
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp"
3738
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
3839
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
3940
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
41+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp"
4042
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
4143
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
4244
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <cmath>
10+
11+
#include <executorch/kernels/portable/cpu/util/functional_util.h>
12+
#include <executorch/runtime/kernel/kernel_includes.h>
13+
#include "kernels.h"
14+
15+
namespace torch {
16+
namespace executor {
17+
namespace native {
18+
19+
using Tensor = exec_aten::Tensor;
20+
21+
Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
22+
(void)ctx;
23+
24+
ET_KERNEL_CHECK(
25+
ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out);
26+
ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);
27+
28+
// Resize for dynamic shape
29+
ET_KERNEL_CHECK_MSG(
30+
ctx,
31+
resize_tensor(out, in.sizes()) == Error::Ok,
32+
InvalidArgument,
33+
out,
34+
"Failed to resize output tensor.");
35+
36+
ScalarType in_type = in.scalar_type();
37+
ScalarType out_type = out.scalar_type();
38+
39+
int fall_back = 0;
40+
if((in_type != ScalarType::Float) || (out_type != ScalarType::Float))
41+
fall_back = 1;
42+
43+
if(!fall_back)
44+
{
45+
float* data_in = in.mutable_data_ptr<float>();
46+
float* data_out = out.mutable_data_ptr<float>();
47+
xa_nn_vec_sigmoid_f32_f32(data_out, data_in, in.numel());
48+
}
49+
else
50+
{
51+
ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
52+
ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
53+
apply_unary_map_fn(
54+
[](const CTYPE_IN val_in) {
55+
// perform math in double to preserve precision
56+
double in_casted = static_cast<double>(val_in);
57+
double out_val = 1.0 / (1.0 + exp(-in_casted));
58+
return static_cast<CTYPE_OUT>(out_val);
59+
},
60+
in.const_data_ptr<CTYPE_IN>(),
61+
out.mutable_data_ptr<CTYPE_OUT>(),
62+
in.numel());
63+
});
64+
});
65+
}
66+
67+
return out;
68+
}
69+
70+
} // namespace native
71+
} // namespace executor
72+
} // namespace torch
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/portable/cpu/pattern/pattern.h>
10+
#include <executorch/runtime/kernel/kernel_includes.h>
11+
#include <cmath>
12+
#include "kernels.h"
13+
14+
namespace torch {
15+
namespace executor {
16+
namespace native {
17+
18+
Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
19+
20+
int fall_back = 0;
21+
if((in.scalar_type() != ScalarType::Float) || (out.scalar_type() != ScalarType::Float))
22+
fall_back = 1;
23+
24+
if(!fall_back)
25+
{
26+
float* data_in = in.mutable_data_ptr<float>();
27+
float* data_out = out.mutable_data_ptr<float>();
28+
xa_nn_vec_tanh_f32_f32(data_out, data_in, (int)in.numel());
29+
return out;
30+
}
31+
else
32+
{
33+
return internal::unary_ufunc_realhb_to_floath(std::tanh, ctx, in, out);
34+
}
35+
36+
}
37+
38+
} // namespace native
39+
} // namespace executor
40+
} // namespace torch

0 commit comments

Comments
 (0)