Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
118 commits
Select commit Hold shift + click to select a range
31a49e0
Update
swolchok Mar 19, 2025
9fcd885
Update
swolchok Mar 19, 2025
29d6de9
Update
swolchok Mar 19, 2025
79b908c
Update
swolchok Mar 19, 2025
fd62a07
Update
swolchok Mar 19, 2025
854c991
Update
swolchok Mar 19, 2025
def7ed4
Update
swolchok Mar 19, 2025
40c1b1b
Update
swolchok Mar 19, 2025
7c78357
Update
swolchok Mar 19, 2025
7ba269a
Update
swolchok Mar 19, 2025
edd45fb
Update
swolchok Mar 19, 2025
b9c545f
Update
swolchok Mar 20, 2025
3091007
Update
swolchok Mar 20, 2025
4a00cac
Update
swolchok Mar 20, 2025
21b81bf
Update
swolchok Mar 20, 2025
4c4add0
Update
swolchok Mar 20, 2025
8782a90
Update
swolchok Mar 20, 2025
75f8970
Update
swolchok Mar 20, 2025
2d19e75
Update
swolchok Mar 20, 2025
b61a8a2
Update
swolchok Mar 25, 2025
91161bd
Update
swolchok Mar 25, 2025
4add706
Update
swolchok Mar 25, 2025
5348a92
Update
swolchok Mar 25, 2025
001d72c
Update
swolchok Mar 25, 2025
e49080d
Update
swolchok Mar 25, 2025
44ee51a
Update
swolchok Mar 25, 2025
61afee1
Update
swolchok Mar 25, 2025
f659627
Update
swolchok Mar 25, 2025
f1c5429
Update
swolchok Mar 25, 2025
b34f04f
Update
swolchok Mar 25, 2025
f934bc0
Update
swolchok Mar 25, 2025
3a74f25
Update
swolchok Mar 25, 2025
bbc7ba8
Update
swolchok Mar 25, 2025
151bf4a
Update
swolchok Mar 25, 2025
0654e25
Update
swolchok Mar 25, 2025
9a93839
Update
swolchok Mar 26, 2025
bb16a55
Update
swolchok Mar 26, 2025
2242f1e
Update
swolchok Mar 26, 2025
0822028
Update
swolchok Mar 26, 2025
f1b97dc
Update
swolchok Mar 26, 2025
7336ff1
Update
swolchok Mar 26, 2025
7f57a19
Update
swolchok Mar 26, 2025
5d95c06
Update
swolchok Mar 26, 2025
42623bb
Update
swolchok Mar 26, 2025
284bc17
Update
swolchok Mar 26, 2025
29c2cfd
Update
swolchok Mar 26, 2025
30a2145
Update
swolchok Mar 26, 2025
4553283
Update
swolchok Mar 26, 2025
39610ad
Update
swolchok Mar 26, 2025
b3120fa
Update
swolchok Mar 26, 2025
350bcd8
Update
swolchok Mar 26, 2025
37e5b7d
Update
swolchok Mar 26, 2025
f53bb31
Update
swolchok Mar 26, 2025
ff2c358
Update
swolchok Mar 26, 2025
9c2340f
Update
swolchok Mar 26, 2025
545777f
Update
swolchok Mar 26, 2025
61b2a26
Update
swolchok Mar 26, 2025
7086659
Update
swolchok Mar 28, 2025
e13de0e
Update
swolchok Mar 28, 2025
943ab82
Update
swolchok Mar 28, 2025
f22d039
Update
swolchok Mar 28, 2025
45ce46d
Update
swolchok Mar 28, 2025
754dba4
Update
swolchok Mar 28, 2025
d5dfe2f
Update
swolchok Mar 28, 2025
3f1b775
Update
swolchok Mar 28, 2025
e55ac4a
Update
swolchok Mar 28, 2025
34eb5d4
Update
swolchok Mar 28, 2025
ea9dc6f
Update
swolchok Mar 28, 2025
7d7859e
Update
swolchok Mar 28, 2025
b98829d
Update
swolchok Mar 28, 2025
3140910
Update
swolchok Mar 28, 2025
afad88e
Update
swolchok Mar 28, 2025
946f2e0
Update
swolchok Mar 28, 2025
242995d
Update
swolchok Mar 28, 2025
7c23fec
Update
swolchok Mar 28, 2025
7f2bbdb
Update
swolchok Apr 2, 2025
960315e
Update
swolchok Apr 2, 2025
9e42e93
Update
swolchok Apr 2, 2025
96d258e
Update
swolchok Apr 2, 2025
e6f66ab
Update
swolchok Apr 2, 2025
a756254
Update
swolchok Apr 2, 2025
de9d52f
Update
swolchok Apr 2, 2025
ef74fe1
Update
swolchok Apr 2, 2025
b2e23ae
Update
swolchok Apr 2, 2025
7dc5cee
Update
swolchok Apr 2, 2025
20f3046
Update
swolchok Apr 2, 2025
3aa266d
Update
swolchok Apr 2, 2025
3c88a56
Update
swolchok Apr 2, 2025
153735d
Update
swolchok Apr 2, 2025
cac4293
Update
swolchok Apr 2, 2025
85451ea
Update
swolchok Apr 2, 2025
77a4fc6
Update
swolchok Apr 2, 2025
21ae5da
Update
swolchok Apr 2, 2025
a61c9b8
Update
swolchok Apr 2, 2025
0beabbb
Update
swolchok Apr 2, 2025
a7876b5
Update
swolchok May 27, 2025
3d59208
add tests that are long enough to hit the vectorized path
swolchok May 28, 2025
6541b28
actually verified test coverage
swolchok May 28, 2025
61a2f32
split out some commits
swolchok May 28, 2025
aedb0fa
split out some commits
swolchok May 28, 2025
a6d2402
split out some commits
swolchok May 28, 2025
78e1abb
fix visibility
swolchok May 28, 2025
f72595a
fix visibility
swolchok May 28, 2025
5a0da3f
fix visibility
swolchok May 28, 2025
b10c80f
Update
swolchok May 28, 2025
b721485
Update
swolchok May 28, 2025
c0ad8ac
Update
swolchok May 28, 2025
14a42e0
Update
swolchok May 28, 2025
17e45db
Update
swolchok May 28, 2025
402caf2
rebase, fix lint in #9432
swolchok May 29, 2025
a4df2da
rebase, fix lint in #9432
swolchok May 29, 2025
84170e4
rebase, fix lint in #9432
swolchok May 29, 2025
aa5009b
rebase, fix lint in #9432
swolchok May 29, 2025
6e71130
rebase, fix lint in #9432
swolchok May 29, 2025
4a0d1db
Update
swolchok Jun 9, 2025
ca9db4c
Update
swolchok Jun 9, 2025
aa453d7
Update
swolchok Jun 9, 2025
9967037
Update
swolchok Jun 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ exclude_patterns = [
'examples/**',
'exir/verification/bindings.cpp',
'extension/**',
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
'kernels/portable/cpu/util/elementwise_util.h',
'kernels/optimized/**',
'runtime/core/exec_aten/**',
# Want to be able to keep c10 in sync with PyTorch core.
Expand Down
9 changes: 7 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -643,13 +643,18 @@ target_link_options_shared_lib(executorch)
# Real integrations should supply their own YAML file that only lists the
# operators necessary for the models that will run.
#
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
# find pytorch lib here to make it available to all
# sub-directories. Find it before including portable so that
# optimized_portabale_kernels can use it.
find_package_torch_headers()
endif()

if(BUILD_EXECUTORCH_PORTABLE_OPS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable)
endif()

if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
# find pytorch lib here to make it available to all sub-directories
find_package_torch_headers()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
endif()

Expand Down
1 change: 1 addition & 0 deletions kernels/optimized/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ message("Generated files ${gen_command_sources}")
list(TRANSFORM _optimized_kernels__srcs PREPEND "${EXECUTORCH_ROOT}/")
add_library(optimized_kernels ${_optimized_kernels__srcs})
target_include_directories(optimized_kernels PRIVATE ${TORCH_INCLUDE_DIRS} "${EXECUTORCH_ROOT}/third-party/pocketfft")
target_compile_definitions(optimized_kernels PRIVATE ET_USE_PYTORCH_HEADERS)
target_link_libraries(
optimized_kernels PUBLIC executorch_core cpublas extension_threadpool
)
Expand Down
25 changes: 1 addition & 24 deletions kernels/optimized/cpu/binary_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,11 @@

#include <executorch/kernels/optimized/vec/functional.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/broadcast_indexes_range.h>
#include <executorch/runtime/kernel/kernel_includes.h>

namespace torch {
namespace executor {
namespace internal {
// NOTE: we bake ArrayRef iterators being pointers into the return
// type here because we assume that iterators are portable across
// ArrayRef copies.
inline const Tensor::SizesType* arrayref_begin_ignoring_leading_1s(
ArrayRef<Tensor::SizesType> arr) {
return std::find_if(
arr.begin(), arr.end(), [](Tensor::SizesType x) { return x != 1; });
}

inline bool sizes_match_ignoring_leading_1s(
ArrayRef<Tensor::SizesType> lhs,
ArrayRef<Tensor::SizesType> rhs) {
auto lhs_begin = arrayref_begin_ignoring_leading_1s(lhs);
auto lhs_end = lhs.end();

auto rhs_begin = arrayref_begin_ignoring_leading_1s(rhs);
auto rhs_end = rhs.end();

return ((lhs_end - lhs_begin) == (rhs_end - rhs_begin)) &&
std::equal(lhs_begin, lhs_end, rhs_begin);
}
} // namespace internal

enum class ElementwiseOptimizedPath {
kNone,
kTreatAs1d,
Expand Down
5 changes: 4 additions & 1 deletion kernels/optimized/cpu/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ def define_common_targets():
srcs = [],
exported_headers = ["op_add_sub_impl.h"],
visibility = ["//executorch/kernels/optimized/cpu/..."],
exported_deps = ["//executorch/runtime/core:core"],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/kernels/portable/cpu/util:broadcast_indexes_range",
],
)

runtime.cxx_library(
Expand Down
2 changes: 2 additions & 0 deletions kernels/portable/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ if(BUILD_OPTIMIZED_PORTABLE_KERNELS)
target_link_libraries(optimized_portable_kernels PRIVATE executorch)
target_link_libraries(optimized_portable_kernels PUBLIC extension_threadpool)
target_compile_options(optimized_portable_kernels PUBLIC ${_common_compile_options})
target_include_directories(optimized_portable_kernels PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_definitions(optimized_portable_kernels PRIVATE ET_USE_PYTORCH_HEADERS)
install(
TARGETS optimized_portable_kernels
DESTINATION lib
Expand Down
12 changes: 6 additions & 6 deletions kernels/portable/cpu/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,17 @@ Tensor& mul_out(
out);

ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
utils::apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
return val_a * val_b;
},
utils::apply_bitensor_elementwise_fn<
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const auto val_a, const auto val_b) { return val_a * val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
b,
utils::SupportedTensorDtypes::REALHBBF16,
out,
utils::SupportedTensorDtypes::REALHBBF16);
out);
});

return out;
Expand Down
27 changes: 26 additions & 1 deletion kernels/portable/cpu/util/broadcast_indexes_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,28 @@
namespace torch::executor {

namespace internal {
// NOTE: we bake ArrayRef iterators being pointers into the return
// type here because we assume that iterators are portable across
// ArrayRef copies.
inline const Tensor::SizesType* arrayref_begin_ignoring_leading_1s(
ArrayRef<Tensor::SizesType> arr) {
return std::find_if(
arr.begin(), arr.end(), [](Tensor::SizesType x) { return x != 1; });
}

inline bool sizes_match_ignoring_leading_1s(
ArrayRef<Tensor::SizesType> lhs,
ArrayRef<Tensor::SizesType> rhs) {
auto lhs_begin = arrayref_begin_ignoring_leading_1s(lhs);
auto lhs_end = lhs.end();

auto rhs_begin = arrayref_begin_ignoring_leading_1s(rhs);
auto rhs_end = rhs.end();

return ((lhs_end - lhs_begin) == (rhs_end - rhs_begin)) &&
std::equal(lhs_begin, lhs_end, rhs_begin);
}

template <std::size_t kNumInputs>
class BroadcastIndexesIterator {
public:
Expand All @@ -35,7 +57,10 @@ class BroadcastIndexesIterator {
template <typename... Args>
explicit BroadcastIndexesIterator(const Tensor& output, const Args&... args)
: output_dim_or_zero_if_no_broadcasting_(
((args.sizes() == output.sizes()) && ...) ? 0 : output.dim()),
(sizes_match_ignoring_leading_1s(args.sizes(), output.sizes()) &&
...)
? 0
: output.dim()),
output_shape_(output.sizes()) {
static_assert(
sizeof...(args) == kNumInputs && (std::is_same_v<Args, Tensor> && ...),
Expand Down
4 changes: 4 additions & 0 deletions kernels/portable/cpu/util/dtype_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@ bool check_tensor_dtype(
return executorch::runtime::tensor_is_realhbbf16_type(t);
case SupportedTensorDtypes::REALHBF16:
return executorch::runtime::tensor_is_realhbf16_type(t);
case SupportedTensorDtypes::REALH:
Copy link
Contributor

@JacobSzwejbka JacobSzwejbka Mar 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the h in realh? half?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it matches the naming scheme in

#define ET_SWITCH_REALH_TYPES(TYPE, CONTEXT, NAME, CTYPE_ALIAS, ...) \

return executorch::runtime::tensor_is_realh_type(t);
case SupportedTensorDtypes::FLOATHBF16:
return executorch::runtime::tensor_is_floating_type(t);
case SupportedTensorDtypes::INTB:
return executorch::runtime::tensor_is_integral_type(t, true);
case SupportedTensorDtypes::BOOL:
return executorch::runtime::tensor_is_type(t, ScalarType::Bool);
case SupportedTensorDtypes::BOOL_OR_BYTE:
return (executorch::runtime::tensor_is_type(
t, ScalarType::Bool, ScalarType::Byte));
Expand Down
108 changes: 77 additions & 31 deletions kernels/portable/cpu/util/dtype_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_realhbf16(
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_realh(const Tensor& t) {
CTYPE_COMMON (*result)(const void*) = nullptr;
ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() {
result = internal::load_and_convert<CTYPE_COMMON, TENSOR_CTYPE>;
});
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_floathbf16(
const Tensor& t) {
Expand All @@ -72,6 +81,16 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_intb(const Tensor& t) {
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_bool(const Tensor& t) {
ET_CHECK_MSG(
t.scalar_type() == ScalarType::Bool,
"Unhandled dtype %s for %s",
::executorch::runtime::toString(t.scalar_type()),
op_name);
return internal::load_and_convert<CTYPE_COMMON, bool>;
}

template <typename CTYPE_COMMON, const char* op_name>
load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_bool_or_byte(
const Tensor& t) {
Expand All @@ -86,12 +105,6 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_bool_or_byte(
template <typename CTYPE_COMMON, const char* op_name>
load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_same_as_compute(
const Tensor& t) {
constexpr auto common_scalar_type = CppTypeToScalarType<CTYPE_COMMON>::value;
ET_CHECK_MSG(
t.scalar_type() == common_scalar_type,
"Unhandled dtype %s for %s",
::executorch::runtime::toString(common_scalar_type),
op_name);
return internal::load_and_convert<CTYPE_COMMON, CTYPE_COMMON>;
}

Expand Down Expand Up @@ -143,6 +156,16 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_realhbf16(
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_realh(
const Tensor& t) {
void (*result)(CTYPE_COMMON, void*) = nullptr;
ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() {
result = internal::convert_and_store<TENSOR_CTYPE, CTYPE_COMMON>;
});
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON>
get_store_common_to_tensor_fn_floathbf16(const Tensor& t) {
Expand All @@ -165,6 +188,17 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_intb(
return result;
}

template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_bool(
const Tensor& t) {
ET_CHECK_MSG(
t.scalar_type() == ScalarType::Bool,
"Unhandled dtype %s for %s",
::executorch::runtime::toString(t.scalar_type()),
op_name);
return internal::convert_and_store<bool, CTYPE_COMMON>;
}

template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON>
get_store_common_to_tensor_fn_bool_or_byte(const Tensor& t) {
Expand All @@ -179,33 +213,13 @@ get_store_common_to_tensor_fn_bool_or_byte(const Tensor& t) {
template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON>
get_store_common_to_tensor_fn_same_as_compute(const Tensor& t) {
constexpr auto common_scalar_type = CppTypeToScalarType<CTYPE_COMMON>::value;
ET_CHECK_MSG(
t.scalar_type() == common_scalar_type,
"Unhandled dtype %s for %s",
::executorch::runtime::toString(common_scalar_type),
op_name);
return internal::convert_and_store<CTYPE_COMMON, CTYPE_COMMON>;
// We already validate tensor types earlier in the process, so at
// this phase, treat same_as_compute the same as our widest
// SupportedTensorDtypes set.
return get_store_common_to_tensor_fn_realhbf16<CTYPE_COMMON, op_name>(t);
}

template <
typename CTYPE_COMMON,
const char* op_name,
std::enable_if_t<std::is_same_v<CTYPE_COMMON, float>, bool> = true>
store_common_to_tensor_fn<CTYPE_COMMON>
get_store_common_to_tensor_fn_same_as_common(const Tensor& t) {
void (*result)(CTYPE_COMMON, void*) = nullptr;
ET_SWITCH_THREE_TYPES(
Float, Half, BFloat16, t.scalar_type(), unused, op_name, CTYPE, [&]() {
result = internal::convert_and_store<CTYPE, CTYPE_COMMON>;
});
return result;
}

template <
typename CTYPE_COMMON,
const char* op_name,
std::enable_if_t<!std::is_same_v<CTYPE_COMMON, float>, bool> = true>
template <typename CTYPE_COMMON, const char* op_name>
store_common_to_tensor_fn<CTYPE_COMMON>
get_store_common_to_tensor_fn_same_as_common(const Tensor& t) {
return get_store_common_to_tensor_fn_same_as_compute<CTYPE_COMMON, op_name>(
Expand All @@ -217,8 +231,10 @@ get_store_common_to_tensor_fn_same_as_common(const Tensor& t) {
enum class SupportedTensorDtypes {
REALHBBF16,
REALHBF16,
REALH,
FLOATHBF16,
INTB,
BOOL,
BOOL_OR_BYTE,
SAME_AS_COMPUTE,
SAME_AS_COMMON,
Expand All @@ -235,10 +251,14 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn(
return get_load_to_common_fn_realhbbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::REALHBF16:
return get_load_to_common_fn_realhbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::REALH:
return get_load_to_common_fn_realh<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::FLOATHBF16:
return get_load_to_common_fn_realhbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::INTB:
return get_load_to_common_fn_intb<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::BOOL:
return get_load_to_common_fn_bool<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::BOOL_OR_BYTE:
return get_load_to_common_fn_bool_or_byte<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::SAME_AS_COMPUTE:
Expand All @@ -259,10 +279,14 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn(
return get_store_common_to_tensor_fn_realhbbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::REALHBF16:
return get_store_common_to_tensor_fn_realhbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::REALH:
return get_store_common_to_tensor_fn_realh<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::FLOATHBF16:
return get_store_common_to_tensor_fn_floathbf16<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::INTB:
return get_store_common_to_tensor_fn_intb<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::BOOL:
return get_store_common_to_tensor_fn_bool<CTYPE_COMMON, op_name>(t);
case SupportedTensorDtypes::BOOL_OR_BYTE:
return get_store_common_to_tensor_fn_bool_or_byte<CTYPE_COMMON, op_name>(
t);
Expand All @@ -285,6 +309,28 @@ bool check_tensor_dtype(
SupportedTensorDtypes dtypes,
const ScalarType compute_type);

/// Return the one output type we are willing to emit specialized code
/// to handle, given a compute type of CTYPE_COMMON and supported
/// output types of out_dtypes.
template <typename CTYPE_COMMON>
inline constexpr ScalarType specialized_output_scalar_type(
SupportedTensorDtypes out_dtypes) {
switch (out_dtypes) {
case SupportedTensorDtypes::BOOL:
return ScalarType::Bool;
case SupportedTensorDtypes::BOOL_OR_BYTE:
return ScalarType::Bool;
case SupportedTensorDtypes::REALHBBF16:
case SupportedTensorDtypes::REALHBF16:
case SupportedTensorDtypes::REALH:
case SupportedTensorDtypes::FLOATHBF16:
case SupportedTensorDtypes::INTB:
case SupportedTensorDtypes::SAME_AS_COMPUTE:
case SupportedTensorDtypes::SAME_AS_COMMON:
return CppTypeToScalarType<CTYPE_COMMON>::value;
}
}

} // namespace internal
} // namespace utils
} // namespace native
Expand Down
Loading
Loading