Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ deps = [
"executorch",
]

[targets.optimized_native_cpu_ops_oss]
[targets.optimized_native_cpu_ops]
buck_targets = [
"//configurations:optimized_native_cpu_ops_oss",
"//configurations:optimized_native_cpu_ops",
]
filters = [
".cpp$",
Expand Down Expand Up @@ -437,6 +437,6 @@ deps = [
"portable_kernels",
"quantized_kernels",
"xnnpack_backend",
"optimized_native_cpu_ops_oss",
"optimized_native_cpu_ops",
]
# ---------------------------------- LLama end ----------------------------------
2 changes: 1 addition & 1 deletion configurations/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
# Merge optimized and portable definitions, taking optimized where available.
merge_yaml(
FUNCTIONS_YAML ${EXECUTORCH_ROOT}/kernels/optimized/optimized-oss.yaml
FUNCTIONS_YAML ${EXECUTORCH_ROOT}/kernels/optimized/optimized.yaml
FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml OUTPUT_DIR
${CMAKE_CURRENT_BINARY_DIR}
)
Expand Down
18 changes: 0 additions & 18 deletions configurations/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,3 @@ def define_common_targets():
"@EXECUTORCH_CLIENTS",
],
)

# TODO(T183193812): delete this target after optimized-oss.yaml is gone
executorch_generated_lib(
name = "optimized_native_cpu_ops_oss",
deps = [
"//executorch/kernels/optimized:optimized_operators",
"//executorch/kernels/optimized:optimized_oplist",
"//executorch/kernels/portable:executorch_aten_ops",
"//executorch/kernels/portable:operators",
],
functions_yaml_target = "//executorch/kernels/optimized:optimized-oss.yaml",
fallback_yaml_target = "//executorch/kernels/portable:functions.yaml",
define_static_targets = True,
visibility = [
"//executorch/examples/...",
"@EXECUTORCH_CLIENTS",
],
)
5 changes: 1 addition & 4 deletions examples/models/llama/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
def _get_operator_lib(aten = False):
if aten:
return ["//executorch/kernels/aten:generated_lib"]
elif runtime.is_oss:
# TODO(T183193812): delete this path after optimized-oss.yaml is no more.
return ["//executorch/configurations:optimized_native_cpu_ops_oss", "//executorch/extension/llm/custom_ops:custom_ops"]
else:
return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"]

def get_qnn_dependency():
# buck build -c executorch.enable_qnn=true //executorch/examples/models/llama/runner:runner
# Check if QNN is enabled before including the dependency
if native.read_config("executorch", "enable_qnn", "false") == "true":
# //executorch/backends/qualcomm:qnn_executorch_backend doesn't work,
# //executorch/backends/qualcomm:qnn_executorch_backend doesn't work,
# likely due to it's an empty library with dependency only
return [
"//executorch/backends/qualcomm/runtime:runtime",
Expand Down
4 changes: 2 additions & 2 deletions kernels/optimized/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ target_compile_options(cpublas PUBLIC ${_common_compile_options})

# Generate C++ bindings to register kernels into both PyTorch (for AOT) and
# Executorch (for runtime). Here select all ops in optimized.yaml
set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized-oss.yaml")
set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized.yaml")
gen_selected_ops(LIB_NAME "optimized_ops_lib" OPS_SCHEMA_YAML "${_yaml}")

generate_bindings_for_kernels(
LIB_NAME "optimized_ops_lib" FUNCTIONS_YAML
${CMAKE_CURRENT_SOURCE_DIR}/optimized-oss.yaml
${CMAKE_CURRENT_SOURCE_DIR}/optimized.yaml
ADD_EXCEPTION_BOUNDARY
)
message("Generated files ${gen_command_sources}")
Expand Down
19 changes: 11 additions & 8 deletions kernels/optimized/cpu/op_log_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,20 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) {
static_assert(
std::is_same_v<OUT_T, float>,
"Below loop actually only supports float.");
const VecIn max_input_vec(max_input);
for (; d + VecOut::size() < dim_size; d += VecOut::size()) {
auto index = d * dim_stride;
auto in = VecIn::loadu(&input_data[index]);
auto out_ = (in - max_input_vec).exp();
out_.store(&output_data[index]);
// It is not correct to vectorize if dim is not contiguous!
if (dim_stride == 1) {
const VecIn max_input_vec(max_input);
for (; d + VecOut::size() < dim_size; d += VecOut::size()) {
auto index = d * dim_stride;
auto in = VecIn::loadu(&input_data[index]);
auto out_ = (in - max_input_vec).exp();
out_.store(&output_data[index]);
#if defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE)
temp_sum += vaddvq_f32(out_);
temp_sum += vaddvq_f32(out_);
#else
temp_sum += at::vec::vec_reduce_all<float>(std::plus<VecOut>(), out_);
temp_sum += at::vec::vec_reduce_all<float>(std::plus<VecOut>(), out_);
#endif
}
}
for (; d < dim_size; ++d) {
output_data[d * dim_stride] =
Expand Down
8 changes: 3 additions & 5 deletions kernels/optimized/cpu/targets.bzl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "is_op_disabled", "op_target")
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "op_target")

_OPTIMIZED_ATEN_OPS = (
op_target(
Expand Down Expand Up @@ -111,13 +111,11 @@ def define_common_targets():
TARGETS and BUCK files that call this function.
"""

enabled_ops = [op for op in _OPTIMIZED_ATEN_OPS if not is_op_disabled(op["name"])]

# Define build targets for all operators registered in the tables above.
for op in enabled_ops:
for op in _OPTIMIZED_ATEN_OPS:
define_op_target(**op)

aten_op_targets = [":{}".format(op["name"]) for op in enabled_ops]
aten_op_targets = [":{}".format(op["name"]) for op in _OPTIMIZED_ATEN_OPS]
all_op_targets = aten_op_targets

runtime.cxx_library(
Expand Down
6 changes: 1 addition & 5 deletions kernels/optimized/op_registration_util.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
load(
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
"get_vec_preprocessor_flags",
"get_vec_deps",
"get_vec_preprocessor_flags",
)
load(
"@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl",
Expand Down Expand Up @@ -137,7 +137,3 @@ def define_op_target(name, compiler_flags, deps):
compiler_flags = compiler_flags,
deps = deps,
)

def is_op_disabled(name):
# All ops are enabled for internal builds.
return False
96 changes: 0 additions & 96 deletions kernels/optimized/optimized-oss.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions kernels/optimized/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,6 @@ def define_common_targets(is_fbcode=False):
],
)

runtime.export_file(
name = "optimized-oss.yaml",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "optimized_operators",
srcs = [],
Expand Down
10 changes: 3 additions & 7 deletions kernels/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ foreach(kernel ${_kernels})
cp
"${CMAKE_CURRENT_BINARY_DIR}/../../kernels/${kernel}/${kernel}_ops_lib/*.h"
"${CMAKE_CURRENT_BINARY_DIR}/include/${kernel}/executorch/kernels/${kernel}/"
DEPENDS "${kernel}_ops_lib"
DEPENDS "${kernel}_ops_lib"
)
endforeach()

Expand Down Expand Up @@ -270,17 +270,13 @@ set(_optimized_kernels_test_sources
"op_le_test.cpp"
"op_linear_test.cpp"
"op_log_softmax_test.cpp"
"op_mm_test.cpp"
"op_mul_test.cpp"
"op_native_layer_norm_test.cpp"
"op_neg_test.cpp"
"op_sub_test.cpp"
"UnaryUfuncRealHBBF16ToFloatHBF16Test.cpp"
${CMAKE_CURRENT_BINARY_DIR}/include/portable/executorch/kernels/test/supported_features.cpp
)

# We don't have sleef on OSS so we don't have gelu and log_softmax
list(REMOVE_ITEM _optimized_kernels_test_sources "op_gelu_test.cpp"
"op_log_softmax_test.cpp"
${CMAKE_CURRENT_BINARY_DIR}/include/optimized/executorch/kernels/test/supported_features.cpp
)

et_cxx_test(
Expand Down
57 changes: 57 additions & 0 deletions kernels/test/op_log_softmax_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,59 @@ class OpLogSoftmaxOutTest : public OperatorTest {
EXPECT_TENSOR_CLOSE(out, expected);
}
}

template <class CTYPE, executorch::aten::ScalarType DTYPE>
void test_dtype_noncontiguous_dim() {
TensorFactory<DTYPE> tf;

// Dim 0 must be longer than the vector width of the machine (for
// float, this is 4 for ARM64 and 8 for AVX2) to exhibit problems.
// clang-format off
Tensor x = tf.make(
{9, 3},
{
0, 9, 18,
1, 10, 19,
2, 11, 20,
3, 12, 21,
4, 13, 22,
5, 14, 23,
6, 15, 24,
7, 16, 25,
8, 17, 26,
});
// clang-format on

Tensor out = tf.zeros({9, 3});

op_log_softmax_out(x, /*dim=*/0, /*half_to_float*/ false, out);

// clang-format off
Tensor expected = tf.make(
{9, 3},
{
-8.45855, -8.45855, -8.45855,
-7.45855, -7.45855, -7.45855,
-6.45855, -6.45855, -6.45855,
-5.45855, -5.45855, -5.45855,
-4.45855, -4.45855, -4.45855,
-3.45855, -3.45855, -3.45855,
-2.45855, -2.45855, -2.45855,
-1.45855, -1.45855, -1.45855,
-0.458552, -0.458552, -0.458552
});
// clang-format on

if constexpr (DTYPE == ScalarType::BFloat16) {
EXPECT_TENSOR_CLOSE_WITH_TOL(
out,
expected,
1e-2,
executorch::runtime::testing::internal::kDefaultAtol);
} else {
EXPECT_TENSOR_CLOSE(out, expected);
}
}
};

TEST_F(OpLogSoftmaxOutTest, Smoke) {
Expand Down Expand Up @@ -101,6 +154,10 @@ TEST_F(OpLogSoftmaxOutTest, AllDtypesSupported) {
#undef TEST_ENTRY
}

TEST_F(OpLogSoftmaxOutTest, NonContiguous) {
test_dtype_noncontiguous_dim<float, ScalarType::Float>();
}

TEST_F(OpLogSoftmaxOutTest, MismatchedDimensionsDies) {
if (SupportedFeatures::get()->is_aten) {
GTEST_SKIP() << "ATen currently supports mismatched dimensions";
Expand Down
Loading
Loading