diff --git a/kernels/portable/cpu/util/allocate_tensor_util.cpp b/kernels/portable/cpu/util/allocate_tensor_util.cpp new file mode 100644 index 00000000000..0bb10b6caff --- /dev/null +++ b/kernels/portable/cpu/util/allocate_tensor_util.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/kernels/portable/cpu/util/allocate_tensor_util.h" + + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; +using ScalarType = exec_aten::ScalarType; + +Tensor allocate_tensor( + KernelRuntimeContext& ctx, + const ArrayRef& sizes, + const ArrayRef& dim_order, + const ArrayRef& strides, + const ScalarType& dtype) { + int dim = sizes.size(); + int size_nbytes = dim * sizeof(Tensor::SizesType); + Result temp_mem_res_size = ctx.allocate_temp(size_nbytes); + void* size_data_ptr = + temp_mem_res_size.ok() ? temp_mem_res_size.get() : nullptr; + ET_CHECK_MSG(size_data_ptr != nullptr, "Failed to malloc for size bytes"); + memcpy(size_data_ptr, sizes.data(), size_nbytes); + + // TODO(T145322324): can we remove the static cast once size is unsigned? + size_t dim_order_nbytes = + static_cast(dim) * sizeof(Tensor::DimOrderType); + Result temp_mem_res_dim_order = ctx.allocate_temp(dim_order_nbytes); + void* dim_order_data_ptr = + temp_mem_res_dim_order.ok() ? temp_mem_res_dim_order.get() : nullptr; + ET_CHECK_MSG( + dim_order_data_ptr != nullptr, "Failed to malloc for dim order bytes"); + memcpy(dim_order_data_ptr, dim_order.data(), dim_order_nbytes); + + int strides_nbytes = dim * sizeof(Tensor::StridesType); + Result temp_mem_res_strides = ctx.allocate_temp(strides_nbytes); + void* strides_data_ptr = + temp_mem_res_strides.ok() ? temp_mem_res_strides.get() : nullptr; + printf("strides_data_ptr: %p\n", strides_data_ptr); + fflush(stdout); + ET_CHECK_MSG( + strides_data_ptr != nullptr, "Failed to malloc for strides bytes"); + memcpy(strides_data_ptr, strides.data(), strides_nbytes); + + Result temp_mem_res_tensor = ctx.allocate_temp(sizeof(TensorImpl)); + auto tensor_impl = static_cast( + temp_mem_res_tensor.ok() ? temp_mem_res_tensor.get() : nullptr); + ET_CHECK_MSG(tensor_impl != nullptr, "Failed to malloc for data TensorImpl"); + + new (tensor_impl) TensorImpl( + dtype, + dim, + reinterpret_cast(size_data_ptr), + nullptr, + reinterpret_cast(dim_order_data_ptr), + reinterpret_cast(strides_data_ptr)); + + Result temp_mem_res_data = ctx.allocate_temp(tensor_impl->nbytes()); + void* data_ptr = temp_mem_res_data.ok() ? temp_mem_res_data.get() : nullptr; + ET_CHECK_MSG(data_ptr != nullptr, "Failed to malloc for data buffer"); + tensor_impl->set_data(data_ptr); + + return Tensor{tensor_impl}; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/allocate_tensor_util.h b/kernels/portable/cpu/util/allocate_tensor_util.h new file mode 100644 index 00000000000..cd9b10e0444 --- /dev/null +++ b/kernels/portable/cpu/util/allocate_tensor_util.h @@ -0,0 +1,18 @@ +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +#pragma once + +#include + +namespace torch { +namespace executor { + +Tensor allocate_tensor( + KernelRuntimeContext& ctx, + const ArrayRef& sizes, + const ArrayRef& dim_order, + const ArrayRef& strides, + const ScalarType& dtype); + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/sort_util.cpp b/kernels/portable/cpu/util/sort_util.cpp new file mode 100644 index 00000000000..c57053a5088 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/kernels/portable/cpu/util/sort_util.h" +#include +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indices, + bool descending) { + // Check if the input tensor is a valid input + ET_CHECK_MSG(tensor.dim() == 1, "Input tensor must be 1D"); + + // Check if the output tensors are valid + ET_CHECK_MSG(sorted_tensor.dim() == 1, "Output tensor must be 1D"); + ET_CHECK_MSG(sorted_indices.dim() == 1, "Output tensor must be 1D"); + + // Check if the output tensors have the same dtype + ET_CHECK_MSG( + tensor.scalar_type() == sorted_tensor.scalar_type(), + "Input and output tensors must have the same dtype"); + ET_CHECK_MSG( + tensor.scalar_type() == ScalarType::Float, + "Only float inputs are supported currently"); + ET_CHECK_MSG( + sorted_indices.scalar_type() == exec_aten::ScalarType::Long, + "Output tensor must be of type int64"); + + // Get the number of elements in the tensor + int size = tensor.numel(); + + // Create a tensor to store the indices + for (int i = 0; i < size; i++) { + sorted_indices.mutable_data_ptr()[i] = i; + } + + // Sort the indices based on the corresponding tensor values + std::sort( + sorted_indices.mutable_data_ptr(), + sorted_indices.mutable_data_ptr() + size, + [&tensor, descending](int64_t i, int64_t j) { + if (descending) { + return tensor.const_data_ptr()[i] > + tensor.const_data_ptr()[j]; + } else { + return tensor.const_data_ptr()[i] < + tensor.const_data_ptr()[j]; + } + }); + + // Rearrange the tensor values based on the sorted indices + for (int i = 0; i < size; i++) { + sorted_tensor.mutable_data_ptr()[i] = tensor.const_data_ptr< + float>()[sorted_indices.const_data_ptr()[i]]; + } + + return Error::Ok; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/sort_util.h b/kernels/portable/cpu/util/sort_util.h new file mode 100644 index 00000000000..9095490b327 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indice, + bool descending = false); + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 82d3d84fa23..7212915c5f9 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -237,6 +237,27 @@ def define_common_targets(): visibility = ["//executorch/kernels/portable/cpu/..."], ) + runtime.cxx_library( + name = "allocate_tensor_util", + srcs = ["allocate_tensor_util.cpp"], + exported_headers = ["allocate_tensor_util.h"], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + ], + visibility = ["//executorch/kernels/portable/cpu/..."], + ) + + runtime.cxx_library( + name = "sort_util", + srcs = ["sort_util.cpp"], + exported_headers = ["sort_util.h"], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/torchvision/..."], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in [True, False]: suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/allocate_tensor_test.cpp b/kernels/portable/cpu/util/test/allocate_tensor_test.cpp new file mode 100644 index 00000000000..dcfea3687a6 --- /dev/null +++ b/kernels/portable/cpu/util/test/allocate_tensor_test.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +using ScalarType = exec_aten::ScalarType; + +class AllocateTest : public ::testing::Test { + protected: + void SetUp() override { + // Since these tests cause ET_LOG to be called, the PAL must be initialized + // first. + torch::executor::runtime_init(); + } +}; + +TEST(AllocateTest, AllocateTensor) { + uint8_t* temp_allocator_ptr = (uint8_t*)malloc(2048); + executorch::runtime::MemoryAllocator temp_allocator(2048, temp_allocator_ptr); + executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator); + + executorch::aten::SizesType sizes[3] = {1, 2, 3}; + executorch::aten::DimOrderType dim_order[3] = {0, 1, 2}; + executorch::aten::StridesType strides[3] = {3, 3, 1}; + + torch::executor::ArrayRef sizes_ref(sizes, 3); + torch::executor::ArrayRef strides_ref(strides, 3); + torch::executor::ArrayRef dim_orders_ref( + dim_order, 3); + + torch::executor::allocate_tensor( + ctx, sizes, dim_order, strides, ScalarType::Float); + + free(temp_allocator_ptr); +} + +TEST(AllocateTest, FailAllocateTensor) { + torch::executor::runtime_init(); + + uint8_t* temp_allocator_ptr = (uint8_t*)malloc(16); + executorch::runtime::MemoryAllocator temp_allocator(16, temp_allocator_ptr); + executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator); + + executorch::aten::SizesType sizes[3] = {1, 2, 3}; + executorch::aten::DimOrderType dim_order[3] = {0, 1, 2}; + executorch::aten::StridesType strides[3] = {3, 3, 1}; + + torch::executor::ArrayRef sizes_ref(sizes, 3); + torch::executor::ArrayRef strides_ref(strides, 3); + torch::executor::ArrayRef dim_orders_ref( + dim_order, 3); + + ET_EXPECT_DEATH( + torch::executor::allocate_tensor( + ctx, sizes, dim_order, strides, ScalarType::Float), + "Failed to malloc"); + + free(temp_allocator_ptr); +} diff --git a/kernels/portable/cpu/util/test/sort_util_test.cpp b/kernels/portable/cpu/util/test/sort_util_test.cpp new file mode 100644 index 00000000000..e5dbfbd4b30 --- /dev/null +++ b/kernels/portable/cpu/util/test/sort_util_test.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +#include + +using namespace ::testing; +using exec_aten::ScalarType; +using exec_aten::Tensor; +using torch::executor::ArrayRef; +using torch::executor::testing::TensorFactory; + +TEST(SortUtilTest, SortTensorTest) { + TensorFactory tf; + TensorFactory lf; + + Tensor a = tf.make({4}, {3, 2, 1, 4}); + Tensor b = tf.zeros({4}); + Tensor c = lf.zeros({4}); + + // Ascending order sort test + sort_tensor(a, b, c); + + Tensor expected = tf.make({4}, {1, 2, 3, 4}); + Tensor expected_indices = lf.make({4}, {2, 1, 0, 3}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); + + // Descending order sort test + sort_tensor(a, b, c, true); + expected = tf.make({4}, {4, 3, 2, 1}); + expected_indices = lf.make({4}, {3, 0, 1, 2}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); +} diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 28988b90dcc..45687fd28bb 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -21,3 +21,21 @@ def define_common_targets(): "//executorch/kernels/portable/cpu/util:reduce_util", ], ) + + runtime.cxx_test( + name = "allocate_tensor_test", + srcs = ["allocate_tensor_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/kernels/portable/cpu/util:allocate_tensor_util", + "//executorch/runtime/kernel:kernel_includes", + + runtime.cxx_test( + name = "sort_util_test", + srcs = ["sort_util_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + "//executorch/kernels/portable/cpu/util:sort_util", + ], + )