Add allocate tensor util that uses temp allocator

tarun292 · facebook-github-bot · commit f2ee74337d94 · 2024-10-08T15:13:46.000-07:00
Differential Revision: D64072692
diff --git a/kernels/portable/cpu/util/allocate_tensor_util.cpp b/kernels/portable/cpu/util/allocate_tensor_util.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "executorch/kernels/portable/cpu/util/allocate_tensor_util.h"
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+
+namespace torch {
+namespace executor {
+
+using Tensor = exec_aten::Tensor;
+using ScalarType = exec_aten::ScalarType;
+
+Tensor allocate_tensor(
+    KernelRuntimeContext& ctx,
+    const ArrayRef<Tensor::SizesType>& sizes,
+    const ArrayRef<Tensor::DimOrderType>& dim_order,
+    const ArrayRef<Tensor::StridesType>& strides,
+    const ScalarType& dtype) {
+  int dim = sizes.size();
+  int size_nbytes = dim * sizeof(Tensor::SizesType);
+  Result<void*> temp_mem_res_size = ctx.allocate_temp(size_nbytes);
+  void* size_data_ptr =
+      temp_mem_res_size.ok() ? temp_mem_res_size.get() : nullptr;
+  ET_CHECK_MSG(size_data_ptr != nullptr, "Failed to malloc for size bytes");
+  memcpy(size_data_ptr, sizes.data(), size_nbytes);
+
+  // TODO(T145322324): can we remove the static cast once size is unsigned?
+  size_t dim_order_nbytes =
+      static_cast<size_t>(dim) * sizeof(Tensor::DimOrderType);
+  Result<void*> temp_mem_res_dim_order = ctx.allocate_temp(dim_order_nbytes);
+  void* dim_order_data_ptr =
+      temp_mem_res_dim_order.ok() ? temp_mem_res_dim_order.get() : nullptr;
+  ET_CHECK_MSG(
+      dim_order_data_ptr != nullptr, "Failed to malloc for dim order bytes");
+  memcpy(dim_order_data_ptr, dim_order.data(), dim_order_nbytes);
+
+  int strides_nbytes = dim * sizeof(Tensor::StridesType);
+  Result<void*> temp_mem_res_strides = ctx.allocate_temp(strides_nbytes);
+  void* strides_data_ptr =
+      temp_mem_res_strides.ok() ? temp_mem_res_strides.get() : nullptr;
+  printf("strides_data_ptr: %p\n", strides_data_ptr);
+  fflush(stdout);
+  ET_CHECK_MSG(
+      strides_data_ptr != nullptr, "Failed to malloc for strides bytes");
+  memcpy(strides_data_ptr, strides.data(), strides_nbytes);
+
+  Result<void*> temp_mem_res_tensor = ctx.allocate_temp(sizeof(TensorImpl));
+  auto tensor_impl = static_cast<TensorImpl*>(
+      temp_mem_res_tensor.ok() ? temp_mem_res_tensor.get() : nullptr);
+  ET_CHECK_MSG(tensor_impl != nullptr, "Failed to malloc for data TensorImpl");
+
+  new (tensor_impl) TensorImpl(
+      dtype,
+      dim,
+      reinterpret_cast<Tensor::SizesType*>(size_data_ptr),
+      nullptr,
+      reinterpret_cast<Tensor::DimOrderType*>(dim_order_data_ptr),
+      reinterpret_cast<Tensor::StridesType*>(strides_data_ptr));
+
+  Result<void*> temp_mem_res_data = ctx.allocate_temp(tensor_impl->nbytes());
+  void* data_ptr = temp_mem_res_data.ok() ? temp_mem_res_data.get() : nullptr;
+  ET_CHECK_MSG(data_ptr != nullptr, "Failed to malloc for data buffer");
+  tensor_impl->set_data(data_ptr);
+
+  return Tensor{tensor_impl};
+}
+
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/util/allocate_tensor_util.h b/kernels/portable/cpu/util/allocate_tensor_util.h
@@ -0,0 +1,18 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#pragma once
+
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+
+Tensor allocate_tensor(
+    KernelRuntimeContext& ctx,
+    const ArrayRef<Tensor::SizesType>& sizes,
+    const ArrayRef<Tensor::DimOrderType>& dim_order,
+    const ArrayRef<Tensor::StridesType>& strides,
+    const ScalarType& dtype);
+
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl
@@ -237,6 +237,16 @@ def define_common_targets():
         visibility = ["//executorch/kernels/portable/cpu/..."],
     )
 
+    runtime.cxx_library(
+        name = "allocate_tensor_util",
+        srcs = ["allocate_tensor_util.cpp"],
+        exported_headers = ["allocate_tensor_util.cpp"],
+        deps = [
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+        visibility = ["//executorch/kernels/portable/cpu/..."],
+    )
+
     # Utility functions that can be used by operators that perform reduction
     for aten_mode in [True, False]:
         suffix = "_aten" if aten_mode else ""
diff --git a/kernels/portable/cpu/util/test/allocate_tensor_test.cpp b/kernels/portable/cpu/util/test/allocate_tensor_test.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <executorch/kernels/portable/cpu/util/allocate_tensor_util.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <executorch/test/utils/DeathTest.h>
+using ScalarType = exec_aten::ScalarType;
+
+class AllocateTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Since these tests cause ET_LOG to be called, the PAL must be initialized
+    // first.
+    torch::executor::runtime_init();
+  }
+};
+
+TEST(AllocateTest, AllocateTensor) {
+  uint8_t* temp_allocator_ptr = (uint8_t*)malloc(2048);
+  executorch::runtime::MemoryAllocator temp_allocator(2048, temp_allocator_ptr);
+  executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator);
+
+  executorch::aten::SizesType sizes[3] = {1, 2, 3};
+  executorch::aten::DimOrderType dim_order[3] = {0, 1, 2};
+  executorch::aten::StridesType strides[3] = {3, 3, 1};
+
+  torch::executor::ArrayRef<exec_aten::SizesType> sizes_ref(sizes, 3);
+  torch::executor::ArrayRef<exec_aten::StridesType> strides_ref(strides, 3);
+  torch::executor::ArrayRef<exec_aten::DimOrderType> dim_orders_ref(
+      dim_order, 3);
+
+  torch::executor::allocate_tensor(
+      ctx, sizes, dim_order, strides, ScalarType::Float);
+
+  free(temp_allocator_ptr);
+}
+
+TEST(AllocateTest, FailAllocateTensor) {
+  torch::executor::runtime_init();
+
+  uint8_t* temp_allocator_ptr = (uint8_t*)malloc(16);
+  executorch::runtime::MemoryAllocator temp_allocator(16, temp_allocator_ptr);
+  executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator);
+
+  executorch::aten::SizesType sizes[3] = {1, 2, 3};
+  executorch::aten::DimOrderType dim_order[3] = {0, 1, 2};
+  executorch::aten::StridesType strides[3] = {3, 3, 1};
+
+  torch::executor::ArrayRef<exec_aten::SizesType> sizes_ref(sizes, 3);
+  torch::executor::ArrayRef<exec_aten::StridesType> strides_ref(strides, 3);
+  torch::executor::ArrayRef<exec_aten::DimOrderType> dim_orders_ref(
+      dim_order, 3);
+
+  ET_EXPECT_DEATH(
+      torch::executor::allocate_tensor(
+          ctx, sizes, dim_order, strides, ScalarType::Float),
+      "Failed to malloc");
+
+  free(temp_allocator_ptr);
+}
diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl
@@ -21,3 +21,13 @@ def define_common_targets():
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     )
+
+    runtime.cxx_test(
+        name = "allocate_tensor_test",
+        srcs = ["allocate_tensor_test.cpp"],
+        deps = [
+            "//executorch/runtime/core/exec_aten:lib",
+            "//executorch/kernels/portable/cpu/util:allocate_tensor_util",
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+    )