diff --git a/backends/aoti/slim/factory/from_etensor.h b/backends/aoti/slim/factory/from_etensor.h new file mode 100644 index 00000000000..2a7f7063ad5 --- /dev/null +++ b/backends/aoti/slim/factory/from_etensor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace executorch::backends::aoti::slim { + +/// Creates a SlimTensor from an ETensor (ExecuTorch portable tensor). +/// +/// This factory function converts an ETensor to a SlimTensor, optionally +/// copying the data to a target device. The ETensor is assumed to always +/// reside on CPU. +/// +/// @param etensor The source ETensor (always on CPU). +/// @param target_device The target device for the output SlimTensor. +/// @return A new SlimTensor with data copied to the target device. +/// +/// @note ETensor uses int32_t (SizesType/StridesType) for sizes and strides, +/// while SlimTensor uses int64_t. This function handles the conversion. +/// +/// Example usage: +/// @code +/// auto* cpu_tensor = &(args[i]->toTensor()); // ETensor from EValue +/// SlimTensor gpu_tensor = from_etensor(*cpu_tensor, DEFAULT_CUDA_DEVICE); +/// @endcode +inline SlimTensor from_etensor( + const executorch::runtime::etensor::Tensor& etensor, + const c10::Device& target_device = CPU_DEVICE) { + // Step 1: Extract metadata from ETensor + const auto ndim = static_cast(etensor.dim()); + + // Convert sizes from exec_aten::SizesType (int32_t) to int64_t + std::vector sizes_vec(ndim); + for (size_t i = 0; i < ndim; ++i) { + sizes_vec[i] = static_cast(etensor.size(static_cast(i))); + } + + // Convert strides from exec_aten::StridesType (int32_t) to int64_t + std::vector strides_vec(ndim); + auto etensor_strides = etensor.strides(); + for (size_t i = 0; i < ndim; ++i) { + strides_vec[i] = static_cast(etensor_strides[i]); + } + + // Map ETensor ScalarType to SlimTensor ScalarType + c10::ScalarType dtype = static_cast(etensor.scalar_type()); + + // Step 2: Create SlimTensor on target device + SlimTensor result = empty_strided( + makeArrayRef(sizes_vec), makeArrayRef(strides_vec), dtype, target_device); + + // Step 3: Copy data from ETensor (CPU) to SlimTensor (target device) + // ETensor is always on CPU, so this handles CPU→CPU or CPU→CUDA copy + const void* src_data = etensor.const_data_ptr(); + void* dst_data = result.data_ptr(); + size_t nbytes = etensor.nbytes(); + + if (nbytes > 0) { + // const_cast is safe here because copy_ only reads from src_data + result.storage()->copy_( + dst_data, const_cast(src_data), nbytes, CPU_DEVICE); + } + + return result; +} + +/// Creates a SlimTensor from an ETensor pointer. +/// +/// Convenience overload that accepts a pointer instead of a reference. +/// +/// @param etensor Pointer to the source ETensor (must not be null). +/// @param target_device The target device for the output SlimTensor. +/// @return A new SlimTensor with data copied to the target device. +inline SlimTensor from_etensor( + const executorch::runtime::etensor::Tensor* etensor, + const c10::Device& target_device = CPU_DEVICE) { + ET_CHECK_MSG( + etensor != nullptr, "from_etensor: etensor pointer cannot be nullptr"); + return from_etensor(*etensor, target_device); +} + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/factory/targets.bzl b/backends/aoti/slim/factory/targets.bzl index b26549c01a2..24434c73e66 100644 --- a/backends/aoti/slim/factory/targets.bzl +++ b/backends/aoti/slim/factory/targets.bzl @@ -29,3 +29,16 @@ def define_common_targets(): "//executorch/backends/aoti/slim/util:size_util", ], ) + + runtime.cxx_library( + name = "from_etensor", + headers = [ + "from_etensor.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/backends/aoti/slim/factory:empty", + "//executorch/backends/aoti/slim/util:array_ref_util", + "//executorch/runtime/core/portable_type:portable_type", + ], + ) diff --git a/backends/aoti/slim/factory/test/targets.bzl b/backends/aoti/slim/factory/test/targets.bzl index 668d7f75385..f9f4fe17923 100644 --- a/backends/aoti/slim/factory/test/targets.bzl +++ b/backends/aoti/slim/factory/test/targets.bzl @@ -44,3 +44,16 @@ def define_common_targets(): ], **backend_kwargs ) + + runtime.cxx_test( + name = "test_from_etensor" + backend_suffix, + srcs = [ + "test_from_etensor.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/core:storage", + "//executorch/backends/aoti/slim/factory:from_etensor", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + ], + **backend_kwargs + ) diff --git a/backends/aoti/slim/factory/test/test_from_etensor.cpp b/backends/aoti/slim/factory/test/test_from_etensor.cpp new file mode 100644 index 00000000000..5c6702c9d2d --- /dev/null +++ b/backends/aoti/slim/factory/test/test_from_etensor.cpp @@ -0,0 +1,331 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include + +#ifdef CUDA_AVAILABLE +#include +#endif + +namespace executorch::backends::aoti::slim { + +using executorch::runtime::etensor::ScalarType; +using executorch::runtime::testing::TensorFactory; + +// ============================================================================= +// Test Device Helpers +// ============================================================================= + +inline std::vector getTestDevices() { + std::vector devices = {CPU_DEVICE}; +#ifdef CUDA_AVAILABLE + devices.push_back(DEFAULT_CUDA_DEVICE); +#endif + return devices; +} + +inline std::string deviceToString( + const testing::TestParamInfo& info) { + return info.param.is_cpu() ? "CPU" : "CUDA"; +} + +// ============================================================================= +// Helper Functions +// ============================================================================= + +namespace { + +// Helper: Verify SlimTensor data matches expected values +// Handles GPU data by copying to host first +template +void verify_slimtensor_data( + const SlimTensor& tensor, + const T* expected_data, + size_t num_elements) { + size_t nbytes = num_elements * sizeof(T); + + if (tensor.is_cpu()) { + const T* actual = static_cast(tensor.data_ptr()); + for (size_t i = 0; i < num_elements; ++i) { + EXPECT_EQ(actual[i], expected_data[i]) + << "Mismatch at index " << i << ": expected " << expected_data[i] + << ", got " << actual[i]; + } + } else { +#ifdef CUDA_AVAILABLE + // Copy GPU data to host for verification + std::vector host_data(num_elements); + DeviceTraits::memcpy( + host_data.data(), + tensor.data_ptr(), + nbytes, + CPU_DEVICE, + tensor.device()); + for (size_t i = 0; i < num_elements; ++i) { + EXPECT_EQ(host_data[i], expected_data[i]) + << "Mismatch at index " << i << ": expected " << expected_data[i] + << ", got " << host_data[i]; + } +#else + FAIL() << "CUDA not available but tensor is on CUDA device"; +#endif + } +} + +} // namespace + +// ============================================================================= +// FromETensor Parameterized Tests (CPU and CUDA) +// ============================================================================= + +class FromETensorParamTest : public testing::TestWithParam { + protected: + void SetUp() override { + executorch::runtime::runtime_init(); + } + + c10::Device device() const { + return GetParam(); + } +}; + +TEST_P(FromETensorParamTest, BasicConversion) { + TensorFactory tf; + + // Create ETensor on CPU with known values + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + auto etensor = tf.make({2, 3}, data); + + // Convert to SlimTensor on target device + SlimTensor result = from_etensor(etensor, device()); + + // Verify metadata + EXPECT_EQ(result.dim(), 2u); + EXPECT_EQ(result.size(0), 2); + EXPECT_EQ(result.size(1), 3); + EXPECT_EQ(result.dtype(), c10::ScalarType::Float); + EXPECT_EQ(result.device().type(), device().type()); + EXPECT_EQ(result.numel(), 6u); + EXPECT_TRUE(result.is_contiguous()); + + // Verify data + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, PreservesStrides) { + TensorFactory tf; + + // Create ETensor with non-default strides (column-major order) + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector strides = {1, 2}; // Column-major for 2x3 tensor + auto etensor = tf.make({2, 3}, data, strides); + + // Convert to SlimTensor + SlimTensor result = from_etensor(etensor, device()); + + // Verify strides are preserved + EXPECT_EQ(result.stride(0), 1); + EXPECT_EQ(result.stride(1), 2); + EXPECT_FALSE(result.is_contiguous()); +} + +TEST_P(FromETensorParamTest, Float32Dtype) { + TensorFactory tf; + std::vector data = {1.5f, 2.5f, 3.5f, 4.5f}; + auto etensor = tf.make({2, 2}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Float); + EXPECT_EQ(result.itemsize(), sizeof(float)); + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, Int64Dtype) { + TensorFactory tf; + std::vector data = {10, 20, 30, 40, 50, 60}; + auto etensor = tf.make({2, 3}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Long); + EXPECT_EQ(result.itemsize(), sizeof(int64_t)); + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, Int32Dtype) { + TensorFactory tf; + std::vector data = {100, 200, 300, 400}; + auto etensor = tf.make({4}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Int); + EXPECT_EQ(result.itemsize(), sizeof(int32_t)); + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, Int16Dtype) { + TensorFactory tf; + std::vector data = {-1, 0, 1, 2, 3, 4}; + auto etensor = tf.make({2, 3}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Short); + EXPECT_EQ(result.itemsize(), sizeof(int16_t)); + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, Int8Dtype) { + TensorFactory tf; + std::vector data = {-128, -1, 0, 1, 127}; + auto etensor = tf.make({5}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Char); + EXPECT_EQ(result.itemsize(), sizeof(int8_t)); + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, BoolDtype) { + TensorFactory tf; + // TensorFactory uses uint8_t internally + std::vector data = {1, 0, 1, 0, 1, 1}; + auto etensor = tf.make({2, 3}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dtype(), c10::ScalarType::Bool); + EXPECT_EQ(result.numel(), 6u); + + // Verify data using uint8_t representation + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, LargeTensor) { + TensorFactory tf; + + // Create a larger tensor + constexpr size_t kSize = 1024; + std::vector data(kSize); + for (size_t i = 0; i < kSize; ++i) { + data[i] = static_cast(i) * 0.5f; + } + auto etensor = tf.make({32, 32}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.numel(), kSize); + EXPECT_EQ(result.size(0), 32); + EXPECT_EQ(result.size(1), 32); + + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, OneDimensional) { + TensorFactory tf; + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + auto etensor = tf.make({5}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dim(), 1u); + EXPECT_EQ(result.size(0), 5); + EXPECT_EQ(result.stride(0), 1); + EXPECT_TRUE(result.is_contiguous()); + + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, ThreeDimensional) { + TensorFactory tf; + std::vector data(24); + for (size_t i = 0; i < 24; ++i) { + data[i] = static_cast(i); + } + auto etensor = tf.make({2, 3, 4}, data); + + SlimTensor result = from_etensor(etensor, device()); + + EXPECT_EQ(result.dim(), 3u); + EXPECT_EQ(result.size(0), 2); + EXPECT_EQ(result.size(1), 3); + EXPECT_EQ(result.size(2), 4); + EXPECT_TRUE(result.is_contiguous()); + + verify_slimtensor_data(result, data.data(), data.size()); +} + +TEST_P(FromETensorParamTest, PointerOverload) { + TensorFactory tf; + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + auto etensor = tf.make({2, 2}, data); + + // Use pointer overload + SlimTensor result = from_etensor(&etensor, device()); + + EXPECT_EQ(result.dim(), 2u); + EXPECT_EQ(result.numel(), 4u); + + verify_slimtensor_data(result, data.data(), data.size()); +} + +INSTANTIATE_TEST_SUITE_P( + DeviceTests, + FromETensorParamTest, + testing::ValuesIn(getTestDevices()), + deviceToString); + +// ============================================================================= +// CPU-Only Tests (require direct data access without CUDA memcpy) +// ============================================================================= + +TEST(FromETensorCPUTest, DataIsIndependent) { + executorch::runtime::runtime_init(); + TensorFactory tf; + + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + auto etensor = tf.make({2, 2}, data); + + SlimTensor result = from_etensor(etensor, CPU_DEVICE); + + // Modify source data + float* etensor_data = etensor.mutable_data_ptr(); + etensor_data[0] = 999.0f; + + // SlimTensor should have its own copy + const float* result_data = static_cast(result.data_ptr()); + EXPECT_FLOAT_EQ(result_data[0], 1.0f); +} + +TEST(FromETensorCPUTest, ModifySlimTensorDoesNotAffectETensor) { + executorch::runtime::runtime_init(); + TensorFactory tf; + + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + auto etensor = tf.make({2, 2}, data); + + SlimTensor result = from_etensor(etensor, CPU_DEVICE); + + // Modify SlimTensor + float* result_data = static_cast(result.data_ptr()); + result_data[0] = 999.0f; + + // ETensor should be unchanged + const float* etensor_data = etensor.const_data_ptr(); + EXPECT_FLOAT_EQ(etensor_data[0], 1.0f); +} + +} // namespace executorch::backends::aoti::slim