diff --git a/backends/aoti/slim/c10/core/Device.h b/backends/aoti/slim/c10/core/Device.h new file mode 100644 index 00000000000..5638f6f80e8 --- /dev/null +++ b/backends/aoti/slim/c10/core/Device.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace executorch::backends::aoti::slim::c10 { + +/// An index representing a specific device; e.g., the 1 in GPU 1. +/// A DeviceIndex is not independently meaningful without knowing +/// the DeviceType it is associated; try to use Device rather than +/// DeviceIndex directly. +using DeviceIndex = int8_t; + +/// Represents a compute device on which a tensor is located. +/// A device is uniquely identified by a type (e.g., CPU) and a device index. +struct Device final { + /// Constructs a new Device from a DeviceType and an optional device index. + /// @param type The type of device. + /// @param index The device index. For CPU, this should be -1 or 0. + /* implicit */ + explicit Device(DeviceType type, DeviceIndex index = -1) + : type_(type), index_(index) { + validate(); + } + + /// Constructs a Device from a string description. + /// The string must be "cpu" or "cpu:0". + /* implicit */ Device(const std::string& device_string) + : Device(DeviceType::CPU) { + ET_CHECK_MSG(!device_string.empty(), "Device string must not be empty"); + + if (device_string == "cpu" || device_string == "CPU") { + type_ = DeviceType::CPU; + index_ = -1; + } else if (device_string == "cpu:0" || device_string == "CPU:0") { + type_ = DeviceType::CPU; + index_ = static_cast(device_string.back() - '0'); + } else { + ET_CHECK_MSG( + false, + "Invalid device string: %s. Currently only 'cpu' is supported.", + device_string.c_str()); + } + validate(); + } + + /// Returns true if the type and index of this Device matches that of other. + bool operator==(const Device& other) const noexcept { + return this->type_ == other.type_ && this->index_ == other.index_; + } + + /// Returns true if the type or index of this Device differs from that of + /// other. + bool operator!=(const Device& other) const noexcept { + return !(*this == other); + } + + /// Sets the device index. + void set_index(DeviceIndex index) { + index_ = index; + } + + /// Returns the type of device this is. + DeviceType type() const noexcept { + return type_; + } + + /// Returns the device index. + DeviceIndex index() const noexcept { + return index_; + } + + /// Returns true if the device has a non-default index. + bool has_index() const noexcept { + return index_ != -1; + } + + /// Returns true if the device is of CPU type. + bool is_cpu() const noexcept { + return type_ == DeviceType::CPU; + } + + /// Returns a string representation of the device (e.g., "cpu" or "cpu:0"). + std::string str() const { + std::string str = DeviceTypeName(type(), /* lower_case */ true); + if (has_index()) { + str.push_back(':'); + str.append(std::to_string(index())); + } + return str; + } + + private: + DeviceType type_; + DeviceIndex index_ = -1; + + void validate() { + ET_DCHECK_MSG( + index_ >= -1, + "Device index must be -1 or non-negative, got %d", + static_cast(index_)); + ET_DCHECK_MSG( + !is_cpu() || index_ <= 0, + "CPU device index must be -1 or zero, got %d", + static_cast(index_)); + } +}; + +inline std::ostream& operator<<(std::ostream& stream, const Device& device) { + stream << device.str(); + return stream; +} + +} // namespace executorch::backends::aoti::slim::c10 + +namespace std { +template <> +struct hash { + size_t operator()( + executorch::backends::aoti::slim::c10::Device d) const noexcept { + static_assert( + sizeof(executorch::backends::aoti::slim::c10::DeviceType) == 1, + "DeviceType is not 8-bit"); + static_assert( + sizeof(executorch::backends::aoti::slim::c10::DeviceIndex) == 1, + "DeviceIndex is not 8-bit"); + uint32_t bits = static_cast(static_cast(d.type())) + << 16 | + static_cast(static_cast(d.index())); + return std::hash{}(bits); + } +}; +} // namespace std diff --git a/backends/aoti/slim/c10/core/DeviceType.h b/backends/aoti/slim/c10/core/DeviceType.h new file mode 100644 index 00000000000..c8c36c7faab --- /dev/null +++ b/backends/aoti/slim/c10/core/DeviceType.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace executorch::backends::aoti::slim::c10 { + +/// Enum representing the type of device. +enum class DeviceType : int8_t { + CPU = 0, + COMPILE_TIME_MAX_DEVICE_TYPES = 1, +}; + +constexpr DeviceType kCPU = DeviceType::CPU; + +/// Maximum number of device types at compile time. +constexpr int COMPILE_TIME_MAX_DEVICE_TYPES = + static_cast(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES); + +/// Returns the name of the device type as a string. +/// @param d The device type. +/// @param lower_case If true, returns the name in lower case. +/// @return The name of the device type. +inline std::string DeviceTypeName(DeviceType d, bool lower_case = false) { + switch (d) { + case DeviceType::CPU: + return lower_case ? "cpu" : "CPU"; + default: + ET_CHECK_MSG(false, "Unknown device type: %d", static_cast(d)); + } +} + +/// Checks if the device type is valid. +/// @param d The device type to check. +/// @return true if the device type is valid, false otherwise. +inline bool isValidDeviceType(DeviceType d) { + return d == DeviceType::CPU; +} + +inline std::ostream& operator<<(std::ostream& stream, DeviceType type) { + stream << DeviceTypeName(type, /* lower_case */ true); + return stream; +} + +} // namespace executorch::backends::aoti::slim::c10 + +namespace std { +template <> +struct hash { + std::size_t operator()( + executorch::backends::aoti::slim::c10::DeviceType k) const { + return std::hash()(static_cast(k)); + } +}; +} // namespace std diff --git a/backends/aoti/slim/c10/core/ScalarType.h b/backends/aoti/slim/c10/core/ScalarType.h new file mode 100644 index 00000000000..1ca1a1429ed --- /dev/null +++ b/backends/aoti/slim/c10/core/ScalarType.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace executorch::backends::aoti::slim::c10 { + +/// Enum representing the scalar type (dtype) of tensor elements. +/// Note: Enum values must match PyTorch's c10::ScalarType for compatibility. +enum class ScalarType : int8_t { + // Byte = 0, + // Char = 1, + // Short = 2, + // Int = 3, + // Long = 4, + Float = 6, + // Bool = 11, + // BFloat16 = 15, + Undefined = -1, + NumOptions = 7, +}; + +/// Constant for Float scalar type. +constexpr ScalarType kFloat = ScalarType::Float; + +/// Returns the size in bytes of a single element of the given scalar type. +/// @param t The scalar type. +/// @return The size in bytes of a single element. +inline size_t elementSize(ScalarType t) { + switch (t) { + case ScalarType::Float: + return sizeof(float); + default: + ET_CHECK_MSG(false, "Unknown ScalarType: %d", static_cast(t)); + } +} + +/// Returns the name of the scalar type as a string. +/// @param t The scalar type. +/// @return The name of the scalar type. +inline const char* toString(ScalarType t) { + switch (t) { + case ScalarType::Float: + return "Float"; + case ScalarType::Undefined: + return "Undefined"; + default: + return "UNKNOWN_SCALAR"; + } +} + +/// Checks if the scalar type is a floating point type. +/// @param t The scalar type to check. +/// @return true if the scalar type is floating point, false otherwise. +inline bool isFloatingType(ScalarType t) { + return t == ScalarType::Float; +} + +/// Checks if the scalar type is an integral type (including bool). +/// @param t The scalar type to check. +/// @param includeBool Whether to consider Bool as integral. +/// @return true if the scalar type is integral, false otherwise. +inline bool isIntegralType(ScalarType t, bool /*includeBool*/) { + (void)t; + return false; +} + +inline std::ostream& operator<<(std::ostream& stream, ScalarType scalar_type) { + return stream << toString(scalar_type); +} + +} // namespace executorch::backends::aoti::slim::c10 diff --git a/backends/aoti/slim/c10/core/TARGETS b/backends/aoti/slim/c10/core/TARGETS new file mode 100644 index 00000000000..77871de4469 --- /dev/null +++ b/backends/aoti/slim/c10/core/TARGETS @@ -0,0 +1,3 @@ +load("targets.bzl", "define_common_targets") + +define_common_targets() diff --git a/backends/aoti/slim/c10/core/targets.bzl b/backends/aoti/slim/c10/core/targets.bzl new file mode 100644 index 00000000000..9b7d1259df0 --- /dev/null +++ b/backends/aoti/slim/c10/core/targets.bzl @@ -0,0 +1,52 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Define targets for SlimTensor c10 core module.""" + + # Header-only library for DeviceType + runtime.cxx_library( + name = "device_type", + headers = [ + "DeviceType.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/runtime/platform:platform", + ], + ) + + # Header-only library for Device + runtime.cxx_library( + name = "device", + headers = [ + "Device.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + ":device_type", + "//executorch/runtime/platform:platform", + ], + ) + + # Header-only library for ScalarType + runtime.cxx_library( + name = "scalar_type", + headers = [ + "ScalarType.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/runtime/platform:platform", + ], + ) + + # Combined c10 core library + runtime.cxx_library( + name = "core", + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + ":device", + ":device_type", + ":scalar_type", + ], + ) diff --git a/backends/aoti/slim/c10/core/test/TARGETS b/backends/aoti/slim/c10/core/test/TARGETS new file mode 100644 index 00000000000..77871de4469 --- /dev/null +++ b/backends/aoti/slim/c10/core/test/TARGETS @@ -0,0 +1,3 @@ +load("targets.bzl", "define_common_targets") + +define_common_targets() diff --git a/backends/aoti/slim/c10/core/test/targets.bzl b/backends/aoti/slim/c10/core/test/targets.bzl new file mode 100644 index 00000000000..f7abf59a273 --- /dev/null +++ b/backends/aoti/slim/c10/core/test/targets.bzl @@ -0,0 +1,25 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Define test targets for SlimTensor c10 core module.""" + + runtime.cxx_test( + name = "test_device", + srcs = [ + "test_device.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/c10/core:device", + "//executorch/backends/aoti/slim/c10/core:device_type", + ], + ) + + runtime.cxx_test( + name = "test_scalar_type", + srcs = [ + "test_scalar_type.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/c10/core:scalar_type", + ], + ) diff --git a/backends/aoti/slim/c10/core/test/test_device.cpp b/backends/aoti/slim/c10/core/test/test_device.cpp new file mode 100644 index 00000000000..57123589775 --- /dev/null +++ b/backends/aoti/slim/c10/core/test/test_device.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include + +using namespace executorch::backends::aoti::slim::c10; + +class DeviceTypeTest : public ::testing::Test {}; + +TEST_F(DeviceTypeTest, CPUEnumValue) { + // Verify CPU has the correct enum value (0) + EXPECT_EQ(static_cast(DeviceType::CPU), 0); +} + +TEST_F(DeviceTypeTest, DeviceTypeName) { + // Verify DeviceTypeName returns correct strings + EXPECT_EQ(DeviceTypeName(DeviceType::CPU, false), "CPU"); + EXPECT_EQ(DeviceTypeName(DeviceType::CPU, true), "cpu"); +} + +TEST_F(DeviceTypeTest, IsValidDeviceType) { + // Verify isValidDeviceType works correctly + EXPECT_TRUE(isValidDeviceType(DeviceType::CPU)); +} + +TEST_F(DeviceTypeTest, KCPUConstant) { + // Verify kCPU constant + EXPECT_EQ(kCPU, DeviceType::CPU); +} + +class DeviceTest : public ::testing::Test {}; + +TEST_F(DeviceTest, ConstructFromDeviceType) { + // Construct Device from DeviceType + Device cpu_device(DeviceType::CPU); + + EXPECT_TRUE(cpu_device.is_cpu()); + EXPECT_EQ(cpu_device.type(), DeviceType::CPU); + EXPECT_EQ(cpu_device.index(), -1); // Default index + EXPECT_FALSE(cpu_device.has_index()); +} + +TEST_F(DeviceTest, ConstructWithIndex) { + // Construct Device with explicit index + Device cpu_device(DeviceType::CPU, 0); + + EXPECT_TRUE(cpu_device.is_cpu()); + EXPECT_EQ(cpu_device.type(), DeviceType::CPU); + EXPECT_EQ(cpu_device.index(), 0); + EXPECT_TRUE(cpu_device.has_index()); +} + +TEST_F(DeviceTest, ConstructFromString) { + // Construct Device from string + Device cpu1("cpu"); + EXPECT_TRUE(cpu1.is_cpu()); + EXPECT_EQ(cpu1.index(), -1); + + Device cpu2("CPU"); + EXPECT_TRUE(cpu2.is_cpu()); + EXPECT_EQ(cpu2.index(), -1); + + Device cpu3("cpu:0"); + EXPECT_TRUE(cpu3.is_cpu()); + EXPECT_EQ(cpu3.index(), 0); +} + +TEST_F(DeviceTest, Equality) { + Device cpu1(DeviceType::CPU, 0); + Device cpu2(DeviceType::CPU, 0); + Device cpu3(DeviceType::CPU, -1); + + EXPECT_EQ(cpu1, cpu2); + EXPECT_NE(cpu1, cpu3); +} + +TEST_F(DeviceTest, Str) { + Device cpu1(DeviceType::CPU); + EXPECT_EQ(cpu1.str(), "cpu"); + + Device cpu2(DeviceType::CPU, 0); + EXPECT_EQ(cpu2.str(), "cpu:0"); +} + +TEST_F(DeviceTest, SetIndex) { + Device cpu(DeviceType::CPU); + EXPECT_EQ(cpu.index(), -1); + + cpu.set_index(0); + EXPECT_EQ(cpu.index(), 0); + EXPECT_TRUE(cpu.has_index()); +} + +TEST_F(DeviceTest, Hash) { + // Verify Device can be hashed (for use in unordered containers) + Device cpu1(DeviceType::CPU, 0); + Device cpu2(DeviceType::CPU, 0); + Device cpu3(DeviceType::CPU, -1); + + std::hash hasher; + EXPECT_EQ(hasher(cpu1), hasher(cpu2)); + EXPECT_NE(hasher(cpu1), hasher(cpu3)); +} diff --git a/backends/aoti/slim/c10/core/test/test_scalar_type.cpp b/backends/aoti/slim/c10/core/test/test_scalar_type.cpp new file mode 100644 index 00000000000..673641d84c7 --- /dev/null +++ b/backends/aoti/slim/c10/core/test/test_scalar_type.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +using namespace executorch::backends::aoti::slim::c10; + +class ScalarTypeTest : public ::testing::Test {}; + +TEST_F(ScalarTypeTest, FloatEnumValue) { + // Verify Float has the correct enum value (6) to match PyTorch + EXPECT_EQ(static_cast(ScalarType::Float), 6); +} + +TEST_F(ScalarTypeTest, KFloatConstant) { + // Verify kFloat constant + EXPECT_EQ(kFloat, ScalarType::Float); +} + +TEST_F(ScalarTypeTest, ElementSizeFloat) { + // Verify elementSize returns correct size for Float (4 bytes) + EXPECT_EQ(elementSize(ScalarType::Float), sizeof(float)); + EXPECT_EQ(elementSize(ScalarType::Float), 4); +} + +TEST_F(ScalarTypeTest, ToStringFloat) { + // Verify toString returns correct string for Float + EXPECT_STREQ(toString(ScalarType::Float), "Float"); +} + +TEST_F(ScalarTypeTest, ToStringUndefined) { + // Verify toString returns correct string for Undefined + EXPECT_STREQ(toString(ScalarType::Undefined), "Undefined"); +} + +TEST_F(ScalarTypeTest, IsFloatingType) { + // Verify isFloatingType works correctly + EXPECT_TRUE(isFloatingType(ScalarType::Float)); +} + +TEST_F(ScalarTypeTest, IsIntegralType) { + // Verify isIntegralType works correctly + // Currently no integral types are supported, so Float should return false + EXPECT_FALSE(isIntegralType(ScalarType::Float, false)); + EXPECT_FALSE(isIntegralType(ScalarType::Float, true)); +} + +TEST_F(ScalarTypeTest, StreamOperator) { + // Verify stream operator works + std::ostringstream oss; + oss << ScalarType::Float; + EXPECT_EQ(oss.str(), "Float"); +} diff --git a/backends/aoti/slim/core/Storage.h b/backends/aoti/slim/core/Storage.h new file mode 100644 index 00000000000..ed8bdf88b49 --- /dev/null +++ b/backends/aoti/slim/core/Storage.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace executorch::backends::aoti::slim { + +/// Type alias for deleter function pointer. +using DeleterFn = void (*)(void*); + +namespace detail { +/// No-op deleter for non-owning storage. +inline void noop(void*) {} +} // namespace detail + +/// Default CPU device constant. +inline const c10::Device CPU_DEVICE = c10::Device(c10::DeviceType::CPU, 0); + +/// DeviceTraits template for device-specific operations. +/// Device-specific implementations provide allocate(), free(), and memcpy(). +template +struct DeviceTraits; + +/// CPU specialization of DeviceTraits. +/// Provides CPU memory allocation and copy operations using malloc/free/memcpy. +template <> +struct DeviceTraits { + /// Allocates CPU memory using malloc. + /// @param nbytes Number of bytes to allocate. + /// @param device The target device (unused for CPU). + /// @return Pointer to allocated memory. + static void* allocate(size_t nbytes, const c10::Device& device = CPU_DEVICE) { + (void)device; + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + return malloc(nbytes); + } + + /// Frees CPU memory using free. + /// @param ptr Pointer to memory to free. + static void free(void* ptr) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + std::free(ptr); + } + + /// Copies memory between CPU locations. + /// @param dst Destination pointer. + /// @param src Source pointer. + /// @param nbytes Number of bytes to copy. + /// @param dst_device Destination device (unused for CPU-to-CPU). + /// @param src_device Source device (unused for CPU-to-CPU). + static void memcpy( + void* dst, + const void* src, + size_t nbytes, + const c10::Device& dst_device, + const c10::Device& src_device) { + (void)dst_device; + (void)src_device; + std::memcpy(dst, src, nbytes); + } +}; + +/** + * MaybeOwningStorage - A storage class that manages tensor data memory. + * + * This class provides owning memory storage for tensor data on CPU. + * Owning storage allocates and manages its own memory, freeing it upon + * destruction. + * + * Current limitations: + * - CPU device only + * - Owning mode only + * The future diffs will add support for non-owning storage and other devices. + * + * Thread Safety: NOT THREAD-SAFE + * - Uses NonAtomicSharedPtr for reference counting + * - Must only be used in single-threaded contexts + */ +class MaybeOwningStorage { + public: + /// Constructs owning storage with allocated memory. + /// @param device The device for storage (must be CPU). + /// @param nbytes Number of bytes to allocate. + MaybeOwningStorage(const c10::Device& device, size_t nbytes) + : device_(device), capacity_(nbytes), is_owning_(true) { + ET_CHECK_MSG( + device.is_cpu(), + "Only CPU device is currently supported, got: %s", + device.str().c_str()); + + data_ = DeviceTraits::allocate(nbytes, device); + deleter_ = DeviceTraits::free; + } + + /// Default constructor is deleted - storage must have a device. + MaybeOwningStorage() = delete; + + /// Copy constructor is deleted - use SharedPtr for shared ownership. + MaybeOwningStorage(const MaybeOwningStorage&) = delete; + + /// Copy assignment is deleted - use SharedPtr for shared ownership. + MaybeOwningStorage& operator=(const MaybeOwningStorage&) = delete; + + /// Move constructor. + MaybeOwningStorage(MaybeOwningStorage&& other) noexcept + : device_(other.device_), + data_(other.data_), + capacity_(other.capacity_), + deleter_(other.deleter_), + is_owning_(other.is_owning_) { + other.data_ = nullptr; + other.capacity_ = 0; + other.deleter_ = detail::noop; + other.is_owning_ = false; + } + + /// Move assignment operator. + MaybeOwningStorage& operator=(MaybeOwningStorage&& other) noexcept { + if (this != &other) { + free_data(); + + device_ = other.device_; + data_ = other.data_; + capacity_ = other.capacity_; + deleter_ = other.deleter_; + is_owning_ = other.is_owning_; + + other.data_ = nullptr; + other.capacity_ = 0; + other.deleter_ = detail::noop; + other.is_owning_ = false; + } + return *this; + } + + /// Destructor - frees owned memory. + ~MaybeOwningStorage() { + free_data(); + } + + /// Copies data between storage locations. + /// @param dst_data_ptr Destination data pointer. + /// @param src_data_ptr Source data pointer. + /// @param nbytes Number of bytes to copy. + /// @param src_device Source device. + void copy_( + void* dst_data_ptr, + void* src_data_ptr, + size_t nbytes, + const c10::Device& src_device) { + ET_CHECK_MSG( + dst_data_ptr, "Storage copy failed: dst_data_ptr cannot be nullptr"); + ET_CHECK_MSG( + src_data_ptr, "Storage copy failed: src_data_ptr cannot be nullptr"); + + if (dst_data_ptr == src_data_ptr) { + return; + } + + ET_CHECK_MSG( + device_.is_cpu() && src_device.is_cpu(), + "Only CPU-to-CPU copy is currently supported"); + + DeviceTraits::memcpy( + dst_data_ptr, src_data_ptr, nbytes, device_, src_device); + } + + /// Creates a clone of this storage on the specified device. + /// @param device Target device for the clone (must be CPU). + /// @return A new MaybeOwningStorage with copied data. + MaybeOwningStorage clone(const c10::Device& device) const { + ET_CHECK_MSG(data_, "Storage clone failed: source data cannot be nullptr"); + ET_CHECK_MSG( + device.is_cpu(), "Only CPU device is currently supported for clone"); + + MaybeOwningStorage cloned_storage(device, capacity_); + + DeviceTraits::memcpy( + cloned_storage.data_, data_, capacity_, device, device_); + + return cloned_storage; + } + + /// Returns the data pointer, or nullptr for zero-sized storage. + void* data() const { + if (capacity_ == 0) { + return nullptr; + } + return data_; + } + + /// Returns the device this storage is on. + const c10::Device& device() const { + return device_; + } + + /// Returns the capacity in bytes. + size_t nbytes() const { + return capacity_; + } + + /// Returns true if this storage owns its memory. + bool is_owning() const { + return is_owning_; + } + + /// Returns true if the storage can be resized (must be owning). + bool is_resizable() const { + return is_owning_; + } + + private: + c10::Device device_ = CPU_DEVICE; + void* data_ = nullptr; + size_t capacity_ = 0; + DeleterFn deleter_ = detail::noop; + bool is_owning_ = false; + + /// Frees the data if non-null. + void free_data() { + if (data_ != nullptr) { + deleter_(data_); + data_ = nullptr; + } + } +}; + +/// Storage is a shared pointer to MaybeOwningStorage. +/// Multiple tensors can share the same underlying storage. +using Storage = SharedPtr; + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/core/TARGETS b/backends/aoti/slim/core/TARGETS new file mode 100644 index 00000000000..77871de4469 --- /dev/null +++ b/backends/aoti/slim/core/TARGETS @@ -0,0 +1,3 @@ +load("targets.bzl", "define_common_targets") + +define_common_targets() diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl new file mode 100644 index 00000000000..12de67bf8b1 --- /dev/null +++ b/backends/aoti/slim/core/targets.bzl @@ -0,0 +1,19 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Define targets for SlimTensor core module.""" + + # Header-only library for Storage + runtime.cxx_library( + name = "storage", + headers = [ + "Storage.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/backends/aoti/slim/c10/core:device", + "//executorch/backends/aoti/slim/c10/core:scalar_type", + "//executorch/backends/aoti/slim/util:shared_ptr", + "//executorch/runtime/platform:platform", + ], + ) diff --git a/backends/aoti/slim/core/test/TARGETS b/backends/aoti/slim/core/test/TARGETS new file mode 100644 index 00000000000..77871de4469 --- /dev/null +++ b/backends/aoti/slim/core/test/TARGETS @@ -0,0 +1,3 @@ +load("targets.bzl", "define_common_targets") + +define_common_targets() diff --git a/backends/aoti/slim/core/test/targets.bzl b/backends/aoti/slim/core/test/targets.bzl new file mode 100644 index 00000000000..8e580f5ed0e --- /dev/null +++ b/backends/aoti/slim/core/test/targets.bzl @@ -0,0 +1,14 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Define test targets for SlimTensor core module.""" + + runtime.cxx_test( + name = "test_storage", + srcs = [ + "test_storage.cpp", + ], + deps = [ + "//executorch/backends/aoti/slim/core:storage", + ], + ) diff --git a/backends/aoti/slim/core/test/test_storage.cpp b/backends/aoti/slim/core/test/test_storage.cpp new file mode 100644 index 00000000000..42a8678c888 --- /dev/null +++ b/backends/aoti/slim/core/test/test_storage.cpp @@ -0,0 +1,259 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +namespace executorch::backends::aoti::slim { + +// ============================================================================= +// DeviceTraits Tests +// ============================================================================= + +TEST(DeviceTraitsCPUTest, AllocateAndFree) { + constexpr size_t kSize = 1024; + void* ptr = DeviceTraits::allocate(kSize); + ASSERT_NE(ptr, nullptr); + + DeviceTraits::free(ptr); +} + +TEST(DeviceTraitsCPUTest, AllocateZeroBytes) { + void* ptr = DeviceTraits::allocate(0); + DeviceTraits::free(ptr); +} + +TEST(DeviceTraitsCPUTest, MemcpyCPUToCPU) { + constexpr size_t kSize = 256; + float* src = static_cast( + DeviceTraits::allocate(kSize * sizeof(float))); + float* dst = static_cast( + DeviceTraits::allocate(kSize * sizeof(float))); + + for (size_t i = 0; i < kSize; ++i) { + src[i] = static_cast(i) * 1.5f; + } + + DeviceTraits::memcpy( + dst, src, kSize * sizeof(float), CPU_DEVICE, CPU_DEVICE); + + for (size_t i = 0; i < kSize; ++i) { + EXPECT_FLOAT_EQ(dst[i], static_cast(i) * 1.5f); + } + + DeviceTraits::free(src); + DeviceTraits::free(dst); +} + +// ============================================================================= +// MaybeOwningStorage Tests - Owning Mode +// ============================================================================= + +TEST(MaybeOwningStorageTest, ConstructOwning) { + constexpr size_t kNbytes = 512; + MaybeOwningStorage storage(CPU_DEVICE, kNbytes); + + EXPECT_NE(storage.data(), nullptr); + EXPECT_EQ(storage.nbytes(), kNbytes); + EXPECT_TRUE(storage.device().is_cpu()); + EXPECT_TRUE(storage.is_owning()); + EXPECT_TRUE(storage.is_resizable()); +} + +TEST(MaybeOwningStorageTest, ConstructOwningZeroBytes) { + MaybeOwningStorage storage(CPU_DEVICE, 0); + + EXPECT_EQ(storage.data(), nullptr); + EXPECT_EQ(storage.nbytes(), 0); + EXPECT_TRUE(storage.device().is_cpu()); + EXPECT_TRUE(storage.is_owning()); +} + +TEST(MaybeOwningStorageTest, DataPersistence) { + constexpr size_t kNumFloats = 64; + constexpr size_t kNbytes = kNumFloats * sizeof(float); + MaybeOwningStorage storage(CPU_DEVICE, kNbytes); + + float* data = static_cast(storage.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + data[i] = static_cast(i) * 2.0f; + } + + float* read_data = static_cast(storage.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + EXPECT_FLOAT_EQ(read_data[i], static_cast(i) * 2.0f); + } +} + +TEST(MaybeOwningStorageTest, MoveConstruct) { + constexpr size_t kNbytes = 256; + MaybeOwningStorage original(CPU_DEVICE, kNbytes); + void* original_data = original.data(); + + MaybeOwningStorage moved(std::move(original)); + + EXPECT_EQ(moved.data(), original_data); + EXPECT_EQ(moved.nbytes(), kNbytes); + EXPECT_TRUE(moved.is_owning()); + + EXPECT_EQ(original.data(), nullptr); + EXPECT_EQ(original.nbytes(), 0); + EXPECT_FALSE(original.is_owning()); +} + +TEST(MaybeOwningStorageTest, MoveAssign) { + constexpr size_t kNbytes1 = 256; + constexpr size_t kNbytes2 = 512; + MaybeOwningStorage storage1(CPU_DEVICE, kNbytes1); + MaybeOwningStorage storage2(CPU_DEVICE, kNbytes2); + void* storage2_data = storage2.data(); + + storage1 = std::move(storage2); + + EXPECT_EQ(storage1.data(), storage2_data); + EXPECT_EQ(storage1.nbytes(), kNbytes2); + EXPECT_TRUE(storage1.is_owning()); + + EXPECT_EQ(storage2.data(), nullptr); + EXPECT_EQ(storage2.nbytes(), 0); + EXPECT_FALSE(storage2.is_owning()); +} + +TEST(MaybeOwningStorageTest, Clone) { + constexpr size_t kNumFloats = 32; + constexpr size_t kNbytes = kNumFloats * sizeof(float); + MaybeOwningStorage original(CPU_DEVICE, kNbytes); + + float* data = static_cast(original.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + data[i] = static_cast(i) * 3.0f; + } + + MaybeOwningStorage cloned = original.clone(CPU_DEVICE); + + EXPECT_NE(cloned.data(), original.data()); + EXPECT_EQ(cloned.nbytes(), original.nbytes()); + EXPECT_TRUE(cloned.is_owning()); + + float* cloned_data = static_cast(cloned.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + EXPECT_FLOAT_EQ(cloned_data[i], static_cast(i) * 3.0f); + } + + data[0] = 999.0f; + EXPECT_FLOAT_EQ(cloned_data[0], 0.0f); +} + +TEST(MaybeOwningStorageTest, CopyFunction) { + constexpr size_t kNumFloats = 16; + constexpr size_t kNbytes = kNumFloats * sizeof(float); + MaybeOwningStorage src_storage(CPU_DEVICE, kNbytes); + MaybeOwningStorage dst_storage(CPU_DEVICE, kNbytes); + + float* src_data = static_cast(src_storage.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + src_data[i] = static_cast(i) + 0.5f; + } + + dst_storage.copy_( + dst_storage.data(), src_storage.data(), kNbytes, CPU_DEVICE); + + float* dst_data = static_cast(dst_storage.data()); + for (size_t i = 0; i < kNumFloats; ++i) { + EXPECT_FLOAT_EQ(dst_data[i], static_cast(i) + 0.5f); + } +} + +// ============================================================================= +// Storage (SharedPtr) Tests +// ============================================================================= + +TEST(StorageSharedPtrTest, BasicUsage) { + constexpr size_t kNbytes = 128; + Storage storage(new MaybeOwningStorage(CPU_DEVICE, kNbytes)); + + EXPECT_NE(storage.get(), nullptr); + EXPECT_NE(storage->data(), nullptr); + EXPECT_EQ(storage->nbytes(), kNbytes); + EXPECT_TRUE(storage->device().is_cpu()); + EXPECT_EQ(storage.use_count(), 1); +} + +TEST(StorageSharedPtrTest, SharedOwnership) { + constexpr size_t kNbytes = 128; + Storage storage1(new MaybeOwningStorage(CPU_DEVICE, kNbytes)); + void* data_ptr = storage1->data(); + + Storage storage2 = storage1; // Copy, not reference - increments ref count + + EXPECT_EQ(storage1.use_count(), 2); + EXPECT_EQ(storage2.use_count(), 2); + EXPECT_EQ(storage1->data(), storage2->data()); + EXPECT_EQ(storage2->data(), data_ptr); +} + +TEST(StorageSharedPtrTest, SharedOwnershipModification) { + constexpr size_t kNumFloats = 8; + constexpr size_t kNbytes = kNumFloats * sizeof(float); + Storage storage1(new MaybeOwningStorage(CPU_DEVICE, kNbytes)); + + float* data = static_cast(storage1->data()); + for (size_t i = 0; i < kNumFloats; ++i) { + data[i] = 0.0f; + } + + const Storage& storage2 = storage1; + + float* data2 = static_cast(storage2->data()); + for (size_t i = 0; i < kNumFloats; ++i) { + data2[i] = static_cast(i) * 10.0f; + } + + float* data1 = static_cast(storage1->data()); + for (size_t i = 0; i < kNumFloats; ++i) { + EXPECT_FLOAT_EQ(data1[i], static_cast(i) * 10.0f); + } +} + +TEST(StorageSharedPtrTest, ReferenceCountDecrement) { + constexpr size_t kNbytes = 64; + Storage storage1(new MaybeOwningStorage(CPU_DEVICE, kNbytes)); + EXPECT_EQ(storage1.use_count(), 1); + + { + Storage storage2 = storage1; // Copy increments ref count + EXPECT_EQ(storage1.use_count(), 2); + } // storage2 destroyed, ref count decrements + + EXPECT_EQ(storage1.use_count(), 1); +} + +TEST(StorageSharedPtrTest, MoveSemantics) { + constexpr size_t kNbytes = 64; + Storage storage1(new MaybeOwningStorage(CPU_DEVICE, kNbytes)); + void* data_ptr = storage1->data(); + + Storage storage2 = std::move(storage1); + + EXPECT_EQ(storage1.get(), nullptr); + EXPECT_EQ(storage2->data(), data_ptr); + EXPECT_EQ(storage2.use_count(), 1); +} + +TEST(StorageSharedPtrTest, MakeShared) { + constexpr size_t kNbytes = 256; + Storage storage = make_shared(CPU_DEVICE, kNbytes); + + EXPECT_NE(storage.get(), nullptr); + EXPECT_NE(storage->data(), nullptr); + EXPECT_EQ(storage->nbytes(), kNbytes); + EXPECT_EQ(storage.use_count(), 1); +} + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/util/SharedPtr.h b/backends/aoti/slim/util/SharedPtr.h new file mode 100644 index 00000000000..e4e439ee4cb --- /dev/null +++ b/backends/aoti/slim/util/SharedPtr.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include + +namespace executorch::backends::aoti::slim { + +/** + * SharedPtr - A lightweight shared pointer implementation optimized for + * single-threaded execution contexts. + * + * This class provides shared ownership semantics similar to std::shared_ptr but + * without atomic operations, making it faster in single-threaded contexts. + * ExecuTorch AOTI-drive backends operate in a single-threaded context, so + * this optimization is safe and provides better performance. + * + * Primary Use Cases: + * 1. Intermediate SlimTensor Storage Management: + * - Manages temporary tensors created during model execution + * - Avoids the overhead of atomic reference counting in std::shared_ptr + * + * 2. Input/Output Tensor References: + * - Provides reference counting for input/output tensors + * - Uses dummy deleters to prevent premature deallocation when needed + */ +template +class SharedPtr { + private: + struct ControlBlock { + int count = 1; + T* ptr; + using Deleter = void (*)(T*); + Deleter deleter; + + ControlBlock(T* p, Deleter d) : ptr(p), deleter(d) {} + ControlBlock(const ControlBlock&) = delete; + ControlBlock& operator=(const ControlBlock&) = delete; + ControlBlock(ControlBlock&&) = delete; + ControlBlock& operator=(ControlBlock&&) = delete; + + ~ControlBlock() { + if (ptr) { + deleter(ptr); + } + } + }; + + ControlBlock* cb_; + + static void default_deleter(T* p) { + delete p; + } + + void cleanup() { + if (cb_ && --cb_->count == 0) { + delete cb_; + } + cb_ = nullptr; + } + + public: + /// Default constructor - creates an empty shared pointer. + SharedPtr() noexcept : cb_(nullptr) {} + + /// Constructor from raw pointer. + explicit SharedPtr(T* p, typename ControlBlock::Deleter d = default_deleter) + : cb_(p ? new ControlBlock(p, d) : nullptr) {} + + /// Copy constructor. + SharedPtr(const SharedPtr& other) noexcept : cb_(other.cb_) { + if (cb_) { + ++cb_->count; + } + } + + /// Move constructor. + SharedPtr(SharedPtr&& other) noexcept : cb_(other.cb_) { + other.cb_ = nullptr; + } + + /// Destructor. + ~SharedPtr() { + cleanup(); + } + + /// Copy assignment. + SharedPtr& operator=(const SharedPtr& other) noexcept { + if (this != &other) { + cleanup(); + cb_ = other.cb_; + if (cb_) { + ++cb_->count; + } + } + return *this; + } + + /// Move assignment. + SharedPtr& operator=(SharedPtr&& other) noexcept { + if (this != &other) { + cleanup(); + cb_ = other.cb_; + other.cb_ = nullptr; + } + return *this; + } + + /// Resets the shared pointer to manage a new object. + void reset( + T* p = nullptr, + typename ControlBlock::Deleter d = default_deleter) { + *this = SharedPtr(p, d); + } + + /// Swaps the contents with another shared pointer. + void swap(SharedPtr& other) noexcept { + std::swap(cb_, other.cb_); + } + + /// Returns the managed pointer. + T* get() const noexcept { + return cb_ ? cb_->ptr : nullptr; + } + + /// Dereferences the managed pointer. + T& operator*() const { + ET_CHECK_MSG(cb_, "Dereferencing null SharedPtr"); + return *cb_->ptr; + } + + /// Accesses members of the managed object. + T* operator->() const { + ET_CHECK_MSG(cb_, "Accessing member of null SharedPtr"); + return cb_->ptr; + } + + /// Returns the reference count. + long use_count() const noexcept { + return cb_ ? cb_->count : 0; + } + + /// Returns true if the shared pointer is not null. + explicit operator bool() const noexcept { + return cb_ != nullptr; + } + + friend void swap(SharedPtr& a, SharedPtr& b) noexcept { + a.swap(b); + } + + friend bool operator==(const SharedPtr& lhs, const SharedPtr& rhs) noexcept { + return lhs.get() == rhs.get(); + } + + friend bool operator!=(const SharedPtr& lhs, const SharedPtr& rhs) noexcept { + return !(lhs == rhs); + } + + friend bool operator==(const SharedPtr& lhs, std::nullptr_t) noexcept { + return lhs.get() == nullptr; + } + + friend bool operator!=(const SharedPtr& lhs, std::nullptr_t) noexcept { + return lhs.get() != nullptr; + } + + friend bool operator==(std::nullptr_t, const SharedPtr& rhs) noexcept { + return rhs.get() == nullptr; + } + + friend bool operator!=(std::nullptr_t, const SharedPtr& rhs) noexcept { + return rhs.get() != nullptr; + } +}; + +/// Creates a SharedPtr managing a new object constructed with the given args. +template +SharedPtr make_shared(Args&&... args) { + return SharedPtr(new T(std::forward(args)...)); +} + +} // namespace executorch::backends::aoti::slim diff --git a/backends/aoti/slim/util/TARGETS b/backends/aoti/slim/util/TARGETS new file mode 100644 index 00000000000..77871de4469 --- /dev/null +++ b/backends/aoti/slim/util/TARGETS @@ -0,0 +1,3 @@ +load("targets.bzl", "define_common_targets") + +define_common_targets() diff --git a/backends/aoti/slim/util/targets.bzl b/backends/aoti/slim/util/targets.bzl new file mode 100644 index 00000000000..13f49168a0f --- /dev/null +++ b/backends/aoti/slim/util/targets.bzl @@ -0,0 +1,16 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Define targets for SlimTensor util module.""" + + # Header-only library for SharedPtr + runtime.cxx_library( + name = "shared_ptr", + headers = [ + "SharedPtr.h", + ], + visibility = ["@EXECUTORCH_CLIENTS"], + exported_deps = [ + "//executorch/runtime/platform:platform", + ], + )