|
| 1 | +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. |
| 2 | + |
| 3 | +#include <executorch/backends/cuda/runtime/tensor/tensor_maker.h> |
| 4 | + |
| 5 | +#include <numeric> |
| 6 | + |
| 7 | +#include <executorch/runtime/core/exec_aten/util/tensor_util.h> |
| 8 | + |
| 9 | +namespace executorch::backends::cuda { |
| 10 | + |
| 11 | +namespace { |
| 12 | +#ifndef USE_ATEN_LIB |
| 13 | +/** |
| 14 | + * A structure that consolidates the metadata (sizes, dim_order, strides) and |
| 15 | + * the data buffer associated with a Tensor. Since Tensor does not own |
| 16 | + * the memory for these metadata arrays or the data itself, this structure |
| 17 | + * ensures that they are managed together and have the same lifetime as the |
| 18 | + * Tensor. When the Tensor is destroyed, the Storage structure ensures |
| 19 | + * proper cleanup of the associated metadata and data if needed. |
| 20 | + */ |
| 21 | +struct Storage final { |
| 22 | + executorch::aten::TensorImpl tensor_impl; |
| 23 | + executorch::aten::Tensor tensor; |
| 24 | + std::vector<executorch::aten::SizesType> sizes; |
| 25 | + std::vector<executorch::aten::DimOrderType> dim_order; |
| 26 | + std::vector<executorch::aten::StridesType> strides; |
| 27 | + std::function<void(void*)> deleter; |
| 28 | + |
| 29 | + Storage( |
| 30 | + executorch::aten::TensorImpl&& tensor_impl, |
| 31 | + std::vector<executorch::aten::SizesType>&& sizes, |
| 32 | + std::vector<executorch::aten::DimOrderType>&& dim_order, |
| 33 | + std::vector<executorch::aten::StridesType>&& strides, |
| 34 | + std::function<void(void*)>&& deleter) |
| 35 | + : tensor_impl(std::move(tensor_impl)), |
| 36 | + tensor(&this->tensor_impl), |
| 37 | + sizes(std::move(sizes)), |
| 38 | + dim_order(std::move(dim_order)), |
| 39 | + strides(std::move(strides)), |
| 40 | + deleter(std::move(deleter)) {} |
| 41 | + |
| 42 | + ~Storage() { |
| 43 | + if (deleter) { |
| 44 | + deleter(tensor_impl.mutable_data()); |
| 45 | + } |
| 46 | + } |
| 47 | +}; |
| 48 | +#endif // USE_ATEN_LIB |
| 49 | +} // namespace |
| 50 | + |
| 51 | +TensorPtr make_tensor( |
| 52 | + std::vector<executorch::aten::SizesType> sizes, |
| 53 | + void* data, |
| 54 | + std::vector<executorch::aten::DimOrderType> dim_order, |
| 55 | + std::vector<executorch::aten::StridesType> strides, |
| 56 | + executorch::aten::ScalarType type, |
| 57 | + executorch::aten::TensorShapeDynamism dynamism, |
| 58 | + std::function<void(void*)> deleter) { |
| 59 | + const auto dim = sizes.size(); |
| 60 | + ET_CHECK_MSG( |
| 61 | + dim_order.empty() || dim_order.size() == dim, |
| 62 | + "dim_order size must match sizes or be empty."); |
| 63 | + ET_CHECK_MSG( |
| 64 | + strides.empty() || strides.size() == dim, |
| 65 | + "strides size must match sizes or be empty."); |
| 66 | + |
| 67 | + if (dim_order.empty()) { |
| 68 | + dim_order.resize(dim); |
| 69 | + std::iota(dim_order.begin(), dim_order.end(), 0); |
| 70 | + if (!strides.empty()) { |
| 71 | + std::sort(dim_order.begin(), dim_order.end(), [&](size_t a, size_t b) { |
| 72 | + return strides[a] > strides[b]; |
| 73 | + }); |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + // AOTI backends (like AOTI-CUDA) handle both contiguous and incontiguous |
| 78 | + // tensors, so we skip stride calculation and incontiguous tensor checks. |
| 79 | + // Strides are passed through as-is without validation. |
| 80 | + |
| 81 | +#ifndef USE_ATEN_LIB |
| 82 | + executorch::aten::TensorImpl tensor_impl( |
| 83 | + type, |
| 84 | + dim, |
| 85 | + sizes.data(), |
| 86 | + data, |
| 87 | + dim_order.data(), |
| 88 | + strides.data(), |
| 89 | + dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC); |
| 90 | + auto storage = std::make_shared<Storage>( |
| 91 | + std::move(tensor_impl), |
| 92 | + std::move(sizes), |
| 93 | + std::move(dim_order), |
| 94 | + std::move(strides), |
| 95 | + std::move(deleter)); |
| 96 | + const auto tensor_ptr = &storage->tensor; |
| 97 | + return std::shared_ptr<executorch::aten::Tensor>( |
| 98 | + std::move(storage), tensor_ptr); |
| 99 | +#else |
| 100 | + auto options = c10::TensorOptions() |
| 101 | + .dtype(c10::scalarTypeToTypeMeta(type)) |
| 102 | + .device(c10::kCPU); |
| 103 | + auto storage = c10::Storage( |
| 104 | + c10::Storage::use_byte_size_t(), |
| 105 | + at::detail::computeStorageNbytes( |
| 106 | + sizes, strides, options.dtype().itemsize()), |
| 107 | + c10::InefficientStdFunctionContext::makeDataPtr( |
| 108 | + data, std::move(deleter), options.device()), |
| 109 | + nullptr, |
| 110 | + false); |
| 111 | + auto tensor_impl = c10::make_intrusive<executorch::aten::TensorImpl>( |
| 112 | + std::move(storage), |
| 113 | + c10::DispatchKeySet(c10::DispatchKey::CPU), |
| 114 | + options.dtype()); |
| 115 | + tensor_impl->set_sizes_and_strides(sizes, strides); |
| 116 | + return std::make_shared<executorch::aten::Tensor>(std::move(tensor_impl)); |
| 117 | +#endif // USE_ATEN_LIB |
| 118 | +} |
| 119 | + |
| 120 | +} // namespace executorch::backends::cuda |
0 commit comments