Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,16 @@ endif()

if(EXECUTORCH_BUILD_CORTEX_M)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m)
list(APPEND _executorch_backends coretex_m_backend)
endif()

if(EXECUTORCH_BUILD_CUDA)
# Build common AOTI functionality (required for CUDA)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/aoti)
# Build CUDA-specific AOTI functionality
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cuda)
# Add aoti_cuda to backends - it already depends on aoti_common
list(APPEND _executorch_backends aoti_cuda)
endif()

if(EXECUTORCH_BUILD_EXTENSION_APPLE)
Expand Down Expand Up @@ -1021,6 +1031,11 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
extension_runner_util gflags executorch_backends
)

# Add flat tensor extension if it's built
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
list(APPEND _executor_runner_libs extension_flat_tensor)
endif()

if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
elseif(EXECUTORCH_BUILD_CADENCE)
Expand Down
2 changes: 2 additions & 0 deletions backends/aoti/aoti_model_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ using executorch::runtime::etensor::Tensor;
extern "C" {

// Type definitions
using AOTITensorHandle = Tensor*;
using AOTIRuntimeError = Error;

// Forward declarations for AOT Inductor model container
Expand Down Expand Up @@ -75,6 +76,7 @@ extern AOTInductorModelContainerRunFunc AOTInductorModelContainerRun;
struct AOTIDelegateHandle {
void* so_handle;
AOTInductorModelContainerHandle container_handle;
void* cuda_stream; // cudaStream_t stored as void* to avoid CUDA header dependency
};

} // namespace aoti
Expand Down
2 changes: 2 additions & 0 deletions backends/aoti/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ inline executorch::aten::ScalarType dtype_to_scalar_type(int32_t dtype) {
// Convert based on known PyTorch dtype codes (without CUDA-specific
// dependency)
switch (dtype) {
case 4: // PyTorch's int64 dtype code
return executorch::aten::ScalarType::Long;
case 6: // PyTorch's float32 dtype code
return executorch::aten::ScalarType::Float;
case 15: // PyTorch's bfloat16 dtype code
Expand Down
74 changes: 74 additions & 0 deletions backends/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# Build AOTI CUDA backend for runtime.
#
# ### Editing this file ###
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~
# It should also be cmake-lint clean.
#

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

find_package(CUDAToolkit REQUIRED)

# Use ExecutorTorch's standard way to find PyTorch libraries for AOTI
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
find_package_torch()

# CUDA-specific AOTI functionality
set(_aoti_cuda_sources
runtime/cuda_backend.cpp
runtime/guard.cpp
runtime/shims/cuda_guard.cpp
runtime/shims/memory.cpp
runtime/shims/tensor_attribute.cpp
)
add_library(aoti_cuda STATIC ${_aoti_cuda_sources})
target_include_directories(
aoti_cuda
PUBLIC ${CUDAToolkit_INCLUDE_DIRS}
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
$<INSTALL_INTERFACE:include>
# PyTorch AOTI headers from ExecutorTorch's torch detection
${TORCH_INCLUDE_DIRS}
)
target_compile_options(aoti_cuda PUBLIC -fexceptions -frtti -fPIC)
# Ensure symbols are exported properly
target_link_options(aoti_cuda PUBLIC -Wl,--export-dynamic)

# Link against CUDA::cudart, common AOTI library, and PyTorch CUDA libraries
target_link_libraries(
aoti_cuda
PUBLIC aoti_common CUDA::cudart ${CMAKE_DL_LIBS}
# Link PyTorch libraries for AOTI CUDA functions
${TORCH_LIBRARIES}
)
# If you need other CUDA libraries, link them similarly:
# target_link_libraries(aoti_cuda PUBLIC CUDA::cublas CUDA::cufft ...)
executorch_target_link_options_shared_lib(aoti_cuda)

# Add runtime
add_executable(voxtral_runner tests/voxtral_runner.cpp)
target_link_libraries(
voxtral_runner PUBLIC aoti_cuda extension_module_static extension_flat_tensor
portable_ops_lib
)

install(
TARGETS aoti_cuda
EXPORT ExecuTorchTargets
DESTINATION lib
)
2 changes: 0 additions & 2 deletions backends/cuda/cuda_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@
# required fallback kernels but not supported
missing_fallback_kernels: Set[str] = set()


class COMPILE_SPEC_KEYS(Enum):
METHOD_NAME = "method_name"


# context manager for non-fallback guarantee
# it will raise exception when generating fallback kernels during aoti compile
@contextlib.contextmanager
Expand Down
6 changes: 5 additions & 1 deletion backends/cuda/runtime/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ oncall("executorch")
runtime.cxx_library(
name = "runtime_shims",
srcs = [
"guard.cpp",
"shims/cuda_guard.cpp",
"shims/memory.cpp",
"shims/tensor_attribute.cpp",
],
headers = [
"guard.h",
"shims/cuda_guard.h",
"shims/memory.h",
"shims/tensor_attribute.h",
"shims/utils.h",
"utils.h",
],
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
Expand Down
Loading