Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .ci/scripts/test_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ build_cmake_executor_runner() {
${COMMON} \
-B${CMAKE_OUTPUT_DIR} .
cmake --build ${CMAKE_OUTPUT_DIR} -j4
elif [[ "$backend_string_select" == "CUDA" ]]; then
echo "Backend $backend_string_select selected"
cmake -DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_CUDA=ON \
${COMMON} \
-B${CMAKE_OUTPUT_DIR} .
cmake --build ${CMAKE_OUTPUT_DIR} -j4
else
cmake -DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
Expand Down Expand Up @@ -323,6 +330,13 @@ test_model_with_mediatek() {
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
}

test_model_with_cuda() {
# Export a basic .pte and .ptd, then run the model.
"${PYTHON_EXECUTABLE}" -m examples.cuda.scripts.export --model_name="${MODEL_NAME}" --output_dir "./"
build_cmake_executor_runner "CUDA"
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte" --data_path "./aoti_cuda_blob.ptd"
}


if [[ "${BACKEND}" == "portable" ]]; then
echo "Testing ${MODEL_NAME} with portable kernels..."
Expand Down Expand Up @@ -375,6 +389,12 @@ elif [[ "${BACKEND}" == "mediatek" ]]; then
if [[ $? -eq 0 ]]; then
prepare_artifacts_upload
fi
elif [[ "${BACKEND}" == "cuda" ]]; then
echo "Testing ${MODEL_NAME} with cuda..."
test_model_with_cuda
if [[ $? -eq 0 ]]; then
prepare_artifacts_upload
fi
else
set +e
if [[ "${BACKEND}" == *"quantization"* ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,27 @@ jobs:
else
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
fi

test-models-cuda:
name: test-models-cuda
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
fail-fast: false
matrix:
model: [linear, add, add_mul, resnet18]
with:
timeout: 90
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.6
use-custom-docker-registry: false
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux

PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
1 change: 1 addition & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ exclude_patterns = [
'**/*.gif',
'extension/llm/tokenizers',
'extension/llm/tokenizers/**',
'examples/cuda',
# File contains @generated
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
Expand Down
14 changes: 11 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,16 @@ endif()

if(EXECUTORCH_BUILD_CORTEX_M)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m)
list(APPEND _executorch_backends coretex_m_backend)
endif()

if(EXECUTORCH_BUILD_CUDA)
# Build common AOTI functionality (required for CUDA)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/aoti)
# Build CUDA-specific AOTI functionality
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cuda)
# Add aoti_cuda to backends - it already depends on aoti_common
list(APPEND _executorch_backends aoti_cuda)
endif()

if(EXECUTORCH_BUILD_EXTENSION_APPLE)
Expand Down Expand Up @@ -1021,9 +1031,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
extension_runner_util gflags executorch_backends
)

if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
list(APPEND _executor_runner_libs extension_flat_tensor)
endif()
list(APPEND _executor_runner_libs ${_executorch_extensions})

if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
Expand Down
4 changes: 3 additions & 1 deletion backends/aoti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ set(_aoti_common_sources aoti_model_container.cpp common_shims.cpp)
add_library(aoti_common STATIC ${_aoti_common_sources})
target_include_directories(
aoti_common
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
# PyTorch AOTI headers from ExecuTorch's torch detection
${TORCH_INCLUDE_DIRS}
)
Expand Down
2 changes: 2 additions & 0 deletions backends/aoti/aoti_model_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ using executorch::runtime::etensor::Tensor;
extern "C" {

// Type definitions
using AOTITensorHandle = Tensor*;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can directly using Tensor*; in the other places we've removed the alias.

using AOTIRuntimeError = Error;

// Forward declarations for AOT Inductor model container
Expand Down Expand Up @@ -74,6 +75,7 @@ extern AOTInductorModelContainerRunFunc AOTInductorModelContainerRun;
// AOTI Delegate Handle structure
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit if this backend can't be instantiated directly then perhaps s/aoti/_aoti?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you say more?

struct AOTIDelegateHandle {
void* so_handle;
std::string so_path;
AOTInductorModelContainerHandle container_handle;
};

Expand Down
69 changes: 69 additions & 0 deletions backends/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# Build AOTI CUDA backend for runtime.
#
# ### Editing this file ###
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~
# It should also be cmake-lint clean.
#
cmake_minimum_required(VERSION 3.29)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

find_package(CUDAToolkit REQUIRED)

# Use ExecutorTorch's standard way to find PyTorch libraries for AOTI
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
find_package_torch()

# CUDA-specific AOTI functionality
set(_aoti_cuda_sources runtime/cuda_backend.cpp runtime/shims/memory.cpp
runtime/shims/tensor_attribute.cpp
)
add_library(aoti_cuda STATIC ${_aoti_cuda_sources})
target_include_directories(
aoti_cuda
PUBLIC ${CUDAToolkit_INCLUDE_DIRS}
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
$<INSTALL_INTERFACE:include>
# PyTorch AOTI headers from ExecutorTorch's torch detection
${TORCH_INCLUDE_DIRS}
)
target_compile_options(aoti_cuda PUBLIC -fexceptions -frtti -fPIC)
# Ensure symbols are exported properly
target_link_options(aoti_cuda PUBLIC -Wl,--export-dynamic)

# Link against CUDA::cudart, common AOTI library, and PyTorch CUDA libraries
target_link_libraries(
aoti_cuda
PUBLIC aoti_common CUDA::cudart ${CMAKE_DL_LIBS}
# Link PyTorch libraries for AOTI CUDA functions
${TORCH_LIBRARIES}
)
# If you need other CUDA libraries, link them similarly:
# target_link_libraries(aoti_cuda PUBLIC CUDA::cublas CUDA::cufft ...)
executorch_target_link_options_shared_lib(aoti_cuda)

install(
TARGETS aoti_cuda
EXPORT ExecuTorchTargets
DESTINATION lib
)
Loading
Loading