Skip to content

Commit 9b3a448

Browse files
committed
Update on "Use merged data map in module"
Differential Revision: [D83799869](https://our.internmc.facebook.com/intern/diff/D83799869/) [ghstack-poisoned]
2 parents 84686e7 + 9f1330c commit 9b3a448

File tree

112 files changed

+2017
-466
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+2017
-466
lines changed

.ci/scripts/test_backend.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ fi
5959
if [[ "$FLOW" == *arm* ]]; then
6060
# Setup ARM deps.
6161
.ci/scripts/setup-arm-baremetal-tools.sh
62+
source examples/arm/ethos-u-scratch/setup_path.sh
6263

6364
if [[ "$FLOW" == *ethos_u* ]]; then
6465
# Prepare a test runner binary that can run on the Corstone-3x0 FVPs

.ci/scripts/test_model.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,33 @@ prepare_artifacts_upload() {
4848
fi
4949
}
5050

51+
5152
build_cmake_executor_runner() {
5253
local backend_string_select="${1:-}"
5354
echo "Building executor_runner"
5455
rm -rf ${CMAKE_OUTPUT_DIR}
5556
mkdir ${CMAKE_OUTPUT_DIR}
57+
# Common options:
58+
COMMON="-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE"
5659
if [[ "$backend_string_select" == "XNNPACK" ]]; then
5760
echo "Backend $backend_string_select selected"
58-
(cd ${CMAKE_OUTPUT_DIR} \
59-
&& cmake -DCMAKE_BUILD_TYPE=Release \
61+
cmake -DCMAKE_BUILD_TYPE=Release \
6062
-DEXECUTORCH_BUILD_XNNPACK=ON \
61-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
63+
${COMMON} \
64+
-B${CMAKE_OUTPUT_DIR} .
65+
cmake --build ${CMAKE_OUTPUT_DIR} -j4
66+
elif [[ "$backend_string_select" == "CUDA" ]]; then
67+
echo "Backend $backend_string_select selected"
68+
cmake -DCMAKE_BUILD_TYPE=Release \
69+
-DEXECUTORCH_BUILD_CUDA=ON \
70+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
71+
${COMMON} \
72+
-B${CMAKE_OUTPUT_DIR} .
6273
cmake --build ${CMAKE_OUTPUT_DIR} -j4
6374
else
6475
cmake -DCMAKE_BUILD_TYPE=Debug \
6576
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
66-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
77+
${COMMON} \
6778
-B${CMAKE_OUTPUT_DIR} .
6879
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
6980
fi
@@ -320,6 +331,13 @@ test_model_with_mediatek() {
320331
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
321332
}
322333

334+
test_model_with_cuda() {
335+
# Export a basic .pte and .ptd, then run the model.
336+
"${PYTHON_EXECUTABLE}" -m examples.cuda.scripts.export --model_name="${MODEL_NAME}" --output_dir "./"
337+
build_cmake_executor_runner "CUDA"
338+
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte" --data_path "./aoti_cuda_blob.ptd"
339+
}
340+
323341

324342
if [[ "${BACKEND}" == "portable" ]]; then
325343
echo "Testing ${MODEL_NAME} with portable kernels..."
@@ -372,6 +390,12 @@ elif [[ "${BACKEND}" == "mediatek" ]]; then
372390
if [[ $? -eq 0 ]]; then
373391
prepare_artifacts_upload
374392
fi
393+
elif [[ "${BACKEND}" == "cuda" ]]; then
394+
echo "Testing ${MODEL_NAME} with cuda..."
395+
test_model_with_cuda
396+
if [[ $? -eq 0 ]]; then
397+
prepare_artifacts_upload
398+
fi
375399
else
376400
set +e
377401
if [[ "${BACKEND}" == *"quantization"* ]]; then

.ci/scripts/utils.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,15 @@ build_executorch_runner_cmake() {
125125
clean_executorch_install_folders
126126
mkdir "${CMAKE_OUTPUT_DIR}"
127127

128-
pushd "${CMAKE_OUTPUT_DIR}" || return
129128
if [[ $1 == "Debug" ]]; then
130129
CXXFLAGS="-fsanitize=address,undefined"
131130
else
132131
CXXFLAGS=""
133132
fi
134-
CXXFLAGS="$CXXFLAGS" retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
135-
popd || return
133+
CXXFLAGS="$CXXFLAGS" retry cmake \
134+
-DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \
135+
-DCMAKE_BUILD_TYPE="${1:-Release}" \
136+
-B${CMAKE_OUTPUT_DIR} .
136137

137138
if [ "$(uname)" == "Darwin" ]; then
138139
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))

.github/workflows/test-cuda-builds.yml renamed to .github/workflows/cuda.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,28 @@ jobs:
6161
else
6262
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
6363
fi
64+
65+
test-models-cuda:
66+
name: test-models-cuda
67+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
68+
permissions:
69+
id-token: write
70+
contents: read
71+
strategy:
72+
fail-fast: false
73+
matrix:
74+
model: [linear, add, add_mul, resnet18]
75+
with:
76+
timeout: 90
77+
runner: linux.g5.4xlarge.nvidia.gpu
78+
gpu-arch-type: cuda
79+
gpu-arch-version: 12.6
80+
use-custom-docker-registry: false
81+
submodules: recursive
82+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
83+
script: |
84+
set -eux
85+
86+
PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
87+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
88+
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -909,12 +909,12 @@ jobs:
909909
contents: read
910910
secrets: inherit
911911
with:
912+
secrets-env: SAMSUNG_AI_LITECORE_KEY
912913
runner: linux.2xlarge
913914
docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
914915
submodules: 'recursive'
915916
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
916917
timeout: 90
917-
secrets-env: SAMSUNG_AI_LITECORE_KEY
918918
script: |
919919
set -ex
920920

.github/workflows/test-backend-arm.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ on:
1212
paths:
1313
- .github/workflows/test-backend-arm.yml
1414
- .github/workflows/_test_backend.yml
15+
- .ci/scripts/test_backend.sh
16+
- backends/test/suite/flow.py
17+
- backends/test/suite/flows/arm.py
1518
workflow_dispatch:
1619

1720
concurrency:

.lintrunner.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ exclude_patterns = [
219219
'**/*.gif',
220220
'extension/llm/tokenizers',
221221
'extension/llm/tokenizers/**',
222+
'examples/cuda',
222223
# File contains @generated
223224
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
224225
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',

CMakeLists.txt

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ if(EXECUTORCH_BUILD_CPUINFO)
226226
install(
227227
TARGETS cpuinfo
228228
EXPORT ExecuTorchTargets
229-
DESTINATION lib
229+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
230230
INCLUDES
231231
DESTINATION ${_common_include_directories}
232232
)
@@ -269,7 +269,7 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
269269
install(
270270
TARGETS pthreadpool pthreadpool_interface fxdiv
271271
EXPORT ExecuTorchTargets
272-
DESTINATION lib
272+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
273273
INCLUDES
274274
DESTINATION ${_common_include_directories}
275275
)
@@ -587,6 +587,16 @@ endif()
587587

588588
if(EXECUTORCH_BUILD_CORTEX_M)
589589
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m)
590+
list(APPEND _executorch_backends coretex_m_backend)
591+
endif()
592+
593+
if(EXECUTORCH_BUILD_CUDA)
594+
# Build common AOTI functionality (required for CUDA)
595+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/aoti)
596+
# Build CUDA-specific AOTI functionality
597+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cuda)
598+
# Add aoti_cuda to backends - it already depends on aoti_common
599+
list(APPEND _executorch_backends aoti_cuda)
590600
endif()
591601

592602
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
@@ -713,7 +723,7 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
713723
install(
714724
TARGETS torchao_ops_executorch torchao_kernels_aarch64
715725
EXPORT ExecuTorchTargets
716-
DESTINATION lib
726+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
717727
INCLUDES
718728
DESTINATION ${_common_include_directories}
719729
)
@@ -724,7 +734,7 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
724734
install(
725735
TARGETS kleidiai
726736
EXPORT ExecuTorchTargets
727-
DESTINATION lib
737+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
728738
INCLUDES
729739
DESTINATION ${_common_include_directories}
730740
)
@@ -1004,7 +1014,7 @@ if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
10041014
install(
10051015
TARGETS executorch_selected_kernels
10061016
EXPORT ExecuTorchTargets
1007-
DESTINATION lib
1017+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
10081018
)
10091019
else()
10101020
# No selective build - link the full library.
@@ -1026,6 +1036,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
10261036
extension_runner_util gflags executorch_backends
10271037
)
10281038

1039+
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
1040+
list(APPEND _executor_runner_libs extension_flat_tensor)
1041+
endif()
1042+
10291043
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
10301044
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
10311045
elseif(EXECUTORCH_BUILD_CADENCE)

backends/aoti/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ set(_aoti_common_sources aoti_model_container.cpp common_shims.cpp)
3030
add_library(aoti_common STATIC ${_aoti_common_sources})
3131
target_include_directories(
3232
aoti_common
33-
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
33+
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
34+
$<INSTALL_INTERFACE:include>
35+
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
3436
# PyTorch AOTI headers from ExecuTorch's torch detection
3537
${TORCH_INCLUDE_DIRS}
3638
)
@@ -50,5 +52,5 @@ executorch_target_link_options_shared_lib(aoti_common)
5052
install(
5153
TARGETS aoti_common
5254
EXPORT ExecuTorchTargets
53-
DESTINATION lib
55+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
5456
)

backends/aoti/aoti_model_container.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ using executorch::runtime::etensor::Tensor;
2121
extern "C" {
2222

2323
// Type definitions
24+
using AOTITensorHandle = Tensor*;
2425
using AOTIRuntimeError = Error;
2526

2627
// Forward declarations for AOT Inductor model container
@@ -74,6 +75,7 @@ extern AOTInductorModelContainerRunFunc AOTInductorModelContainerRun;
7475
// AOTI Delegate Handle structure
7576
struct AOTIDelegateHandle {
7677
void* so_handle;
78+
std::string so_path;
7779
AOTInductorModelContainerHandle container_handle;
7880
};
7981

0 commit comments

Comments
 (0)