Skip to content

Commit 6ffd0ac

Browse files
author
ssjia
committed
Update base for Update on "[ET-VK][ez] Make pipeline executable properties be controlled by a different macro"
Prevent the following validation layer errors when building with VULKAN_DEBUG ``` Validation Error: [ VUID-VkComputePipelineCreateInfo-None-09497 ] | MessageID = 0xde3918a7 vkCreateComputePipelines(): pCreateInfos[0].flags has VkPipelineCreateFlagBits values (VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR|VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) that requires the extensions VK_KHR_pipeline_executable_properties. The Vulkan spec states: If the pNext chain does not include a VkPipelineCreateFlags2CreateInfo structure, flags must be a valid combination of VkPipelineCreateFlagBits values (https://vulkan.lunarg.com/doc/view/1.4.321.0/mac/antora/spec/latest/chapters/pipelines.html#VUID-VkComputePipelineCreateInfo-None-09497) Validation 0 vkCreateComputePipelines(): pCreateInfos[0].flags has VkPipelineCreateFlagBits values (VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR|VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) that requires the extensions VK_KHR_pipeline_executable_properties. The Vulkan spec states: If the pNext chain does not include a VkPipelineCreateFlags2CreateInfo structure, flags must be a valid combination of VkPipelineCreateFlagBits values (https://vulkan.lunarg.com/doc/view/1.4.321.0/mac/antora/spec/latest/chapters/pipelines.html#VUID-VkComputePipelineCreateInfo-None-09497) Validation Error: [ VUID-VkComputePipelineCreateInfo-None-09497 ] | MessageID = 0xde3918a7 vkCreateComputePipelines(): pCreateInfos[0].flags has VkPipelineCreateFlagBits values (VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR|VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) that requires the extensions VK_KHR_pipeline_executable_properties. The Vulkan spec states: If the pNext chain does not include a VkPipelineCreateFlags2CreateInfo structure, flags must be a valid combination of VkPipelineCreateFlagBits values (https://vulkan.lunarg.com/doc/view/1.4.321.0/mac/antora/spec/latest/chapters/pipelines.html#VUID-VkComputePipelineCreateInfo-None-09497) Validation 0 vkCreateComputePipelines(): pCreateInfos[0].flags has VkPipelineCreateFlagBits values (VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR|VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) that requires the extensions VK_KHR_pipeline_executable_properties. The Vulkan spec states: If the pNext chain does not include a VkPipelineCreateFlags2CreateInfo structure, flags must be a valid combination of VkPipelineCreateFlagBits values (https://vulkan.lunarg.com/doc/view/1.4.321.0/mac/antora/spec/latest/chapters/pipelines.html#VUID-VkComputePipelineCreateInfo-None-09497) ``` Differential Revision: [D84716453](https://our.internmc.facebook.com/intern/diff/D84716453/) [ghstack-poisoned]
2 parents 28f9017 + baa41c6 commit 6ffd0ac

File tree

85 files changed

+2965
-1170
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+2965
-1170
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
44d8d54e38c0258357d4e92e1fefe21e845947a3
1+
09fdbd0a0639b128f712a4f5202ed42ca4c60957
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
53a2908a10f414a2f85caa06703a26a40e873869
1+
e6f766c7d750d40603eee3f66c5915bac606b3ea

.ci/scripts/utils.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,44 @@ install_pip_dependencies() {
4444
popd || return
4545
}
4646

47+
dedupe_macos_loader_path_rpaths() {
48+
if [[ "$(uname)" != "Darwin" ]]; then
49+
return
50+
fi
51+
52+
local torch_lib_dir
53+
pushd ..
54+
torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib
55+
popd
56+
57+
if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then
58+
return
59+
fi
60+
61+
local torch_libs=(
62+
"libtorch_cpu.dylib"
63+
"libtorch.dylib"
64+
"libc10.dylib"
65+
)
66+
67+
for lib_name in "${torch_libs[@]}"; do
68+
local lib_path="${torch_lib_dir}/${lib_name}"
69+
if [[ ! -f "${lib_path}" ]]; then
70+
continue
71+
fi
72+
73+
local removed=0
74+
# Repeatedly remove the @loader_path rpath entries until none remain.
75+
while install_name_tool -delete_rpath @loader_path "${lib_path}" 2>/dev/null; do
76+
removed=1
77+
done
78+
79+
if [[ "${removed}" == "1" ]]; then
80+
install_name_tool -add_rpath @loader_path "${lib_path}" || true
81+
fi
82+
done
83+
}
84+
4785
install_domains() {
4886
echo "Install torchvision and torchaudio"
4987
pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
@@ -101,6 +139,7 @@ install_pytorch_and_domains() {
101139
echo "Use cached wheel at ${cached_torch_wheel}"
102140
fi
103141

142+
dedupe_macos_loader_path_rpaths
104143
# Grab the pinned audio and vision commits from PyTorch
105144
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
106145
export TORCHAUDIO_VERSION

.github/workflows/cuda.yml

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,26 @@ jobs:
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
9090
export-voxtral-cuda-artifact:
91-
name: export-voxtral-cuda-artifact
91+
name: export-voxtral-cuda-${{ matrix.quant.name }}
9292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393
permissions:
9494
id-token: write
9595
contents: read
9696
secrets: inherit
9797
strategy:
9898
fail-fast: false
99+
matrix:
100+
quant:
101+
- name: "non-quantized"
102+
artifact: "voxtral-cuda-export"
103+
extra_args: ""
104+
- name: "quantized-int4-tile-packed"
105+
artifact: "voxtral-cuda-quantized-int4-tile-packed"
106+
extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
107+
- name: "quantized-int4-weight-only"
108+
artifact: "voxtral-cuda-quantized-int4-weight-only"
109+
# TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
110+
extra_args: "--qlinear_encoder 4w"
99111
with:
100112
timeout: 90
101113
secrets-env: EXECUTORCH_HF_TOKEN
@@ -104,7 +116,7 @@ jobs:
104116
gpu-arch-version: 12.6
105117
use-custom-docker-registry: false
106118
submodules: recursive
107-
upload-artifact: voxtral-cuda-export
119+
upload-artifact: ${{ matrix.quant.artifact }}
108120
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
109121
script: |
110122
set -eux
@@ -122,14 +134,16 @@ jobs:
122134
pip list
123135
echo "::endgroup::"
124136
125-
echo "::group::Export Voxtral"
137+
echo "::group::Export Voxtral (${{ matrix.quant.name }})"
138+
EXTRA_ARGS="${{ matrix.quant.extra_args }}"
126139
optimum-cli export executorch \
127140
--model "mistralai/Voxtral-Mini-3B-2507" \
128141
--task "multimodal-text-to-text" \
129142
--recipe "cuda" \
130143
--dtype bfloat16 \
131144
--device cuda \
132145
--max_seq_len 1024 \
146+
${EXTRA_ARGS} \
133147
--output_dir ./
134148
python -m executorch.extension.audio.mel_spectrogram \
135149
--feature_size 128 \
@@ -142,7 +156,7 @@ jobs:
142156
test -f voxtral_preprocessor.pte
143157
echo "::endgroup::"
144158
145-
echo "::group::Store Voxtral Artifacts"
159+
echo "::group::Store Voxtral Artifacts (${{ matrix.quant.name }})"
146160
mkdir -p "${RUNNER_ARTIFACT_DIR}"
147161
cp model.pte "${RUNNER_ARTIFACT_DIR}/"
148162
cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
@@ -201,22 +215,30 @@ jobs:
201215
echo "::endgroup::"
202216
203217
test-voxtral-cuda-e2e:
204-
name: test-voxtral-cuda-e2e
218+
name: test-voxtral-cuda-e2e-${{ matrix.format.name }}
205219
needs: export-voxtral-cuda-artifact
206220
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
207221
permissions:
208222
id-token: write
209223
contents: read
210224
strategy:
211225
fail-fast: false
226+
matrix:
227+
format:
228+
- name: "non-quantized"
229+
artifact: "voxtral-cuda-export"
230+
- name: "quantized-int4-tile-packed"
231+
artifact: "voxtral-cuda-quantized-int4-tile-packed"
232+
- name: "quantized-int4-weight-only"
233+
artifact: "voxtral-cuda-quantized-int4-weight-only"
212234
with:
213235
timeout: 90
214236
runner: linux.g5.4xlarge.nvidia.gpu
215237
gpu-arch-type: cuda
216238
gpu-arch-version: 12.6
217239
use-custom-docker-registry: false
218240
submodules: recursive
219-
download-artifact: voxtral-cuda-export
241+
download-artifact: ${{ matrix.format.artifact }}
220242
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
221243
script: |
222244
set -eux
@@ -226,7 +248,7 @@ jobs:
226248
pip list
227249
echo "::endgroup::"
228250
229-
echo "::group::Prepare Voxtral Artifacts"
251+
echo "::group::Prepare Voxtral Artifacts (${{ matrix.format.name }})"
230252
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
231253
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
232254
cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" .
@@ -255,7 +277,7 @@ jobs:
255277
cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release
256278
echo "::endgroup::"
257279
258-
echo "::group::Run Voxtral Runner"
280+
echo "::group::Run Voxtral Runner (${{ matrix.format.name }})"
259281
set +e
260282
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
261283
OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ jobs:
351351
352352
# reinstall executorch
353353
bash ./install_executorch.sh --minimal
354+
pip list
354355
355356
# run python unittest
356357
python -m unittest examples.models.moshi.mimi.test_mimi

CMakeLists.txt

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -99,28 +99,6 @@ announce_configured_options(CCACHE_PROGRAM)
9999

100100
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
101101

102-
# Setup RPATH. See
103-
# https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
104-
# Use separate rpaths during build and install phases
105-
set(CMAKE_SKIP_BUILD_RPATH OFF)
106-
# Don't use the install-rpath during the build phase
107-
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
108-
# Automatically add all linked folders that are NOT in the build directory to
109-
# the rpath (per library?)
110-
#
111-
# TODO: Doesn't work for us right now because we are not installing .so's into
112-
# the correct locations. For example we have libcustom_ops_aot_lib.so depending
113-
# on _portable_lib.so, which was eventually put under
114-
# <site-packages>/executorch/extension/pybindings/ but this rpath is not
115-
# automatically added because at build time it seems `portable_lib` is being
116-
# built under the same directory, so no extra rpath is being added. To properly
117-
# fix this we need to install `portable_lib` into the correct path.
118-
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
119-
# ------------------------------ OPTIONS -------------------------------------
120-
# WARNING: Please don't add example specific options in this CMakeLists.txt.
121-
# Instead please use `find_package(executorch REQUIRED)` in the example
122-
# directory and add a new executable in the example `CMakeLists.txt`.
123-
124102
if(NOT EXECUTORCH_ENABLE_LOGGING)
125103
# Avoid pulling in the logging strings, which can be large. Note that this
126104
# will set the compiler flag for all targets in this directory, and for all
@@ -909,12 +887,13 @@ if(EXECUTORCH_BUILD_PYBIND)
909887

910888
# Set RPATH to find PyTorch libraries relative to the installation location
911889
# This goes from executorch/extension/pybindings up to site-packages, then to
912-
# torch/lib
890+
# torch/lib. Don't do this to APPLE, as it will error out on the following
891+
# error:
892+
#
913893
if(APPLE)
914-
set_target_properties(
915-
portable_lib PROPERTIES BUILD_RPATH "@loader_path/../../../torch/lib"
916-
INSTALL_RPATH "@loader_path/../../../torch/lib"
917-
)
894+
# Skip setting @loader_path for APPLE, since it causes error like ld:
895+
# duplicate LC_RPATH '@loader_path' in '<site-packages>/torch/lib/
896+
# libtorch_cpu.dylib'
918897
else()
919898
set_target_properties(
920899
portable_lib PROPERTIES BUILD_RPATH "$ORIGIN/../../../torch/lib"

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ For Apple, please refer to the [iOS documentation](docs/source/using-executorch-
2424
executorch
2525
├── <a href="backends">backends</a> - Backend delegate implementations for various hardware targets. Each backend uses partitioner to split the graph into subgraphs that can be executed on specific hardware, quantizer to optimize model precision, and runtime components to execute the graph on target hardware. For details refer to the <a href="docs/source/backend-delegates-integration.md">backend documentation</a> and the <a href="docs/source/using-executorch-export.md">Export and Lowering tutorial</a> for more information.
2626
│ ├── <a href="backends/apple">apple</a> - Apple-specific backends.
27-
│ │ ├── <a href="backends/apple/coreml">coreml</a> - CoreML backend for Apple devices. See <a href="docs/source/backends-coreml.md">doc</a>.
28-
│ │ └── <a href="backends/apple/mps">mps</a> - Metal Performance Shaders backend for Apple devices. See <a href="docs/source/backends-mps.md">doc</a>.
27+
│ │ ├── <a href="backends/apple/coreml">coreml</a> - CoreML backend for Apple devices. See <a href="docs/source/backends/coreml/coreml-overview.md">doc</a>.
28+
│ │ └── <a href="backends/apple/mps">mps</a> - Metal Performance Shaders backend for Apple devices. See <a href="docs/source/backends/mps/mps-overview.md">doc</a>.
2929
│ ├── <a href="backends/arm">arm</a> - ARM architecture backends. See <a href="docs/source/backends-arm-ethos-u.md">doc</a>.
3030
│ ├── <a href="backends/cadence">cadence</a> - Cadence-specific backends. See <a href="docs/source/backends-cadence.md">doc</a>.
3131
│ ├── <a href="backends/example">example</a> - Example backend implementations.

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Copyright (c) 2023 Apple Inc.
99
Copyright (c) 2024 MediaTek Inc.
1010
Copyright 2023 NXP
1111
Copyright (c) 2025 Samsung Electronics Co. LTD
12+
Copyright (c) Intel Corporation
1213

1314
Redistribution and use in source and binary forms, with or without modification,
1415
are permitted provided that the following conditions are met:

README-wheel.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ The prebuilt `executorch.runtime` module included in this package provides a way
1212
to run ExecuTorch `.pte` files, with some restrictions:
1313
* Only [core ATen operators](docs/source/ir-ops-set-definition.md) are linked into the prebuilt module
1414
* Only the [XNNPACK backend delegate](docs/source/backends-xnnpack.md) is linked into the prebuilt module.
15-
* \[macOS only] [Core ML](docs/source/backends-coreml.md) and [MPS](docs/source/backends-mps.md) backend
15+
* \[macOS only] [Core ML](docs/source/backends/coreml/coreml-overview.md) and [MPS](docs/source/backends/mps/mps-overview.md) backend
1616
are also linked into the prebuilt module.
1717

1818
Please visit the [ExecuTorch website](https://pytorch.org/executorch) for

backends/aoti/aoti_delegate_handle.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ using AOTInductorModelContainerGetNumConstantsFunc = AOTIRuntimeError (*)(
7171
AOTInductorModelContainerHandle container_handle,
7272
size_t* num_constants);
7373

74+
// Update the model container with the constant tensors
75+
using AOTInductorModelUpdateConstantsFromBlobFunc = AOTIRuntimeError (*)(
76+
AOTInductorModelContainerHandle container_handle,
77+
const uint8_t* weight_blob_ptr);
78+
7479
} // extern "C"
7580

7681
// AOTI Delegate Handle structure
@@ -87,6 +92,7 @@ struct AOTIDelegateHandle {
8792
AOTInductorModelContainerGetNumInputsFunc get_num_inputs;
8893
AOTInductorModelContainerGetNumOutputsFunc get_num_outputs;
8994
AOTInductorModelContainerRunFunc run;
95+
AOTInductorModelUpdateConstantsFromBlobFunc update_constants_from_blob;
9096
};
9197

9298
} // namespace aoti

0 commit comments

Comments
 (0)