Skip to content

Commit ea4a7fa

Browse files
authored
Fix torchao deps (#13107)
This PR * Renames EXECUTORCH_BUILD_TORCHAO to EXECUTORCH_BUILD_KERNELS_TORCHAO to be more in line with other kernel options (e.g., EXECUTORCH_BUILD_KERNELS_OPTIMIZED) * Fixes torchao lowbit kernel dependencies in xcframeworks * Adds torchao lowbit kernels to the swift package
1 parent 7963bbb commit ea4a7fa

File tree

12 files changed

+91
-59
lines changed

12 files changed

+91
-59
lines changed

.Package.swift/kernels_torchao/dummy.swift

Whitespace-only changes.

.Package.swift/kernels_torchao_debug/dummy.swift

Whitespace-only changes.

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,22 @@ cmake -DPYTHON_EXECUTABLE=python \
2929
-DEXECUTORCH_ENABLE_LOGGING=1 \
3030
-DCMAKE_BUILD_TYPE=Release \
3131
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
32-
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
33-
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
34-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3532
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
33+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3634
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3735
-DEXECUTORCH_BUILD_XNNPACK=OFF \
3836
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
3937
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
38+
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
39+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
40+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
4041
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4142
-Bcmake-out .
42-
cmake --build cmake-out -j16 --target install --config Release
43+
cmake --build cmake-out -j16 --config Release --target install
4344

4445
# Install llama runner with torchao
4546
cmake -DPYTHON_EXECUTABLE=python \
46-
-DBUILD_TESTING=OFF \
4747
-DCMAKE_BUILD_TYPE=Release \
48-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
49-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
50-
-DEXECUTORCH_BUILD_XNNPACK=OFF \
51-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
52-
-DEXECUTORCH_BUILD_TORCHAO=ON \
5348
-Bcmake-out/examples/models/llama \
5449
examples/models/llama
5550
cmake --build cmake-out/examples/models/llama -j16 --config Release

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ jobs:
485485
eval "$(conda shell.bash hook)"
486486
487487
# Install requirements
488-
${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
488+
${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
489489
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
490490
491491
# Run test

CMakeLists.txt

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -278,29 +278,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
278278
)
279279
endif()
280280

281-
if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
282-
set(TORCHAO_BUILD_ATEN_OPS OFF)
283-
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
284-
set(TORCHAO_BUILD_CPU_AARCH64 ON)
285-
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
286-
287-
list(
288-
APPEND
289-
TORCHAO_INCLUDE_DIRS
290-
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
291-
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
292-
${EXECUTORCH_ROOT}/third-party/ao
293-
)
294-
295-
set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
296-
297-
add_subdirectory(
298-
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
299-
)
300-
executorch_target_link_options_shared_lib(torchao_ops_executorch)
301-
list(APPEND _executorch_kernels torchao_ops_executorch)
302-
endif()
303-
304281
if(EXECUTORCH_BUILD_TESTS)
305282
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
306283
include(CTest)
@@ -705,6 +682,65 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
705682
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
706683
endif()
707684

685+
if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
686+
if(NOT TARGET cpuinfo)
687+
message(
688+
FATAL_ERROR
689+
"EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_CPUINFO be set ON"
690+
)
691+
endif()
692+
if(NOT TARGET pthreadpool)
693+
message(
694+
FATAL_ERROR
695+
"EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_PTHREADPOOL be set ON"
696+
)
697+
endif()
698+
699+
# Configure TorchAO kernels
700+
set(TORCHAO_BUILD_ATEN_OPS OFF)
701+
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
702+
set(TORCHAO_BUILD_CPU_AARCH64 ON)
703+
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
704+
set(TORCHAO_BUILD_KLEIDIAI ON)
705+
706+
# TorchAO kernels look for EXECUTORCH_INCLUDE_DIRS
707+
if(DEFINED EXECUTORCH_INCLUDE_DIRS)
708+
message(FATAL_ERROR "EXECUTORCH_INCLUDE_DIRS is already defined")
709+
endif()
710+
set(EXECUTORCH_INCLUDE_DIRS
711+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
712+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
713+
)
714+
add_subdirectory(
715+
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
716+
)
717+
unset(EXECUTORCH_INCLUDE_DIRS)
718+
719+
executorch_target_link_options_shared_lib(torchao_ops_executorch)
720+
list(APPEND _executorch_kernels torchao_ops_executorch)
721+
722+
install(
723+
TARGETS torchao_ops_executorch torchao_kernels_aarch64
724+
EXPORT ExecuTorchTargets
725+
DESTINATION lib
726+
INCLUDES
727+
DESTINATION ${_common_include_directories}
728+
)
729+
# If using KleidiAI and XNNPACK has not installed it already, install it
730+
if(TORCHAO_BUILD_KLEIDIAI AND NOT (EXECUTORCH_BUILD_XNNPACK
731+
AND EXECUTORCH_XNNPACK_ENABLE_KLEIDI)
732+
)
733+
install(
734+
TARGETS kleidiai
735+
EXPORT ExecuTorchTargets
736+
DESTINATION lib
737+
INCLUDES
738+
DESTINATION ${_common_include_directories}
739+
)
740+
endif()
741+
742+
endif()
743+
708744
if(EXECUTORCH_BUILD_PYBIND)
709745

710746
# Add codegen tools subdirectory for selective_build pybind module

Package.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ let products = deliverables([
8484
],
8585
],
8686
"kernels_quantized": [:],
87+
"kernels_torchao": [
88+
"targets": [
89+
"threadpool",
90+
],
91+
],
8792
])
8893

8994
let targets = deliverables([

docs/source/using-executorch-ios.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The ExecuTorch Runtime for iOS and macOS (ARM64) is distributed as a collection
1414
* `kernels_llm` - Custom kernels for LLMs
1515
* `kernels_optimized` - Accelerated generic CPU kernels
1616
* `kernels_quantized` - Quantized kernels
17+
* `kernels_torchao` - Quantized CPU kernels from torchao
1718

1819
Link your binary with the ExecuTorch runtime and any backends or kernels used by the exported ML model. It is recommended to link the core runtime to the components that use ExecuTorch directly, and link kernels and backends against the main app target.
1920

examples/models/llama/CMakeLists.txt

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ cmake_dependent_option(
3737
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
3838
)
3939

40-
option(EXECUTORCH_BUILD_TORCHAO "Build the torchao kernels" OFF)
40+
option(EXECUTORCH_BUILD_KERNELS_TORCHAO_MPS "Build the torchao MPS kernels" OFF)
4141

4242
if(NOT PYTHON_EXECUTABLE)
4343
set(PYTHON_EXECUTABLE python3)
@@ -115,21 +115,16 @@ if(TARGET custom_ops)
115115
list(APPEND link_libraries custom_ops)
116116
endif()
117117

118-
if(EXECUTORCH_BUILD_TORCHAO)
118+
if(TARGET torchao_ops_executorch)
119+
executorch_target_link_options_shared_lib(torchao_ops_executorch)
120+
list(APPEND link_libraries torchao_ops_executorch)
121+
endif()
122+
123+
if(EXECUTORCH_BUILD_KERNELS_TORCHAO_MPS)
119124
# Currently only enable this on Arm-based Macs
120125
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL
121126
"arm64"
122127
)
123-
set(TORCHAO_BUILD_ATEN_OPS OFF)
124-
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
125-
set(TORCHAO_BUILD_CPU_AARCH64 ON)
126-
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
127-
add_subdirectory(
128-
${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/ao/torchao/experimental
129-
${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/ao/torchao/experimental
130-
)
131-
executorch_target_link_options_shared_lib(torchao_ops_executorch)
132-
list(APPEND link_libraries torchao_ops_executorch)
133128
if(EXECUTORCH_BUILD_MPS)
134129
add_subdirectory(
135130
${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/ao/torchao/experimental/ops/mps

examples/models/llama/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -340,11 +340,13 @@ Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-de
340340
341341
## Running with low-bit kernels
342342
343-
We now give instructions for quantizating and running your model with low-bit kernels. These are still experimental, and require you do development on an Arm-based Mac, and install executorch from source with the environment variable EXECUTORCH_BUILD_TORCHAO=1 defined:
343+
We now give instructions for quantizating and running your model with low-bit kernels. These are still experimental, and require you do development on an Arm-based Mac, and install executorch from source with the environment variable EXECUTORCH_BUILD_KERNELS_TORCHAO=1 defined:
344344
```
345-
EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
345+
EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
346346
```
347347
348+
(If you'd like lowbit to use KleidiAI when available, you can instead install with `EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_KLEIDIAI=1 python install_executorch.py`.)
349+
348350
Also note that low-bit quantization often requires QAT (quantization-aware training) to give good quality results.
349351
350352
First export your model for lowbit quantization (step 2 above):
@@ -394,21 +396,19 @@ cmake -DPYTHON_EXECUTABLE=python \
394396
-DEXECUTORCH_BUILD_XNNPACK=OFF \
395397
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
396398
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
399+
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
400+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
401+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
397402
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
398403
-Bcmake-out .
399-
cmake --build cmake-out -j16 --target install --config Release
404+
cmake --build cmake-out -j16 --config Release --target install
400405
```
401406
402407
Next install the llama runner with torchao kernels enabled (similar to step 3.2 above):
403408
404409
```
405410
cmake -DPYTHON_EXECUTABLE=python \
406411
-DCMAKE_BUILD_TYPE=Release \
407-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
408-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
409-
-DEXECUTORCH_BUILD_XNNPACK=OFF \
410-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
411-
-DEXECUTORCH_BUILD_TORCHAO=ON \
412412
-Bcmake-out/examples/models/llama \
413413
examples/models/llama
414414
cmake --build cmake-out/examples/models/llama -j16 --config Release

install_requirements.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ def install_requirements(use_pytorch_nightly):
118118
# Install packages directly from local copy instead of pypi.
119119
# This is usually not recommended.
120120
new_env = os.environ.copy()
121-
if ("EXECUTORCH_BUILD_TORCHAO" not in new_env) or (
122-
new_env["EXECUTORCH_BUILD_TORCHAO"] == "0"
121+
if ("EXECUTORCH_BUILD_KERNELS_TORCHAO" not in new_env) or (
122+
new_env["EXECUTORCH_BUILD_KERNELS_TORCHAO"] == "0"
123123
):
124124
new_env["USE_CPP"] = "0"
125125
else:
126-
assert new_env["EXECUTORCH_BUILD_TORCHAO"] == "1"
126+
assert new_env["EXECUTORCH_BUILD_KERNELS_TORCHAO"] == "1"
127127
new_env["USE_CPP"] = "1"
128128
new_env["CMAKE_POLICY_VERSION_MINIMUM"] = "3.5"
129129
subprocess.run(

0 commit comments

Comments
 (0)