Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
03b306d
Rebase
hinriksnaer Jun 19, 2025
b4dd92b
Updated CMakeLists for torchao
hinriksnaer May 19, 2025
b6e0518
Fixed parameter name
hinriksnaer May 19, 2025
e0e6e98
added missing flag
hinriksnaer May 19, 2025
f575cbf
updated build steps
hinriksnaer May 20, 2025
9b9aa90
up
metascroy May 21, 2025
9e913c4
Work In Progress: changes to build_apple_framework.sh
hinriksnaer May 27, 2025
c16c779
cleaned up the cmake command
hinriksnaer May 27, 2025
c0c26aa
updated cmake and framework build
hinriksnaer May 29, 2025
70c6ae6
up
metascroy May 30, 2025
21bb1c1
added torchao debug xcconfig
hinriksnaer Jun 18, 2025
82659ec
added temporary build scripts
hinriksnaer Jun 18, 2025
402a7ed
aligned submodules
hinriksnaer Jun 19, 2025
e1f9b8a
fixed accidental changes from rebase
hinriksnaer Jun 19, 2025
3b11e4c
added missing torchao param
hinriksnaer Jun 19, 2025
06c2583
Rebase
hinriksnaer Jun 19, 2025
a8f3b35
removed accidental code inclusion due to rebase
hinriksnaer Jun 26, 2025
ed0ccd3
updated apple framework defaults
hinriksnaer Jun 28, 2025
6eebd8d
updated torchao debug
hinriksnaer Jun 28, 2025
8f80fde
updated tokenizer module
hinriksnaer Jun 28, 2025
c146407
updated debug conf
hinriksnaer Jun 29, 2025
e1f0101
added export script
hinriksnaer Jun 29, 2025
5dd6df8
Update comments in CMakeLists.txt
hinriksnaer Jul 1, 2025
5200d68
updated rebase
hinriksnaer Jul 29, 2025
276232e
updated bash formatting
hinriksnaer Jul 28, 2025
58b24c2
Removed dev scripts
hinriksnaer Jul 28, 2025
6a00da3
reverted to existing changes
hinriksnaer Jul 29, 2025
435ffa1
removed dev script
hinriksnaer Jul 29, 2025
16b8984
fixed submodules
hinriksnaer Jul 29, 2025
3547232
Merge branch 'main' into torchao-build
hinriksnaer Jul 29, 2025
03f584a
updated link name
hinriksnaer Jul 29, 2025
eab0f73
changed dependency reference in torchao build
hinriksnaer Jul 30, 2025
935da89
Merge branch 'main' into torchao-build
hinriksnaer Jul 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,36 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
)
endif()

if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
# Currently only enable this on Arm-based Macs
set(TORCHAO_BUILD_ATEN_OPS OFF)
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
set(TORCHAO_BUILD_CPU_AARCH64 ON)
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)

# Set ExecuTorch headers and core library so TorchAO can register ops against them
set(EXECUTORCH_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/..)
set(EXECUTORCH_LIBRARIES executorch pthreadpool extension_threadpool cpuinfo)

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental)

# # Ensure torchao_ops_executorch can find and link pthreadpool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove these comments

# target_include_directories(torchao_ops_executorch PRIVATE ${PTHREADPOOL_SOURCE_DIR}/include)
# target_link_libraries(torchao_ops_executorch PRIVATE pthreadpool)

# Only call this on torchao_ops_executorch — pthreadpool is not a shared lib
target_link_options_shared_lib(torchao_ops_executorch)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this line still needed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally this should be in executorch-config.cmake. Doesn't have to be in this PR though.

Copy link
Contributor

@metascroy metascroy Jul 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hinriksnaer it looks like "target_link_options_shared_lib" was renamed to "executorch_target_link_options_shared_lib"?


# Ensure pthreadpool is included in the packaged framework
list(APPEND _dep_libs torchao_ops_executorch)
# list(APPEND _dep_libs pthreadpool)
endif()

if(EXECUTORCH_BUILD_TESTS)
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
include(CTest)
endif()

# TODO(dbort): Fix these warnings and remove this flag.
set(_common_compile_options -Wno-deprecated-declarations -fPIC)

Expand Down
34 changes: 34 additions & 0 deletions examples/demo-apps/apple_ios/LLaMA/torchao_debug.xcconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ET_PLATFORM[sdk=iphonesimulator*] = simulator
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shoumikhin can you review these changes to iOS demo app?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be removed. Although including this in the documentation in some form would be a good idea for future workstreams.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is not needed.

ET_PLATFORM[sdk=iphoneos*] = ios
ET_PLATFORM[sdk=macos*] = macos

OTHER_LDFLAGS = $(inherited) \
-force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \
-framework Foundation \
-framework CoreML \
-framework Accelerate \
-framework Metal \
-framework MetalPerformanceShaders \
-framework MetalPerformanceShadersGraph \
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_torchao_$(ET_PLATFORM).a \
-lsqlite3 \
@$(TEMP_DIR)/cmake/linker_flags

// LLaMARunner requires additional dependencies built with CMake in a custom run script phase.
// Include headers and libraries from $(TEMP_DIR)/cmake for it.
HEADER_SEARCH_PATHS = $(inherited) \
$(SRCROOT)/../../../../.. \
$(TEMP_DIR)/cmake/include \
$(SRCROOT)/../../../../extension/llm/tokenizers/include \
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece \
$(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece/src

LIBRARY_SEARCH_PATHS = $(inherited) \
$(TEMP_DIR)/cmake/lib

23 changes: 23 additions & 0 deletions export_llama_torchao.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# No quantization
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hinriksnaer this file should be removed. It looks like it is just for testing?

# Set these paths to point to the downloaded files
LLAMA_CHECKPOINT=../../../.llama/checkpoints/Llama-3.2-3B-Instruct/original/consolidated.00.pth
LLAMA_PARAMS=../../../.llama/checkpoints/Llama-3.2-3B-Instruct/original/params.json

# Set low-bit quantization parameters
QLINEAR_BITWIDTH=4 # Can be 1-8
QLINEAR_GROUP_SIZE=128 # Must be multiple of 16
QEMBEDDING_BITWIDTH=4 # Can be 1-8
QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16

python -m extension.llm.export.export_llm \
base.model_class="llama3_2" \
base.checkpoint="${LLAMA_CHECKPOINT:?}" \
base.params="${LLAMA_PARAMS:?}" \
model.use_kv_cache=True \
model.use_sdpa_with_kv_cache=True \
base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' \
export.output_name="llama3_2.pte" \
quantization.qmode="torchao:8da${QLINEAR_BITWIDTH}w" \
quantization.group_size=${QLINEAR_GROUP_SIZE} \
quantization.embedding_quantize=\'torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}\' \
model.dtype_override="fp32"
25 changes: 25 additions & 0 deletions run_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file looks like it was for your local debugging and shouldn't be merged?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this and all the new .sh are scripts that I used while developing. I'll clean it up unless anyone else beats me to it.


bash install_executorch.sh --clean

bash install_executorch.sh

pip install -e .

cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
-Bcmake-out .

cmake --build cmake-out -j16 --target install --config Release

15 changes: 15 additions & 0 deletions run_ios_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
bash install_executorch.sh --clean

bash install_executorch.sh

pip install -e .

./install_requirements.sh

# CoreML-only requirements:
./backends/apple/coreml/scripts/install_requirements.sh

# MPS-only requirements:
./backends/apple/mps/install_requirements.sh

./scripts/build_apple_frameworks.sh --Release
15 changes: 15 additions & 0 deletions run_torchao_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
bash install_executorch.sh --clean

bash install_executorch.sh

pip install -e .

./install_requirements.sh

# CoreML-only requirements:
./backends/apple/coreml/scripts/install_requirements.sh

# MPS-only requirements:
./backends/apple/mps/install_requirements.sh

./scripts/build_apple_frameworks.sh --Release --torchao
79 changes: 44 additions & 35 deletions scripts/build_apple_frameworks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ libquantized_kernels.a,\
libquantized_ops_lib.a,\
:"

FRAMEWORK_KERNELS_TORCHAO="kernels_torchao:\
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before testing xcframework, let's verify your changes work on macOS desktop first.

libtorchao_ops_executorch.a,\
libtorchao_kernels_aarch64.a,\
:"

usage() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shoumikhin can you review these changes to xcframeworks, and comment on any tests that should be run?

echo "Usage: $0 [OPTIONS]"
echo "Build frameworks for Apple platforms."
Expand All @@ -83,6 +88,7 @@ usage() {
echo " --optimized Only build the Optimized kernels."
echo " --quantized Only build the Quantized kernels."
echo " --xnnpack Only build the XNNPACK backend."
echo " --torchao Build the TorchAO kernels."
echo
exit 0
}
Expand All @@ -100,6 +106,7 @@ set_cmake_options_override() {
"-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=OFF"
"-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF"
"-DEXECUTORCH_BUILD_XNNPACK=OFF"
"-DEXECUTORCH_BUILD_KERNELS_TORCHAO=OFF"
)
fi

Expand All @@ -113,27 +120,28 @@ set_cmake_options_override() {

for arg in "$@"; do
case $arg in
-h|--help) usage ;;
--Release)
if [[ ! " ${MODES[*]:-} " =~ \bRelease\b ]]; then
MODES+=("Release")
fi
;;
--Debug)
if [[ ! " ${MODES[*]:-} " =~ \bDebug\b ]]; then
MODES+=("Debug")
fi
;;
--coreml) set_cmake_options_override "EXECUTORCH_BUILD_COREML";;
--custom) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_CUSTOM" ;;
--mps) set_cmake_options_override "EXECUTORCH_BUILD_MPS" ;;
--optimized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" ;;
--quantized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_QUANTIZED" ;;
--xnnpack) set_cmake_options_override "EXECUTORCH_BUILD_XNNPACK" ;;
*)
echo -e "\033[31m[error] unknown option: ${arg}\033[0m"
exit 1
;;
-h | --help) usage ;;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: need to change the indentation?

--Release)
if [[ ! " ${MODES[*]:-} " =~ \bRelease\b ]]; then
MODES+=("Release")
fi
;;
--Debug)
if [[ ! " ${MODES[*]:-} " =~ \bDebug\b ]]; then
MODES+=("Debug")
fi
;;
--coreml) set_cmake_options_override "EXECUTORCH_BUILD_COREML" ;;
--custom) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_CUSTOM" ;;
--mps) set_cmake_options_override "EXECUTORCH_BUILD_MPS" ;;
--optimized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" ;;
--quantized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_QUANTIZED" ;;
--xnnpack) set_cmake_options_override "EXECUTORCH_BUILD_XNNPACK" ;;
--torchao) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_TORCHAO" ;;
*)
echo -e "\033[31m[error] unknown option: ${arg}\033[0m"
exit 1
;;
esac
done

Expand All @@ -149,15 +157,15 @@ for preset_index in "${!PRESETS[@]}"; do

# Do NOT add options here. Update the respective presets instead.
cmake -S "${SOURCE_ROOT_DIR}" \
-B "${preset_output_dir}" \
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="${preset_output_dir}" \
-DCMAKE_BUILD_TYPE="${mode}" \
${CMAKE_OPTIONS_OVERRIDE[@]:-} \
--preset "${preset}"
-B "${preset_output_dir}" \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: restore indentation?

-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="${preset_output_dir}" \
-DCMAKE_BUILD_TYPE="${mode}" \
${CMAKE_OPTIONS_OVERRIDE[@]:-} \
--preset "${preset}"

cmake --build "${preset_output_dir}" \
--config "${mode}" \
-j$(sysctl -n hw.ncpu)
--config "${mode}" \
-j$(sysctl -n hw.ncpu)
done
done

Expand All @@ -167,8 +175,8 @@ mkdir -p "$HEADERS_ABSOLUTE_PATH"

"$SOURCE_ROOT_DIR"/scripts/print_exported_headers.py --buck2=$(realpath "$BUCK2") --targets \
//extension/module: \
//extension/tensor: \
| rsync -av --files-from=- "$SOURCE_ROOT_DIR" "$HEADERS_ABSOLUTE_PATH/executorch"
//extension/tensor: |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert?

rsync -av --files-from=- "$SOURCE_ROOT_DIR" "$HEADERS_ABSOLUTE_PATH/executorch"

# HACK: XCFrameworks don't appear to support exporting any build
# options, but we need the following:
Expand All @@ -187,7 +195,7 @@ cp -r $HEADERS_ABSOLUTE_PATH/executorch/runtime/core/portable_type/c10/torch "$H

cp "$SOURCE_ROOT_DIR/extension/apple/ExecuTorch/Exported/"*.h "$HEADERS_ABSOLUTE_PATH/executorch"

cat > "$HEADERS_ABSOLUTE_PATH/module.modulemap" << 'EOF'
cat >"$HEADERS_ABSOLUTE_PATH/module.modulemap" <<'EOF'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: restore formatting?

module ExecuTorch {
umbrella header "ExecuTorch/ExecuTorch.h"
export *
Expand All @@ -211,10 +219,10 @@ append_framework_flag() {
fi

if [[ -n "$mode" && "$mode" != "Release" ]]; then
local name spec
name=$(echo "$framework" | cut -d: -f1)
spec=$(echo "$framework" | cut -d: -f2-)
framework="${name}_$(echo "$mode" | tr '[:upper:]' '[:lower:]'):${spec}"
local name spec
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: restore indentation?

name=$(echo "$framework" | cut -d: -f1)
spec=$(echo "$framework" | cut -d: -f2-)
framework="${name}_$(echo "$mode" | tr '[:upper:]' '[:lower:]'):${spec}"
fi
echo "Adding framework: ${framework}"
FRAMEWORK_FLAGS+=("--framework=$framework")
Expand All @@ -235,6 +243,7 @@ for mode in "${MODES[@]}"; do
append_framework_flag "EXECUTORCH_BUILD_KERNELS_CUSTOM" "$FRAMEWORK_KERNELS_CUSTOM" "$mode"
append_framework_flag "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" "$FRAMEWORK_KERNELS_OPTIMIZED" "$mode"
append_framework_flag "EXECUTORCH_BUILD_KERNELS_QUANTIZED" "$FRAMEWORK_KERNELS_QUANTIZED" "$mode"
append_framework_flag "EXECUTORCH_BUILD_KERNELS_TORCHAO" "$FRAMEWORK_KERNELS_TORCHAO" "$mode"

cd "${OUTPUT_DIR}"
"$SOURCE_ROOT_DIR"/scripts/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}"
Expand Down
9 changes: 9 additions & 0 deletions tools/cmake/executorch-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ set(lib_list
quantized_kernels
quantized_ops_lib
quantized_ops_aot_lib
torchao_ops_executorch
torchao_kernels_aarch64
)
foreach(lib ${lib_list})
# Name of the variable which stores result of the find_library search
Expand Down Expand Up @@ -141,6 +143,13 @@ if(TARGET optimized_kernels)
)
endif()

if(TARGET torchao_ops_executorch)
set_target_properties(
torchao_ops_executorch PROPERTIES INTERFACE_LINK_LIBRARIES
"executorch_core;extension_threadpool;cpuinfo;pthreadpool"
)
endif()

if(TARGET coremldelegate)
set_target_properties(
coremldelegate PROPERTIES INTERFACE_LINK_LIBRARIES
Expand Down
1 change: 1 addition & 0 deletions tools/cmake/preset/apple_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_TORCHAO ON)
Loading