-
Couldn't load subscription status.
- Fork 700
Add torchao kernels to xcframework #10963
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 23 commits
03b306d
b4dd92b
b6e0518
e0e6e98
f575cbf
9b9aa90
9e913c4
c16c779
c0c26aa
70c6ae6
21bb1c1
82659ec
402a7ed
e1f9b8a
3b11e4c
06c2583
a8f3b35
ed0ccd3
6eebd8d
8f80fde
c146407
e1f0101
5dd6df8
5200d68
276232e
58b24c2
6a00da3
435ffa1
16b8984
3547232
03f584a
eab0f73
935da89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -252,6 +252,24 @@ if(EXECUTORCH_BUILD_PTHREADPOOL) | |
| ) | ||
| endif() | ||
|
|
||
| if(EXECUTORCH_BUILD_KERNELS_TORCHAO) | ||
| set(TORCHAO_BUILD_ATEN_OPS OFF) | ||
| set(TORCHAO_BUILD_EXECUTORCH_OPS ON) | ||
| set(TORCHAO_BUILD_CPU_AARCH64 ON) | ||
| set(TORCHAO_ENABLE_ARM_NEON_DOT ON) | ||
| set(EXECUTORCH_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/..) | ||
| set(EXECUTORCH_LIBRARIES executorch pthreadpool extension_threadpool cpuinfo) | ||
|
|
||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental) | ||
| target_link_options_shared_lib(torchao_ops_executorch) | ||
|
||
| list(APPEND _dep_libs torchao_ops_executorch) | ||
| endif() | ||
|
|
||
| if(EXECUTORCH_BUILD_TESTS) | ||
| set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) | ||
| include(CTest) | ||
| endif() | ||
|
|
||
| # TODO(dbort): Fix these warnings and remove this flag. | ||
| set(_common_compile_options -Wno-deprecated-declarations -fPIC) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| ET_PLATFORM[sdk=iphonesimulator*] = simulator | ||
|
||
| ET_PLATFORM[sdk=iphoneos*] = ios | ||
| ET_PLATFORM[sdk=macos*] = macos | ||
|
|
||
| OTHER_LDFLAGS = $(inherited) \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \ | ||
| -framework Foundation \ | ||
| -framework CoreML \ | ||
| -framework Accelerate \ | ||
| -framework Metal \ | ||
| -framework MetalPerformanceShaders \ | ||
| -framework MetalPerformanceShadersGraph \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \ | ||
| -force_load $(BUILT_PRODUCTS_DIR)/libkernels_torchao_$(ET_PLATFORM).a \ | ||
| -lsqlite3 \ | ||
| @$(TEMP_DIR)/cmake/linker_flags | ||
|
|
||
| // LLaMARunner requires additional dependencies built with CMake in a custom run script phase. | ||
| // Include headers and libraries from $(TEMP_DIR)/cmake for it. | ||
| HEADER_SEARCH_PATHS = $(inherited) \ | ||
| $(SRCROOT)/../../../../.. \ | ||
| $(TEMP_DIR)/cmake/include \ | ||
| $(SRCROOT)/../../../../extension/llm/tokenizers/include \ | ||
| $(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece \ | ||
| $(SRCROOT)/../../../../extension/llm/tokenizers/third-party/sentencepiece/src | ||
|
|
||
| LIBRARY_SEARCH_PATHS = $(inherited) \ | ||
| $(TEMP_DIR)/cmake/lib | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| # No quantization | ||
|
||
| # Set these paths to point to the downloaded files | ||
| LLAMA_CHECKPOINT=../../../.llama/checkpoints/Llama-3.2-3B-Instruct/original/consolidated.00.pth | ||
| LLAMA_PARAMS=../../../.llama/checkpoints/Llama-3.2-3B-Instruct/original/params.json | ||
|
|
||
| # Set low-bit quantization parameters | ||
| QLINEAR_BITWIDTH=4 # Can be 1-8 | ||
| QLINEAR_GROUP_SIZE=128 # Must be multiple of 16 | ||
| QEMBEDDING_BITWIDTH=4 # Can be 1-8 | ||
| QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16 | ||
|
|
||
| python -m extension.llm.export.export_llm \ | ||
| base.model_class="llama3_2" \ | ||
| base.checkpoint="${LLAMA_CHECKPOINT:?}" \ | ||
| base.params="${LLAMA_PARAMS:?}" \ | ||
| model.use_kv_cache=True \ | ||
| model.use_sdpa_with_kv_cache=True \ | ||
| base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' \ | ||
| export.output_name="llama3_2.pte" \ | ||
| quantization.qmode="torchao:8da${QLINEAR_BITWIDTH}w" \ | ||
| quantization.group_size=${QLINEAR_GROUP_SIZE} \ | ||
| quantization.embedding_quantize=\'torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}\' \ | ||
| model.dtype_override="fp32" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
|
|
||
|
||
|
|
||
| bash install_executorch.sh --clean | ||
|
|
||
| bash install_executorch.sh | ||
|
|
||
| pip install -e . | ||
|
|
||
| cmake -DPYTHON_EXECUTABLE=python \ | ||
| -DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
| -DEXECUTORCH_ENABLE_LOGGING=1 \ | ||
| -DCMAKE_BUILD_TYPE=Release \ | ||
| -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | ||
| -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | ||
| -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | ||
| -DEXECUTORCH_BUILD_XNNPACK=OFF \ | ||
| -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | ||
| -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | ||
| -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | ||
| -DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \ | ||
| -DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \ | ||
| -Bcmake-out . | ||
|
|
||
| cmake --build cmake-out -j16 --target install --config Release | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| bash install_executorch.sh --clean | ||
|
|
||
| bash install_executorch.sh | ||
|
|
||
| pip install -e . | ||
|
|
||
| ./install_requirements.sh | ||
|
|
||
| # CoreML-only requirements: | ||
| ./backends/apple/coreml/scripts/install_requirements.sh | ||
|
|
||
| # MPS-only requirements: | ||
| ./backends/apple/mps/install_requirements.sh | ||
|
|
||
| ./scripts/build_apple_frameworks.sh --Release |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| bash install_executorch.sh --clean | ||
|
|
||
| bash install_executorch.sh | ||
|
|
||
| pip install -e . | ||
|
|
||
| ./install_requirements.sh | ||
|
|
||
| # CoreML-only requirements: | ||
| ./backends/apple/coreml/scripts/install_requirements.sh | ||
|
|
||
| # MPS-only requirements: | ||
| ./backends/apple/mps/install_requirements.sh | ||
|
|
||
| ./scripts/build_apple_frameworks.sh --Release --torchao |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,11 @@ libquantized_kernels.a,\ | |
| libquantized_ops_lib.a,\ | ||
| :" | ||
|
|
||
| FRAMEWORK_KERNELS_TORCHAO="kernels_torchao:\ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before testing xcframework, let's verify your changes work on macOS desktop first. |
||
| libtorchao_ops_executorch.a,\ | ||
| libtorchao_kernels_aarch64.a,\ | ||
| :" | ||
|
|
||
| usage() { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shoumikhin can you review these changes to xcframeworks, and comment on any tests that should be run? |
||
| echo "Usage: $0 [OPTIONS]" | ||
| echo "Build frameworks for Apple platforms." | ||
|
|
@@ -83,6 +88,7 @@ usage() { | |
| echo " --optimized Only build the Optimized kernels." | ||
| echo " --quantized Only build the Quantized kernels." | ||
| echo " --xnnpack Only build the XNNPACK backend." | ||
| echo " --torchao Build the TorchAO kernels." | ||
| echo | ||
| exit 0 | ||
| } | ||
|
|
@@ -100,6 +106,7 @@ set_cmake_options_override() { | |
| "-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=OFF" | ||
| "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF" | ||
| "-DEXECUTORCH_BUILD_XNNPACK=OFF" | ||
| "-DEXECUTORCH_BUILD_KERNELS_TORCHAO=OFF" | ||
hinriksnaer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| fi | ||
|
|
||
|
|
@@ -113,27 +120,28 @@ set_cmake_options_override() { | |
|
|
||
| for arg in "$@"; do | ||
| case $arg in | ||
| -h|--help) usage ;; | ||
| --Release) | ||
| if [[ ! " ${MODES[*]:-} " =~ \bRelease\b ]]; then | ||
| MODES+=("Release") | ||
| fi | ||
| ;; | ||
| --Debug) | ||
| if [[ ! " ${MODES[*]:-} " =~ \bDebug\b ]]; then | ||
| MODES+=("Debug") | ||
| fi | ||
| ;; | ||
| --coreml) set_cmake_options_override "EXECUTORCH_BUILD_COREML";; | ||
| --custom) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_CUSTOM" ;; | ||
| --mps) set_cmake_options_override "EXECUTORCH_BUILD_MPS" ;; | ||
| --optimized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" ;; | ||
| --quantized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_QUANTIZED" ;; | ||
| --xnnpack) set_cmake_options_override "EXECUTORCH_BUILD_XNNPACK" ;; | ||
| *) | ||
| echo -e "\033[31m[error] unknown option: ${arg}\033[0m" | ||
| exit 1 | ||
| ;; | ||
| -h | --help) usage ;; | ||
|
||
| --Release) | ||
| if [[ ! " ${MODES[*]:-} " =~ \bRelease\b ]]; then | ||
| MODES+=("Release") | ||
| fi | ||
| ;; | ||
| --Debug) | ||
| if [[ ! " ${MODES[*]:-} " =~ \bDebug\b ]]; then | ||
| MODES+=("Debug") | ||
| fi | ||
| ;; | ||
| --coreml) set_cmake_options_override "EXECUTORCH_BUILD_COREML" ;; | ||
| --custom) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_CUSTOM" ;; | ||
| --mps) set_cmake_options_override "EXECUTORCH_BUILD_MPS" ;; | ||
| --optimized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" ;; | ||
| --quantized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_QUANTIZED" ;; | ||
| --xnnpack) set_cmake_options_override "EXECUTORCH_BUILD_XNNPACK" ;; | ||
| --torchao) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_TORCHAO" ;; | ||
| *) | ||
| echo -e "\033[31m[error] unknown option: ${arg}\033[0m" | ||
| exit 1 | ||
| ;; | ||
| esac | ||
| done | ||
|
|
||
|
|
@@ -149,15 +157,15 @@ for preset_index in "${!PRESETS[@]}"; do | |
|
|
||
| # Do NOT add options here. Update the respective presets instead. | ||
| cmake -S "${SOURCE_ROOT_DIR}" \ | ||
| -B "${preset_output_dir}" \ | ||
| -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="${preset_output_dir}" \ | ||
| -DCMAKE_BUILD_TYPE="${mode}" \ | ||
| ${CMAKE_OPTIONS_OVERRIDE[@]:-} \ | ||
| --preset "${preset}" | ||
| -B "${preset_output_dir}" \ | ||
|
||
| -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY="${preset_output_dir}" \ | ||
| -DCMAKE_BUILD_TYPE="${mode}" \ | ||
| ${CMAKE_OPTIONS_OVERRIDE[@]:-} \ | ||
| --preset "${preset}" | ||
|
|
||
| cmake --build "${preset_output_dir}" \ | ||
| --config "${mode}" \ | ||
| -j$(sysctl -n hw.ncpu) | ||
| --config "${mode}" \ | ||
| -j$(sysctl -n hw.ncpu) | ||
| done | ||
| done | ||
|
|
||
|
|
@@ -167,8 +175,8 @@ mkdir -p "$HEADERS_ABSOLUTE_PATH" | |
|
|
||
| "$SOURCE_ROOT_DIR"/scripts/print_exported_headers.py --buck2=$(realpath "$BUCK2") --targets \ | ||
| //extension/module: \ | ||
| //extension/tensor: \ | ||
| | rsync -av --files-from=- "$SOURCE_ROOT_DIR" "$HEADERS_ABSOLUTE_PATH/executorch" | ||
| //extension/tensor: | | ||
|
||
| rsync -av --files-from=- "$SOURCE_ROOT_DIR" "$HEADERS_ABSOLUTE_PATH/executorch" | ||
|
|
||
| # HACK: XCFrameworks don't appear to support exporting any build | ||
| # options, but we need the following: | ||
|
|
@@ -187,7 +195,7 @@ cp -r $HEADERS_ABSOLUTE_PATH/executorch/runtime/core/portable_type/c10/torch "$H | |
|
|
||
| cp "$SOURCE_ROOT_DIR/extension/apple/ExecuTorch/Exported/"*.h "$HEADERS_ABSOLUTE_PATH/executorch" | ||
|
|
||
| cat > "$HEADERS_ABSOLUTE_PATH/module.modulemap" << 'EOF' | ||
| cat >"$HEADERS_ABSOLUTE_PATH/module.modulemap" <<'EOF' | ||
|
||
| module ExecuTorch { | ||
| umbrella header "ExecuTorch/ExecuTorch.h" | ||
| export * | ||
|
|
@@ -211,10 +219,10 @@ append_framework_flag() { | |
| fi | ||
|
|
||
| if [[ -n "$mode" && "$mode" != "Release" ]]; then | ||
| local name spec | ||
| name=$(echo "$framework" | cut -d: -f1) | ||
| spec=$(echo "$framework" | cut -d: -f2-) | ||
| framework="${name}_$(echo "$mode" | tr '[:upper:]' '[:lower:]'):${spec}" | ||
| local name spec | ||
|
||
| name=$(echo "$framework" | cut -d: -f1) | ||
| spec=$(echo "$framework" | cut -d: -f2-) | ||
| framework="${name}_$(echo "$mode" | tr '[:upper:]' '[:lower:]'):${spec}" | ||
| fi | ||
| echo "Adding framework: ${framework}" | ||
| FRAMEWORK_FLAGS+=("--framework=$framework") | ||
|
|
@@ -235,6 +243,7 @@ for mode in "${MODES[@]}"; do | |
| append_framework_flag "EXECUTORCH_BUILD_KERNELS_CUSTOM" "$FRAMEWORK_KERNELS_CUSTOM" "$mode" | ||
| append_framework_flag "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" "$FRAMEWORK_KERNELS_OPTIMIZED" "$mode" | ||
| append_framework_flag "EXECUTORCH_BUILD_KERNELS_QUANTIZED" "$FRAMEWORK_KERNELS_QUANTIZED" "$mode" | ||
| append_framework_flag "EXECUTORCH_BUILD_KERNELS_TORCHAO" "$FRAMEWORK_KERNELS_TORCHAO" "$mode" | ||
|
|
||
| cd "${OUTPUT_DIR}" | ||
| "$SOURCE_ROOT_DIR"/scripts/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was this line still needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ideally this should be in
executorch-config.cmake. Doesn't have to be in this PR though.