diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index f42a20e22..47d1a8d87 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -434,7 +434,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10.11 + python-version: '3.10.11' - name: Setup Xcode if: runner.os == 'macOS' uses: maxim-lobanov/setup-xcode@v1 @@ -577,7 +577,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10.11 + python-version: '3.10.11' - name: Print machine info run: | uname -a @@ -625,6 +625,7 @@ jobs: with: runner: macos-m1-stable # neeps MPS, was macos-m1-stable script: | + export PYTHON_VERSION="3.10" set -x # NS/MC: Remove previous installation of torch and torchao first # as this script does not install anything into conda env but rather as system dep @@ -737,6 +738,7 @@ jobs: with: runner: macos-m1-stable # needs MPS, was macos-m1-stable script: | + export PYTHON_VERSION="3.10" set -x # NS/MC: Remove previous installation of torch and torchao first # as this script does not install anything into conda env but rather as system dep @@ -914,31 +916,19 @@ jobs: continue-on-error: true run: | echo "Installing ExecuTorch" - bash torchchat/utils/scripts/build_native.sh et - - name: Install ET pip + bash torchchat/utils/scripts/install_et.sh + - name: Install ExecuTorch python run: | - echo "ET build directory" - ls et-build | cat - + echo "Install ExecuTorch python" pushd et-build/src/executorch - if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then - echo "Mismatched hash. Make sure branch install_et.sh matches branch from Github cache." - echo "On commit $(git rev-parse HEAD)" - echo "Expected commit ${{env.et-git-hash}}" - exit 1 - fi - pip install . + chmod +x ./install_requirements.sh + chmod +x ./install_requirements.py + ./install_requirements.sh popd - name: Install runner run: | - # Pull submodules (re2, abseil) for Tiktoken - git submodule sync - git submodule update --init - - export TORCHCHAT_ROOT=${PWD} - cmake -S . -B ./cmake-out -G Ninja - cmake --build ./cmake-out --target et_run - + echo "Installing runner" + bash torchchat/utils/scripts/build_native.sh et - name: Run inference run: | python torchchat.py download stories15M @@ -1035,3 +1025,93 @@ jobs: git submodule update --init ./runner/build_android.sh echo "Tests complete." + + test-torchao-experimental: + strategy: + matrix: + runner: [macos-14-xlarge] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 + - name: Setup Xcode + if: runner.os == 'macOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.3' + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install torchchat + run: | + echo "Intalling pip3 packages" + ./install/install_requirements.sh + pip3 list + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' + - name: Install torchao-experimental + id: install-torchao-experimental + run: | + bash torchchat/utils/scripts/build_torchao_experimental.sh + - name: Set git shas + id: setup-hash + run: | + export TORCHCHAT_ROOT=${PWD} + echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV" + - name: Load or install ET + id: install-et + uses: actions/cache@v3 + env: + cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}} + with: + path: ./et-build + key: ${{env.cache-key}} + restore-keys: | + ${{env.cache-key}} + - if: ${{ steps.install-et.outputs.cache-hit != 'true' }} + continue-on-error: true + run: | + echo "Installing ExecuTorch" + bash torchchat/utils/scripts/install_et.sh + - name: Install runner + run: | + echo "Installing runner" + bash torchchat/utils/scripts/build_native.sh et link_torchao + - name: Install runner AOTI + id: install-runner-aoti + run: | + bash torchchat/utils/scripts/build_native.sh aoti link_torchao + - name: Run inference + run: | + python torchchat.py download stories110M + wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + + export PRMT="Once upon a time in a land far away" + + echo "Generate eager" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + + echo "Generate compile" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile + + echo "Export and run ET (C++ runner)" + python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" + + echo "Export and run AOTI (C++ runner)" + python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}" + + echo "Generate AOTI" + python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}" + + echo "Tests complete." diff --git a/.gitignore b/.gitignore index 3f25b76c0..ee856fcd2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ __pycache__/ # Build directories build/android/* et-build/* +torchao-build/* runner-et/cmake-out/* runner-aoti/cmake-out/* cmake-out/ diff --git a/docs/quantization.md b/docs/quantization.md index 1f619e58e..aea8a8dc6 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -118,6 +118,67 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my name is" ``` +## Experimental TorchAO lowbit kernels + +### Use +The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize. +It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false). +The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true). +Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme. + +You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization. + +### Setup +To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon. + +From the torchchat root directory, run +``` +sh torchchat/utils/scripts/build_torchao_experimental.sh +``` + +This should take about 10 seconds to complete. Once finished, you can use a8wxdq in torchchat. + +Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners. + +``` +sh torchchat/utils/scripts/build_native.sh aoti link_torchao +``` + +``` +sh torchchat/utils/scripts/build_native.sh et link_torchao +``` + +### Examples + +#### Eager mode +``` +python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' +``` + +#### torch.compile +``` +python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile +``` + +As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS. For example, + +``` +OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile +``` + +#### AOTI +``` +python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so +python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is" +``` + +#### ExecuTorch +``` +python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte +``` + +Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. + ## Quantization Profiles Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json` diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index a6f1373dd..af7ef4377 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -91298923a0076c1b41059efb6dad2876426e4b03 +c75711cb329cab3df91fb9083a18373f9a568377 diff --git a/install/.pins/torchao-experimental-pin.txt b/install/.pins/torchao-experimental-pin.txt new file mode 100644 index 000000000..9b101777d --- /dev/null +++ b/install/.pins/torchao-experimental-pin.txt @@ -0,0 +1 @@ +3fa38aaf1276e36845a82fb399e5054718a441c4 diff --git a/install/requirements.txt b/install/requirements.txt index bbb1d56d1..7bb3b74b5 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy < 2.0 +numpy>=1.23.5,<2.0 gguf lm-eval==0.4.2 blobfile diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 156e9bcce..35e4c1329 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -28,3 +28,7 @@ if(Torch_FOUND) target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m) set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17) endif() + +if (LINK_TORCHAO_CUSTOM_OPS) + target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}") +endif() diff --git a/runner/build_android.sh b/runner/build_android.sh index c32185957..c0ad02d7b 100755 --- a/runner/build_android.sh +++ b/runner/build_android.sh @@ -24,8 +24,6 @@ export CMAKE_OUT_DIR="cmake-out-android" export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF" export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON" export CMAKE_OUT_DIR="cmake-out-android" -# export DCMAKE_INSTALL_PREFIX=cmake-out-android -# build_runner_et() { rm -rf cmake-out-android @@ -43,5 +41,5 @@ install_executorch_python_libs $ENABLE_ET_PYBIND export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake export ANDROID_ABI=arm64-v8a export ANDROID_PLATFORM=android-23 -install_executorch +install_executorch_cpp_libs build_runner_et diff --git a/runner/et.cmake b/runner/et.cmake index 7fc16b1f2..12c7fca02 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -62,7 +62,6 @@ if(executorch_FOUND) set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch) set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack) - list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp) list(APPEND _common_include_directories ${XNNPACK_ROOT}/third-party/cpuinfo/include) @@ -80,7 +79,9 @@ if(executorch_FOUND) et_run PRIVATE executorch extension_module + extension_tensor extension_data_loader + extension_threadpool optimized_kernels quantized_kernels portable_kernels @@ -111,6 +112,15 @@ if(executorch_FOUND) target_link_libraries(et_run PRIVATE log) endif() + if(LINK_TORCHAO_CUSTOM_OPS) + # target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(et_run PRIVATE "$") + target_link_libraries(et_run PRIVATE + "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a" + "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a" + ) + endif() + # Adding target_link_options_shared_lib as commented out below leads to this: # # CMake Error at Utils.cmake:22 (target_link_options): diff --git a/runner/run.cpp b/runner/run.cpp index 999ad2fcc..99eb7bfb9 100644 --- a/runner/run.cpp +++ b/runner/run.cpp @@ -39,19 +39,20 @@ torch::Device aoti_device(torch::kCPU); #else // __ET_MODEL__ #include -#include +#include #include #include #include #if defined(ET_USE_ADAPTIVE_THREADS) -#include -#include +#include +#include #endif using exec_aten::ScalarType; using torch::executor::EValue; -using torch::executor::ManagedTensor; +using executorch::extension::TensorPtr; +using executorch::extension::make_tensor_ptr; using torch::executor::Module; using torch::executor::Result; #endif @@ -212,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) { .to(torch::kCPU); auto logits = result[0].data_ptr(); #else // __ET_MODEL__ - ManagedTensor pos_managed(pos_buffer, {1}, ScalarType::Long); - ManagedTensor tokens_managed(token_buffer, {1, 1}, ScalarType::Long); + TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); + TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); std::vector inputs; - auto tmp1 = EValue(tokens_managed.get_aliasing_tensor()); - auto tmp2 = EValue(pos_managed.get_aliasing_tensor()); + auto tmp1 = EValue(tokens_managed); + auto tmp2 = EValue(pos_managed); inputs.push_back(tmp1); inputs.push_back(tmp2); diff --git a/torchchat/export.py b/torchchat/export.py index affb8b871..b28e8023f 100644 --- a/torchchat/export.py +++ b/torchchat/export.py @@ -194,7 +194,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None): return self.wo(output) def replace_attention_with_custom_sdpa_attention(module: nn.Module): - from executorch.examples.models.llama2.custom_ops import ( # noqa + from executorch.extension.llm.custom_ops import ( # noqa sdpa_with_kv_cache, ) @@ -304,7 +304,6 @@ def export_for_et(model, device, output_path) -> str: edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner()) export_program = edge_manager.to_executorch( ExecutorchBackendConfig( - extract_constant_segment=True, extract_delegate_segments=True, passes=[ QuantFusionPass(), diff --git a/torchchat/model.py b/torchchat/model.py index a576d5036..48978b9a0 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -961,7 +961,7 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor: from executorch.extension.pybindings import portable_lib as exec_lib # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately. - from executorch.examples.models.llama2.custom_ops import sdpa_with_kv_cache # no-qa + from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # no-qa class PTEModel(nn.Module): def __init__(self, config, path) -> None: diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index a0d9248a9..041f074c2 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -96,10 +96,19 @@ def quantize_model( precision = get_precision() try: - # Easier to ask forgiveness than permission - quant_handler = ao_quantizer_class_dict[quantizer]( - groupsize=q_kwargs["groupsize"], device=device, precision=precision - ) + if quantizer == "linear:a8wxdq": + quant_handler = ao_quantizer_class_dict[quantizer]( + device=device, + precision=precision, + bitwidth=q_kwargs.get("bitwidth", 4), + groupsize=q_kwargs.get("groupsize", 128), + has_weight_zeros=q_kwargs.get("has_weight_zeros", False), + ) + else: + # Easier to ask forgiveness than permission + quant_handler = ao_quantizer_class_dict[quantizer]( + groupsize=q_kwargs["groupsize"], device=device, precision=precision + ) except TypeError as e: if "unexpected keyword argument 'device'" in str(e): quant_handler = ao_quantizer_class_dict[quantizer]( @@ -861,3 +870,33 @@ def quantized_model(self) -> nn.Module: "linear:int4": Int4WeightOnlyQuantizer, "linear:a8w4dq": Int8DynActInt4WeightQuantizer, } + +try: + import importlib.util + import sys + import os + torchao_build_path = f"{os.getcwd()}/torchao-build" + + # Try loading quantizer + torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location( + "torchao_experimental_quant_api", + f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py", + ) + torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec) + sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api + torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api) + from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer + ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer + + # Try loading custom op + try: + import glob + libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*") + libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) + torch.ops.load_library(libs[0]) + except Exception as e: + print("Failed to torchao custom op library with error: ", e) + print("Slow fallback kernels will be used.") + +except Exception as e: + print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}") diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index aacd97415..3f2984574 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -25,6 +25,8 @@ if [ $# -eq 0 ]; then show_help exit 1 fi + +LINK_TORCHAO=OFF while (( "$#" )); do case "$1" in -h|--help) @@ -41,6 +43,11 @@ while (( "$#" )); do TARGET="et" shift ;; + link_torchao) + echo "Linking with torchao custom ops..." + LINK_TORCHAO=ON + shift + ;; *) echo "Invalid option: $1" show_help @@ -49,15 +56,7 @@ while (( "$#" )); do esac done -if [ -z "${TORCHCHAT_ROOT}" ]; then - # Get the absolute path of the current script - SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - # Get the absolute path of the parent directory - TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")" - source "$TORCHCHAT_ROOT/scripts/install_utils.sh" -else - source "$TORCHCHAT_ROOT/torchchat/utils/scripts/install_utils.sh" -fi +source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" if [ -z "${ET_BUILD_DIR}" ]; then ET_BUILD_DIR="et-build" @@ -68,18 +67,31 @@ pushd ${TORCHCHAT_ROOT} git submodule update --init git submodule sync if [[ "$TARGET" == "et" ]]; then + if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" ]; then + echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install does not exist." + echo "Make sure you run install_executorch_libs" + exit 1 + fi + + if [[ "$LINK_TORCHAO" == "ON" ]]; then + if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then + echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist." + echo "Make sure you run clone_torchao" + exit 1 + fi find_cmake_prefix_path - install_pip_dependencies - clone_executorch - install_executorch_libs false + EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" + EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" + install_torchao_custom_executorch_ops + fi fi popd # CMake commands if [[ "$TARGET" == "et" ]]; then - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja else - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja fi cmake --build ./cmake-out --target "${TARGET}"_run diff --git a/torchchat/utils/scripts/build_torchao_experimental.sh b/torchchat/utils/scripts/build_torchao_experimental.sh new file mode 100644 index 000000000..1df3e80c6 --- /dev/null +++ b/torchchat/utils/scripts/build_torchao_experimental.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + + +source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" + +pushd ${TORCHCHAT_ROOT} +find_cmake_prefix_path +clone_torchao +install_torchao_custom_aten_ops +popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index f915402e7..265332861 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -8,8 +8,10 @@ set -ex pipefail if [ -z "$TORCHCHAT_ROOT" ]; then - echo "Defaulting TORCHCHAT_ROOT to $PWD since it is unset." - TORCHCHAT_ROOT=$PWD + # Get the absolute path of the current script + SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + TORCHCHAT_ROOT="$SCRIPT_PATH/../../.." + echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset." fi install_pip_dependencies() { @@ -102,9 +104,10 @@ COMMON_CMAKE_ARGS="\ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON" + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON" -install_executorch() { +install_executorch_cpp_libs() { # AOT lib has to be build for model export # So by default it is built, and you can explicitly opt-out EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF @@ -153,12 +156,55 @@ install_executorch() { } install_executorch_libs() { - # Install executorch python and C++ libs - export CMAKE_ARGS="\ - ${COMMON_CMAKE_ARGS} \ - -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" - export CMAKE_BUILD_ARGS="--target install" - + install_executorch_cpp_libs install_executorch_python_libs $1 } + +clone_torchao() { + echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src" + rm -rf ${TORCHCHAT_ROOT}/torchao-build/src + mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src + pushd ${TORCHCHAT_ROOT}/torchao-build/src + echo $pwd + + cp -R ${HOME}/fbsource/fbcode/pytorch/ao . + # git clone https://github.com/pytorch/ao.git + # cd ao + # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt) + + popd +} + +install_torchao_custom_aten_ops() { + echo "Building torchao custom ops for ATen" + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental + + CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out + cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ + -DTORCHAO_OP_TARGET="ATEN" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} --target install --config Release + + popd +} + +install_torchao_custom_executorch_ops() { + echo "Building torchao custom ops for ExecuTorch" + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental + + CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" + cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ + -DTORCHAO_OP_TARGET="EXECUTORCH" \ + -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ + -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} --target install --config Release + + popd +}