diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 11446030cef..e07cbc1dafd 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -325,7 +325,7 @@ jobs: eval "$(conda shell.bash hook)" # Install requirements - ${CONDA_RUN} python install_executorch.py + ${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py ${CONDA_RUN} sh examples/models/llama/install_requirements.sh # Run test diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md index bbd2107ad74..8605ed3efc7 100644 --- a/examples/models/llama/README.md +++ b/examples/models/llama/README.md @@ -338,7 +338,12 @@ Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-de ## Running with low-bit kernels -We now give instructions for quantizating and running your model with low-bit kernels. These are still experimental, and require you do development on an Arm-based Mac. Also note that low-bit quantization often requires QAT (quantization-aware training) to give good quality results. +We now give instructions for quantizating and running your model with low-bit kernels. These are still experimental, and require you do development on an Arm-based Mac, and install executorch from source with the environment variable EXECUTORCH_BUILD_TORCHAO=1 defined: +``` +EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py +``` + +Also note that low-bit quantization often requires QAT (quantization-aware training) to give good quality results. First export your model for lowbit quantization (step 2 above): diff --git a/install_requirements.py b/install_requirements.py index 70781b5445a..0600c123dab 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -106,7 +106,14 @@ def install_requirements(use_pytorch_nightly): # Install packages directly from local copy instead of pypi. # This is usually not recommended. new_env = os.environ.copy() - new_env["USE_CPP"] = "1" # install torchao kernels + if ("EXECUTORCH_BUILD_TORCHAO" not in new_env) or ( + new_env["EXECUTORCH_BUILD_TORCHAO"] == "0" + ): + new_env["USE_CPP"] = "0" + else: + assert new_env["EXECUTORCH_BUILD_TORCHAO"] == "1" + new_env["USE_CPP"] = "1" + new_env["CMAKE_POLICY_VERSION_MINIMUM"] = "3.5" subprocess.run( [ sys.executable, diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index da1364eb2ad..88148ee6cfd 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -20,6 +20,9 @@ set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set_overridable_option(EXECUTORCH_BUILD_COREML ON) set_overridable_option(EXECUTORCH_BUILD_MPS ON) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + set_overridable_option(EXECUTORCH_BUILD_TORCHAO ON) + endif() elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Linux-specific code here elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "WIN32")