Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit cc16970

Browse files
committed
init
1 parent d0d87cb commit cc16970

File tree

9 files changed

+6
-307
lines changed

9 files changed

+6
-307
lines changed

.github/workflows/pull.yml

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,93 +1025,3 @@ jobs:
10251025
git submodule update --init
10261026
./runner/build_android.sh
10271027
echo "Tests complete."
1028-
1029-
test-torchao-experimental:
1030-
strategy:
1031-
matrix:
1032-
runner: [macos-14-xlarge]
1033-
runs-on: ${{matrix.runner}}
1034-
steps:
1035-
- name: Checkout repo
1036-
uses: actions/checkout@v3
1037-
with:
1038-
submodules: true
1039-
- name: Setup Python
1040-
uses: actions/setup-python@v2
1041-
with:
1042-
python-version: 3.10.11
1043-
- name: Setup Xcode
1044-
if: runner.os == 'macOS'
1045-
uses: maxim-lobanov/setup-xcode@v1
1046-
with:
1047-
xcode-version: '15.3'
1048-
- name: Print machine info
1049-
run: |
1050-
uname -a
1051-
if [ $(uname -s) == Darwin ]; then
1052-
sysctl machdep.cpu.brand_string
1053-
sysctl machdep.cpu.core_count
1054-
fi
1055-
- name: Install torchchat
1056-
run: |
1057-
echo "Intalling pip3 packages"
1058-
./install/install_requirements.sh
1059-
pip3 list
1060-
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
1061-
- name: Install torchao-experimental
1062-
id: install-torchao-experimental
1063-
run: |
1064-
bash torchchat/utils/scripts/build_torchao_experimental.sh
1065-
- name: Set git shas
1066-
id: setup-hash
1067-
run: |
1068-
export TORCHCHAT_ROOT=${PWD}
1069-
echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
1070-
- name: Load or install ET
1071-
id: install-et
1072-
uses: actions/cache@v3
1073-
env:
1074-
cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}
1075-
with:
1076-
path: ./et-build
1077-
key: ${{env.cache-key}}
1078-
restore-keys: |
1079-
${{env.cache-key}}
1080-
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
1081-
continue-on-error: true
1082-
run: |
1083-
echo "Installing ExecuTorch"
1084-
bash torchchat/utils/scripts/install_et.sh
1085-
- name: Install runner
1086-
run: |
1087-
echo "Installing runner"
1088-
bash torchchat/utils/scripts/build_native.sh et link_torchao
1089-
- name: Install runner AOTI
1090-
id: install-runner-aoti
1091-
run: |
1092-
bash torchchat/utils/scripts/build_native.sh aoti link_torchao
1093-
- name: Run inference
1094-
run: |
1095-
python torchchat.py download stories110M
1096-
wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
1097-
1098-
export PRMT="Once upon a time in a land far away"
1099-
1100-
echo "Generate eager"
1101-
python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1102-
1103-
echo "Generate compile"
1104-
python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
1105-
1106-
echo "Export and run ET (C++ runner)"
1107-
python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1108-
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
1109-
1110-
echo "Export and run AOTI (C++ runner)"
1111-
python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1112-
./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
1113-
1114-
echo "Generate AOTI"
1115-
python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
1116-
1117-
echo "Tests complete."

docs/quantization.md

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -118,66 +118,6 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr
118118
python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my name is"
119119
```
120120

121-
## Experimental TorchAO lowbit kernels
122-
123-
### Use
124-
The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
125-
It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
126-
The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
127-
Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme.
128-
129-
You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization.
130-
131-
### Setup
132-
To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.
133-
134-
From the torchchat root directory, run
135-
```
136-
sh torchchat/utils/scripts/build_torchao_experimental.sh
137-
```
138-
139-
This should take about 10 seconds to complete. Once finished, you can use a8wxdq in torchchat.
140-
141-
Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners.
142-
143-
```
144-
sh torchchat/utils/scripts/build_native.sh aoti link_torchao
145-
```
146-
147-
```
148-
sh torchchat/utils/scripts/build_native.sh et link_torchao
149-
```
150-
151-
### Examples
152-
153-
#### Eager mode
154-
```
155-
python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
156-
```
157-
158-
#### torch.compile
159-
```
160-
python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
161-
```
162-
163-
As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS. For example,
164-
165-
```
166-
OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
167-
```
168-
169-
#### AOTI
170-
```
171-
python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so
172-
python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is"
173-
```
174-
175-
#### ExecuTorch
176-
```
177-
python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte
178-
```
179-
180-
Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file.
181121

182122
## Quantization Profiles
183123

install/.pins/torchao-experimental-pin.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

runner/aoti.cmake

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,3 @@ if(Torch_FOUND)
2828
target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
2929
set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
3030
endif()
31-
32-
if (LINK_TORCHAO_CUSTOM_OPS)
33-
target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
34-
endif()

runner/et.cmake

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -112,30 +112,6 @@ if(executorch_FOUND)
112112
target_link_libraries(et_run PRIVATE log)
113113
endif()
114114

115-
if(LINK_TORCHAO_CUSTOM_OPS)
116-
# target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}")
117-
target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
118-
target_link_libraries(et_run PRIVATE
119-
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
120-
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
121-
)
122-
endif()
123-
124-
# Adding target_link_options_shared_lib as commented out below leads to this:
125-
#
126-
# CMake Error at Utils.cmake:22 (target_link_options):
127-
# Cannot specify link options for target
128-
# "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
129-
# which is not built by this project.
130-
# Call Stack (most recent call first):
131-
# Utils.cmake:30 (macos_kernel_link_options)
132-
# CMakeLists.txt:41 (target_link_options_shared_lib)
133-
#
134-
#target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch
135-
136-
# This works on mac, but appears to run into issues on linux
137-
# It is needed to solve:
138-
# E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
139115
else()
140116
MESSAGE(WARNING "ExecuTorch package not found")
141117
endif()

torchchat/utils/quantize.py

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,10 @@ def quantize_model(
9696
precision = get_precision()
9797

9898
try:
99-
if quantizer == "linear:a8wxdq":
100-
quant_handler = ao_quantizer_class_dict[quantizer](
101-
device=device,
102-
precision=precision,
103-
bitwidth=q_kwargs.get("bitwidth", 4),
104-
groupsize=q_kwargs.get("groupsize", 128),
105-
has_weight_zeros=q_kwargs.get("has_weight_zeros", False),
106-
)
107-
else:
108-
# Easier to ask forgiveness than permission
109-
quant_handler = ao_quantizer_class_dict[quantizer](
110-
groupsize=q_kwargs["groupsize"], device=device, precision=precision
111-
)
99+
# Easier to ask forgiveness than permission
100+
quant_handler = ao_quantizer_class_dict[quantizer](
101+
groupsize=q_kwargs["groupsize"], device=device, precision=precision
102+
)
112103
except TypeError as e:
113104
if "unexpected keyword argument 'device'" in str(e):
114105
quant_handler = ao_quantizer_class_dict[quantizer](
@@ -870,33 +861,3 @@ def quantized_model(self) -> nn.Module:
870861
"linear:int4": Int4WeightOnlyQuantizer,
871862
"linear:a8w4dq": Int8DynActInt4WeightQuantizer,
872863
}
873-
874-
try:
875-
import importlib.util
876-
import sys
877-
import os
878-
torchao_build_path = f"{os.getcwd()}/torchao-build"
879-
880-
# Try loading quantizer
881-
torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location(
882-
"torchao_experimental_quant_api",
883-
f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py",
884-
)
885-
torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec)
886-
sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api
887-
torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api)
888-
from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer
889-
ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer
890-
891-
# Try loading custom op
892-
try:
893-
import glob
894-
libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
895-
libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
896-
torch.ops.load_library(libs[0])
897-
except Exception as e:
898-
print("Failed to torchao custom op library with error: ", e)
899-
print("Slow fallback kernels will be used.")
900-
901-
except Exception as e:
902-
print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}")

torchchat/utils/scripts/build_native.sh

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ if [ $# -eq 0 ]; then
2626
exit 1
2727
fi
2828

29-
LINK_TORCHAO=OFF
3029
while (( "$#" )); do
3130
case "$1" in
3231
-h|--help)
@@ -43,11 +42,6 @@ while (( "$#" )); do
4342
TARGET="et"
4443
shift
4544
;;
46-
link_torchao)
47-
echo "Linking with torchao custom ops..."
48-
LINK_TORCHAO=ON
49-
shift
50-
;;
5145
*)
5246
echo "Invalid option: $1"
5347
show_help
@@ -72,26 +66,14 @@ if [[ "$TARGET" == "et" ]]; then
7266
echo "Make sure you run install_executorch_libs"
7367
exit 1
7468
fi
75-
76-
if [[ "$LINK_TORCHAO" == "ON" ]]; then
77-
if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then
78-
echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist."
79-
echo "Make sure you run clone_torchao"
80-
exit 1
81-
fi
82-
find_cmake_prefix_path
83-
EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
84-
EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
85-
install_torchao_custom_executorch_ops
86-
fi
8769
fi
8870
popd
8971

9072
# CMake commands
9173
if [[ "$TARGET" == "et" ]]; then
92-
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
74+
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
9375
else
94-
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
76+
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
9577
fi
9678
cmake --build ./cmake-out --target "${TARGET}"_run
9779

torchchat/utils/scripts/build_torchao_experimental.sh

Lines changed: 0 additions & 16 deletions
This file was deleted.

torchchat/utils/scripts/install_utils.sh

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -159,52 +159,3 @@ install_executorch_libs() {
159159
install_executorch_cpp_libs
160160
install_executorch_python_libs $1
161161
}
162-
163-
clone_torchao() {
164-
echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src"
165-
rm -rf ${TORCHCHAT_ROOT}/torchao-build/src
166-
mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src
167-
pushd ${TORCHCHAT_ROOT}/torchao-build/src
168-
echo $pwd
169-
170-
cp -R ${HOME}/fbsource/fbcode/pytorch/ao .
171-
# git clone https://github.com/pytorch/ao.git
172-
# cd ao
173-
# git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt)
174-
175-
popd
176-
}
177-
178-
install_torchao_custom_aten_ops() {
179-
echo "Building torchao custom ops for ATen"
180-
pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
181-
182-
CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
183-
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
184-
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
185-
-DCMAKE_BUILD_TYPE="Release" \
186-
-DTORCHAO_OP_TARGET="ATEN" \
187-
-S . \
188-
-B ${CMAKE_OUT_DIR} -G Ninja
189-
cmake --build ${CMAKE_OUT_DIR} --target install --config Release
190-
191-
popd
192-
}
193-
194-
install_torchao_custom_executorch_ops() {
195-
echo "Building torchao custom ops for ExecuTorch"
196-
pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
197-
198-
CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
199-
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
200-
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
201-
-DCMAKE_BUILD_TYPE="Release" \
202-
-DTORCHAO_OP_TARGET="EXECUTORCH" \
203-
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
204-
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
205-
-S . \
206-
-B ${CMAKE_OUT_DIR} -G Ninja
207-
cmake --build ${CMAKE_OUT_DIR} --target install --config Release
208-
209-
popd
210-
}

0 commit comments

Comments
 (0)