Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my n

### Use
The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
It takes arguments bitwidth (1, 2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q4_0 quantization scheme.

You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization.
You should expect high performance on ARM CPU if bitwidth is 1, 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization.

### Setup
To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.
Expand Down
2 changes: 1 addition & 1 deletion install/.pins/torchao-pin.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
63cb7a9857654784f726fec75c0dc36167094d8a
ae3e7c68eae7085e13241cb3d6b39481868dd162
2 changes: 1 addition & 1 deletion runner/aoti.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ if(Torch_FOUND)
endif()

if (LINK_TORCHAO_OPS)
target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
endif()
3 changes: 1 addition & 2 deletions runner/et.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,9 @@ if(executorch_FOUND)
endif()

if(LINK_TORCHAO_OPS)
target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_executorch.a>")
target_link_libraries(et_run PRIVATE
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
)
endif()

Expand Down
2 changes: 1 addition & 1 deletion torchchat/utils/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def quantized_model(self) -> nn.Module:
# Try loading custom op
try:
import glob
libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/libtorchao_ops_aten.*")
libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
torch.ops.load_library(libs[0])
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions torchchat/utils/scripts/install_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ install_torchao_aten_ops() {
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
-DCMAKE_BUILD_TYPE="Release" \
-DTORCHAO_OP_TARGET="ATEN" \
-DTORCHAO_OP_TARGET="aten" \
-S . \
-B ${CMAKE_OUT_DIR} -G Ninja
cmake --build ${CMAKE_OUT_DIR} --target install --config Release
Expand All @@ -207,7 +207,7 @@ install_torchao_executorch_ops() {
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
-DCMAKE_BUILD_TYPE="Release" \
-DTORCHAO_OP_TARGET="EXECUTORCH" \
-DTORCHAO_OP_TARGET="executorch" \
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-S . \
Expand Down
Loading