Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 8c7e688

Browse files
authored
Bump torchao to commit on 09-30-24 (#1242)
* bump torchao * remove scripts from cache
1 parent 0ddee99 commit 8c7e688

File tree

7 files changed

+9
-10
lines changed

7 files changed

+9
-10
lines changed

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ jobs:
11031103
with:
11041104
path: |
11051105
./et-build
1106-
./torchchat/utils/scripts
1106+
./torchchat/utils/scripts/install_et.sh
11071107
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
11081108
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
11091109
continue-on-error: true

docs/quantization.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,11 @@ python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my n
122122

123123
### Use
124124
The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
125-
It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
125+
It takes arguments bitwidth (1, 2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
126126
The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
127127
Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q4_0 quantization scheme.
128128

129-
You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization.
129+
You should expect high performance on ARM CPU if bitwidth is 1, 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization.
130130

131131
### Setup
132132
To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.

install/.pins/torchao-pin.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
63cb7a9857654784f726fec75c0dc36167094d8a
1+
ae3e7c68eae7085e13241cb3d6b39481868dd162

runner/aoti.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ if(Torch_FOUND)
3030
endif()
3131

3232
if (LINK_TORCHAO_OPS)
33-
target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
33+
target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
3434
endif()

runner/et.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,9 @@ if(executorch_FOUND)
117117
endif()
118118

119119
if(LINK_TORCHAO_OPS)
120-
target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
120+
target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_executorch.a>")
121121
target_link_libraries(et_run PRIVATE
122122
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
123-
"${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
124123
)
125124
endif()
126125

torchchat/utils/quantize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,7 @@ def quantized_model(self) -> nn.Module:
898898
# Try loading custom op
899899
try:
900900
import glob
901-
libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
901+
libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/libtorchao_ops_aten.*")
902902
libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
903903
torch.ops.load_library(libs[0])
904904
except Exception as e:

torchchat/utils/scripts/install_utils.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ install_torchao_aten_ops() {
191191
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
192192
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
193193
-DCMAKE_BUILD_TYPE="Release" \
194-
-DTORCHAO_OP_TARGET="ATEN" \
194+
-DTORCHAO_OP_TARGET="aten" \
195195
-S . \
196196
-B ${CMAKE_OUT_DIR} -G Ninja
197197
cmake --build ${CMAKE_OUT_DIR} --target install --config Release
@@ -207,7 +207,7 @@ install_torchao_executorch_ops() {
207207
cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
208208
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
209209
-DCMAKE_BUILD_TYPE="Release" \
210-
-DTORCHAO_OP_TARGET="EXECUTORCH" \
210+
-DTORCHAO_OP_TARGET="executorch" \
211211
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
212212
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
213213
-S . \

0 commit comments

Comments
 (0)