diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9d3ad63e3..55fe8f11d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1103,7 +1103,7 @@ jobs:
         with:
           path: |
             ./et-build
-            ./torchchat/utils/scripts
+            ./torchchat/utils/scripts/install_et.sh
           key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
       - if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
         continue-on-error: true
diff --git a/docs/quantization.md b/docs/quantization.md
index 2fac20fc1..24a12fcbd 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -122,11 +122,11 @@ python3 torchchat.py generate llama3 --pte-path llama3.pte  --prompt "Hello my n
 
 ### Use
 The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
-It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
+It takes arguments bitwidth (1, 2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
 The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
 Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q4_0 quantization scheme.
 
-You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16.  With other platforms and argument choices, a slow fallback kernel will be used.  You will see warnings about this during quantization.
+You should expect high performance on ARM CPU if bitwidth is 1, 2, 3, 4, or 5 and groupsize is divisible by 16.  With other platforms and argument choices, a slow fallback kernel will be used.  You will see warnings about this during quantization.
 
 ### Setup
 To use a8wxdq, you must set up the torchao experimental kernels.  These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.
diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt
index b28bd09cd..a6a965960 100644
--- a/install/.pins/torchao-pin.txt
+++ b/install/.pins/torchao-pin.txt
@@ -1 +1 @@
-63cb7a9857654784f726fec75c0dc36167094d8a
+ae3e7c68eae7085e13241cb3d6b39481868dd162
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index 082a6f5ce..ae907b391 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -30,5 +30,5 @@ if(Torch_FOUND)
 endif()
 
 if (LINK_TORCHAO_OPS)
-    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
diff --git a/runner/et.cmake b/runner/et.cmake
index c788ead56..916ce9ea8 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -117,10 +117,9 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_OPS)
-    target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
+    target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_executorch.a>")
     target_link_libraries(et_run PRIVATE
       "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
-       "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
     )
   endif()
 
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
index abca48d25..1be34a1c5 100644
--- a/torchchat/utils/quantize.py
+++ b/torchchat/utils/quantize.py
@@ -898,7 +898,7 @@ def quantized_model(self) -> nn.Module:
     # Try loading custom op
     try:
         import glob
-        libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
+        libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/libtorchao_ops_aten.*")
         libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
         torch.ops.load_library(libs[0])
     except Exception as e:
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index f5698237f..10405382e 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -191,7 +191,7 @@ install_torchao_aten_ops() {
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
     -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
     -DCMAKE_BUILD_TYPE="Release" \
-    -DTORCHAO_OP_TARGET="ATEN" \
+    -DTORCHAO_OP_TARGET="aten" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build  ${CMAKE_OUT_DIR} --target install --config Release
@@ -207,7 +207,7 @@ install_torchao_executorch_ops() {
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
     -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
     -DCMAKE_BUILD_TYPE="Release" \
-    -DTORCHAO_OP_TARGET="EXECUTORCH" \
+    -DTORCHAO_OP_TARGET="executorch" \
     -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
     -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
     -S . \