Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit c2108d6

Browse files
authored
Merge branch 'main' into new-intx-quantizer
2 parents 76e8ec5 + f810de3 commit c2108d6

File tree

6 files changed

+35
-15
lines changed

6 files changed

+35
-15
lines changed

.github/workflows/pull.yml

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ jobs:
291291
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292
echo "::endgroup::"
293293
294+
echo "::group::Run inference with quantize file"
295+
for DEVICE in cpu; do # cuda
296+
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+
# follow up with torchao as a separate PR
298+
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+
done
302+
echo "::endgroup::"
303+
294304
test-gpu-aoti-float32:
295305
permissions:
296306
id-token: write
@@ -335,6 +345,11 @@ jobs:
335345
fi
336346
echo "::endgroup::"
337347
348+
# echo "::group::Run inference with quantize file"
349+
# python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
350+
# python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
351+
# echo "::endgroup::"
352+
338353
test-gpu-aoti-float16:
339354
permissions:
340355
id-token: write
@@ -376,10 +391,15 @@ jobs:
376391
echo "::group::Run inference with quantize file"
377392
if [ $(uname -s) == Darwin ]; then
378393
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379-
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
394+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380395
fi
381396
echo "::endgroup::"
382397
398+
# echo "::group::Run inference with quantize file"
399+
# python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
400+
# python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
401+
# echo "::endgroup::"
402+
383403
test-gpu-eval-sanity-check:
384404
permissions:
385405
id-token: write
@@ -495,10 +515,11 @@ jobs:
495515
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
496516
497517
echo "******************************************"
498-
echo "*** --quantize torchchat/quant_config/mobile.json ***"
518+
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
519+
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
499520
echo "******************************************"
500-
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
501-
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
521+
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522+
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
502523
503524
504525
echo "******************************************"
@@ -1147,10 +1168,6 @@ jobs:
11471168
run: |
11481169
echo "Installing runner"
11491170
bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
1150-
- name: Install runner AOTI
1151-
id: install-runner-aoti
1152-
run: |
1153-
bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
11541171
- name: Run inference
11551172
run: |
11561173
python torchchat.py download stories110M

install/.pins/et-pin.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
9836b39fe690e1906f133b4a233863149c30d499
1+
791472d6706b027552f39f11b28d034e4839c9af

install/install_requirements.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
5151
# NOTE: If a newly-fetched version of the executorch repo changes the value of
5252
# PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
5353
# package versions.
54-
PYTORCH_NIGHTLY_VERSION=dev20250124
54+
PYTORCH_NIGHTLY_VERSION=dev20250131
5555

5656
# Nightly version for torchvision
57-
VISION_NIGHTLY_VERSION=dev20250124
57+
VISION_NIGHTLY_VERSION=dev20250131
5858

5959
# Nightly version for torchtune
60-
TUNE_NIGHTLY_VERSION=dev20250124
60+
TUNE_NIGHTLY_VERSION=dev20250131
6161

6262
# The pip repository that hosts nightly torch packages. cpu by default.
6363
# If cuda is available, based on presence of nvidia-smi, install the pytorch nightly

torchchat/cli/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ def arg_init(args):
549549
precision_handler = args.quantize.get("precision", None)
550550
if precision_handler:
551551
if precision_handler["dtype"] != args.dtype:
552-
print('overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
552+
print(f'overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
553553
precision_handler["dtype"] = args.dtype
554554

555555
if getattr(args, "output_pte_path", None):

torchchat/utils/scripts/build_native.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then
8686
EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
8787
install_torchao_executorch_ops
8888
fi
89+
elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then
90+
# Install OMP when using AOTI with linked torchao ops
91+
brew install libomp
8992
fi
9093
popd
9194

torchchat/utils/scripts/install_utils.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ install_executorch_python_libs() {
8888
echo "Building and installing python libraries"
8989
if [ "${ENABLE_ET_PYBIND}" = false ]; then
9090
echo "Not installing pybind"
91-
bash ./install_requirements.sh --pybind off
91+
bash ./install_executorch.sh --pybind off
9292
else
9393
echo "Installing pybind"
94-
bash ./install_requirements.sh --pybind xnnpack
94+
bash ./install_executorch.sh --pybind xnnpack
9595
fi
9696

9797
# TODO: figure out the root cause of 'AttributeError: module 'evaluate'

0 commit comments

Comments
 (0)