Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 6e51132

Browse files
authored
Merge branch 'main' into patch-43
2 parents 26f629e + 384a728 commit 6e51132

File tree

5 files changed

+87
-20
lines changed

5 files changed

+87
-20
lines changed

.github/workflows/pull.yml

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ jobs:
291291
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292
echo "::endgroup::"
293293
294+
echo "::group::Run inference with quantize file"
295+
for DEVICE in cpu; do # cuda
296+
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+
# follow up with torchao as a separate PR
298+
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+
done
302+
echo "::endgroup::"
303+
294304
test-gpu-aoti-float32:
295305
permissions:
296306
id-token: write
@@ -335,6 +345,11 @@ jobs:
335345
fi
336346
echo "::endgroup::"
337347
348+
# echo "::group::Run inference with quantize file"
349+
# python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
350+
# python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
351+
# echo "::endgroup::"
352+
338353
test-gpu-aoti-float16:
339354
permissions:
340355
id-token: write
@@ -376,10 +391,15 @@ jobs:
376391
echo "::group::Run inference with quantize file"
377392
if [ $(uname -s) == Darwin ]; then
378393
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379-
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
394+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380395
fi
381396
echo "::endgroup::"
382397
398+
# echo "::group::Run inference with quantize file"
399+
# python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
400+
# python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
401+
# echo "::endgroup::"
402+
383403
test-gpu-eval-sanity-check:
384404
permissions:
385405
id-token: write
@@ -495,10 +515,11 @@ jobs:
495515
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
496516
497517
echo "******************************************"
498-
echo "*** --quantize torchchat/quant_config/mobile.json ***"
518+
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
519+
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
499520
echo "******************************************"
500-
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
501-
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
521+
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522+
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
502523
503524
504525
echo "******************************************"
@@ -1055,7 +1076,59 @@ jobs:
10551076
./runner/build_android.sh
10561077
echo "Tests complete."
10571078
1058-
test-torchao-experimental:
1079+
test-torchao-aoti-experimental:
1080+
strategy:
1081+
matrix:
1082+
runner: [macos-14-xlarge]
1083+
runs-on: ${{matrix.runner}}
1084+
steps:
1085+
- name: Checkout repo
1086+
uses: actions/checkout@v3
1087+
with:
1088+
submodules: true
1089+
- name: Setup Python
1090+
uses: actions/setup-python@v2
1091+
with:
1092+
python-version: 3.10.11
1093+
- name: Setup Xcode
1094+
if: runner.os == 'macOS'
1095+
uses: maxim-lobanov/setup-xcode@v1
1096+
with:
1097+
xcode-version: '15.3'
1098+
- name: Print machine info
1099+
run: |
1100+
uname -a
1101+
if [ $(uname -s) == Darwin ]; then
1102+
sysctl machdep.cpu.brand_string
1103+
sysctl machdep.cpu.core_count
1104+
fi
1105+
- name: Install torchchat
1106+
run: |
1107+
echo "Intalling pip3 packages"
1108+
./install/install_requirements.sh
1109+
pip3 list
1110+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
1111+
- name: Install torchao-ops
1112+
id: install-torchao-ops
1113+
run: |
1114+
bash torchchat/utils/scripts/build_torchao_ops.sh
1115+
- name: Install runner AOTI
1116+
id: install-runner-aoti
1117+
run: |
1118+
bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
1119+
- name: Run inference
1120+
run: |
1121+
python torchchat.py download stories110M
1122+
wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
1123+
export PRMT="Once upon a time in a land far away"
1124+
echo "Export and run AOTI (C++ runner)"
1125+
python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
1126+
./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
1127+
echo "Generate AOTI"
1128+
python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
1129+
echo "Tests complete."
1130+
1131+
test-torchao-et-experimental:
10591132
strategy:
10601133
matrix:
10611134
runner: [macos-14-xlarge]
@@ -1100,10 +1173,6 @@ jobs:
11001173
run: |
11011174
echo "Installing runner"
11021175
bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
1103-
- name: Install runner AOTI
1104-
id: install-runner-aoti
1105-
run: |
1106-
bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
11071176
- name: Run inference
11081177
run: |
11091178
python torchchat.py download stories110M
@@ -1116,11 +1185,6 @@ jobs:
11161185
echo "Export and run ET (C++ runner)"
11171186
python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
11181187
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
1119-
echo "Export and run AOTI (C++ runner)"
1120-
python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
1121-
./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
1122-
echo "Generate AOTI"
1123-
python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
11241188
echo "Tests complete."
11251189
11261190
test-torchao-experimental-mps:

install/.pins/et-pin.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
9c043290ad3944268290e015c3063bc411e6ef6b
1+
791472d6706b027552f39f11b28d034e4839c9af

install/install_requirements.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
5151
# NOTE: If a newly-fetched version of the executorch repo changes the value of
5252
# PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
5353
# package versions.
54-
PYTORCH_NIGHTLY_VERSION=dev20250124
54+
PYTORCH_NIGHTLY_VERSION=dev20250131
5555

5656
# Nightly version for torchvision
57-
VISION_NIGHTLY_VERSION=dev20250124
57+
VISION_NIGHTLY_VERSION=dev20250131
5858

5959
# Nightly version for torchtune
60-
TUNE_NIGHTLY_VERSION=dev20250124
60+
TUNE_NIGHTLY_VERSION=dev20250131
6161

6262
# The pip repository that hosts nightly torch packages. cpu by default.
6363
# If cuda is available, based on presence of nvidia-smi, install the pytorch nightly

torchchat/utils/scripts/build_native.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then
8686
EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
8787
install_torchao_executorch_ops
8888
fi
89+
elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then
90+
# Install OMP when using AOTI with linked torchao ops
91+
brew install libomp
8992
fi
9093
popd
9194

torchchat/utils/scripts/install_utils.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ install_executorch_python_libs() {
8888
echo "Building and installing python libraries"
8989
if [ "${ENABLE_ET_PYBIND}" = false ]; then
9090
echo "Not installing pybind"
91-
bash ./install_requirements.sh --pybind off
91+
bash ./install_executorch.sh --pybind off
9292
else
9393
echo "Installing pybind"
94-
bash ./install_requirements.sh --pybind xnnpack
94+
bash ./install_executorch.sh --pybind xnnpack
9595
fi
9696

9797
# TODO: figure out the root cause of 'AttributeError: module 'evaluate'

0 commit comments

Comments
 (0)