Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 5d098bd

Browse files
authored
Update pull.yml
add DEVICE specification for snapshot and use device cpu
1 parent e733606 commit 5d098bd

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

.github/workflows/pull.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,13 @@ jobs:
292292
echo "::endgroup::"
293293
294294
echo "::group::Run inference with quantize file"
295-
python3 torchchat.py export --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
296-
python3 torchchat.py generate --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
295+
for DEVICE in cpu; do # cuda
296+
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+
# follow up with torchao as a separate PR
298+
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+
done
297302
echo "::endgroup::"
298303
299304
test-gpu-aoti-float32:

0 commit comments

Comments
 (0)