This repository was archived by the owner on Sep 10, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 1 file changed +7
-2
lines changed
Expand file tree Collapse file tree 1 file changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -292,8 +292,13 @@ jobs:
292292 echo "::endgroup::"
293293
294294 echo "::group::Run inference with quantize file"
295- python3 torchchat.py export --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
296- python3 torchchat.py generate --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
295+ for DEVICE in cpu; do # cuda
296+ # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+ # follow up with torchao as a separate PR
298+ echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+ python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+ python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+ done
297302 echo "::endgroup::"
298303
299304 test-gpu-aoti-float32 :
You can’t perform that action at this time.
0 commit comments