Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 3ee6a0f

Browse files
authored
Update pull.yml
aoti on gpu with all data types. (Might need some tweaking on the python to aoti code transition?)
1 parent 7cbf2a3 commit 3ee6a0f

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

.github/workflows/pull.yml

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ jobs:
291291
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292
echo "::endgroup::"
293293
294+
echo "::group::Run inference with quantize file"
295+
if [ $(uname -s) != Darwin ]; then
296+
python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
297+
298+
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
299+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
300+
301+
fi
302+
echo "::endgroup::"
303+
294304
test-gpu-aoti-float32:
295305
permissions:
296306
id-token: write
@@ -331,7 +341,11 @@ jobs:
331341
332342
echo "::group::Run inference with quantize file"
333343
if [ $(uname -s) != Darwin ]; then
334-
python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
344+
python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
345+
346+
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
347+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
348+
335349
fi
336350
echo "::endgroup::"
337351
@@ -374,9 +388,9 @@ jobs:
374388
echo "::endgroup::"
375389
376390
echo "::group::Run inference with quantize file"
377-
if [ $(uname -s) == Darwin ]; then
378-
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379-
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
391+
if [ $(uname -s) != Darwin ]; then
392+
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"
393+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380394
fi
381395
echo "::endgroup::"
382396

0 commit comments

Comments
 (0)