@@ -291,6 +291,16 @@ jobs:
291291 bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292 echo "::endgroup::"
293293
294+ echo "::group::Run inference with quantize file"
295+ if [ $(uname -s) != Darwin ]; then
296+ python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
297+
298+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
299+ python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
300+
301+ fi
302+ echo "::endgroup::"
303+
294304 test-gpu-aoti-float32 :
295305 permissions :
296306 id-token : write
@@ -331,7 +341,11 @@ jobs:
331341
332342 echo "::group::Run inference with quantize file"
333343 if [ $(uname -s) != Darwin ]; then
334- python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
344+ python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
345+
346+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
347+ python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
348+
335349 fi
336350 echo "::endgroup::"
337351
@@ -374,9 +388,9 @@ jobs:
374388 echo "::endgroup::"
375389
376390 echo "::group::Run inference with quantize file"
377- if [ $(uname -s) = = Darwin ]; then
378- python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379- python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
391+ if [ $(uname -s) ! = Darwin ]; then
392+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16-- checkpoint "./checkpoints/${REPO_NAME}/model.pth"
393+ python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380394 fi
381395 echo "::endgroup::"
382396
0 commit comments