@@ -291,6 +291,16 @@ jobs:
291291        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16" 
292292        echo "::endgroup::" 
293293
294+         echo "::group::Run inference with quantize file" 
295+         if [ $(uname -s) != Darwin ]; then 
296+           python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
297+            
298+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
299+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
300+ 
301+         fi 
302+         echo "::endgroup::" 
303+          
294304test-gpu-aoti-float32 :
295305    permissions :
296306      id-token : write 
@@ -331,7 +341,11 @@ jobs:
331341
332342        echo "::group::Run inference with quantize file" 
333343        if [ $(uname -s) != Darwin ]; then 
334-           python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
344+           python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
345+            
346+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
347+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
348+ 
335349        fi 
336350        echo "::endgroup::" 
337351
@@ -374,9 +388,9 @@ jobs:
374388        echo "::endgroup::" 
375389
376390        echo "::group::Run inference with quantize file" 
377-         if [ $(uname -s) = = Darwin ]; then 
378-           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
379-               python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
391+         if [ $(uname -s) ! = Darwin ]; then 
392+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16-- checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
393+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16  --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
380394        fi 
381395        echo "::endgroup::" 
382396
0 commit comments