Update pull.yml

mikekgfb · web-flow · commit 3ee6a0f92c2c · 2025-01-31T00:32:54.000-08:00
aoti on gpu with all data types.  (Might need some tweaking on the python to aoti code transition?)
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -291,6 +291,16 @@ jobs:
         bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
         echo "::endgroup::"
 
+        echo "::group::Run inference with quantize file"
+        if [ $(uname -s) != Darwin ]; then
+          python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+
+        fi
+        echo "::endgroup::"
+        
   test-gpu-aoti-float32:
     permissions:
       id-token: write
@@ -331,7 +341,11 @@ jobs:
 
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) != Darwin ]; then
-          python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+
         fi
         echo "::endgroup::"
 
@@ -374,9 +388,9 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Run inference with quantize file"
-        if [ $(uname -s) == Darwin ]; then
-          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-             python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+        if [ $(uname -s) != Darwin ]; then
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
         fi
         echo "::endgroup::"