Update pull.yml

mikekgfb · web-flow · commit 38e69e84f1bb · 2025-01-31T09:50:57.000-08:00
Fixed typos
Use gs=32 (padding was apparently disabled, so users will have to get everything "just right".  Not the UX I recmmend.)
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -295,8 +295,8 @@ jobs:
         if [ $(uname -s) != Darwin ]; then
           python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
           
-          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
 
         fi
         echo "::endgroup::"
@@ -341,9 +341,9 @@ jobs:
 
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) != Darwin ]; then
-          python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
           
-          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
 
         fi
@@ -389,7 +389,7 @@ jobs:
 
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) != Darwin ]; then
-          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
         fi
         echo "::endgroup::"