Update pull.yml

mikekgfb · web-flow · commit e73360645b24 · 2025-02-08T13:10:11.000-08:00
Remove fp16 and fp32 int4 quantized models for now. @jerryzh168 Not sure why these dtypes are not compatible with int4 quantization?
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -340,10 +340,10 @@ jobs:
         fi
         echo "::endgroup::"
 
-        echo "::group::Run inference with quantize file"
-        python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-        python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-        echo "::endgroup::"
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
         
   test-gpu-aoti-float16:
     permissions:
@@ -390,10 +390,10 @@ jobs:
         fi
         echo "::endgroup::"
 
-        echo "::group::Run inference with quantize file"
-        python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-        python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-        echo "::endgroup::"
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
 
   test-gpu-eval-sanity-check:
     permissions: