@@ -295,8 +295,8 @@ jobs:
295295        if [ $(uname -s) != Darwin ]; then 
296296          python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
297297           
298-           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
299-           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
298+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
299+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16  --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
300300
301301        fi 
302302        echo "::endgroup::" 
@@ -341,9 +341,9 @@ jobs:
341341
342342        echo "::group::Run inference with quantize file" 
343343        if [ $(uname -s) != Darwin ]; then 
344-           python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
344+           python3 torchchat.py generate --quantize torchchat/quant_config/cuda-32 .json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
345345           
346-           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
346+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
347347          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
348348
349349        fi 
@@ -389,7 +389,7 @@ jobs:
389389
390390        echo "::group::Run inference with quantize file" 
391391        if [ $(uname -s) != Darwin ]; then 
392-           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
392+           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype float16  --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
393393          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
394394        fi 
395395        echo "::endgroup::" 
0 commit comments