@@ -295,8 +295,8 @@ jobs:
295295 if [ $(uname -s) != Darwin ]; then
296296 python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
297297
298- python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
299- python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
298+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
299+ python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
300300
301301 fi
302302 echo "::endgroup::"
@@ -341,9 +341,9 @@ jobs:
341341
342342 echo "::group::Run inference with quantize file"
343343 if [ $(uname -s) != Darwin ]; then
344- python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
344+ python3 torchchat.py generate --quantize torchchat/quant_config/cuda-32 .json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
345345
346- python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
346+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
347347 python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
348348
349349 fi
@@ -389,7 +389,7 @@ jobs:
389389
390390 echo "::group::Run inference with quantize file"
391391 if [ $(uname -s) != Darwin ]; then
392- python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"
392+ python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32 .json --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
393393 python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
394394 fi
395395 echo "::endgroup::"
0 commit comments