@@ -291,6 +291,11 @@ jobs:
291291        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16" 
292292        echo "::endgroup::" 
293293
294+         echo "::group::Run inference with quantize file" 
295+         python3 torchchat.py export --output-snap model.tc --dtype bfloat16--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
296+         python3 torchchat.py generate --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
297+         echo "::endgroup::" 
298+ 
294299test-gpu-aoti-float32 :
295300    permissions :
296301      id-token : write 
@@ -335,6 +340,11 @@ jobs:
335340        fi 
336341        echo "::endgroup::" 
337342
343+         echo "::group::Run inference with quantize file" 
344+         python3 torchchat.py export --output-snap model.tc --dtype float32--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
345+         python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
346+         echo "::endgroup::" 
347+          
338348test-gpu-aoti-float16 :
339349    permissions :
340350      id-token : write 
@@ -376,10 +386,15 @@ jobs:
376386        echo "::group::Run inference with quantize file" 
377387        if [ $(uname -s) == Darwin ]; then 
378388          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
379-               python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
389+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
380390        fi 
381391        echo "::endgroup::" 
382392
393+         echo "::group::Run inference with quantize file" 
394+         python3 torchchat.py export --output-snap model.tc --dtype float16--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
395+         python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
396+         echo "::endgroup::" 
397+ 
383398test-gpu-eval-sanity-check :
384399    permissions :
385400      id-token : write 
@@ -495,10 +510,11 @@ jobs:
495510          python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
496511
497512          echo "******************************************" 
498-           echo "*** --quantize torchchat/quant_config/mobile.json ***" 
513+           echo "*** can't test --quantize torchchat/quant_config/mobile.json ***" 
514+           echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***" 
499515          echo "******************************************" 
500-           #  python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
501-           #  python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
516+           python torchchat.py export --quantize torchchat/quant_config/mobile-32 .json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
517+           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
502518
503519
504520          echo "******************************************" 
0 commit comments