@@ -291,6 +291,16 @@ jobs:
291291        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16" 
292292        echo "::endgroup::" 
293293
294+         echo "::group::Run inference with quantize file" 
295+         for DEVICE in cpu; do # cuda  
296+           # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'` 
297+           # follow up with torchao as a separate PR 
298+           echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot" 
299+           python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
300+           python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
301+         done 
302+         echo "::endgroup::" 
303+ 
294304test-gpu-aoti-float32 :
295305    permissions :
296306      id-token : write 
@@ -335,6 +345,11 @@ jobs:
335345        fi 
336346        echo "::endgroup::" 
337347
348+         # echo "::group::Run inference with quantize file" 
349+         # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
350+         # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
351+         # echo "::endgroup::" 
352+          
338353test-gpu-aoti-float16 :
339354    permissions :
340355      id-token : write 
@@ -376,10 +391,15 @@ jobs:
376391        echo "::group::Run inference with quantize file" 
377392        if [ $(uname -s) == Darwin ]; then 
378393          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
379-               python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
394+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
380395        fi 
381396        echo "::endgroup::" 
382397
398+         # echo "::group::Run inference with quantize file" 
399+         # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
400+         # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
401+         # echo "::endgroup::" 
402+ 
383403test-gpu-eval-sanity-check :
384404    permissions :
385405      id-token : write 
@@ -495,10 +515,11 @@ jobs:
495515          python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
496516
497517          echo "******************************************" 
498-           echo "*** --quantize torchchat/quant_config/mobile.json ***" 
518+           echo "*** can't test --quantize torchchat/quant_config/mobile.json ***" 
519+           echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***" 
499520          echo "******************************************" 
500-           #  python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
501-           #  python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
521+           python torchchat.py export --quantize torchchat/quant_config/mobile-32 .json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
522+           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
502523
503524
504525          echo "******************************************" 
@@ -1147,10 +1168,6 @@ jobs:
11471168        run : | 
11481169          echo "Installing runner" 
11491170          bash torchchat/utils/scripts/build_native.sh et link_torchao_ops 
1150- name : Install runner AOTI 
1151-         id : install-runner-aoti 
1152-         run : | 
1153-           bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops 
11541171name : Run inference 
11551172        run : | 
11561173          python torchchat.py download stories110M 
0 commit comments