@@ -291,6 +291,16 @@ jobs:
291291        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16" 
292292        echo "::endgroup::" 
293293
294+         echo "::group::Run inference with quantize file" 
295+         for DEVICE in cpu; do # cuda  
296+           # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'` 
297+           # follow up with torchao as a separate PR 
298+           echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot" 
299+           python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
300+           python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
301+         done 
302+         echo "::endgroup::" 
303+ 
294304test-gpu-aoti-float32 :
295305    permissions :
296306      id-token : write 
@@ -335,6 +345,11 @@ jobs:
335345        fi 
336346        echo "::endgroup::" 
337347
348+         # echo "::group::Run inference with quantize file" 
349+         # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
350+         # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
351+         # echo "::endgroup::" 
352+          
338353test-gpu-aoti-float16 :
339354    permissions :
340355      id-token : write 
@@ -376,10 +391,15 @@ jobs:
376391        echo "::group::Run inference with quantize file" 
377392        if [ $(uname -s) == Darwin ]; then 
378393          python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
379-               python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
394+           python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ 
380395        fi 
381396        echo "::endgroup::" 
382397
398+         # echo "::group::Run inference with quantize file" 
399+         # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
400+         # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
401+         # echo "::endgroup::" 
402+ 
383403test-gpu-eval-sanity-check :
384404    permissions :
385405      id-token : write 
@@ -495,10 +515,11 @@ jobs:
495515          python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
496516
497517          echo "******************************************" 
498-           echo "*** --quantize torchchat/quant_config/mobile.json ***" 
518+           echo "*** can't test --quantize torchchat/quant_config/mobile.json ***" 
519+           echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***" 
499520          echo "******************************************" 
500-           #  python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
501-           #  python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
521+           python torchchat.py export --quantize torchchat/quant_config/mobile-32 .json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
522+           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte 
502523
503524
504525          echo "******************************************" 
@@ -1055,7 +1076,59 @@ jobs:
10551076          ./runner/build_android.sh 
10561077          echo "Tests complete." 
10571078
1058- test-torchao-experimental :
1079+ test-torchao-aoti-experimental :
1080+     strategy :
1081+       matrix :
1082+         runner : [macos-14-xlarge] 
1083+     runs-on : ${{matrix.runner}} 
1084+     steps :
1085+       - name : Checkout repo 
1086+         uses : actions/checkout@v3 
1087+         with :
1088+           submodules : true 
1089+       - name : Setup Python 
1090+         uses : actions/setup-python@v2 
1091+         with :
1092+           python-version : 3.10.11 
1093+       - name : Setup Xcode 
1094+         if : runner.os == 'macOS' 
1095+         uses : maxim-lobanov/setup-xcode@v1 
1096+         with :
1097+           xcode-version : ' 15.3' 
1098+       - name : Print machine info 
1099+         run : | 
1100+           uname -a 
1101+           if [ $(uname -s) == Darwin ]; then 
1102+             sysctl machdep.cpu.brand_string 
1103+             sysctl machdep.cpu.core_count 
1104+           fi 
1105+ name : Install torchchat 
1106+         run : | 
1107+           echo "Intalling pip3 packages" 
1108+           ./install/install_requirements.sh 
1109+           pip3 list 
1110+           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' 
1111+ name : Install torchao-ops 
1112+         id : install-torchao-ops 
1113+         run : | 
1114+           bash torchchat/utils/scripts/build_torchao_ops.sh 
1115+ name : Install runner AOTI 
1116+         id : install-runner-aoti 
1117+         run : | 
1118+           bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops 
1119+ name : Run inference 
1120+         run : | 
1121+           python torchchat.py download stories110M 
1122+           wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model 
1123+           export PRMT="Once upon a time in a land far away" 
1124+           echo "Export and run AOTI (C++ runner)" 
1125+           python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' 
1126+           ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}" 
1127+           echo "Generate AOTI" 
1128+           python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}" 
1129+           echo "Tests complete." 
1130+ 
1131+ test-torchao-et-experimental :
10591132    strategy :
10601133      matrix :
10611134        runner : [macos-14-xlarge] 
@@ -1100,10 +1173,6 @@ jobs:
11001173        run : | 
11011174          echo "Installing runner" 
11021175          bash torchchat/utils/scripts/build_native.sh et link_torchao_ops 
1103- name : Install runner AOTI 
1104-         id : install-runner-aoti 
1105-         run : | 
1106-           bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops 
11071176name : Run inference 
11081177        run : | 
11091178          python torchchat.py download stories110M 
@@ -1116,11 +1185,6 @@ jobs:
11161185          echo "Export and run ET (C++ runner)" 
11171186          python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' 
11181187          ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" 
1119-           echo "Export and run AOTI (C++ runner)" 
1120-           python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' 
1121-           ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}" 
1122-           echo "Generate AOTI" 
1123-           python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}" 
11241188          echo "Tests complete." 
11251189
11261190test-torchao-experimental-mps :
0 commit comments