Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 343c94d

Browse files
authored
Update pull.yml to test snapshot saving and loading
test snapshot saving and loading
1 parent 7cbf2a3 commit 343c94d

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

.github/workflows/pull.yml

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,11 @@ jobs:
291291
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292
echo "::endgroup::"
293293
294+
echo "::group::Run inference with quantize file"
295+
python3 torchchat.py export --output-snap model.tc --dtype bfloat16--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
296+
python3 torchchat.py generate --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
297+
echo "::endgroup::"
298+
294299
test-gpu-aoti-float32:
295300
permissions:
296301
id-token: write
@@ -335,6 +340,11 @@ jobs:
335340
fi
336341
echo "::endgroup::"
337342
343+
echo "::group::Run inference with quantize file"
344+
python3 torchchat.py export --output-snap model.tc --dtype float32--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
345+
python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
346+
echo "::endgroup::"
347+
338348
test-gpu-aoti-float16:
339349
permissions:
340350
id-token: write
@@ -376,10 +386,15 @@ jobs:
376386
echo "::group::Run inference with quantize file"
377387
if [ $(uname -s) == Darwin ]; then
378388
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379-
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
389+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380390
fi
381391
echo "::endgroup::"
382392
393+
echo "::group::Run inference with quantize file"
394+
python3 torchchat.py export --output-snap model.tc --dtype float16--quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
395+
python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
396+
echo "::endgroup::"
397+
383398
test-gpu-eval-sanity-check:
384399
permissions:
385400
id-token: write
@@ -495,10 +510,11 @@ jobs:
495510
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
496511
497512
echo "******************************************"
498-
echo "*** --quantize torchchat/quant_config/mobile.json ***"
513+
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
514+
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
499515
echo "******************************************"
500-
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
501-
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
516+
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
517+
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
502518
503519
504520
echo "******************************************"

0 commit comments

Comments
 (0)