@@ -133,60 +133,60 @@ function generate_aoti_model_output() {
133133 echo " ******************************************"
134134 echo " ************** non-quantized *************"
135135 echo " ******************************************"
136- python3 -W ignore torchchat.py export --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path " ${MODEL_DIR} /${MODEL_NAME} .so " --device " $TARGET_DEVICE " || exit 1
137- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso- path " $MODEL_DIR /${MODEL_NAME} .so " --prompt " $PROMPT " --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
136+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path " ${MODEL_DIR} /${MODEL_NAME} .pt2 " --device " $TARGET_DEVICE " || exit 1
137+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --aoti-package- path " $MODEL_DIR /${MODEL_NAME} .pt2 " --prompt " $PROMPT " --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
138138 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
139139
140140 echo " ******************************************"
141141 echo " ******* Emb: channel-wise quantized ******"
142142 echo " ******************************************"
143- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
144- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
143+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
144+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
145145 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
146146
147147 echo " ******************************************"
148148 echo " ******** Emb: group-wise quantized *******"
149149 echo " ******************************************"
150- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
151- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
150+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
151+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
152152 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
153153
154154 echo " ***********************************************"
155155 echo " ******* Emb: 4bit channel-wise quantized ******"
156156 echo " ***********************************************"
157- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
158- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
157+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
158+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
159159 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
160160
161161 echo " ***********************************************"
162162 echo " ******** Emb: 4bit group-wise quantized *******"
163163 echo " ***********************************************"
164- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
165- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
164+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
165+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
166166 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
167167
168168 if [ " ${EXCLUDE_INT8_QUANT:- false} " == false ]; then
169169 echo " ******************************************"
170170 echo " ******* INT8 channel-wise quantized ******"
171171 echo " ******************************************"
172- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
173- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
172+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
173+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
174174 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
175175
176176 echo " ******************************************"
177177 echo " ******** INT8 group-wise quantized *******"
178178 echo " ******************************************"
179- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
180- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
179+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
180+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
181181 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
182182 fi
183183 echo " ******************************************"
184184 echo " ******** INT4 group-wise quantized *******"
185185 echo " ******************************************"
186186 if [[ " $TARGET_DEVICE " != " cuda" || " $DTYPE " == " bfloat16" ]]; then
187187 # For CUDA, only bfloat16 makes sense for int4 mm kernel
188- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
189- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
188+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
189+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
190190 .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
191191 fi
192192 done
@@ -285,8 +285,8 @@ function eval_model_sanity_check() {
285285 echo " ******** INT4 group-wise quantized (AOTI) *******"
286286 echo " *************************************************"
287287 if [ " $DTYPE " != " float16" ]; then
288- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --dynamic-shapes --device " $TARGET_DEVICE " || exit 1
289- python3 -W ignore torchchat.py eval --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti" || exit 1
288+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --dynamic-shapes --device " $TARGET_DEVICE " || exit 1
289+ python3 -W ignore torchchat.py eval --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti" || exit 1
290290 cat " $MODEL_DIR /output_eval_aoti"
291291 fi ;
292292 fi ;
0 commit comments