Skip to content

Commit b552d60

Browse files
committed
Update
[ghstack-poisoned]
2 parents f3016c0 + 82f7090 commit b552d60

File tree

5 files changed

+18
-18
lines changed

5 files changed

+18
-18
lines changed

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ if [[ "${QNN}" == "ON" ]]; then
241241
EXPORT_ARGS="${EXPORT_ARGS} backend.qnn.enabled=true model.enable_dynamic_shape=false debug.verbose=true"
242242
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
243243
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
244-
EXPORT_ARGS+=" base.tokenizer_path=tokenizer.model quantization.pt2e_quantize=qnn_16a16w quantization.calibration_tasks=\"[wikitext]\" quantization.calibration_limit=1 quantization.calibration_seq_length=128 quantization.calibration_data=\"Once\""
244+
EXPORT_ARGS+=" base.tokenizer_path=tokenizer.model quantization.pt2e_quantize=qnn_16a16w quantization.calibration_tasks=[\"wikitext\"] quantization.calibration_limit=1 quantization.calibration_seq_length=128 quantization.calibration_data=\"Once\""
245245
fi
246246
fi
247247
if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ ${PYTHON_EXECUTABLE} -m extension.llm.export.export_llm \
7878
export.output_name="${MODEL_OUT}" \
7979
quantization.qmode="torchao:8da${QLINEAR_BITWIDTH}w" \
8080
quantization.group_size=${QLINEAR_GROUP_SIZE} \
81-
quantization.embedding_quantize="torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
81+
quantization.embedding_quantize=\"torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}\" \
8282
model.dtype_override=fp32
8383

8484
# Test run

.github/workflows/android-perf.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ jobs:
230230
model.dtype_override=fp32 \
231231
base.preq_embedding_quantize="8,0" \
232232
quantization.use_spin_quant=native \
233-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"
233+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}"
234234
ls -lh "${OUT_ET_MODEL_NAME}.pte"
235235
elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then
236236
# QAT + LoRA
@@ -258,7 +258,7 @@ jobs:
258258
export.max_seq_length=2048 \
259259
export.max_context_length=2048 \
260260
export.output_name="${OUT_ET_MODEL_NAME}.pte" \
261-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"
261+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}"
262262
ls -lh "${OUT_ET_MODEL_NAME}.pte"
263263
elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then
264264
# Original BF16 version, without any quantization
@@ -271,7 +271,7 @@ jobs:
271271
model.use_sdpa_with_kv_cache=true \
272272
backend.xnnpack.enabled=true \
273273
model.dtype_override=bf16 \
274-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}" \
274+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}" \
275275
export.output_name="${OUT_ET_MODEL_NAME}.pte"
276276
ls -lh "${OUT_ET_MODEL_NAME}.pte"
277277
elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
@@ -288,7 +288,7 @@ jobs:
288288
quantization.qmode=8da4w \
289289
quantization.group_size=32 \
290290
quantization.embedding_quantize="8,0" \
291-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}" \
291+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}" \
292292
export.output_name="${OUT_ET_MODEL_NAME}.pte"
293293
ls -lh "${OUT_ET_MODEL_NAME}.pte"
294294
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
@@ -326,7 +326,7 @@ jobs:
326326
quantization.qmode=8da4w \
327327
quantization.group_size=32 \
328328
quantization.embedding_quantize="8,0" \
329-
base.metadata="{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}" \
329+
base.metadata="\{\"get_bos_id\":151644,\"get_eos_ids\":[151645]\}" \
330330
export.output_name="${OUT_ET_MODEL_NAME}.pte"
331331
ls -lh "${OUT_ET_MODEL_NAME}.pte"
332332
fi

.github/workflows/apple-perf.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ jobs:
239239
model.dtype_override=fp32 \
240240
base.preq_embedding_quantize="8,0" \
241241
quantization.use_spin_quant=native \
242-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"
242+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}"
243243
ls -lh "${OUT_ET_MODEL_NAME}.pte"
244244
elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then
245245
# QAT + LoRA
@@ -267,7 +267,7 @@ jobs:
267267
export.max_seq_length=2048 \
268268
export.max_context_length=2048 \
269269
export.output_name="${OUT_ET_MODEL_NAME}.pte" \
270-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"
270+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}"
271271
ls -lh "${OUT_ET_MODEL_NAME}.pte"
272272
elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then
273273
# Original BF16 version, without any quantization
@@ -280,7 +280,7 @@ jobs:
280280
model.use_sdpa_with_kv_cache=true \
281281
backend.xnnpack.enabled=true \
282282
model.dtype_override=bf16 \
283-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}" \
283+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}" \
284284
export.output_name="${OUT_ET_MODEL_NAME}.pte"
285285
ls -lh "${OUT_ET_MODEL_NAME}.pte"
286286
elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
@@ -297,7 +297,7 @@ jobs:
297297
quantization.qmode=8da4w \
298298
quantization.group_size=32 \
299299
quantization.embedding_quantize="8,0" \
300-
base.metadata="{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}" \
300+
base.metadata="\{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]\}" \
301301
export.output_name="${OUT_ET_MODEL_NAME}.pte"
302302
ls -lh "${OUT_ET_MODEL_NAME}.pte"
303303
elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then
@@ -331,7 +331,7 @@ jobs:
331331
quantization.qmode=8da4w \
332332
quantization.group_size=32 \
333333
quantization.embedding_quantize="8,0" \
334-
base.metadata="{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}" \
334+
base.metadata="\{\"get_bos_id\":151644,\"get_eos_ids\":[151645]\}" \
335335
export.output_name="${OUT_ET_MODEL_NAME}.pte"
336336
ls -lh "${OUT_ET_MODEL_NAME}.pte"
337337
fi

extension/llm/export/test/test_export_llm.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ def test_with_config(self, mock_export_llama: MagicMock) -> None:
7474
called_config = mock_export_llama.call_args[0][0]
7575
self.assertEqual(called_config["base"]["tokenizer_path"], "/path/to/tokenizer.json")
7676
self.assertEqual(called_config["base"]["model_class"], "llama2")
77-
self.assertEqual(called_config["base"]["preq_mode"], "preq_8da4w")
78-
self.assertEqual(called_config["model"]["dtype_override"], "fp16")
77+
self.assertEqual(called_config["base"]["preq_mode"].value, "8da4w")
78+
self.assertEqual(called_config["model"]["dtype_override"].value, "fp16")
7979
self.assertEqual(called_config["export"]["max_seq_length"], 256)
80-
self.assertEqual(called_config["quantization"]["pt2e_quantize"], "xnnpack_dynamic")
81-
self.assertEqual(called_config["quantization"]["use_spin_quant"], "cuda")
82-
self.assertEqual(called_config["backend"]["coreml"]["quantize"], "c4w")
83-
self.assertEqual(called_config["backend"]["coreml"]["compute_units"], "cpu_and_gpu")
80+
self.assertEqual(called_config["quantization"]["pt2e_quantize"].value, "xnnpack_dynamic")
81+
self.assertEqual(called_config["quantization"]["use_spin_quant"].value, "cuda")
82+
self.assertEqual(called_config["backend"]["coreml"]["quantize"].value, "c4w")
83+
self.assertEqual(called_config["backend"]["coreml"]["compute_units"].value, "cpu_and_gpu")
8484
finally:
8585
os.unlink(config_file)
8686

0 commit comments

Comments
 (0)