We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 672acf7 commit c947f97Copy full SHA for c947f97
model_export_script.sh
@@ -13,7 +13,7 @@ export MODEL_OUT_DECODE=${MODEL_OUT_DIR}/decode_model_${STATIC_SEQ_LENGTH}.pte
13
14
# python -m examples.models.llama.export_llama -c $MODEL_IN -p $PARAMS --output_name=$MODEL_OUT_DECODE -E "4,32" -kv --coreml --coreml-ios 18 --coreml-quantize c4w --coreml-compute-units cpu_and_ne --max_seq_length 1024 --verbose -d "fp16" --static_seq_length $STATIC_SEQ_LENGTH
15
# python -m examples.models.llama.export_llama -c $MODEL_IN -p $PARAMS --output_name=$MODEL_OUT_DECODE_KV_IO -E "4,32" -kv --coreml --coreml-ios 18 --coreml-quantize c4w --coreml-compute-units cpu_and_ne --max_seq_length 1024 --verbose -d "fp16" --static_seq_length $STATIC_SEQ_LENGTH --decode_kv_cache_as_io
16
-python -m examples.models.llama.export_llama -c $MODEL_IN -p $PARAMS --output_name=$MODEL_OUT_DECODE_KV_IO_ADDITIVE -E "4,32" -kv --coreml --coreml-ios 18 --coreml-quantize c4w --coreml-compute-units cpu_and_ne --max_seq_length 1024 --verbose -d "fp16" --static_seq_length $STATIC_SEQ_LENGTH --decode_kv_cache_as_io --use_additive_kv_cache_update --disable_dynamic_shape
+python -m examples.models.llama.export_llama -c $MODEL_IN -p $PARAMS --output_name=$MODEL_OUT_DECODE_KV_IO_ADDITIVE -E "4,32" -kv --coreml --coreml-ios 18 --coreml-quantize c4w --coreml-compute-units cpu_and_ne --max_seq_length 1024 --verbose -d "fp16" --static_seq_length $STATIC_SEQ_LENGTH --decode_kv_cache_as_io --use_additive_kv_cache_update
17
18
# python examples/apple/coreml/scripts/extract_coreml_models.py -m $MODEL_OUT_DECODE -o "${MODEL_OUT_DIR}/decode_${STATIC_SEQ_LENGTH}"
19
# python examples/apple/coreml/scripts/extract_coreml_models.py -m $MODEL_OUT_DECODE_KV_IO -o "${MODEL_OUT_DIR}/decode_kv_io${STATIC_SEQ_LENGTH}"
0 commit comments