Update with +

jackzhxng · jackzhxng · commit 482b9fc5c726 · 2025-06-25T15:22:49.000-07:00
diff --git a/examples/models/deepseek-r1-distill-llama-8B/README.md b/examples/models/deepseek-r1-distill-llama-8B/README.md
@@ -54,9 +54,9 @@ torch.save(sd, "/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth")
 ```
 python -m extension.llm.export.export_llm \
     --config examples/models/deepseek-r1-distill-llama-8B/config/deepseek-r1-distill-llama-8B
-    base.checkpoint=/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \
-	base.params=params.json \
-	export.output_name="DeepSeek-R1-Distill-Llama-8B.pte"
+    +base.checkpoint=/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \
+	+base.params=params.json \
+	+export.output_name="DeepSeek-R1-Distill-Llama-8B.pte"
 ```
 
 6. Run the model on your desktop for validation or integrate with iOS/Android apps. Instructions for these are available in the Llama [README](../llama/README.md) starting at Step 3.
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
@@ -169,9 +169,9 @@ LLAMA_PARAMS=path/to/params.json
 
 python -m extension.llm.export.export_llm \
   --config examples/models/llamaconfig/llama_bf16.yaml
-  base.model_class="llama3_2" \
-  base.checkpoint="${LLAMA_CHECKPOINT:?}" \
-  base.params="${LLAMA_PARAMS:?}" \
+  +base.model_class="llama3_2" \
+  +base.checkpoint="${LLAMA_CHECKPOINT:?}" \
+  +base.params="${LLAMA_PARAMS:?}" \
 ```
 For convenience, an [exported ExecuTorch bf16 model](https://huggingface.co/executorch-community/Llama-3.2-1B-ET/blob/main/llama3_2-1B.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-ET/blob/main/ExportRecipe_1B.ipynb).
 
@@ -187,9 +187,9 @@ LLAMA_PARAMS=path/to/spinquant/params.json
 
 python -m extension.llm.export.export_llm \
     --config examples/models/llama/config/llama_xnnpack_spinquant.yaml
-	base.model_class="llama3_2" \
-	base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
-	base.params="${LLAMA_PARAMS:?}" \
+	+base.model_class="llama3_2" \
+	+base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
+	+base.params="${LLAMA_PARAMS:?}" \
 ```
 For convenience, an [exported ExecuTorch SpinQuant model](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/blob/main/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/blob/main/Export_Recipe_Llama_3_2_1B_Instruct_SpinQuant_INT4_EO8.ipynb).
 
@@ -204,9 +204,9 @@ LLAMA_PARAMS=path/to/qlora/params.json
 
 python -m extension.llm.export.export_llm \
     --config examples/models/llama/config/llama_xnnpack_qat.yaml
-    base.model_class="llama3_2" \
-    base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
-    base.params="${LLAMA_PARAMS:?}" \
+    +base.model_class="llama3_2" \
+    +base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
+    +base.params="${LLAMA_PARAMS:?}" \
 ```
 For convenience, an [exported ExecuTorch QAT+LoRA model](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-QLORA_INT4_EO8-ET/blob/main/Llama-3.2-1B-Instruct-QLORA_INT4_EO8.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-QLORA_INT4_EO8-ET/blob/main/Export_Recipe_Llama_3_2_1B_Instruct_QLORA_INT4_EO8.ipynb).
 
@@ -220,9 +220,9 @@ You can export and run the original Llama 3 8B instruct model.
 ```
 python -m extension.llm.export.export_llm \
     --config examples/models/llama/config/llama_q8da4w.yaml
-    base.model_clas="llama3"
-	base.checkpoint=<consolidated.00.pth.pth> \
-	base.params=<params.json> \
+    +base.model_clas="llama3"
+	+base.checkpoint=<consolidated.00.pth.pth> \
+	+base.params=<params.json> \
 ```
     Due to the larger vocabulary size of Llama 3, we recommend quantizing the embeddings with `quantization.embedding_quantize=\'4,32\'` as shown above to further reduce the model size.
 
diff --git a/examples/models/phi_4_mini/README.md b/examples/models/phi_4_mini/README.md
@@ -34,9 +34,9 @@ PHI_CHECKPOINT=path/to/checkpoint.pth
 
 python -m extension.llm.export.export_llm \
   --config config/phi_4_mini_xnnpack.yaml
-  base.checkpoint="${PHI_CHECKPOINT=path/to/checkpoint.pth:?}" \
-  base.params="examples/models/phi-4-mini/config/config.json" \
-  export.output_name="phi-4-mini.pte" \
+  +base.checkpoint="${PHI_CHECKPOINT=path/to/checkpoint.pth:?}" \
+  +base.params="examples/models/phi-4-mini/config/config.json" \
+  +export.output_name="phi-4-mini.pte" \
 ```
 
 Run using the executor runner:
diff --git a/examples/models/qwen2_5/README.md b/examples/models/qwen2_5/README.md
@@ -34,10 +34,10 @@ QWEN_CHECKPOINT=path/to/checkpoint.pth
 
 python -m extension.llm.export.export_llm \
   --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
-  base.model_class="qwen2_5" \
-  base.checkpoint="${QWEN_CHECKPOINT:?}" \
-  base.params="examples/models/qwen2_5/1_5b_config.json" \
-  export.output_name="qwen2_5-1_5b.pte" \
+  +base.model_class="qwen2_5" \
+  +base.checkpoint="${QWEN_CHECKPOINT:?}" \
+  +base.params="examples/models/qwen2_5/1_5b_config.json" \
+  +export.output_name="qwen2_5-1_5b.pte" \
 ```
 
 Run using the executor runner:
diff --git a/examples/models/qwen3/README.md b/examples/models/qwen3/README.md
@@ -18,28 +18,28 @@ Export 0.6b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
   --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
-  base.model_class="qwen3_0_6b" \
-  base.params="examples/models/qwen3/config/0_6b_config.json" \
-  export.output_name="qwen3_0_6b.pte" \
+  +base.model_class="qwen3_0_6b" \
+  +base.params="examples/models/qwen3/config/0_6b_config.json" \
+  +export.output_name="qwen3_0_6b.pte" \
 
 ```
 
 Export 1.7b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
   --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
-  base.model_class="qwen3_1_7b" \
-  base.params="examples/models/qwen3/config/1_7b_config.json" \
-  export.output_name="qwen3_1_7b.pte" \
+  +base.model_class="qwen3_1_7b" \
+  +base.params="examples/models/qwen3/config/1_7b_config.json" \
+  +export.output_name="qwen3_1_7b.pte" \
 ```
 
 Export 4b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
   --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
-  base.model_class="qwen3_4b" \
-  base.params="examples/models/qwen3/config/4b_config.json" \
-  export.output_name="qwen3_4b.pte" \
+  +base.model_class="qwen3_4b" \
+  +base.params="examples/models/qwen3/config/4b_config.json" \
+  +export.output_name="qwen3_4b.pte" \
 ```
 
 ### Example run