diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index bbf879295ae..bc9bbb8bae0 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -102,7 +102,7 @@ test_model() {
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
       # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
-      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/qwen2_5/1_5b_config.json
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/qwen2_5/config/1_5b_config.json
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
@@ -110,7 +110,7 @@ test_model() {
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
-      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/phi_4_mini/config.json
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/phi_4_mini/config/config.json
       run_portable_executor_runner
       rm "./${MODEL_NAME}.pte"
       return
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index a79a900b2d8..2eab69eb88b 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -317,7 +317,7 @@ jobs:
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
                 python -m extension.llm.export.export_llm \
                   base.model_class=qwen3_0_6b \
-                  base.params=examples/models/qwen3/0_6b_config.json \
+                  base.params=examples/models/qwen3/config/0_6b_config.json \
                   model.use_kv_cache=true \
                   model.use_sdpa_with_kv_cache=true \
                   model.dtype_override=fp32 \
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index 6b1666da642..3db5abbefbd 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -322,7 +322,7 @@ jobs:
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
                 ${CONDA_RUN} python -m extension.llm.export.export_llm \
                   base.model_class=qwen3_0_6b \
-                  base.params=examples/models/qwen3/0_6b_config.json \
+                  base.params=examples/models/qwen3/config/0_6b_config.json \
                   model.use_kv_cache=true \
                   model.use_sdpa_with_kv_cache=true \
                   model.dtype_override=fp32 \
diff --git a/examples/models/deepseek-r1-distill-llama-8B/README.md b/examples/models/deepseek-r1-distill-llama-8B/README.md
index f05dd9990a2..00397e9f60f 100644
--- a/examples/models/deepseek-r1-distill-llama-8B/README.md
+++ b/examples/models/deepseek-r1-distill-llama-8B/README.md
@@ -53,17 +53,10 @@ torch.save(sd, "/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth")
 5. Generate a PTE file for use with the Llama runner.
 ```
 python -m extension.llm.export.export_llm \
-    base.checkpoint=/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \
-	base.params=params.json \
-	model.use_kv_cache=True \
-	model.use_sdpa_with_kv_cache=True \
-	backend.xnnpack.enabled=True \
-	quantization.qmode="8da4w" \
-	quantization.group_size=128 \
-	model.dtype_override="fp16" \
-	base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' \
-	quantization.embedding_quantize=\'4,32\' \
-	export.output_name="DeepSeek-R1-Distill-Llama-8B.pte"
+    --config examples/models/deepseek-r1-distill-llama-8B/config/deepseek-r1-distill-llama-8B
+    +base.checkpoint=/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \
+	+base.params=params.json \
+	+export.output_name="DeepSeek-R1-Distill-Llama-8B.pte"
 ```
 
 6. Run the model on your desktop for validation or integrate with iOS/Android apps. Instructions for these are available in the Llama [README](../llama/README.md) starting at Step 3.
diff --git a/examples/models/deepseek-r1-distill-llama-8B/config/deepseek_xnnpack_q8da4w.yaml b/examples/models/deepseek-r1-distill-llama-8B/config/deepseek_xnnpack_q8da4w.yaml
new file mode 100644
index 00000000000..1da7c253d92
--- /dev/null
+++ b/examples/models/deepseek-r1-distill-llama-8B/config/deepseek_xnnpack_q8da4w.yaml
@@ -0,0 +1,16 @@
+base:
+  metadata: '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+
+model:
+  use_kv_cache: True
+  use_sdpa_with_kv_cache: True
+  dtype_override: fp16
+
+backend:
+  xnnpack:
+    enabled: True
+
+quantization:
+  qmode: 8da4w
+  group_size: 128
+  embedding_quantize: 4,32
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
index 3e6869e5c49..bbd2107ad74 100644
--- a/examples/models/llama/README.md
+++ b/examples/models/llama/README.md
@@ -168,14 +168,10 @@ LLAMA_CHECKPOINT=path/to/consolidated.00.pth
 LLAMA_PARAMS=path/to/params.json
 
 python -m extension.llm.export.export_llm \
-  base.model_class="llama3_2" \
-  base.checkpoint="${LLAMA_CHECKPOINT:?}" \
-  base.params="${LLAMA_PARAMS:?}" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="bf16" \
-  base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' \
-  export.output_name="llama3_2.pte"
+  --config examples/models/llamaconfig/llama_bf16.yaml
+  +base.model_class="llama3_2" \
+  +base.checkpoint="${LLAMA_CHECKPOINT:?}" \
+  +base.params="${LLAMA_PARAMS:?}" \
 ```
 For convenience, an [exported ExecuTorch bf16 model](https://huggingface.co/executorch-community/Llama-3.2-1B-ET/blob/main/llama3_2-1B.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-ET/blob/main/ExportRecipe_1B.ipynb).
 
@@ -190,22 +186,10 @@ LLAMA_QUANTIZED_CHECKPOINT=path/to/spinquant/consolidated.00.pth.pth
 LLAMA_PARAMS=path/to/spinquant/params.json
 
 python -m extension.llm.export.export_llm \
-   base.model_class="llama3_2" \
-   base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
-   base.params="${LLAMA_PARAMS:?}" \
-   model.use_sdpa_with_kv_cache=True \
-   backend.xnnpack.enabled=True \
-   backend.xnnpack.extended_ops=True \
-   base.preq_mode="preq_8da4w_out_8da8w" \
-   base.preq_group_size=32 \
-   export.max_seq_length=2048 \
-   export.max_context_length=2048 \
-   export.output_name="llama3_2.pte" \
-   model.use_kv_cache=True \
-   model.dtype_override="fp32" \
-   base.preq_embedding_quantize=\'8,0\' \
-   quantization.use_spin_quant="native" \
-   base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"'
+  --config examples/models/llama/config/llama_xnnpack_spinquant.yaml
+  +base.model_class="llama3_2" \
+  +base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
+  +base.params="${LLAMA_PARAMS:?}"
 ```
 For convenience, an [exported ExecuTorch SpinQuant model](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/blob/main/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/blob/main/Export_Recipe_Llama_3_2_1B_Instruct_SpinQuant_INT4_EO8.ipynb).
 
@@ -219,23 +203,10 @@ LLAMA_QUANTIZED_CHECKPOINT=path/to/qlora/consolidated.00.pth.pth
 LLAMA_PARAMS=path/to/qlora/params.json
 
 python -m extension.llm.export.export_llm \
-   base.model_class="llama3_2" \
-   base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
-   base.params="${LLAMA_PARAMS:?}" \
-   quantization.use_qat=True \
-   base.use_lora=16 \
-   base.preq_mode="preq_8da4w_out_8da8w" \
-   base.preq_group_size=32 \
-   base.preq_embedding_quantize=\'8,0\' \
-   model.use_sdpa_with_kv_cache=True \
-   model.use_kv_cache=True \
-   backend.xnnpack.enabled=True \
-   backend.xnnpack.extended_ops=True \
-   model.dtype_override="fp32" \
-   export.max_seq_length=2048 \
-   export.max_context_length=2048 \
-   export.output_name="llama3_2.pte" \
-   base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"'
+    --config examples/models/llama/config/llama_xnnpack_qat.yaml
+    +base.model_class="llama3_2" \
+    +base.checkpoint="${LLAMA_QUANTIZED_CHECKPOINT:?}" \
+    +base.params="${LLAMA_PARAMS:?}" \
 ```
 For convenience, an [exported ExecuTorch QAT+LoRA model](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-QLORA_INT4_EO8-ET/blob/main/Llama-3.2-1B-Instruct-QLORA_INT4_EO8.pte) is available on Hugging Face. The export was created using [this detailed recipe notebook](https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-QLORA_INT4_EO8-ET/blob/main/Export_Recipe_Llama_3_2_1B_Instruct_QLORA_INT4_EO8.ipynb).
 
@@ -246,20 +217,13 @@ You can export and run the original Llama 3 8B instruct model.
 1. Llama 3 pretrained parameters can be downloaded from [Meta's official Llama 3 repository](https://github.com/meta-llama/llama3/).
 
 2. Export model and generate `.pte` file
-    ```
-    python -m extension.llm.export.export_llm \
-	    base.checkpoint=<consolidated.00.pth.pth> \
-		base.params=<params.json> \
-		model.use_kv_cache=True \
-		model.use_sdpa_with_kv_cache=True \
-		backend.xnnpack.enabled=True \
-		quantization.qmode="8da4w" \
-		quantization.group_size=128 \
-		model.dtype_override="fp32" \
-		base.metadata='"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"' \
-		quantization.embedding_quantize=\'4,32\' \
-		export.output_name="llama3_kv_sdpa_xnn_qe_4_32.pte"
-    ```
+```
+python -m extension.llm.export.export_llm \
+    --config examples/models/llama/config/llama_q8da4w.yaml
+    +base.model_clas="llama3"
+    +base.checkpoint=<consolidated.00.pth.pth> \
+    +base.params=<params.json>
+```
     Due to the larger vocabulary size of Llama 3, we recommend quantizing the embeddings with `quantization.embedding_quantize=\'4,32\'` as shown above to further reduce the model size.
 
 
@@ -276,20 +240,20 @@ You can export and run the original Llama 3 8B instruct model.
 Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the section of Common Issues and Mitigations below for solutions.
 
 2. Build llama runner.
-    ```
-    cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DBUILD_TESTING=OFF \
-        -DCMAKE_BUILD_TYPE=Release \
-        -Bcmake-out/examples/models/llama \
-        examples/models/llama
+```
+cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
+	-DBUILD_TESTING=OFF \
+	-DCMAKE_BUILD_TYPE=Release \
+	-Bcmake-out/examples/models/llama \
+	examples/models/llama
 
-    cmake --build cmake-out/examples/models/llama -j16 --config Release
-    ```
+cmake --build cmake-out/examples/models/llama -j16 --config Release
+```
 
 3. Run model. Run options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/llama/main.cpp#L18-L40).
-    ```
-    cmake-out/examples/models/llama/llama_main --model_path=<model pte file> --tokenizer_path=<tokenizer.model> --prompt=<prompt>
-    ```
+```
+cmake-out/examples/models/llama/llama_main --model_path=<model pte file> --tokenizer_path=<tokenizer.model> --prompt=<prompt>
+```
 
 To build for CoreML backend and validate on Mac, replace `-DEXECUTORCH_BUILD_XNNPACK=ON` with `-DEXECUTORCH_BUILD_COREML=ON`
 
diff --git a/examples/models/llama/config/llama_bf16.yaml b/examples/models/llama/config/llama_bf16.yaml
new file mode 100644
index 00000000000..8e89e8aa437
--- /dev/null
+++ b/examples/models/llama/config/llama_bf16.yaml
@@ -0,0 +1,7 @@
+base:
+  metadata: '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+
+model:
+  use_kv_cache: True
+  use_sdpa_with_kv_cache: True
+  dtype_override: bf16
\ No newline at end of file
diff --git a/examples/models/llama/config/llama_q8da4w.yaml b/examples/models/llama/config/llama_q8da4w.yaml
new file mode 100644
index 00000000000..476ae928c60
--- /dev/null
+++ b/examples/models/llama/config/llama_q8da4w.yaml
@@ -0,0 +1,11 @@
+base:
+  metadata: '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+
+model:
+  dtype_override: fp32
+
+quantization:
+  qmode: 8da4w
+  group_size: 128
+  embedding_quantize: 4,32
+  
\ No newline at end of file
diff --git a/examples/models/llama/config/llama_xnnpack_qat.yaml b/examples/models/llama/config/llama_xnnpack_qat.yaml
new file mode 100644
index 00000000000..2369ff1d279
--- /dev/null
+++ b/examples/models/llama/config/llama_xnnpack_qat.yaml
@@ -0,0 +1,23 @@
+base:
+  preq_mode: preq_8da4w_out_8da8w
+  preq_group_size: 32
+  preq_embedding_quantize: 8,0
+  metadata: '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+  use_lora: 16
+
+model:
+  use_sdpa_with_kv_cache: True
+  use_kv_cache: True
+  dtype_override: fp32
+
+export:
+  max_seq_length: 2048
+  max_context_length: 2048
+
+quantization:
+  use_qat: True
+
+backend:
+  xnnpack:
+    enabled: True
+    extended_ops: True
\ No newline at end of file
diff --git a/examples/models/llama/config/llama_xnnpack_spinquant.yaml b/examples/models/llama/config/llama_xnnpack_spinquant.yaml
new file mode 100644
index 00000000000..441086d6f73
--- /dev/null
+++ b/examples/models/llama/config/llama_xnnpack_spinquant.yaml
@@ -0,0 +1,22 @@
+base:
+  preq_mode: preq_8da4w_out_8da8w
+  preq_group_size: 32
+  preq_embedding_quantize: 8,0
+  metadata: '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+
+model:
+  use_sdpa_with_kv_cache: True
+  use_kv_cache: True
+  dtype_override: fp32
+
+export:
+  max_seq_length: 2048
+  max_context_length: 2048
+
+quantization:
+  use_spin_quant: native
+
+backend:
+  xnnpack:
+    enabled: True
+    extended_ops: True
\ No newline at end of file
diff --git a/examples/models/phi_4_mini/README.md b/examples/models/phi_4_mini/README.md
index d168d54226e..8fb2f03ac4c 100644
--- a/examples/models/phi_4_mini/README.md
+++ b/examples/models/phi_4_mini/README.md
@@ -8,7 +8,7 @@ Phi-4-mini uses the same example code as Llama, while the checkpoint, model para
 All commands for exporting and running Llama on various backends should also be applicable to Phi-4-mini, by swapping the following args:
 ```
 base.model_class="phi_4_mini"
-base.params="examples/models/phi-4-mini/config.json"
+base.params="examples/models/phi-4-mini/config/config.json"
 base.checkpoint=<path-to-meta-checkpoint>
 ```
 
@@ -33,16 +33,10 @@ Export to XNNPack, no quantization:
 PHI_CHECKPOINT=path/to/checkpoint.pth
 
 python -m extension.llm.export.export_llm \
-  base.model_class="phi_4_mini" \
-  base.checkpoint="${PHI_CHECKPOINT=path/to/checkpoint.pth:?}" \
-  base.params="examples/models/phi-4-mini/config.json" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="fp32" \
-  backend.xnnpack.enabled=True \
-  base.metadata='"{\"get_bos_id\":151643, \"get_eos_ids\":[151643]}"' \
-  export.output_name="phi-4-mini.pte" \
-  debug.verbose=True
+  --config config/phi_4_mini_xnnpack.yaml
+  +base.checkpoint="${PHI_CHECKPOINT=path/to/checkpoint.pth:?}" \
+  +base.params="examples/models/phi-4-mini/config/config.json" \
+  +export.output_name="phi-4-mini.pte" \
 ```
 
 Run using the executor runner:
diff --git a/examples/models/phi_4_mini/config.json b/examples/models/phi_4_mini/config/config.json
similarity index 100%
rename from examples/models/phi_4_mini/config.json
rename to examples/models/phi_4_mini/config/config.json
diff --git a/examples/models/phi_4_mini/config/phi_4_mini_xnnpack.yaml b/examples/models/phi_4_mini/config/phi_4_mini_xnnpack.yaml
new file mode 100644
index 00000000000..9355bd99f64
--- /dev/null
+++ b/examples/models/phi_4_mini/config/phi_4_mini_xnnpack.yaml
@@ -0,0 +1,12 @@
+base:
+  model_class: phi_4_mini
+  metadata: '{"get_bos_id":151643, "get_eos_ids":[151643]}'
+
+model:
+  use_kv_cache: True
+  use_sdpa_with_kv_cache: True
+  dtype_override: fp32
+
+backend:
+  xnnpack:
+    enabled: True
\ No newline at end of file
diff --git a/examples/models/qwen2_5/README.md b/examples/models/qwen2_5/README.md
index 57784169ece..566a7a5c30b 100644
--- a/examples/models/qwen2_5/README.md
+++ b/examples/models/qwen2_5/README.md
@@ -8,7 +8,7 @@ Qwen 2.5 uses the same example code as Llama, while the checkpoint, model params
 All commands for exporting and running Llama on various backends should also be applicable to Qwen 2.5, by swapping the following args:
 ```
 base.model_class="qwen2_5"
-base.params="examples/models/qwen2_5/1_5b_config.json"
+base.params="examples/models/qwen2_5/config/1_5b_config.json"
 base.checkpoint=<path-to-meta-checkpoint>
 ```
 
@@ -33,16 +33,11 @@ Export to XNNPack, no quantization:
 QWEN_CHECKPOINT=path/to/checkpoint.pth
 
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen2_5" \
-  base.checkpoint="${QWEN_CHECKPOINT:?}" \
-  base.params="examples/models/qwen2_5/1_5b_config.json" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="fp32" \
-  backend.xnnpack.enabled=True \
-  base.metadata='"{\"get_bos_id\":151643, \"get_eos_ids\":[151643]}"' \
-  export.output_name="qwen2_5-1_5b.pte" \
-  debug.verbose=True
+  --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
+  +base.model_class="qwen2_5" \
+  +base.checkpoint="${QWEN_CHECKPOINT:?}" \
+  +base.params="examples/models/qwen2_5/1_5b_config.json" \
+  +export.output_name="qwen2_5-1_5b.pte" \
 ```
 
 Run using the executor runner:
diff --git a/examples/models/qwen2_5/1_5b_config.json b/examples/models/qwen2_5/config/1_5b_config.json
similarity index 100%
rename from examples/models/qwen2_5/1_5b_config.json
rename to examples/models/qwen2_5/config/1_5b_config.json
diff --git a/examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml b/examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
new file mode 100644
index 00000000000..0e5c6f7624e
--- /dev/null
+++ b/examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml
@@ -0,0 +1,11 @@
+base:
+  metadata='{"get_bos_id":151643, "get_eos_ids":[151643]}'
+
+model:
+  use_kv_cache: True
+  use_sdpa_with_kv_cache: True
+  dtype_override: fp32
+
+backend:
+  xnnpack:
+    enabled: True
\ No newline at end of file
diff --git a/examples/models/qwen3/README.md b/examples/models/qwen3/README.md
index e24d8da2637..d2d89db93c2 100644
--- a/examples/models/qwen3/README.md
+++ b/examples/models/qwen3/README.md
@@ -8,7 +8,7 @@ Qwen 3 uses the same example code as our optimized Llama model, while the checkp
 All commands for exporting and running Llama on various backends should also be applicable to Qwen 3, by swapping the following args:
 ```
 base.model_class=[qwen3_0_6b,qwen3_1_7b,qwen3_4b]
-base.params=[examples/models/qwen3/0_6b_config.json,examples/models/qwen3/1_7b_config.json,examples/models/qwen3/4b_config.json]
+base.params=[examples/models/qwen3/config/0_6b_config.json,examples/models/qwen3/config/1_7b_config.json,examples/models/config/qwen3/4b_config.json]
 ```
 
 ### Example export
@@ -17,49 +17,29 @@ Here is a basic example for exporting Qwen 3, although please refer to the Llama
 Export 0.6b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3_0_6b" \
-  base.params="examples/models/qwen3/0_6b_config.json" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="fp32" \
-  backend.xnnpack.enabled=True \
-  backend.xnnpack.extended_ops=True \
-  quantization.qmode="8da4w" \
-  base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3_0_6b.pte" \
-  debug.verbose=True
+  --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
+  +base.model_class="qwen3_0_6b" \
+  +base.params="examples/models/qwen3/config/0_6b_config.json" \
+  +export.output_name="qwen3_0_6b.pte" \
+
 ```
 
 Export 1.7b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3_1_7b" \
-  base.params="examples/models/qwen3/1_7b_config.json" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="fp32" \
-  backend.xnnpack.enabled=True \
-  backend.xnnpack.extended_ops=True \
-  quantization.qmode="8da4w" \
-  base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3_1_7b.pte" \
-  debug.verbose=True
+  --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
+  +base.model_class="qwen3_1_7b" \
+  +base.params="examples/models/qwen3/config/1_7b_config.json" \
+  +export.output_name="qwen3_1_7b.pte" \
 ```
 
 Export 4b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3_4b" \
-  base.params="examples/models/qwen3/4b_config.json" \
-  model.use_kv_cache=True \
-  model.use_sdpa_with_kv_cache=True \
-  model.dtype_override="fp32" \
-  backend.xnnpack.enabled=True \
-  backend.xnnpack.extended_ops=True \
-  quantization.qmode="8da4w" \
-  base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3_4b.pte" \
-  debug.verbose=True
+  --config examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
+  +base.model_class="qwen3_4b" \
+  +base.params="examples/models/qwen3/config/4b_config.json" \
+  +export.output_name="qwen3_4b.pte" \
 ```
 
 ### Example run
diff --git a/examples/models/qwen3/0_6b_config.json b/examples/models/qwen3/config/0_6b_config.json
similarity index 100%
rename from examples/models/qwen3/0_6b_config.json
rename to examples/models/qwen3/config/0_6b_config.json
diff --git a/examples/models/qwen3/1_7b_config.json b/examples/models/qwen3/config/1_7b_config.json
similarity index 100%
rename from examples/models/qwen3/1_7b_config.json
rename to examples/models/qwen3/config/1_7b_config.json
diff --git a/examples/models/qwen3/4b_config.json b/examples/models/qwen3/config/4b_config.json
similarity index 100%
rename from examples/models/qwen3/4b_config.json
rename to examples/models/qwen3/config/4b_config.json
diff --git a/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml b/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
new file mode 100644
index 00000000000..60292b1ecdc
--- /dev/null
+++ b/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml
@@ -0,0 +1,15 @@
+base:
+  metadata: '{"get_bos_id": 151644, "get_eos_ids":[151645]}'
+
+model:
+  use_kv_cache: True
+  use_sdpa_with_kv_cache: True
+  dtype_override: fp32
+
+quantization:
+  qmode: 8da4w
+
+backend:
+  xnnpack:
+    enabled: True
+    extended_ops: True
\ No newline at end of file