From 6afd1c0d04ec0167c2ad8c4ce6e5bb6eaea90029 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 06:08:55 -0700
Subject: [PATCH 1/5] Rename phi-4-mini to phi_4_mini

---
 .ci/scripts/gather_test_models.py                             | 2 +-
 .ci/scripts/test_model.sh                                     | 4 ++--
 .github/workflows/pull.yml                                    | 2 +-
 .github/workflows/trunk.yml                                   | 2 +-
 examples/models/__init__.py                                   | 4 ++--
 examples/models/llama/export_llama_lib.py                     | 2 +-
 examples/models/{phi-4-mini => phi_4_mini}/__init__.py        | 0
 examples/models/{phi-4-mini => phi_4_mini}/config.json        | 0
 examples/models/{phi-4-mini => phi_4_mini}/convert_weights.py | 0
 9 files changed, 8 insertions(+), 8 deletions(-)
 rename examples/models/{phi-4-mini => phi_4_mini}/__init__.py (100%)
 rename examples/models/{phi-4-mini => phi_4_mini}/config.json (100%)
 rename examples/models/{phi-4-mini => phi_4_mini}/convert_weights.py (100%)

diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
index b32a052026a..3f22d7699de 100755
--- a/.ci/scripts/gather_test_models.py
+++ b/.ci/scripts/gather_test_models.py
@@ -33,7 +33,7 @@
         "dl3": "linux.4xlarge.memory",
         "emformer_join": "linux.4xlarge.memory",
         "emformer_predict": "linux.4xlarge.memory",
-        "phi-4-mini": "linux.4xlarge.memory",
+        "phi_4_mini": "linux.4xlarge.memory",
     }
 }
 
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index 51e81e62a9f..cd543ff1424 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -100,11 +100,11 @@ test_model() {
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
-  if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
+  if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llama script: python3 -m examples.models.llama.export_llama.
-      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json
       run_portable_executor_runner
       rm "./${MODEL_NAME}.pte"
       return
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 81948e4e827..9a2221b3aac 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -106,7 +106,7 @@ jobs:
           - model: emformer_join
             backend: xnnpack-quantization-delegation
             runner: linux.4xlarge.memory
-          - model: phi-4-mini
+          - model: phi_4_mini
             backend: portable
             runner: linux.4xlarge.memory
           - model: llama3_2_vision_encoder
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 097a272d0fe..b83e4a65bac 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -72,7 +72,7 @@ jobs:
             backend: portable
           - model: softmax
             backend: portable
-          - model: phi-4-mini
+          - model: phi_4_mini
             backend: portable
           - model: qwen2_5
             backend: portable
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
index 80ba6801a6c..41ec5c3aac6 100644
--- a/examples/models/__init__.py
+++ b/examples/models/__init__.py
@@ -36,7 +36,7 @@ class Model(str, Enum):
     Llava = "llava"
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
-    Phi4Mini = "phi-4-mini"
+    Phi4Mini = "phi_4_mini"
 
     def __str__(self) -> str:
         return self.value
@@ -80,7 +80,7 @@ def __str__(self) -> str:
     str(Model.Llava): ("llava", "LlavaModel"),
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
-    str(Model.Phi4Mini): ("phi-4-mini", "Phi4MiniModel"),
+    str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
 }
 
 __all__ = [
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index 37a4e6952d8..c9a85c89ce2 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -95,7 +95,7 @@
     "llama3_2",
     "static_llama",
     "qwen2_5",
-    "phi-4-mini",
+    "phi_4_mini",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
 
diff --git a/examples/models/phi-4-mini/__init__.py b/examples/models/phi_4_mini/__init__.py
similarity index 100%
rename from examples/models/phi-4-mini/__init__.py
rename to examples/models/phi_4_mini/__init__.py
diff --git a/examples/models/phi-4-mini/config.json b/examples/models/phi_4_mini/config.json
similarity index 100%
rename from examples/models/phi-4-mini/config.json
rename to examples/models/phi_4_mini/config.json
diff --git a/examples/models/phi-4-mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py
similarity index 100%
rename from examples/models/phi-4-mini/convert_weights.py
rename to examples/models/phi_4_mini/convert_weights.py

From 3de41ce80034ac7766c511b2cddba80de8109ca8 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 06:05:27 -0700
Subject: [PATCH 2/5] Download checkpoint from HuggingFace

---
 examples/models/llama/export_llama_lib.py     | 50 +++++++++++++++++++
 examples/models/llama/install_requirements.sh |  2 +-
 examples/models/phi_4_mini/__init__.py        |  2 +
 examples/models/phi_4_mini/convert_weights.py | 39 ++++++++-------
 examples/models/qwen2_5/__init__.py           |  4 +-
 examples/models/qwen2_5/convert_weights.py    | 36 ++++++-------
 6 files changed, 96 insertions(+), 37 deletions(-)

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index c9a85c89ce2..7500dcc5955 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -98,6 +98,10 @@
     "phi_4_mini",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
+HUGGING_FACE_REPO_IDS = {
+    "qwen2_5": "Qwen/Qwen2.5-1.5B",
+    "phi_4_mini": "microsoft/Phi-4-mini-instruct",
+}
 
 
 class WeightType(Enum):
@@ -519,7 +523,53 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
         return return_val
 
 
+def download_and_convert_hf_checkpoint(modelname: str) -> str:
+    """
+    Downloads and converts to Meta format a HuggingFace checkpoint.
+    """
+    # Build cache path.
+    cache_subdir = "meta_checkpoints"
+    cache_dir = Path.home() / ".cache" / cache_subdir
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Use repo name to name the converted file.
+    repo_id = HUGGING_FACE_REPO_IDS[modelname]
+    model_name = repo_id.replace(
+        "/", "_"
+    )
+    converted_path = cache_dir / f"{model_name}.pth"
+
+    if converted_path.exists():
+        print(f"✔ Using cached converted model: {converted_path}")
+        return converted_path
+
+    # 1. Download weights from Hugging Face.
+    print("⬇ Downloading and converting checkpoint...")
+    from huggingface_hub import snapshot_download
+
+    checkpoint_path = snapshot_download(
+        repo_id=repo_id,
+    )
+
+    # 2. Convert weights to Meta format.
+    if modelname == "qwen2_5":
+        from executorch.examples.models.qwen2_5 import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "phi_4_mini":
+        from executorch.examples.models.phi_4_mini import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "smollm2":
+        pass
+
+    return converted_path
+
+
 def export_llama(args) -> str:
+    if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS:
+        args.checkpoint = download_and_convert_hf_checkpoint(args.model)
+
     if args.profile_path is not None:
         try:
             from executorch.util.python_profiler import CProfilerFlameGraph
diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh
index cca6ede1d79..254379e9e78 100755
--- a/examples/models/llama/install_requirements.sh
+++ b/examples/models/llama/install_requirements.sh
@@ -10,7 +10,7 @@
 # Install tokenizers for hf .json tokenizer.
 # Install snakeviz for cProfile flamegraph
 # Install lm-eval for Model Evaluation with lm-evalution-harness.
-pip install tiktoken sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
+pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
 
 # Call the install helper for further setup
 python examples/models/llama/install_requirement_helper.py
diff --git a/examples/models/phi_4_mini/__init__.py b/examples/models/phi_4_mini/__init__.py
index 056f2c26314..eec1086580a 100644
--- a/examples/models/phi_4_mini/__init__.py
+++ b/examples/models/phi_4_mini/__init__.py
@@ -2,6 +2,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.phi_4_mini.convert_weights import convert_weights
 
 
 class Phi4MiniModel(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Phi4MiniModel",
+    "convert_weights",
 ]
diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py
index c29231d2e4d..18f82957f94 100644
--- a/examples/models/phi_4_mini/convert_weights.py
+++ b/examples/models/phi_4_mini/convert_weights.py
@@ -51,37 +51,40 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Phi-4-mini weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
+    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=[
             "model-00001-of-00002.safetensors",
             "model-00002-of-00002.safetensors",
         ],
         output_dir=".",
-        model_type="PHI3_MINI",
+        model_type="PHI4",
     )
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = phi_4_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Phi-4-mini weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":
diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py
index d86a97a114d..ec2af1edd78 100644
--- a/examples/models/qwen2_5/__init__.py
+++ b/examples/models/qwen2_5/__init__.py
@@ -1,7 +1,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.example.models.llama.model import Llama2Model
+from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.qwen2_5.convert_weights import convert_weights
 
 
 class Qwen2_5Model(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Qwen2_5Model",
+    "convert_weights",
 ]
diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py
index 9aada5b3e90..9df1e07a8b8 100644
--- a/examples/models/qwen2_5/convert_weights.py
+++ b/examples/models/qwen2_5/convert_weights.py
@@ -53,22 +53,10 @@ def qwen_2_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Qwen2 weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
     # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=["model.safetensors"],
         output_dir=".",
         model_type="QWEN2",
@@ -76,12 +64,26 @@ def main():
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = qwen_2_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Qwen2 weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":

From 2c6609a589f7b004123f9d4d2fbf55d567353f25 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 15:59:20 -0700
Subject: [PATCH 3/5] Tarun pr review

---
 examples/models/llama/export_llama_lib.py     | 59 +++++--------------
 examples/models/llama/hf_download.py          | 50 ++++++++++++++++
 examples/models/llama/install_requirements.sh |  2 +-
 3 files changed, 66 insertions(+), 45 deletions(-)
 create mode 100644 examples/models/llama/hf_download.py

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index 7500dcc5955..b54d01dcbec 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -28,6 +28,9 @@
 from executorch.devtools.backend_debug import print_delegation_info
 
 from executorch.devtools.etrecord import generate_etrecord
+from executorch.examples.models.llama.hf_download import (
+    download_and_convert_hf_checkpoint,
+)
 from executorch.exir.passes.init_mutable_pass import InitializedMutableBufferPass
 
 from executorch.extension.llm.export.builder import DType, LLMEdgeManager
@@ -523,52 +526,20 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
         return return_val
 
 
-def download_and_convert_hf_checkpoint(modelname: str) -> str:
-    """
-    Downloads and converts to Meta format a HuggingFace checkpoint.
-    """
-    # Build cache path.
-    cache_subdir = "meta_checkpoints"
-    cache_dir = Path.home() / ".cache" / cache_subdir
-    cache_dir.mkdir(parents=True, exist_ok=True)
-
-    # Use repo name to name the converted file.
-    repo_id = HUGGING_FACE_REPO_IDS[modelname]
-    model_name = repo_id.replace(
-        "/", "_"
-    )
-    converted_path = cache_dir / f"{model_name}.pth"
-
-    if converted_path.exists():
-        print(f"✔ Using cached converted model: {converted_path}")
-        return converted_path
-
-    # 1. Download weights from Hugging Face.
-    print("⬇ Downloading and converting checkpoint...")
-    from huggingface_hub import snapshot_download
-
-    checkpoint_path = snapshot_download(
-        repo_id=repo_id,
-    )
-
-    # 2. Convert weights to Meta format.
-    if modelname == "qwen2_5":
-        from executorch.examples.models.qwen2_5 import convert_weights
-
-        convert_weights(checkpoint_path, converted_path)
-    elif modelname == "phi_4_mini":
-        from executorch.examples.models.phi_4_mini import convert_weights
-
-        convert_weights(checkpoint_path, converted_path)
-    elif modelname == "smollm2":
-        pass
-
-    return converted_path
-
-
 def export_llama(args) -> str:
+    # If a checkpoint isn't provided for an HF OSS model, download and convert the
+    # weights first.
     if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS:
-        args.checkpoint = download_and_convert_hf_checkpoint(args.model)
+        repo_id = HUGGING_FACE_REPO_IDS[args.model]
+        if args.model == "qwen2_5":
+            from executorch.examples.models.qwen2_5 import convert_weights
+        elif args.model == "phi_4_mini":
+            from executorch.examples.models.phi_4_mini import convert_weights
+        else:
+            raise ValueError(
+                f"Converting weights to meta format for {args.model} is not yet supported"
+            )
+        args.checkpoint = download_and_convert_hf_checkpoint(repo_id, convert_weights)
 
     if args.profile_path is not None:
         try:
diff --git a/examples/models/llama/hf_download.py b/examples/models/llama/hf_download.py
new file mode 100644
index 00000000000..b09dc5125d9
--- /dev/null
+++ b/examples/models/llama/hf_download.py
@@ -0,0 +1,50 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from pathlib import Path
+from typing import Callable
+
+from huggingface_hub import snapshot_download
+
+
+def download_and_convert_hf_checkpoint(
+    repo_id: str, convert_weights: Callable[[str, str], None]
+) -> str:
+    """
+    Downloads and converts to Meta format a HuggingFace checkpoint.
+
+    Args:
+        repo_id: Id of the HuggingFace repo, e.g. "Qwen/Qwen2.5-1.5B".
+        convert_weights: Weight conversion function taking in path to the downloaded HuggingFace
+            files and the desired output path.
+
+    Returns:
+        The output path of the Meta checkpoint converted from HuggingFace.
+    """
+
+    # Build cache path.
+    cache_subdir = "meta_checkpoints"
+    cache_dir = Path.home() / ".cache" / cache_subdir
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Use repo name to name the converted file.
+    model_name = repo_id.replace("/", "_")
+    converted_path = cache_dir / f"{model_name}.pth"
+
+    if converted_path.exists():
+        print(f"✔ Using cached converted model: {converted_path}")
+        return converted_path
+
+    # 1. Download weights from Hugging Face.
+    print("⬇ Downloading and converting checkpoint...")
+    checkpoint_path = snapshot_download(
+        repo_id=repo_id,
+    )
+
+    # 2. Convert weights to Meta format.
+    convert_weights(checkpoint_path, converted_path)
+    return converted_path
diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh
index 254379e9e78..b9e0f9210c5 100755
--- a/examples/models/llama/install_requirements.sh
+++ b/examples/models/llama/install_requirements.sh
@@ -10,7 +10,7 @@
 # Install tokenizers for hf .json tokenizer.
 # Install snakeviz for cProfile flamegraph
 # Install lm-eval for Model Evaluation with lm-evalution-harness.
-pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
+pip install huggingface_hub tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
 
 # Call the install helper for further setup
 python examples/models/llama/install_requirement_helper.py

From 4db8f072fbb1e98c228de7a5ce74e79f2e13c51d Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 16:59:00 -0700
Subject: [PATCH 4/5] Support smollm2

---
 examples/models/llama/export_llama_lib.py     |  3 ++
 .../models/smollm2/{__init__ => __init__.py}  |  4 +-
 examples/models/smollm2/convert_weights.py    | 41 +++++++++++--------
 3 files changed, 29 insertions(+), 19 deletions(-)
 rename examples/models/smollm2/{__init__ => __init__.py} (62%)

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index aedc37054de..1bd0c5ac68c 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -105,6 +105,7 @@
 HUGGING_FACE_REPO_IDS = {
     "qwen2_5": "Qwen/Qwen2.5-1.5B",
     "phi_4_mini": "microsoft/Phi-4-mini-instruct",
+    "smollm2": "HuggingFaceTB/SmolLM-135M",
 }
 
 
@@ -541,6 +542,8 @@ def export_llama(args) -> str:
             from executorch.examples.models.qwen2_5 import convert_weights
         elif args.model == "phi_4_mini":
             from executorch.examples.models.phi_4_mini import convert_weights
+        elif args.model == "smollm2":
+            from executorch.examples.models.smollm2 import convert_weights
         else:
             raise ValueError(
                 f"Converting weights to meta format for {args.model} is not yet supported"
diff --git a/examples/models/smollm2/__init__ b/examples/models/smollm2/__init__.py
similarity index 62%
rename from examples/models/smollm2/__init__
rename to examples/models/smollm2/__init__.py
index 3d01bf9eb42..3f973e2c786 100644
--- a/examples/models/smollm2/__init__
+++ b/examples/models/smollm2/__init__.py
@@ -1,7 +1,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.example.models.llama.model import Llama2Model
+from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.smollm2.convert_weights import convert_weights
 
 
 class SmolLM2Model(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "SmolLM2Model",
+    "convert_weights",
 ]
diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py
index db80bd47b8c..c1a1bf08d11 100644
--- a/examples/models/smollm2/convert_weights.py
+++ b/examples/models/smollm2/convert_weights.py
@@ -11,7 +11,6 @@
 _SMOLLM_FROM_META = {
     "tok_embeddings.weight": "tok_embeddings.weight",
     "norm.weight": "norm.scale",
-    "output.weight": "output.weight",
     "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight",
     "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight",
     "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight",
@@ -41,10 +40,32 @@ def smollm_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
     for key, value in state_dict.items():
         new_key = get_mapped_key(key, inverted_mapping_dict)
         converted_state_dict[new_key] = value
+    converted_state_dict["output.weight"] = converted_state_dict[
+        "tok_embeddings.weight"
+    ]
 
     return converted_state_dict
 
 
+def convert_weights(input_dir: str, output_file: str) -> None:
+    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
+    checkpointer = FullModelHFCheckpointer(
+        checkpoint_dir=input_dir,
+        checkpoint_files=["model.safetensors"],
+        output_dir=".",
+        model_type="LLAMA3",
+    )
+
+    print("Loading checkpoint...")
+    sd = checkpointer.load_checkpoint()
+    print("Converting checkpoint...")
+    breakpoint()
+    sd = smollm_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print(f"Done.")
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Convert SmolLM weights to Meta format."
@@ -57,23 +78,7 @@ def main():
     parser.add_argument("output", type=str, help="Path to the output checkpoint")
 
     args = parser.parse_args()
-
-    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
-    checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
-        checkpoint_files=["model.safetensors"],
-        output_dir=".",
-        model_type="LLAMA",
-    )
-
-    print("Loading checkpoint...")
-    sd = checkpointer.load_checkpoint()
-
-    print("Converting checkpoint...")
-    sd = smollm_tune_to_meta(sd["model"])
-
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":

From 19cda10cf84463fb2d5cdf12f9cef034304cc985 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Mon, 24 Mar 2025 17:27:14 -0700
Subject: [PATCH 5/5] Lint

---
 examples/models/smollm2/convert_weights.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py
index c1a1bf08d11..59b83d3e3a3 100644
--- a/examples/models/smollm2/convert_weights.py
+++ b/examples/models/smollm2/convert_weights.py
@@ -59,11 +59,10 @@ def convert_weights(input_dir: str, output_file: str) -> None:
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
     print("Converting checkpoint...")
-    breakpoint()
     sd = smollm_tune_to_meta(sd["model"])
     print("Saving checkpoint...")
     torch.save(sd, output_file)
-    print(f"Done.")
+    print("Done.")
 
 
 def main():