pytorch · jackzhxng · Mar 25, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -33,7 +33,7 @@
         "dl3": "linux.4xlarge.memory",
         "emformer_join": "linux.4xlarge.memory",
         "emformer_predict": "linux.4xlarge.memory",
-        "phi-4-mini": "linux.4xlarge.memory",
+        "phi_4_mini": "linux.4xlarge.memory",
     }
 }
 

diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -100,11 +100,11 @@ test_model() {
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
-  if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
+  if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llama script: python3 -m examples.models.llama.export_llama.
-      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json
       run_portable_executor_runner
       rm "./${MODEL_NAME}.pte"
       return

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -106,7 +106,7 @@ jobs:
           - model: emformer_join
             backend: xnnpack-quantization-delegation
             runner: linux.4xlarge.memory
-          - model: phi-4-mini
+          - model: phi_4_mini
             backend: portable
             runner: linux.4xlarge.memory
           - model: llama3_2_vision_encoder

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -72,7 +72,7 @@ jobs:
             backend: portable
           - model: softmax
             backend: portable
-          - model: phi-4-mini
+          - model: phi_4_mini
             backend: portable
           - model: qwen2_5
             backend: portable

diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -36,7 +36,7 @@ class Model(str, Enum):
     Llava = "llava"
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
-    Phi4Mini = "phi-4-mini"
+    Phi4Mini = "phi_4_mini"
 
     def __str__(self) -> str:
         return self.value
@@ -80,7 +80,7 @@ def __str__(self) -> str:
     str(Model.Llava): ("llava", "LlavaModel"),
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
-    str(Model.Phi4Mini): ("phi-4-mini", "Phi4MiniModel"),
+    str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
 }
 
 __all__ = [

@@ -95,9 +95,13 @@
     "llama3_2",
     "static_llama",
     "qwen2_5",
-    "phi-4-mini",
+    "phi_4_mini",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
+HUGGING_FACE_REPO_IDS = {
+    "qwen2_5": "Qwen/Qwen2.5-1.5B",
+    "phi_4_mini": "microsoft/Phi-4-mini-instruct",
+}
 
 
 class WeightType(Enum):
@@ -519,7 +523,53 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
         return return_val
 
 
+def download_and_convert_hf_checkpoint(modelname: str) -> str:
+    """
+    Downloads and converts to Meta format a HuggingFace checkpoint.
+    """
+    # Build cache path.
+    cache_subdir = "meta_checkpoints"
+    cache_dir = Path.home() / ".cache" / cache_subdir
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Use repo name to name the converted file.
+    repo_id = HUGGING_FACE_REPO_IDS[modelname]
+    model_name = repo_id.replace(
+        "/", "_"
+    )
+    converted_path = cache_dir / f"{model_name}.pth"
+
+    if converted_path.exists():
+        print(f"✔ Using cached converted model: {converted_path}")
+        return converted_path
+
+    # 1. Download weights from Hugging Face.
+    print("⬇ Downloading and converting checkpoint...")
+    from huggingface_hub import snapshot_download
+
+    checkpoint_path = snapshot_download(
+        repo_id=repo_id,
+    )
+
+    # 2. Convert weights to Meta format.
+    if modelname == "qwen2_5":
+        from executorch.examples.models.qwen2_5 import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "phi_4_mini":
+        from executorch.examples.models.phi_4_mini import convert_weights
+
+        convert_weights(checkpoint_path, converted_path)
+    elif modelname == "smollm2":
+        pass
+
+    return converted_path
+
+
 def export_llama(args) -> str:
+    if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS:
+        args.checkpoint = download_and_convert_hf_checkpoint(args.model)
+
     if args.profile_path is not None:
         try:
             from executorch.util.python_profiler import CProfilerFlameGraph

@@ -10,7 +10,7 @@
 # Install tokenizers for hf .json tokenizer.
 # Install snakeviz for cProfile flamegraph
 # Install lm-eval for Model Evaluation with lm-evalution-harness.
-pip install tiktoken sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
+pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile
 
 # Call the install helper for further setup
 python examples/models/llama/install_requirement_helper.py
@@ -2,6 +2,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.phi_4_mini.convert_weights import convert_weights
 
 
 class Phi4MiniModel(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Phi4MiniModel",
+    "convert_weights",
 ]
diff --git a/examples/models/phi-4-mini/config.json → examples/models/phi_4_mini/config.json b/examples/models/phi-4-mini/config.json → examples/models/phi_4_mini/config.json
@@ -51,37 +51,40 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Phi-4-mini weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
+    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=[
             "model-00001-of-00002.safetensors",
             "model-00002-of-00002.safetensors",
         ],
         output_dir=".",
-        model_type="PHI3_MINI",
+        model_type="PHI4",
     )
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = phi_4_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Phi-4-mini weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":

@@ -1,7 +1,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.example.models.llama.model import Llama2Model
+from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.qwen2_5.convert_weights import convert_weights
 
 
 class Qwen2_5Model(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "Qwen2_5Model",
+    "convert_weights",
 ]
@@ -53,35 +53,37 @@ def qwen_2_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
     return converted_state_dict
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Convert Qwen2 weights to Meta format."
-    )
-    parser.add_argument(
-        "input_dir",
-        type=str,
-        help="Path to directory containing checkpoint files",
-    )
-    parser.add_argument("output", type=str, help="Path to the output checkpoint")
-
-    args = parser.parse_args()
-
+def convert_weights(input_dir: str, output_file: str) -> None:
     # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
+        checkpoint_dir=input_dir,
         checkpoint_files=["model.safetensors"],
         output_dir=".",
         model_type="QWEN2",
     )
 
     print("Loading checkpoint...")
     sd = checkpointer.load_checkpoint()
-
     print("Converting checkpoint...")
     sd = qwen_2_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print("Done.")
 
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Qwen2 weights to Meta format."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Path to directory containing checkpoint files",
+    )
+    parser.add_argument("output", type=str, help="Path to the output checkpoint")
+
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":