From 6afd1c0d04ec0167c2ad8c4ce6e5bb6eaea90029 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 06:08:55 -0700 Subject: [PATCH 1/5] Rename phi-4-mini to phi_4_mini --- .ci/scripts/gather_test_models.py | 2 +- .ci/scripts/test_model.sh | 4 ++-- .github/workflows/pull.yml | 2 +- .github/workflows/trunk.yml | 2 +- examples/models/__init__.py | 4 ++-- examples/models/llama/export_llama_lib.py | 2 +- examples/models/{phi-4-mini => phi_4_mini}/__init__.py | 0 examples/models/{phi-4-mini => phi_4_mini}/config.json | 0 examples/models/{phi-4-mini => phi_4_mini}/convert_weights.py | 0 9 files changed, 8 insertions(+), 8 deletions(-) rename examples/models/{phi-4-mini => phi_4_mini}/__init__.py (100%) rename examples/models/{phi-4-mini => phi_4_mini}/config.json (100%) rename examples/models/{phi-4-mini => phi_4_mini}/convert_weights.py (100%) diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index b32a052026a..3f22d7699de 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -33,7 +33,7 @@ "dl3": "linux.4xlarge.memory", "emformer_join": "linux.4xlarge.memory", "emformer_predict": "linux.4xlarge.memory", - "phi-4-mini": "linux.4xlarge.memory", + "phi_4_mini": "linux.4xlarge.memory", } } diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 51e81e62a9f..cd543ff1424 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -100,11 +100,11 @@ test_model() { rm "./${MODEL_NAME}.pte" return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears. fi - if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then + if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then # Install requirements for export_llama bash examples/models/llama/install_requirements.sh # Test export_llama script: python3 -m examples.models.llama.export_llama. - "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json + "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json run_portable_executor_runner rm "./${MODEL_NAME}.pte" return diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 81948e4e827..9a2221b3aac 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -106,7 +106,7 @@ jobs: - model: emformer_join backend: xnnpack-quantization-delegation runner: linux.4xlarge.memory - - model: phi-4-mini + - model: phi_4_mini backend: portable runner: linux.4xlarge.memory - model: llama3_2_vision_encoder diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 097a272d0fe..b83e4a65bac 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -72,7 +72,7 @@ jobs: backend: portable - model: softmax backend: portable - - model: phi-4-mini + - model: phi_4_mini backend: portable - model: qwen2_5 backend: portable diff --git a/examples/models/__init__.py b/examples/models/__init__.py index 80ba6801a6c..41ec5c3aac6 100644 --- a/examples/models/__init__.py +++ b/examples/models/__init__.py @@ -36,7 +36,7 @@ class Model(str, Enum): Llava = "llava" EfficientSam = "efficient_sam" Qwen25 = "qwen2_5" - Phi4Mini = "phi-4-mini" + Phi4Mini = "phi_4_mini" def __str__(self) -> str: return self.value @@ -80,7 +80,7 @@ def __str__(self) -> str: str(Model.Llava): ("llava", "LlavaModel"), str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"), str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"), - str(Model.Phi4Mini): ("phi-4-mini", "Phi4MiniModel"), + str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"), } __all__ = [ diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 37a4e6952d8..c9a85c89ce2 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -95,7 +95,7 @@ "llama3_2", "static_llama", "qwen2_5", - "phi-4-mini", + "phi_4_mini", ] TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"] diff --git a/examples/models/phi-4-mini/__init__.py b/examples/models/phi_4_mini/__init__.py similarity index 100% rename from examples/models/phi-4-mini/__init__.py rename to examples/models/phi_4_mini/__init__.py diff --git a/examples/models/phi-4-mini/config.json b/examples/models/phi_4_mini/config.json similarity index 100% rename from examples/models/phi-4-mini/config.json rename to examples/models/phi_4_mini/config.json diff --git a/examples/models/phi-4-mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py similarity index 100% rename from examples/models/phi-4-mini/convert_weights.py rename to examples/models/phi_4_mini/convert_weights.py From 3de41ce80034ac7766c511b2cddba80de8109ca8 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 06:05:27 -0700 Subject: [PATCH 2/5] Download checkpoint from HuggingFace --- examples/models/llama/export_llama_lib.py | 50 +++++++++++++++++++ examples/models/llama/install_requirements.sh | 2 +- examples/models/phi_4_mini/__init__.py | 2 + examples/models/phi_4_mini/convert_weights.py | 39 ++++++++------- examples/models/qwen2_5/__init__.py | 4 +- examples/models/qwen2_5/convert_weights.py | 36 ++++++------- 6 files changed, 96 insertions(+), 37 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index c9a85c89ce2..7500dcc5955 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -98,6 +98,10 @@ "phi_4_mini", ] TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"] +HUGGING_FACE_REPO_IDS = { + "qwen2_5": "Qwen/Qwen2.5-1.5B", + "phi_4_mini": "microsoft/Phi-4-mini-instruct", +} class WeightType(Enum): @@ -519,7 +523,53 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str: return return_val +def download_and_convert_hf_checkpoint(modelname: str) -> str: + """ + Downloads and converts to Meta format a HuggingFace checkpoint. + """ + # Build cache path. + cache_subdir = "meta_checkpoints" + cache_dir = Path.home() / ".cache" / cache_subdir + cache_dir.mkdir(parents=True, exist_ok=True) + + # Use repo name to name the converted file. + repo_id = HUGGING_FACE_REPO_IDS[modelname] + model_name = repo_id.replace( + "/", "_" + ) + converted_path = cache_dir / f"{model_name}.pth" + + if converted_path.exists(): + print(f"✔ Using cached converted model: {converted_path}") + return converted_path + + # 1. Download weights from Hugging Face. + print("⬇ Downloading and converting checkpoint...") + from huggingface_hub import snapshot_download + + checkpoint_path = snapshot_download( + repo_id=repo_id, + ) + + # 2. Convert weights to Meta format. + if modelname == "qwen2_5": + from executorch.examples.models.qwen2_5 import convert_weights + + convert_weights(checkpoint_path, converted_path) + elif modelname == "phi_4_mini": + from executorch.examples.models.phi_4_mini import convert_weights + + convert_weights(checkpoint_path, converted_path) + elif modelname == "smollm2": + pass + + return converted_path + + def export_llama(args) -> str: + if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS: + args.checkpoint = download_and_convert_hf_checkpoint(args.model) + if args.profile_path is not None: try: from executorch.util.python_profiler import CProfilerFlameGraph diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh index cca6ede1d79..254379e9e78 100755 --- a/examples/models/llama/install_requirements.sh +++ b/examples/models/llama/install_requirements.sh @@ -10,7 +10,7 @@ # Install tokenizers for hf .json tokenizer. # Install snakeviz for cProfile flamegraph # Install lm-eval for Model Evaluation with lm-evalution-harness. -pip install tiktoken sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile +pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile # Call the install helper for further setup python examples/models/llama/install_requirement_helper.py diff --git a/examples/models/phi_4_mini/__init__.py b/examples/models/phi_4_mini/__init__.py index 056f2c26314..eec1086580a 100644 --- a/examples/models/phi_4_mini/__init__.py +++ b/examples/models/phi_4_mini/__init__.py @@ -2,6 +2,7 @@ # LICENSE file in the root directory of this source tree. from executorch.examples.models.llama.model import Llama2Model +from executorch.examples.models.phi_4_mini.convert_weights import convert_weights class Phi4MiniModel(Llama2Model): @@ -11,4 +12,5 @@ def __init__(self, **kwargs): __all__ = [ "Phi4MiniModel", + "convert_weights", ] diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index c29231d2e4d..18f82957f94 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -51,37 +51,40 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T return converted_state_dict -def main(): - parser = argparse.ArgumentParser( - description="Convert Phi-4-mini weights to Meta format." - ) - parser.add_argument( - "input_dir", - type=str, - help="Path to directory containing checkpoint files", - ) - parser.add_argument("output", type=str, help="Path to the output checkpoint") - - args = parser.parse_args() - +def convert_weights(input_dir: str, output_file: str) -> None: + # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. checkpointer = FullModelHFCheckpointer( - checkpoint_dir=args.input_dir, + checkpoint_dir=input_dir, checkpoint_files=[ "model-00001-of-00002.safetensors", "model-00002-of-00002.safetensors", ], output_dir=".", - model_type="PHI3_MINI", + model_type="PHI4", ) print("Loading checkpoint...") sd = checkpointer.load_checkpoint() - print("Converting checkpoint...") sd = phi_4_tune_to_meta(sd["model"]) + print("Saving checkpoint...") + torch.save(sd, output_file) + print("Done.") - torch.save(sd, args.output) - print(f"Checkpoint saved to {args.output}") + +def main(): + parser = argparse.ArgumentParser( + description="Convert Phi-4-mini weights to Meta format." + ) + parser.add_argument( + "input_dir", + type=str, + help="Path to directory containing checkpoint files", + ) + parser.add_argument("output", type=str, help="Path to the output checkpoint") + + args = parser.parse_args() + convert_weights(args.input_dir, args.output) if __name__ == "__main__": diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py index d86a97a114d..ec2af1edd78 100644 --- a/examples/models/qwen2_5/__init__.py +++ b/examples/models/qwen2_5/__init__.py @@ -1,7 +1,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.example.models.llama.model import Llama2Model +from executorch.examples.models.llama.model import Llama2Model +from executorch.examples.models.qwen2_5.convert_weights import convert_weights class Qwen2_5Model(Llama2Model): @@ -11,4 +12,5 @@ def __init__(self, **kwargs): __all__ = [ "Qwen2_5Model", + "convert_weights", ] diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py index 9aada5b3e90..9df1e07a8b8 100644 --- a/examples/models/qwen2_5/convert_weights.py +++ b/examples/models/qwen2_5/convert_weights.py @@ -53,22 +53,10 @@ def qwen_2_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch. return converted_state_dict -def main(): - parser = argparse.ArgumentParser( - description="Convert Qwen2 weights to Meta format." - ) - parser.add_argument( - "input_dir", - type=str, - help="Path to directory containing checkpoint files", - ) - parser.add_argument("output", type=str, help="Path to the output checkpoint") - - args = parser.parse_args() - +def convert_weights(input_dir: str, output_file: str) -> None: # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. checkpointer = FullModelHFCheckpointer( - checkpoint_dir=args.input_dir, + checkpoint_dir=input_dir, checkpoint_files=["model.safetensors"], output_dir=".", model_type="QWEN2", @@ -76,12 +64,26 @@ def main(): print("Loading checkpoint...") sd = checkpointer.load_checkpoint() - print("Converting checkpoint...") sd = qwen_2_tune_to_meta(sd["model"]) + print("Saving checkpoint...") + torch.save(sd, output_file) + print("Done.") - torch.save(sd, args.output) - print(f"Checkpoint saved to {args.output}") + +def main(): + parser = argparse.ArgumentParser( + description="Convert Qwen2 weights to Meta format." + ) + parser.add_argument( + "input_dir", + type=str, + help="Path to directory containing checkpoint files", + ) + parser.add_argument("output", type=str, help="Path to the output checkpoint") + + args = parser.parse_args() + convert_weights(args.input_dir, args.output) if __name__ == "__main__": From 2c6609a589f7b004123f9d4d2fbf55d567353f25 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:59:20 -0700 Subject: [PATCH 3/5] Tarun pr review --- examples/models/llama/export_llama_lib.py | 59 +++++-------------- examples/models/llama/hf_download.py | 50 ++++++++++++++++ examples/models/llama/install_requirements.sh | 2 +- 3 files changed, 66 insertions(+), 45 deletions(-) create mode 100644 examples/models/llama/hf_download.py diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 7500dcc5955..b54d01dcbec 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -28,6 +28,9 @@ from executorch.devtools.backend_debug import print_delegation_info from executorch.devtools.etrecord import generate_etrecord +from executorch.examples.models.llama.hf_download import ( + download_and_convert_hf_checkpoint, +) from executorch.exir.passes.init_mutable_pass import InitializedMutableBufferPass from executorch.extension.llm.export.builder import DType, LLMEdgeManager @@ -523,52 +526,20 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str: return return_val -def download_and_convert_hf_checkpoint(modelname: str) -> str: - """ - Downloads and converts to Meta format a HuggingFace checkpoint. - """ - # Build cache path. - cache_subdir = "meta_checkpoints" - cache_dir = Path.home() / ".cache" / cache_subdir - cache_dir.mkdir(parents=True, exist_ok=True) - - # Use repo name to name the converted file. - repo_id = HUGGING_FACE_REPO_IDS[modelname] - model_name = repo_id.replace( - "/", "_" - ) - converted_path = cache_dir / f"{model_name}.pth" - - if converted_path.exists(): - print(f"✔ Using cached converted model: {converted_path}") - return converted_path - - # 1. Download weights from Hugging Face. - print("⬇ Downloading and converting checkpoint...") - from huggingface_hub import snapshot_download - - checkpoint_path = snapshot_download( - repo_id=repo_id, - ) - - # 2. Convert weights to Meta format. - if modelname == "qwen2_5": - from executorch.examples.models.qwen2_5 import convert_weights - - convert_weights(checkpoint_path, converted_path) - elif modelname == "phi_4_mini": - from executorch.examples.models.phi_4_mini import convert_weights - - convert_weights(checkpoint_path, converted_path) - elif modelname == "smollm2": - pass - - return converted_path - - def export_llama(args) -> str: + # If a checkpoint isn't provided for an HF OSS model, download and convert the + # weights first. if not args.checkpoint and args.model in HUGGING_FACE_REPO_IDS: - args.checkpoint = download_and_convert_hf_checkpoint(args.model) + repo_id = HUGGING_FACE_REPO_IDS[args.model] + if args.model == "qwen2_5": + from executorch.examples.models.qwen2_5 import convert_weights + elif args.model == "phi_4_mini": + from executorch.examples.models.phi_4_mini import convert_weights + else: + raise ValueError( + f"Converting weights to meta format for {args.model} is not yet supported" + ) + args.checkpoint = download_and_convert_hf_checkpoint(repo_id, convert_weights) if args.profile_path is not None: try: diff --git a/examples/models/llama/hf_download.py b/examples/models/llama/hf_download.py new file mode 100644 index 00000000000..b09dc5125d9 --- /dev/null +++ b/examples/models/llama/hf_download.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path +from typing import Callable + +from huggingface_hub import snapshot_download + + +def download_and_convert_hf_checkpoint( + repo_id: str, convert_weights: Callable[[str, str], None] +) -> str: + """ + Downloads and converts to Meta format a HuggingFace checkpoint. + + Args: + repo_id: Id of the HuggingFace repo, e.g. "Qwen/Qwen2.5-1.5B". + convert_weights: Weight conversion function taking in path to the downloaded HuggingFace + files and the desired output path. + + Returns: + The output path of the Meta checkpoint converted from HuggingFace. + """ + + # Build cache path. + cache_subdir = "meta_checkpoints" + cache_dir = Path.home() / ".cache" / cache_subdir + cache_dir.mkdir(parents=True, exist_ok=True) + + # Use repo name to name the converted file. + model_name = repo_id.replace("/", "_") + converted_path = cache_dir / f"{model_name}.pth" + + if converted_path.exists(): + print(f"✔ Using cached converted model: {converted_path}") + return converted_path + + # 1. Download weights from Hugging Face. + print("⬇ Downloading and converting checkpoint...") + checkpoint_path = snapshot_download( + repo_id=repo_id, + ) + + # 2. Convert weights to Meta format. + convert_weights(checkpoint_path, converted_path) + return converted_path diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh index 254379e9e78..b9e0f9210c5 100755 --- a/examples/models/llama/install_requirements.sh +++ b/examples/models/llama/install_requirements.sh @@ -10,7 +10,7 @@ # Install tokenizers for hf .json tokenizer. # Install snakeviz for cProfile flamegraph # Install lm-eval for Model Evaluation with lm-evalution-harness. -pip install tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile +pip install huggingface_hub tiktoken torchtune sentencepiece tokenizers snakeviz lm_eval==0.4.5 blobfile # Call the install helper for further setup python examples/models/llama/install_requirement_helper.py From 4db8f072fbb1e98c228de7a5ce74e79f2e13c51d Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 16:59:00 -0700 Subject: [PATCH 4/5] Support smollm2 --- examples/models/llama/export_llama_lib.py | 3 ++ .../models/smollm2/{__init__ => __init__.py} | 4 +- examples/models/smollm2/convert_weights.py | 41 +++++++++++-------- 3 files changed, 29 insertions(+), 19 deletions(-) rename examples/models/smollm2/{__init__ => __init__.py} (62%) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index aedc37054de..1bd0c5ac68c 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -105,6 +105,7 @@ HUGGING_FACE_REPO_IDS = { "qwen2_5": "Qwen/Qwen2.5-1.5B", "phi_4_mini": "microsoft/Phi-4-mini-instruct", + "smollm2": "HuggingFaceTB/SmolLM-135M", } @@ -541,6 +542,8 @@ def export_llama(args) -> str: from executorch.examples.models.qwen2_5 import convert_weights elif args.model == "phi_4_mini": from executorch.examples.models.phi_4_mini import convert_weights + elif args.model == "smollm2": + from executorch.examples.models.smollm2 import convert_weights else: raise ValueError( f"Converting weights to meta format for {args.model} is not yet supported" diff --git a/examples/models/smollm2/__init__ b/examples/models/smollm2/__init__.py similarity index 62% rename from examples/models/smollm2/__init__ rename to examples/models/smollm2/__init__.py index 3d01bf9eb42..3f973e2c786 100644 --- a/examples/models/smollm2/__init__ +++ b/examples/models/smollm2/__init__.py @@ -1,7 +1,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.example.models.llama.model import Llama2Model +from executorch.examples.models.llama.model import Llama2Model +from executorch.examples.models.smollm2.convert_weights import convert_weights class SmolLM2Model(Llama2Model): @@ -11,4 +12,5 @@ def __init__(self, **kwargs): __all__ = [ "SmolLM2Model", + "convert_weights", ] diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py index db80bd47b8c..c1a1bf08d11 100644 --- a/examples/models/smollm2/convert_weights.py +++ b/examples/models/smollm2/convert_weights.py @@ -11,7 +11,6 @@ _SMOLLM_FROM_META = { "tok_embeddings.weight": "tok_embeddings.weight", "norm.weight": "norm.scale", - "output.weight": "output.weight", "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight", "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight", "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight", @@ -41,10 +40,32 @@ def smollm_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch. for key, value in state_dict.items(): new_key = get_mapped_key(key, inverted_mapping_dict) converted_state_dict[new_key] = value + converted_state_dict["output.weight"] = converted_state_dict[ + "tok_embeddings.weight" + ] return converted_state_dict +def convert_weights(input_dir: str, output_file: str) -> None: + # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. + checkpointer = FullModelHFCheckpointer( + checkpoint_dir=input_dir, + checkpoint_files=["model.safetensors"], + output_dir=".", + model_type="LLAMA3", + ) + + print("Loading checkpoint...") + sd = checkpointer.load_checkpoint() + print("Converting checkpoint...") + breakpoint() + sd = smollm_tune_to_meta(sd["model"]) + print("Saving checkpoint...") + torch.save(sd, output_file) + print(f"Done.") + + def main(): parser = argparse.ArgumentParser( description="Convert SmolLM weights to Meta format." @@ -57,23 +78,7 @@ def main(): parser.add_argument("output", type=str, help="Path to the output checkpoint") args = parser.parse_args() - - # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. - checkpointer = FullModelHFCheckpointer( - checkpoint_dir=args.input_dir, - checkpoint_files=["model.safetensors"], - output_dir=".", - model_type="LLAMA", - ) - - print("Loading checkpoint...") - sd = checkpointer.load_checkpoint() - - print("Converting checkpoint...") - sd = smollm_tune_to_meta(sd["model"]) - - torch.save(sd, args.output) - print(f"Checkpoint saved to {args.output}") + convert_weights(args.input_dir, args.output) if __name__ == "__main__": From 19cda10cf84463fb2d5cdf12f9cef034304cc985 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 17:27:14 -0700 Subject: [PATCH 5/5] Lint --- examples/models/smollm2/convert_weights.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py index c1a1bf08d11..59b83d3e3a3 100644 --- a/examples/models/smollm2/convert_weights.py +++ b/examples/models/smollm2/convert_weights.py @@ -59,11 +59,10 @@ def convert_weights(input_dir: str, output_file: str) -> None: print("Loading checkpoint...") sd = checkpointer.load_checkpoint() print("Converting checkpoint...") - breakpoint() sd = smollm_tune_to_meta(sd["model"]) print("Saving checkpoint...") torch.save(sd, output_file) - print(f"Done.") + print("Done.") def main():