From a2118b69b50e3f0838275a6b073f8f9ea9bb4559 Mon Sep 17 00:00:00 2001
From: Aaron Zheng <aaronz@berkeley.edu>
Date: Sun, 2 Nov 2025 22:11:18 +0000
Subject: [PATCH 1/3] deepspeed

---
 .../scripts/convert_deepspeed_to_hf.py        | 154 ++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 skyrl-train/scripts/convert_deepspeed_to_hf.py

diff --git a/skyrl-train/scripts/convert_deepspeed_to_hf.py b/skyrl-train/scripts/convert_deepspeed_to_hf.py
new file mode 100644
index 000000000..79b23bab6
--- /dev/null
+++ b/skyrl-train/scripts/convert_deepspeed_to_hf.py
@@ -0,0 +1,154 @@
+"""
+Systematic converter: DeepSpeed ZeRO checkpoint → Hugging Face safetensors model.
+
+Assumptions:
+- You have a structure like:
+    data.pt
+    trainer_state.pt
+    policy/
+      ├── global_step_x/
+      │     ├── zero_pp_rank_0_mp_rank_00_model_states.pt
+      │     └── zero_pp_rank_0_mp_rank_00_optim_states.pt
+      ├── huggingface/
+      │     ├── config.json, tokenizer.json, etc.
+      └── zero_to_fp32.py
+      └── latest
+
+
+Output:
+    policy/huggingface_converted/model.safetensors  (+ copied config/tokenizer)
+
+For Deepspeed model shards, the output directory will be created with the following structure:
+.
+├── added_tokens.json
+├── chat_template.jinja (optional: this file is for chat specific tasks)
+├── config.json
+├── generation_config.json (optional: default decoding parameters)
+├── merges.txt
+├── model.safetensors
+├── special_tokens_map.json
+├── tokenizer.json
+├── tokenizer_config.json
+└── vocab.json
+
+Example usage:
+uv run --isolated --frozen --extra vllm scripts/convert_deepspeed_to_hf.py --ckpt-dir [local_checkpoint] --out-dir [output_directory]
+"""
+
+import json
+import shutil
+import os
+import subprocess
+import argparse
+import torch
+from pathlib import Path
+from safetensors.torch import save_model
+from transformers import AutoModelForCausalLM, AutoConfig, AutoModelForSeq2SeqLM, AutoModel
+
+# === Directories ===
+def main(deepspeed_model_path: Path, out_dir:Path = None) -> Path:
+    ROOT = deepspeed_model_path
+    POLICY_DIR = ROOT / "policy"
+    HF_BASE = POLICY_DIR / "huggingface"
+    OUT_DIR = POLICY_DIR / "huggingface_converted" if not out_dir else out_dir
+    MERGED_FP32 = OUT_DIR / "merged_model" # directory that will store the ultimate pytorch weights. 
+
+    OUT_DIR.mkdir(exist_ok=True, parents=True)
+
+    # === 1. Merge ZeRO shards into single FP32 checkpoint ===
+    zero2fp32_script = POLICY_DIR / "zero_to_fp32.py"
+
+    if not MERGED_FP32.exists():
+        print(f"[1/5] Merging ZeRO shards from {POLICY_DIR} ...")
+        cmd = f"python {zero2fp32_script} {POLICY_DIR} {MERGED_FP32}"
+        result = subprocess.run(cmd)
+        if result.returncode != 0:
+            raise RuntimeError("zero_to_fp32.py merge failed.")
+    else:
+        print(f"[1/5] Merged model already exists → {MERGED_FP32}")
+
+    # === 2. Load merged state dict ===
+    print("[2/5] Loading merged model ...")
+    state = torch.load(MERGED_FP32 / "pytorch_model.bin", map_location="cpu")
+
+    # Handle possible wrapper keys
+    if isinstance(state, dict):
+        for key in ["module", "model_state_dict", "state_dict"]:
+            if key in state:
+                state = state[key]
+                break
+
+    merged_bin = MERGED_FP32 / "pytorch_model.bin"
+    hf_model_bin = HF_BASE / "pytorch_model.bin"
+    shutil.copy2(merged_bin, hf_model_bin)
+    print(f"    Copied to: {hf_model_bin}")
+
+    # === 3. Load HF config and initialize model ===
+    print("[3/5] Initializing Hugging Face model ...")
+    model = AutoModelForCausalLM.from_pretrained(HF_BASE, torch_dtype=torch.float16)
+    missing, unexpected = model.load_state_dict(state, strict=False)
+    print(f"    → Missing keys: {len(missing)}, Unexpected keys: {len(unexpected)}")
+
+    # === 4. Save to safetensors ===
+    print("[4/5] Saving model.safetensors ...")
+    save_model(model, str(OUT_DIR / "model.safetensors"), metadata={"format": "pt"})
+
+    # === 5. Copy tokenizer + config files ===
+    print("[5/5] Copying tokenizer/config files ...")
+    for fname in os.listdir(HF_BASE):
+        if fname.endswith((".json", ".txt", ".jinja")):
+            shutil.copy(HF_BASE / fname, OUT_DIR / fname)
+
+    # === Summary ===
+    print("\n✅ Conversion complete!")
+    print(f"→ Hugging Face safetensors model located at: {OUT_DIR.resolve()}")
+    print(f"→ Load it via:\n\n"
+        f"from transformers import AutoModelForCausalLM, AutoTokenizer\n"
+        f"model = AutoModelForCausalLM.from_pretrained('{OUT_DIR}')\n"
+        f"tokenizer = AutoTokenizer.from_pretrained('{OUT_DIR}')\n")
+    return Path(OUT_DIR)
+
+def guess_hf_class(cfg: AutoConfig):
+    """
+    Tries to find a reasonable HF class from config
+    Falls back to the AutoModel architecture if an LM head can't be detected
+    """
+    if getattr(cfg, "is_encoder_decoder", False):
+        return AutoModelForSeq2SeqLM
+    archs = getattr(cfg, "architectures", []) or []
+    if any(a.endswith("ForCausalLM") for a in archs):
+        return AutoModelForCausalLM
+    decoders = {"gpt2", "gpt_bigcode", "llama", "mistral", "qwen", "qwen2", "internlm", "mpt", "phi", "falcon"}
+    if getattr(cfg, "model_type", "") in decoders:
+        return AutoModelForCausalLM
+    return AutoModel
+
+def validate_load(out_dir: Path):
+    """
+    Optional: sanity-load with HF to ensure the saved safetensors is consumable
+    Loads on the CPU to avoid device / dtype quirk (this may be a problem for loading on GPU which could cause data loading issues)
+    """
+    try:
+        cfg = AutoConfig.from_pretrained(out_dir, local_files_only=True, trust_remote_code=True)
+        HFClass = guess_hf_class(cfg)
+        _ = HFClass.from_pretrained(
+            out_dir, local_files_only=True, device_map=None, dtype="auto", trust_remote_code=True
+        )
+        print("[validate] HF Load OK")
+    except Exception as e:
+        print(f"[validate][error] HF Load failed: {e} ")
+        raise RuntimeError("HF Load failed")
+    
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser(description="Convert Deepspeed checkpoint shards to a HuggingFace safetensors model.")
+    ap.add_argument(
+        "--ckpt-dir", type=str, required=True, help="Path to the checkpoint directory, containing the trainer_state.pt file"
+    )
+    ap.add_argument("--out-dir", type=str, default=None, help="Output for HF model folder")
+    ap.add_argument("--validate-load", action="store_true", help="Try loading with the Transformers Module after saving")
+    args = ap.parse_args()
+    ckpt_dir = Path(args.ckpt_dir).resolve()
+    output_dir = Path(args.out_dir).resolve()
+    out_path = main(ckpt_dir, output_dir)
+    if args.validate_load:
+        validate_load(out_path)
\ No newline at end of file

From d9c16edc39e47ff10cbbd3e95d8f45cb33453871 Mon Sep 17 00:00:00 2001
From: Aaron Zheng <aaronz@berkeley.edu>
Date: Sun, 2 Nov 2025 22:12:18 +0000
Subject: [PATCH 2/3] formatted

---
 .../scripts/convert_deepspeed_to_hf.py        | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/skyrl-train/scripts/convert_deepspeed_to_hf.py b/skyrl-train/scripts/convert_deepspeed_to_hf.py
index 79b23bab6..7a2a4cdc0 100644
--- a/skyrl-train/scripts/convert_deepspeed_to_hf.py
+++ b/skyrl-train/scripts/convert_deepspeed_to_hf.py
@@ -35,7 +35,6 @@
 uv run --isolated --frozen --extra vllm scripts/convert_deepspeed_to_hf.py --ckpt-dir [local_checkpoint] --out-dir [output_directory]
 """
 
-import json
 import shutil
 import os
 import subprocess
@@ -45,13 +44,14 @@
 from safetensors.torch import save_model
 from transformers import AutoModelForCausalLM, AutoConfig, AutoModelForSeq2SeqLM, AutoModel
 
+
 # === Directories ===
-def main(deepspeed_model_path: Path, out_dir:Path = None) -> Path:
+def main(deepspeed_model_path: Path, out_dir: Path = None) -> Path:
     ROOT = deepspeed_model_path
     POLICY_DIR = ROOT / "policy"
     HF_BASE = POLICY_DIR / "huggingface"
     OUT_DIR = POLICY_DIR / "huggingface_converted" if not out_dir else out_dir
-    MERGED_FP32 = OUT_DIR / "merged_model" # directory that will store the ultimate pytorch weights. 
+    MERGED_FP32 = OUT_DIR / "merged_model"  # directory that will store the ultimate pytorch weights.
 
     OUT_DIR.mkdir(exist_ok=True, parents=True)
 
@@ -102,12 +102,15 @@ def main(deepspeed_model_path: Path, out_dir:Path = None) -> Path:
     # === Summary ===
     print("\n✅ Conversion complete!")
     print(f"→ Hugging Face safetensors model located at: {OUT_DIR.resolve()}")
-    print(f"→ Load it via:\n\n"
+    print(
+        f"→ Load it via:\n\n"
         f"from transformers import AutoModelForCausalLM, AutoTokenizer\n"
         f"model = AutoModelForCausalLM.from_pretrained('{OUT_DIR}')\n"
-        f"tokenizer = AutoTokenizer.from_pretrained('{OUT_DIR}')\n")
+        f"tokenizer = AutoTokenizer.from_pretrained('{OUT_DIR}')\n"
+    )
     return Path(OUT_DIR)
 
+
 def guess_hf_class(cfg: AutoConfig):
     """
     Tries to find a reasonable HF class from config
@@ -123,6 +126,7 @@ def guess_hf_class(cfg: AutoConfig):
         return AutoModelForCausalLM
     return AutoModel
 
+
 def validate_load(out_dir: Path):
     """
     Optional: sanity-load with HF to ensure the saved safetensors is consumable
@@ -138,17 +142,23 @@ def validate_load(out_dir: Path):
     except Exception as e:
         print(f"[validate][error] HF Load failed: {e} ")
         raise RuntimeError("HF Load failed")
-    
+
+
 if __name__ == "__main__":
     ap = argparse.ArgumentParser(description="Convert Deepspeed checkpoint shards to a HuggingFace safetensors model.")
     ap.add_argument(
-        "--ckpt-dir", type=str, required=True, help="Path to the checkpoint directory, containing the trainer_state.pt file"
+        "--ckpt-dir",
+        type=str,
+        required=True,
+        help="Path to the checkpoint directory, containing the trainer_state.pt file",
     )
     ap.add_argument("--out-dir", type=str, default=None, help="Output for HF model folder")
-    ap.add_argument("--validate-load", action="store_true", help="Try loading with the Transformers Module after saving")
+    ap.add_argument(
+        "--validate-load", action="store_true", help="Try loading with the Transformers Module after saving"
+    )
     args = ap.parse_args()
     ckpt_dir = Path(args.ckpt_dir).resolve()
     output_dir = Path(args.out_dir).resolve()
     out_path = main(ckpt_dir, output_dir)
     if args.validate_load:
-        validate_load(out_path)
\ No newline at end of file
+        validate_load(out_path)

From 7b0334d26e71ca196afb03379961cc7b6d43635a Mon Sep 17 00:00:00 2001
From: Sumanth R Hegde <39546518+SumanthRH@users.noreply.github.com>
Date: Thu, 6 Nov 2025 22:04:52 -0800
Subject: [PATCH 3/3] Apply suggestions from code review

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 skyrl-train/scripts/convert_deepspeed_to_hf.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/skyrl-train/scripts/convert_deepspeed_to_hf.py b/skyrl-train/scripts/convert_deepspeed_to_hf.py
index 7a2a4cdc0..2fedf4dfc 100644
--- a/skyrl-train/scripts/convert_deepspeed_to_hf.py
+++ b/skyrl-train/scripts/convert_deepspeed_to_hf.py
@@ -57,12 +57,15 @@ def main(deepspeed_model_path: Path, out_dir: Path = None) -> Path:
 
     # === 1. Merge ZeRO shards into single FP32 checkpoint ===
     zero2fp32_script = POLICY_DIR / "zero_to_fp32.py"
+    if not zero2fp32_script.exists():
+        raise FileNotFoundError(f"Conversion script not found at {zero2fp32_script}")
 
     if not MERGED_FP32.exists():
         print(f"[1/5] Merging ZeRO shards from {POLICY_DIR} ...")
-        cmd = f"python {zero2fp32_script} {POLICY_DIR} {MERGED_FP32}"
-        result = subprocess.run(cmd)
+        cmd = ["python", str(zero2fp32_script), str(POLICY_DIR), str(MERGED_FP32)]
+        result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
+            print(f"Error running zero_to_fp32.py:\n{result.stderr}")
             raise RuntimeError("zero_to_fp32.py merge failed.")
     else:
         print(f"[1/5] Merged model already exists → {MERGED_FP32}")
@@ -85,7 +88,7 @@ def main(deepspeed_model_path: Path, out_dir: Path = None) -> Path:
 
     # === 3. Load HF config and initialize model ===
     print("[3/5] Initializing Hugging Face model ...")
-    model = AutoModelForCausalLM.from_pretrained(HF_BASE, torch_dtype=torch.float16)
+    model = AutoModelForCausalLM.from_pretrained(HF_BASE, torch_dtype=torch.bfloat16)
     missing, unexpected = model.load_state_dict(state, strict=False)
     print(f"    → Missing keys: {len(missing)}, Unexpected keys: {len(unexpected)}")
 
@@ -158,7 +161,7 @@ def validate_load(out_dir: Path):
     )
     args = ap.parse_args()
     ckpt_dir = Path(args.ckpt_dir).resolve()
-    output_dir = Path(args.out_dir).resolve()
+    output_dir = Path(args.out_dir).resolve() if args.out_dir is not None else None
     out_path = main(ckpt_dir, output_dir)
     if args.validate_load:
         validate_load(out_path)