pytorch
diff --git a/‎tests/unit_tests/test_checkpoint.py‎
Lines changed: 84 additions & 2 deletions b/‎tests/unit_tests/test_checkpoint.py‎
Lines changed: 84 additions & 2 deletions
diff --git a/‎tests/unit_tests/test_model_converter.py‎
Lines changed: 29 additions & 0 deletions b/‎tests/unit_tests/test_model_converter.py‎
Lines changed: 29 additions & 0 deletions
@@ -14,6 +14,7 @@
 
 import torch
 import torch.nn as nn
+from torch.distributed.checkpoint.default_planner import DefaultLoadPlanner
 from torch.distributed.checkpoint.state_dict_saver import AsyncSaveResponse
 from torch.utils.data import DataLoader
 from torchtitan.components.checkpoint import CheckpointManager
@@ -165,7 +166,7 @@ def fake_save(self, state_dict: dict, checkpoint_id: str, storage_writer=None):
                 sd_to_save[key] = val
         torch.save(sd_to_save, os.path.join(checkpoint_id, "state_dict.pt"))
 
-    def fake_load(self, states: dict, checkpoint_id=None):
+    def fake_load(self, states: dict, checkpoint_id=None, **kwargs):
         path = os.path.join(checkpoint_id, "state_dict.pt")
         loaded = torch.load(path, weights_only="False")
         for key, val in loaded.items():
@@ -748,7 +749,7 @@ def fake_save(state_dict: dict, checkpoint_id: str, storage_writer=None):
                 self.assertNotIn("optimizer", state_dict)
             return
 
-        def fake_load(state_dict: dict, checkpoint_id=None):
+        def fake_load(state_dict: dict, checkpoint_id=None, **kwargs):
             self.assertIn("bias", state_dict)
             self.assertIn("weight", state_dict)
             # No model prefix
@@ -776,5 +777,86 @@ def fake_load(state_dict: dict, checkpoint_id=None):
         manager.load(step=1)
 
 
+class TestModelWrapperConverterKeys(unittest.TestCase):
+    """Tests for ModelWrapper.has_converter_keys() and its effect on load planner."""
+
+    def _create_manager(self, mock_save, mock_load, model, temp_dir):
+        """Create a CheckpointManager with mocked dcp.save/load."""
+        mock_save.side_effect = lambda *a, **kw: os.makedirs(
+            kw.get("checkpoint_id", a[1] if len(a) > 1 else ""), exist_ok=True
+        )
+        mock_load.side_effect = lambda *a, **kw: None
+
+        cfg = CheckpointManager.Config(
+            enable=True,
+            async_mode="disabled",
+            folder="",
+            interval=1,
+            keep_latest_k=0,
+            last_save_model_only=False,
+            export_dtype="float32",
+            exclude_from_loading=[],
+            initial_load_path=None,
+            initial_load_model_only=False,
+        )
+        with mock.patch("torch.distributed.new_group", return_value="pg"):
+            return CheckpointManager(
+                dataloader=FakeDataLoader(),
+                model_parts=[model],
+                optimizers=FakeOptimizersContainer(),
+                lr_schedulers=FakeLRSchedulersContainer(),
+                states={},
+                config=cfg,
+                sd_adapter=None,
+                base_folder=temp_dir,
+                ft_manager=DummyFTManager(),
+            )
+
+    @mock.patch("torch.distributed.get_rank", return_value=0)
+    @mock.patch("torchtitan.components.checkpoint.dcp.load")
+    @mock.patch("torchtitan.components.checkpoint.dcp.save")
+    def test_load_uses_strict_planner_without_converter(
+        self, mock_save, mock_load, mock_rank
+    ):
+        """Without converter keys, dcp.load is called with allow_partial_load=False."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            model = nn.Linear(2, 2)
+            manager = self._create_manager(mock_save, mock_load, model, temp_dir)
+            manager.save(curr_step=1)
+            manager.load(step=1)
+
+            _, kwargs = mock_load.call_args
+            planner = kwargs.get("planner")
+            self.assertIsInstance(planner, DefaultLoadPlanner)
+            self.assertFalse(planner.allow_partial_load)
+        finally:
+            shutil.rmtree(temp_dir)
+
+    @mock.patch("torch.distributed.get_rank", return_value=0)
+    @mock.patch("torchtitan.components.checkpoint.dcp.load")
+    @mock.patch("torchtitan.components.checkpoint.dcp.save")
+    def test_load_uses_partial_planner_with_converter(
+        self, mock_save, mock_load, mock_rank
+    ):
+        """With converter keys on the model, dcp.load is called with allow_partial_load=True."""
+        temp_dir = tempfile.mkdtemp()
+        try:
+            model = nn.Linear(2, 2)
+            object.__setattr__(
+                model, "converter_key_filter", lambda key: ".lora_a." in key
+            )
+            manager = self._create_manager(mock_save, mock_load, model, temp_dir)
+            manager.save(curr_step=1)
+            manager.load(step=1)
+
+            _, kwargs = mock_load.call_args
+            planner = kwargs.get("planner")
+            self.assertIsInstance(planner, DefaultLoadPlanner)
+            self.assertTrue(planner.allow_partial_load)
+        finally:
+            shutil.rmtree(temp_dir)
+
+
 if __name__ == "__main__":
     unittest.main()
@@ -173,3 +173,32 @@ def test_lora_trains_base_frozen():
         if name in lora_before
     )
     assert any_lora_changed, "No LoRA param changed after 5 training steps"
+
+
+def test_lora_key_remap_roundtrip():
+    """Remap torchtitan LoRA keys to HF and back, verify roundtrip."""
+    from torchtitan.components.lora import (
+        remap_lora_keys_from_hf,
+        remap_lora_keys_to_hf,
+    )
+
+    from_hf_map = {
+        "model.layers.{}.self_attn.q_proj.weight": "layers.{}.attention.wq.weight",
+        "model.layers.{}.mlp.gate_proj.weight": "layers.{}.feed_forward.w1.weight",
+    }
+
+    tt_sd = {
+        "layers.0.attention.wq.lora_a.weight": torch.randn(8, 64),
+        "layers.0.attention.wq.lora_b.weight": torch.randn(64, 8),
+        "layers.2.feed_forward.w1.lora_a.weight": torch.randn(8, 64),
+    }
+
+    hf_sd = remap_lora_keys_to_hf(tt_sd, from_hf_map)
+    assert "base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight" in hf_sd
+    assert "base_model.model.model.layers.0.self_attn.q_proj.lora_B.weight" in hf_sd
+    assert "base_model.model.model.layers.2.mlp.gate_proj.lora_A.weight" in hf_sd
+
+    rt_sd = remap_lora_keys_from_hf(hf_sd, from_hf_map)
+    assert set(rt_sd.keys()) == set(tt_sd.keys())
+    for k in tt_sd:
+        assert torch.equal(rt_sd[k], tt_sd[k])