replace _init_optim_state w/ tnt's util

JKSenthil · facebook-github-bot · commit c5b9adb7eb2a · 2024-04-25T14:24:39.000-07:00
Summary:
The `_init_optim_state()` util from DCP is identical to the implementation in TorchTNT. However, since the DCP is only in pytorch &gt;=2.3 / nightlies, it won't be compatible with pytorch stable users

This diff swaps the implementations.

Reviewed By: galrotem

Differential Revision: D56446429

fbshipit-source-id: 7ba85410f80994fff73e7dc053968a8b59f44990
diff --git a/torchtnt/framework/callbacks/dcp_saver.py b/torchtnt/framework/callbacks/dcp_saver.py
@@ -16,8 +16,6 @@
 from torch.distributed import checkpoint as dcp
 
 from torch.distributed.checkpoint._fsspec_filesystem import FsspecReader, FsspecWriter
-from torch.distributed.checkpoint.state_dict import _init_optim_state
-from torch.distributed.checkpoint.stateful import Stateful
 from torchtnt.framework.callbacks._checkpoint_utils import (
     _prepare_app_state_for_checkpoint,
     _prepare_app_state_for_restore,
@@ -39,8 +37,9 @@
     TTrainUnit,
 )
 from torchtnt.framework.utils import get_timing_context
+from torchtnt.utils.optimizer import init_optim_state
 from torchtnt.utils.rank_zero_log import rank_zero_info, rank_zero_warn
-from torchtnt.utils.stateful import MultiStateful
+from torchtnt.utils.stateful import MultiStateful, Stateful
 
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -249,7 +248,7 @@ def restore(
             # `torchtnt.utils.prepare_module.FSDPOptimizerWrapper`, this handles that case.
             optimizer = getattr(obj, "optimizer", obj)
             if isinstance(optimizer, torch.optim.Optimizer):
-                _init_optim_state(optimizer)
+                init_optim_state(optimizer)
 
         dcp.load(
             {"app_state": MultiStateful(app_state)},