Skip to content

Commit 0ab2ade

Browse files
committed
Revert "[Dev] Optimizer State and Master Weight Offloading (NVIDIA#2760)"
This reverts commit 6b157e0.
1 parent 6b157e0 commit 0ab2ade

File tree

6 files changed

+1
-725
lines changed

6 files changed

+1
-725
lines changed

megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py

Lines changed: 0 additions & 315 deletions
This file was deleted.

megatron/core/optimizer/distrib_optimizer.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
5050
from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
5151
from ..transformer.module import MegatronModule
52-
from .cpu_offloading.optimizer_state_offloader import OptimizerStateOffloader
5352
from .grad_scaler import MegatronGradScaler
5453
from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
5554
from .optimizer_config import OptimizerConfig
@@ -605,10 +604,6 @@ def __init__(
605604
self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
606605
self.optimizer.load_state_dict(self.optimizer.state_dict())
607606

608-
self._state_offloader: Optional[OptimizerStateOffloader] = None
609-
if self.config.offload_optimizer_states:
610-
self._state_offloader = OptimizerStateOffloader(self)
611-
612607
def _get_model_param_range_map(self, param: torch.nn.Parameter):
613608
"""
614609
Given a model param, get the index sub-range of the param that this
@@ -2585,8 +2580,6 @@ def step_with_ready_grads(self) -> bool:
25852580
Under the hood, either launch synchronous param all-gathers or get ready to launch
25862581
asynchorous all-gathers that get overlapped with the next forward pass.
25872582
"""
2588-
if self._state_offloader is not None:
2589-
self._state_offloader.sync_before_step()
25902583
update_successful = super().step_with_ready_grads()
25912584

25922585
timers = self.config.timers
@@ -2607,22 +2600,4 @@ def step_with_ready_grads(self) -> bool:
26072600
if timers is not None:
26082601
timers('params-all-gather').stop()
26092602

2610-
if self._state_offloader is not None:
2611-
self._state_offloader.mark_optimizer_states_initialized()
2612-
26132603
return update_successful
2614-
2615-
def offload_states(self):
2616-
"""Offload states to CPU."""
2617-
if self._state_offloader is not None:
2618-
self._state_offloader.offload()
2619-
2620-
def reload_offloaded_states(self):
2621-
"""Start async reload of offloaded states."""
2622-
if self._state_offloader is not None:
2623-
self._state_offloader.reload()
2624-
2625-
def release_offloaded_gpu_states(self):
2626-
"""Release GPU memory after D2H completes. For delayed release case."""
2627-
if self._state_offloader is not None:
2628-
self._state_offloader.release_gpu_memory()

megatron/core/optimizer/optimizer_config.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,6 @@ class OptimizerConfig:
266266
pin_cpu_params: bool = True
267267
"""If True, pin the optimizer parameters to CPU memory."""
268268

269-
offload_optimizer_states: bool = False
270-
"""
271-
If True, offload optimizer states to CPU after each optimizer step and
272-
reload them before the next optimizer step.
273-
"""
274-
275269
################
276270
# Miscellaneous
277271
################

0 commit comments

Comments
 (0)