Lightning-AI
diff --git a/‎src/lightning/pytorch/plugins/precision/deepspeed.py‎
Lines changed: 22 additions & 60 deletions b/‎src/lightning/pytorch/plugins/precision/deepspeed.py‎
Lines changed: 22 additions & 60 deletions
@@ -11,30 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from contextlib import AbstractContextManager, nullcontext
-from typing import TYPE_CHECKING, Any, Callable, Optional, Union
+from contextlib import AbstractContextManager
+from typing import Any, Callable, Optional, Union
 
-import torch
-from lightning_utilities import apply_to_collection
 from torch import Tensor
 from torch.nn import Module
-from torch.optim import LBFGS, Optimizer
-from typing_extensions import get_args, override
+from torch.optim import Optimizer
+from typing_extensions import override
 
 import lightning.pytorch as pl
 from lightning.fabric.plugins.precision.deepspeed import _PRECISION_INPUT
-from lightning.fabric.plugins.precision.utils import _convert_fp_tensor, _DtypeContextManager
+from lightning.fabric.utilities.imports import _raise_enterprise_not_available
 from lightning.fabric.utilities.types import Steppable
 from lightning.pytorch.plugins.precision.precision import Precision
 from lightning.pytorch.utilities import GradClipAlgorithmType
-from lightning.pytorch.utilities.exceptions import MisconfigurationException
-from lightning.pytorch.utilities.model_helpers import is_overridden
-from lightning.pytorch.utilities.rank_zero import WarningCache
-
-if TYPE_CHECKING:
-    import deepspeed
-
-warning_cache = WarningCache()
 
 
 class DeepSpeedPrecision(Precision):
@@ -53,41 +43,29 @@ class DeepSpeedPrecision(Precision):
     """
 
     def __init__(self, precision: _PRECISION_INPUT) -> None:
-        supported_precision = get_args(_PRECISION_INPUT)
-        if precision not in supported_precision:
-            raise ValueError(
-                f"`Trainer(strategy='deepspeed', precision={precision!r})` is not supported."
-                f" `precision` must be one of: {supported_precision}."
-            )
-        self.precision = precision
-        precision_to_type = {
-            "bf16-mixed": torch.bfloat16,
-            "16-mixed": torch.float16,
-            "bf16-true": torch.bfloat16,
-            "16-true": torch.float16,
-            "32-true": torch.float32,
-        }
-        self._desired_dtype = precision_to_type[self.precision]
+        super().__init__(precision)
+        _raise_enterprise_not_available()
+        from pytorch_lightning_enterprise.plugins.precision.deepspeed import (
+            DeepSpeedPrecisionTrainer as EnterpriseDeepSpeedPrecision,
+        )
+
+        self.deepspeed_precision_impl = EnterpriseDeepSpeedPrecision(outer_object=self, precision=precision)
 
     @override
     def convert_module(self, module: Module) -> Module:
-        if "true" in self.precision:
-            return module.to(dtype=self._desired_dtype)
-        return module
+        return self.deepspeed_precision_impl.convert_module(module=module)
 
     @override
     def convert_input(self, data: Any) -> Any:
-        return apply_to_collection(data, function=_convert_fp_tensor, dtype=Tensor, dst_type=self._desired_dtype)
+        return self.deepspeed_precision_impl.convert_input(data=data)
 
     @override
     def tensor_init_context(self) -> AbstractContextManager:
-        if "true" not in self.precision:
-            return nullcontext()
-        return _DtypeContextManager(self._desired_dtype)
+        return self.deepspeed_precision_impl.tensor_init_context()
 
     @override
     def module_init_context(self) -> AbstractContextManager:
-        return self.tensor_init_context()
+        return self.deepspeed_precision_impl.module_init_context()
 
     @override
     def backward(  # type: ignore[override]
@@ -98,7 +76,7 @@ def backward(  # type: ignore[override]
         *args: Any,
         **kwargs: Any,
     ) -> None:
-        r"""Performs back-propagation using DeepSpeed's engine.
+        r"""Performs back-propagation.
 
         Args:
             tensor: the loss tensor
@@ -108,13 +86,7 @@ def backward(  # type: ignore[override]
             \**kwargs: additional keyword arguments for the :meth:`deepspeed.DeepSpeedEngine.backward` call
 
         """
-        if is_overridden("backward", model):
-            warning_cache.warn(
-                "You have overridden the `LightningModule.backward` hook but it will be ignored since DeepSpeed handles"
-                " the backward logic internally."
-            )
-        deepspeed_engine: deepspeed.DeepSpeedEngine = model.trainer.model
-        deepspeed_engine.backward(tensor, *args, **kwargs)
+        return self.deepspeed_precision_impl.backward(tensor=tensor, model=model, optimizer=optimizer, *args, **kwargs)
 
     @override
     def optimizer_step(  # type: ignore[override]
@@ -124,19 +96,7 @@ def optimizer_step(  # type: ignore[override]
         closure: Callable[[], Any],
         **kwargs: Any,
     ) -> Any:
-        if isinstance(optimizer, LBFGS):
-            raise MisconfigurationException("DeepSpeed and the LBFGS optimizer are not compatible.")
-        closure_result = closure()
-        self._after_closure(model, optimizer)
-        skipped_backward = closure_result is None
-        # in manual optimization, the closure does not return a value
-        if model.automatic_optimization and skipped_backward:
-            raise MisconfigurationException(
-                "Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`"
-            )
-        # DeepSpeed handles the optimizer step internally
-        deepspeed_engine: deepspeed.DeepSpeedEngine = model.trainer.model
-        return deepspeed_engine.step(**kwargs)
+        return self.deepspeed_precision_impl.optimizer_step(optimizer=optimizer, model=model, closure=closure, **kwargs)
 
     @override
     def clip_gradients(
@@ -145,4 +105,6 @@ def clip_gradients(
         clip_val: Union[int, float] = 0.0,
         gradient_clip_algorithm: GradClipAlgorithmType = GradClipAlgorithmType.NORM,
     ) -> None:
-        """DeepSpeed handles gradient clipping internally."""
+        return self.deepspeed_precision_impl.clip_gradients(
+            optimizer=optimizer, clip_val=clip_val, gradient_clip_algorithm=gradient_clip_algorithm
+        )