[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 1f7eaba7ce55 · 2024-11-24T01:22:12.000Z
for more information, see https://pre-commit.ci
diff --git a/src/lightning/pytorch/utilities/__init__.py b/src/lightning/pytorch/utilities/__init__.py
@@ -25,6 +25,8 @@
 )
 from lightning.pytorch.utilities.combined_loader import CombinedLoader
 from lightning.pytorch.utilities.enums import GradClipAlgorithmType
+from lightning.pytorch.utilities.fp8_training_handler import Float8TrainingHandler, FP8Config
+from lightning.pytorch.utilities.fsdp2_handler import FSDP2Config, FSDP2Handler
 from lightning.pytorch.utilities.grads import grad_norm
 from lightning.pytorch.utilities.parameter_tying import find_shared_parameters, set_shared_parameters
 from lightning.pytorch.utilities.parsing import AttributeDict, is_picklable
@@ -34,8 +36,6 @@
     rank_zero_only,
     rank_zero_warn,
 )
-from lightning.pytorch.utilities.fp8_training_handler import FP8Config, Float8TrainingHandler
-from lightning.pytorch.utilities.fsdp2_handler import FSDP2Config, FSDP2Handler
 from lightning.pytorch.utilities.torch_compile_handler import TorchCompileHandler
 
 __all__ = [
diff --git a/src/lightning/pytorch/utilities/fp8_training_handler.py b/src/lightning/pytorch/utilities/fp8_training_handler.py
@@ -1,8 +1,8 @@
 # the script is modified based on https://github.com/pytorch/torchtitan/blob/main/torchtitan/float8.py
 import logging
-from typing import Dict, List, Union
-from dataclasses import dataclass
 import operator
+from dataclasses import dataclass
+from typing import Dict, List, Union
 
 import torch
 import torch.nn as nn
@@ -44,13 +44,10 @@ class FP8Config:
 
 
 class Float8TrainingHandler:
-    """
-    Handler for configuring models for FP8 training using torchao.
-    """
+    """Handler for configuring models for FP8 training using torchao."""
 
     def __init__(self, args: FP8Config, model_path: str, parallel_dims: Dict[str, bool]):
-        """
-        Initializes the handler for FP8 training and configuration.
+        """Initializes the handler for FP8 training and configuration.
 
         Args:
             args (FP8Config): Configuration object for FP8 training, including settings for scaling, amax initialization, and torch compile.
@@ -74,6 +71,7 @@ def __init__(self, args: FP8Config, model_path: str, parallel_dims: Dict[str, bo
 
             parallel_dims = {"dp_shard_enabled": False}
             handler = Float8TrainingHandler(fp8_config, "path/to/model", parallel_dims)
+
         """
         self.model_path = model_path
         self.args = args
@@ -132,14 +130,14 @@ def __init__(self, args: FP8Config, model_path: str, parallel_dims: Dict[str, bo
         log.info("Float8 training active")
 
     def convert_to_float8_training(self, model: nn.Module, module_filter_fn: callable = None):
-        """
-        Converts the linear layers of `model` to `Float8Linear` based on a module filter function.
-        Mutates the model in place.
+        """Converts the linear layers of `model` to `Float8Linear` based on a module filter function. Mutates the model
+        in place.
 
         Args:
             model (nn.Module): The model whose layers should be converted.
             module_filter_fn (callable, optional): A function to filter which modules should be replaced.
                 Defaults to a model-specific filter based on `model_path`.
+
         """
         if not self.enable_fp8:
             log.warning("FP8 is disabled, so layers will not be replaced.")
diff --git a/src/lightning/pytorch/utilities/fsdp2_handler.py b/src/lightning/pytorch/utilities/fsdp2_handler.py
@@ -1,10 +1,10 @@
 import logging
+import operator
+from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
 import torch
 import torch.nn as nn
-import operator
-from dataclasses import dataclass
 from lightning_utilities.core.imports import compare_version
 
 if TYPE_CHECKING:
@@ -20,8 +20,7 @@ class FSDP2Config:
 
 
 class FSDP2Handler:
-    """
-    Handler for wrapping the model layers with FSDP2.
+    """Handler for wrapping the model layers with FSDP2.
 
     Args:
         args (FSDP2Config): Configuration for FSDP2, including options for CPU offload and gradient checkpointing.
@@ -30,6 +29,7 @@ class FSDP2Handler:
     Attributes:
         args (FSDP2Config): Stores the FSDP2 configuration.
         device_mesh (DeviceMesh): Stores the device mesh configuration.
+
     """
 
     def __init__(self, args: FSDP2Config, device_mesh: "DeviceMesh"):
@@ -63,14 +63,14 @@ def __init__(self, args: FSDP2Config, device_mesh: "DeviceMesh"):
             raise
 
     def wrap_model(self, model: nn.Module):
-        """
-        Wraps the model layers with FSDP configurations.
+        """Wraps the model layers with FSDP configurations.
 
         Args:
             model (nn.Module): The model to wrap.
 
         Returns:
             nn.Module: The wrapped model.
+
         """
         dp_mesh = self.device_mesh["data_parallel"]
         assert dp_mesh.size() > 1, "FSDP requires at least two devices."
diff --git a/src/lightning/pytorch/utilities/torch_compile_handler.py b/src/lightning/pytorch/utilities/torch_compile_handler.py
@@ -1,22 +1,22 @@
 import logging
+import operator
+
 import torch
 import torch.nn as nn
-import operator
 from lightning_utilities.core.imports import compare_version
 
-
 log = logging.getLogger(__name__)
 
 
 class TorchCompileHandler:
-    """
-    Handler for compiling specific layers of the model using torch.compile.
+    """Handler for compiling specific layers of the model using torch.compile.
 
     Args:
         enable_compile (bool): Whether to enable compilation.
         model_path (str): Path to the model, used to determine default compilable layers.
         compile_layers (List[str], optional): List of layer class names to compile. If None, defaults are used.
         compile_args (dict, optional): Additional arguments to pass to torch.compile.
+
     """
 
     # Default mapping of model names to compilable layer class names
@@ -54,23 +54,23 @@ def __init__(
                     )
 
     def _get_default_compile_layers(self):
-        """
-        Determines the default layers to compile based on the model name.
+        """Determines the default layers to compile based on the model name.
 
         Returns:
             List[str]: List of layer class names to compile.
+
         """
         for model_name, layers in self.DEFAULT_COMPILABLE_LAYERS.items():
             if model_name in self.model_path:
                 return layers
         return []
 
     def compile_model(self, model: nn.Module):
-        """
-        Compiles specified layers in the model.
+        """Compiles specified layers in the model.
 
         Args:
             model (nn.Module): The model to compile.
+
         """
         if not self.enable_compile:
             return
@@ -84,11 +84,11 @@ def compile_model(self, model: nn.Module):
         self._compile_layers(model)
 
     def _compile_layers(self, module: nn.Module):
-        """
-        Recursively compiles specified layers in the module.
+        """Recursively compiles specified layers in the module.
 
         Args:
             module (nn.Module): The module to process.
+
         """
         for name, child in module.named_children():
             child_class_name = type(child).__name__
diff --git a/tests/tests_pytorch/utilities/test_fp8_training_handler.py b/tests/tests_pytorch/utilities/test_fp8_training_handler.py
@@ -2,14 +2,12 @@
 from unittest.mock import patch
 
 import torch.nn as nn
-from torchao.float8 import Float8Linear
 from lightning.pytorch.demos import Transformer
-
-from lightning.pytorch.utilities.fp8_training_handler import FP8Config, Float8TrainingHandler
+from lightning.pytorch.utilities.fp8_training_handler import Float8TrainingHandler, FP8Config
+from torchao.float8 import Float8Linear
 
 
 class TestFloat8TrainingHandler(unittest.TestCase):
-
     def setUp(self):
         self.args = FP8Config(
             enable_fp8=True,
diff --git a/tests/tests_pytorch/utilities/test_fsdp2_handler.py b/tests/tests_pytorch/utilities/test_fsdp2_handler.py
@@ -2,29 +2,29 @@
 from unittest.mock import MagicMock, patch
 
 import torch.nn as nn
-from lightning.pytorch.demos import Transformer
 from lightning.pytorch.utilities.fsdp2_handler import FSDP2Config, FSDP2Handler
 
 
 # Define mock functions
 def mock_fully_shard(module, **kwargs):
-    """
-    Mock for torch.distributed._composable.fsdp.fully_shard.
+    """Mock for torch.distributed._composable.fsdp.fully_shard.
+
     Returns the module unchanged to simulate sharding without actual processing.
+
     """
     return module
 
 
 def mock_checkpoint_wrapper(module):
-    """
-    Mock for torch.distributed.algorithms._checkpoint.checkpoint_wrapper.
+    """Mock for torch.distributed.algorithms._checkpoint.checkpoint_wrapper.
+
     Returns the module unchanged to simulate checkpoint wrapping without actual processing.
+
     """
     return module
 
 
 class TestFSDP2Handler(unittest.TestCase):
-
     def setUp(self):
         self.args = FSDP2Config(
             enable_gradient_checkpointing=True,
diff --git a/tests/tests_pytorch/utilities/test_torch_compile_handler.py b/tests/tests_pytorch/utilities/test_torch_compile_handler.py
@@ -5,20 +5,19 @@
 
 import torch.nn as nn
 from lightning.pytorch.demos import Transformer
-
 from lightning.pytorch.utilities.torch_compile_handler import TorchCompileHandler
 
 
 def mock_torch_compile(module, **kwargs):
-    """
-    Mock function for torch.compile that returns the module unchanged.
+    """Mock function for torch.compile that returns the module unchanged.
+
     This avoids actual compilation during testing.
+
     """
     return module
 
 
 class TestTorchCompileHandler(unittest.TestCase):
-
     def setUp(self):
         self.enable_compile = True
         self.model_path = "test_custom_transformer_model"