layerwise_upcasting -> layerwise_casting

a-r-r-o-w · a-r-r-o-w · commit a9364bd34dd3 · 2025-01-21T15:29:58.000+01:00
diff --git a/docs/source/en/api/utilities.md b/docs/source/en/api/utilities.md
@@ -42,6 +42,6 @@ Utility and helper functions for working with 🤗 Diffusers.
 
 [[autodoc]] utils.torch_utils.randn_tensor
 
-## apply_layerwise_upcasting
+## apply_layerwise_casting
 
-[[autodoc]] hooks.layerwise_upcasting.apply_layerwise_upcasting
+[[autodoc]] hooks.layerwise_casting.apply_layerwise_casting
diff --git a/docs/source/en/optimization/memory.md b/docs/source/en/optimization/memory.md
@@ -171,9 +171,9 @@ from diffusers.utils import export_to_video
 
 model_id = "THUDM/CogVideoX-5b"
 
-# Load the model in bfloat16 and enable layerwise upcasting
+# Load the model in bfloat16 and enable layerwise casting
 transformer = CogVideoXTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
-transformer.enable_layerwise_upcasting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
+transformer.enable_layerwise_casting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
 
 # Load the pipeline
 pipe = CogVideoXPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.bfloat16)
@@ -191,9 +191,9 @@ video = pipe(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
 export_to_video(video, "output.mp4", fps=8)
 ```
 
-In the above example, layerwise upcasting is enabled on the transformer component of the pipeline. By default, certain layers are skipped from the FP8 weight casting because it can lead to significant degradation of generation quality. The normalization and modulation related weight parameters are also skipped by default.
+In the above example, layerwise casting is enabled on the transformer component of the pipeline. By default, certain layers are skipped from the FP8 weight casting because it can lead to significant degradation of generation quality. The normalization and modulation related weight parameters are also skipped by default.
 
-However, you gain more control and flexibility by directly utilizing the [`~hooks.layerwise_upcasting.apply_layerwise_upcasting`] function instead of [`~ModelMixin.enable_layerwise_upcasting`].
+However, you gain more control and flexibility by directly utilizing the [`~hooks.layerwise_casting.apply_layerwise_casting`] function instead of [`~ModelMixin.enable_layerwise_casting`].
 
 ## Channels-last memory format
 
diff --git a/src/diffusers/hooks/__init__.py b/src/diffusers/hooks/__init__.py
@@ -2,4 +2,4 @@
 
 
 if is_torch_available():
-    from .layerwise_upcasting import apply_layerwise_upcasting, apply_layerwise_upcasting_hook
+    from .layerwise_casting import apply_layerwise_casting, apply_layerwise_casting_hook
diff --git a/src/diffusers/hooks/layerwise_casting.py b/src/diffusers/hooks/layerwise_casting.py
@@ -35,7 +35,7 @@
 # fmt: on
 
 
-class LayerwiseUpcastingHook(ModelHook):
+class LayerwiseCastingHook(ModelHook):
     r"""
     A hook that casts the weights of a module to a high precision dtype for computation, and to a low precision dtype
     for storage. This process may lead to quality loss in the output, but can significantly reduce the memory
@@ -55,7 +55,7 @@ def initialize_hook(self, module: torch.nn.Module):
 
     def deinitalize_hook(self, module: torch.nn.Module):
         raise NotImplementedError(
-            "LayerwiseUpcastingHook does not support deinitalization. A model once enabled with layerwise upcasting will "
+            "LayerwiseCastingHook does not support deinitalization. A model once enabled with layerwise casting will "
             "have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype "
             "will lead to precision loss, which might have an impact on the model's generation quality. The model should "
             "be re-initialized and loaded in the original dtype."
@@ -70,7 +70,7 @@ def post_forward(self, module: torch.nn.Module, output):
         return output
 
 
-def apply_layerwise_upcasting(
+def apply_layerwise_casting(
     module: torch.nn.Module,
     storage_dtype: torch.dtype,
     compute_dtype: torch.dtype,
@@ -79,7 +79,7 @@ def apply_layerwise_upcasting(
     non_blocking: bool = False,
 ) -> None:
     r"""
-    Applies layerwise upcasting to a given module. The module expected here is a Diffusers ModelMixin but it can be any
+    Applies layerwise casting to a given module. The module expected here is a Diffusers ModelMixin but it can be any
     nn.Module using diffusers layers or pytorch primitives.
 
     Example:
@@ -92,7 +92,7 @@ def apply_layerwise_upcasting(
     ...     model_id, subfolder="transformer", torch_dtype=torch.bfloat16
     ... )
 
-    >>> apply_layerwise_upcasting(
+    >>> apply_layerwise_casting(
     ...     transformer,
     ...     storage_dtype=torch.float8_e4m3fn,
     ...     compute_dtype=torch.bfloat16,
@@ -110,23 +110,23 @@ def apply_layerwise_upcasting(
         compute_dtype (`torch.dtype`):
             The dtype to cast the module to during the forward pass for computation.
         skip_modules_pattern (`Tuple[str, ...]`, defaults to `"default"`):
-            A list of patterns to match the names of the modules to skip during the layerwise upcasting process. If set
+            A list of patterns to match the names of the modules to skip during the layerwise casting process. If set
             to `"default"`, the default patterns are used. If set to `None`, no modules are skipped. If set to `None`
-            alongside `skip_modules_classes` being `None`, the layerwise upcasting is applied directly to the module
+            alongside `skip_modules_classes` being `None`, the layerwise casting is applied directly to the module
             instead of its internal submodules.
         skip_modules_classes (`Tuple[Type[torch.nn.Module], ...]`, defaults to `None`):
-            A list of module classes to skip during the layerwise upcasting process.
+            A list of module classes to skip during the layerwise casting process.
         non_blocking (`bool`, defaults to `False`):
             If `True`, the weight casting operations are non-blocking.
     """
     if skip_modules_pattern == "default":
         skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN
 
     if skip_modules_classes is None and skip_modules_pattern is None:
-        apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
+        apply_layerwise_casting_hook(module, storage_dtype, compute_dtype, non_blocking)
         return
 
-    _apply_layerwise_upcasting(
+    _apply_layerwise_casting(
         module,
         storage_dtype,
         compute_dtype,
@@ -136,7 +136,7 @@ def apply_layerwise_upcasting(
     )
 
 
-def _apply_layerwise_upcasting(
+def _apply_layerwise_casting(
     module: torch.nn.Module,
     storage_dtype: torch.dtype,
     compute_dtype: torch.dtype,
@@ -149,17 +149,17 @@ def _apply_layerwise_upcasting(
         skip_modules_pattern is not None and any(re.search(pattern, _prefix) for pattern in skip_modules_pattern)
     )
     if should_skip:
-        logger.debug(f'Skipping layerwise upcasting for layer "{_prefix}"')
+        logger.debug(f'Skipping layerwise casting for layer "{_prefix}"')
         return
 
     if isinstance(module, SUPPORTED_PYTORCH_LAYERS):
-        logger.debug(f'Applying layerwise upcasting to layer "{_prefix}"')
-        apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
+        logger.debug(f'Applying layerwise casting to layer "{_prefix}"')
+        apply_layerwise_casting_hook(module, storage_dtype, compute_dtype, non_blocking)
         return
 
     for name, submodule in module.named_children():
         layer_name = f"{_prefix}.{name}" if _prefix else name
-        _apply_layerwise_upcasting(
+        _apply_layerwise_casting(
             submodule,
             storage_dtype,
             compute_dtype,
@@ -170,11 +170,11 @@ def _apply_layerwise_upcasting(
         )
 
 
-def apply_layerwise_upcasting_hook(
+def apply_layerwise_casting_hook(
     module: torch.nn.Module, storage_dtype: torch.dtype, compute_dtype: torch.dtype, non_blocking: bool
 ) -> None:
     r"""
-    Applies a `LayerwiseUpcastingHook` to a given module.
+    Applies a `LayerwiseCastingHook` to a given module.
 
     Args:
         module (`torch.nn.Module`):
@@ -187,5 +187,5 @@ def apply_layerwise_upcasting_hook(
             If `True`, the weight casting operations are non-blocking.
     """
     registry = HookRegistry.check_if_exists_or_initialize(module)
-    hook = LayerwiseUpcastingHook(storage_dtype, compute_dtype, non_blocking)
-    registry.register_hook(hook, "layerwise_upcasting")
+    hook = LayerwiseCastingHook(storage_dtype, compute_dtype, non_blocking)
+    registry.register_hook(hook, "layerwise_casting")
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -32,7 +32,7 @@
 from torch import Tensor, nn
 
 from .. import __version__
-from ..hooks import apply_layerwise_upcasting
+from ..hooks import apply_layerwise_casting
 from ..quantizers import DiffusersAutoQuantizer, DiffusersQuantizer
 from ..quantizers.quantization_config import QuantizationMethod
 from ..utils import (
@@ -104,13 +104,13 @@ def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype:
     """
     Returns the first found floating dtype in parameters if there is one, otherwise returns the last dtype it found.
     """
-    # 1. Check if we have attached any dtype modifying hooks (eg. layerwise upcasting)
+    # 1. Check if we have attached any dtype modifying hooks (eg. layerwise casting)
     if isinstance(parameter, nn.Module):
         for name, submodule in parameter.named_modules():
             if not hasattr(submodule, "_diffusers_hook"):
                 continue
             registry = submodule._diffusers_hook
-            hook = registry.get_hook("layerwise_upcasting")
+            hook = registry.get_hook("layerwise_casting")
             if hook is not None:
                 return hook.compute_dtype
 
@@ -328,7 +328,7 @@ def disable_xformers_memory_efficient_attention(self) -> None:
         """
         self.set_use_memory_efficient_attention_xformers(False)
 
-    def enable_layerwise_upcasting(
+    def enable_layerwise_casting(
         self,
         storage_dtype: torch.dtype = torch.float8_e4m3fn,
         compute_dtype: Optional[torch.dtype] = None,
@@ -337,9 +337,9 @@ def enable_layerwise_upcasting(
         non_blocking: bool = False,
     ) -> None:
         r"""
-        Activates layerwise upcasting for the current model.
+        Activates layerwise casting for the current model.
 
-        Layerwise upcasting is a technique that casts the model weights to a lower precision dtype for storage but
+        Layerwise casting is a technique that casts the model weights to a lower precision dtype for storage but
         upcasts them on-the-fly to a higher precision dtype for computation. This process can significantly reduce the
         memory footprint from model weights, but may lead to some quality degradation in the outputs. Most degradations
         are negligible, mostly stemming from weight casting in normalization and modulation layers.
@@ -348,10 +348,10 @@ def enable_layerwise_upcasting(
         embedding, positional embedding and normalization layers. This is because these layers are most likely
         precision-critical for quality. If you wish to change this behavior, you can set the
         `_skip_layerwise_casting_patterns` attribute to `None`, or call
-        [`~hooks.layerwise_upcasting.apply_layerwise_upcasting`] with custom arguments.
+        [`~hooks.layerwise_casting.apply_layerwise_casting`] with custom arguments.
 
         Example:
-            Using [`~models.ModelMixin.enable_layerwise_upcasting`]:
+            Using [`~models.ModelMixin.enable_layerwise_casting`]:
 
             ```python
             >>> from diffusers import CogVideoXTransformer3DModel
@@ -360,8 +360,8 @@ def enable_layerwise_upcasting(
             ...     "THUDM/CogVideoX-5b", subfolder="transformer", torch_dtype=torch.bfloat16
             ... )
 
-            >>> # Enable layerwise upcasting via the model, which ignores certain modules by default
-            >>> transformer.enable_layerwise_upcasting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
+            >>> # Enable layerwise casting via the model, which ignores certain modules by default
+            >>> transformer.enable_layerwise_casting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
             ```
 
         Args:
@@ -370,18 +370,18 @@ def enable_layerwise_upcasting(
             compute_dtype (`torch.dtype`):
                 The dtype to which the model weights should be cast during the forward pass.
             skip_modules_pattern (`Tuple[str, ...]`, *optional*):
-                A list of patterns to match the names of the modules to skip during the layerwise upcasting process. If
+                A list of patterns to match the names of the modules to skip during the layerwise casting process. If
                 set to `None`, default skip patterns are used to ignore certain internal layers of modules and PEFT
                 layers.
             skip_modules_classes (`Tuple[Type[torch.nn.Module], ...]`, *optional*):
-                A list of module classes to skip during the layerwise upcasting process.
+                A list of module classes to skip during the layerwise casting process.
             non_blocking (`bool`, *optional*, defaults to `False`):
                 If `True`, the weight casting operations are non-blocking.
         """
 
         user_provided_patterns = True
         if skip_modules_pattern is None:
-            from ..hooks.layerwise_upcasting import DEFAULT_SKIP_MODULES_PATTERN
+            from ..hooks.layerwise_casting import DEFAULT_SKIP_MODULES_PATTERN
 
             skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN
             user_provided_patterns = False
@@ -393,8 +393,8 @@ def enable_layerwise_upcasting(
 
         if is_peft_available() and not user_provided_patterns:
             # By default, we want to skip all peft layers because they have a very low memory footprint.
-            # If users want to apply layerwise upcasting on peft layers as well, they can utilize the
-            # `~diffusers.hooks.layerwise_upcasting.apply_layerwise_upcasting` function which provides
+            # If users want to apply layerwise casting on peft layers as well, they can utilize the
+            # `~diffusers.hooks.layerwise_casting.apply_layerwise_casting` function which provides
             # them with more flexibility and control.
 
             from peft.tuners.loha.layer import LoHaLayer
@@ -405,10 +405,10 @@ def enable_layerwise_upcasting(
                 skip_modules_pattern += tuple(layer.adapter_layer_names)
 
         if compute_dtype is None:
-            logger.info("`compute_dtype` not provided when enabling layerwise upcasting. Using dtype of the model.")
+            logger.info("`compute_dtype` not provided when enabling layerwise casting. Using dtype of the model.")
             compute_dtype = self.dtype
 
-        apply_layerwise_upcasting(
+        apply_layerwise_casting(
             self, storage_dtype, compute_dtype, skip_modules_pattern, skip_modules_classes, non_blocking
         )
 
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
@@ -2100,8 +2100,8 @@ def test_correct_lora_configs_with_different_ranks(self):
         self.assertTrue(not np.allclose(original_output, lora_output_diff_alpha, atol=1e-3, rtol=1e-3))
         self.assertTrue(not np.allclose(lora_output_diff_alpha, lora_output_same_rank, atol=1e-3, rtol=1e-3))
 
-    def test_layerwise_upcasting_inference_denoiser(self):
-        from diffusers.hooks.layerwise_upcasting import DEFAULT_SKIP_MODULES_PATTERN, SUPPORTED_PYTORCH_LAYERS
+    def test_layerwise_casting_inference_denoiser(self):
+        from diffusers.hooks.layerwise_casting import DEFAULT_SKIP_MODULES_PATTERN, SUPPORTED_PYTORCH_LAYERS
 
         def check_linear_dtype(module, storage_dtype, compute_dtype):
             patterns_to_check = DEFAULT_SKIP_MODULES_PATTERN
@@ -2142,7 +2142,7 @@ def initialize_pipeline(storage_dtype=None, compute_dtype=torch.float32):
                     )
 
             if storage_dtype is not None:
-                denoiser.enable_layerwise_upcasting(storage_dtype=storage_dtype, compute_dtype=compute_dtype)
+                denoiser.enable_layerwise_casting(storage_dtype=storage_dtype, compute_dtype=compute_dtype)
                 check_linear_dtype(denoiser, storage_dtype, compute_dtype)
 
             return pipe
diff --git a/tests/models/autoencoders/test_models_autoencoder_oobleck.py b/tests/models/autoencoders/test_models_autoencoder_oobleck.py
@@ -120,7 +120,7 @@ def test_set_attn_processor_for_determinism(self):
         "1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n"
         "2. Unskip this test."
     )
-    def test_layerwise_upcasting_inference(self):
+    def test_layerwise_casting_inference(self):
         pass
 
     @unittest.skip(
@@ -129,7 +129,7 @@ def test_layerwise_upcasting_inference(self):
         "1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n"
         "2. Unskip this test."
     )
-    def test_layerwise_upcasting_memory(self):
+    def test_layerwise_casting_memory(self):
         pass
 
 
diff --git a/tests/models/autoencoders/test_models_autoencoder_tiny.py b/tests/models/autoencoders/test_models_autoencoder_tiny.py
@@ -178,15 +178,15 @@ def test_effective_gradient_checkpointing(self):
         "1. Change the forward pass to be dtype agnostic.\n"
         "2. Unskip this test."
     )
-    def test_layerwise_upcasting_inference(self):
+    def test_layerwise_casting_inference(self):
         pass
 
     @unittest.skip(
         "The forward pass of AutoencoderTiny creates a torch.float32 tensor. This causes inference in compute_dtype=torch.bfloat16 to fail. To fix:\n"
         "1. Change the forward pass to be dtype agnostic.\n"
         "2. Unskip this test."
     )
-    def test_layerwise_upcasting_memory(self):
+    def test_layerwise_casting_memory(self):
         pass
 
 
diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
diff --git a/tests/models/unets/test_models_unet_2d.py b/tests/models/unets/test_models_unet_2d.py

Original file line number	Diff line number	Diff line change
`@@ -2,4 +2,4 @@`
`2`	`2`
`3`	`3`
`4`	`4`	`if is_torch_available():`
`5`		`- from .layerwise_upcasting import apply_layerwise_upcasting, apply_layerwise_upcasting_hook`
	`5`	`+ from .layerwise_casting import apply_layerwise_casting, apply_layerwise_casting_hook`
Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,7 @@ def test_set_attn_processor_for_determinism(self):`
`120`	`120`	"1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n"
`121`	`121`	`"2. Unskip this test."`
`122`	`122`	`)`
`123`		`- def test_layerwise_upcasting_inference(self):`
	`123`	`+ def test_layerwise_casting_inference(self):`
`124`	`124`	`pass`
`125`	`125`
`126`	`126`	`@unittest.skip(`
`@@ -129,7 +129,7 @@ def test_layerwise_upcasting_inference(self):`
`129`	`129`	"1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n"
`130`	`130`	`"2. Unskip this test."`
`131`	`131`	`)`
`132`		`- def test_layerwise_upcasting_memory(self):`
	`132`	`+ def test_layerwise_casting_memory(self):`
`133`	`133`	`pass`
`134`	`134`
`135`	`135`