update

a-r-r-o-w · a-r-r-o-w · commit 7c31bb03f34b · 2025-01-02T21:00:31.000+01:00
diff --git a/src/diffusers/models/hooks.py b/src/diffusers/models/hooks.py
@@ -272,19 +272,8 @@ class LayerwiseUpcastingGranularity(str, Enum):
     An enumeration class that defines the granularity of the layerwise upcasting process.
 
     Granularity can be one of the following:
-        - `DIFFUSERS_MODEL`:
-            Applies layerwise upcasting to the entire model at the highest diffusers modeling level. This will cast all
-            the layers of model to the specified storage dtype. This results in the lowest memory usage for storing the
-            model in memory, but may incur significant loss in quality because layers that perform normalization with
-            learned parameters (e.g., RMSNorm with elementwise affinity) are cast to a lower dtype, but this is known
-            to cause quality issues. This method will not reduce the memory required for the forward pass (which
-            comprises of intermediate activations and gradients) of a given modeling component, but may be useful in
-            cases like lowering the memory footprint of text encoders in a pipeline.
-        - `DIFFUSERS_BLOCK`:
-            TODO???
         - `DIFFUSERS_LAYER`:
-            Applies layerwise upcasting to the lower-level diffusers layers of the model. This is more granular than
-            the `DIFFUSERS_MODEL` level, but less granular than the `PYTORCH_LAYER` level. This method is applied to
+            Applies layerwise upcasting to the lower-level diffusers layers of the model. This method is applied to
             only those layers that are a group of linear layers, while excluding precision-critical layers like
             modulation and normalization layers.
         - `PYTORCH_LAYER`:
@@ -300,7 +289,6 @@ class LayerwiseUpcastingGranularity(str, Enum):
         lower precision, as this may lead to significant quality loss.
     """
 
-    DIFFUSERS_MODEL = "diffusers_model"
     DIFFUSERS_LAYER = "diffusers_layer"
     PYTORCH_LAYER = "pytorch_layer"
 
@@ -353,8 +341,6 @@ def apply_layerwise_upcasting(
     skip_modules_pattern: List[str] = [],
     skip_modules_classes: List[Type[torch.nn.Module]] = [],
 ) -> torch.nn.Module:
-    if granularity == LayerwiseUpcastingGranularity.DIFFUSERS_MODEL:
-        return _apply_layerwise_upcasting_diffusers_model(module, storage_dtype, compute_dtype)
     if granularity == LayerwiseUpcastingGranularity.DIFFUSERS_LAYER:
         return _apply_layerwise_upcasting_diffusers_layer(
             module, storage_dtype, compute_dtype, skip_modules_pattern, skip_modules_classes
@@ -365,21 +351,6 @@ def apply_layerwise_upcasting(
         )
 
 
-def _apply_layerwise_upcasting_diffusers_model(
-    module: torch.nn.Module,
-    storage_dtype: torch.dtype,
-    compute_dtype: torch.dtype,
-) -> torch.nn.Module:
-    from .modeling_utils import ModelMixin
-
-    if not isinstance(module, ModelMixin):
-        raise ValueError("The input module must be an instance of ModelMixin")
-
-    logger.debug(f'Applying layerwise upcasting to model "{module.__class__.__name__}"')
-    apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype)
-    return module
-
-
 def _apply_layerwise_upcasting_diffusers_layer(
     module: torch.nn.Module,
     storage_dtype: torch.dtype,