@@ -272,19 +272,8 @@ class LayerwiseUpcastingGranularity(str, Enum):
272272 An enumeration class that defines the granularity of the layerwise upcasting process.
273273
274274 Granularity can be one of the following:
275- - `DIFFUSERS_MODEL`:
276- Applies layerwise upcasting to the entire model at the highest diffusers modeling level. This will cast all
277- the layers of model to the specified storage dtype. This results in the lowest memory usage for storing the
278- model in memory, but may incur significant loss in quality because layers that perform normalization with
279- learned parameters (e.g., RMSNorm with elementwise affinity) are cast to a lower dtype, but this is known
280- to cause quality issues. This method will not reduce the memory required for the forward pass (which
281- comprises of intermediate activations and gradients) of a given modeling component, but may be useful in
282- cases like lowering the memory footprint of text encoders in a pipeline.
283- - `DIFFUSERS_BLOCK`:
284- TODO???
285275 - `DIFFUSERS_LAYER`:
286- Applies layerwise upcasting to the lower-level diffusers layers of the model. This is more granular than
287- the `DIFFUSERS_MODEL` level, but less granular than the `PYTORCH_LAYER` level. This method is applied to
276+ Applies layerwise upcasting to the lower-level diffusers layers of the model. This method is applied to
288277 only those layers that are a group of linear layers, while excluding precision-critical layers like
289278 modulation and normalization layers.
290279 - `PYTORCH_LAYER`:
@@ -300,7 +289,6 @@ class LayerwiseUpcastingGranularity(str, Enum):
300289 lower precision, as this may lead to significant quality loss.
301290 """
302291
303- DIFFUSERS_MODEL = "diffusers_model"
304292 DIFFUSERS_LAYER = "diffusers_layer"
305293 PYTORCH_LAYER = "pytorch_layer"
306294
@@ -353,8 +341,6 @@ def apply_layerwise_upcasting(
353341 skip_modules_pattern : List [str ] = [],
354342 skip_modules_classes : List [Type [torch .nn .Module ]] = [],
355343) -> torch .nn .Module :
356- if granularity == LayerwiseUpcastingGranularity .DIFFUSERS_MODEL :
357- return _apply_layerwise_upcasting_diffusers_model (module , storage_dtype , compute_dtype )
358344 if granularity == LayerwiseUpcastingGranularity .DIFFUSERS_LAYER :
359345 return _apply_layerwise_upcasting_diffusers_layer (
360346 module , storage_dtype , compute_dtype , skip_modules_pattern , skip_modules_classes
@@ -365,21 +351,6 @@ def apply_layerwise_upcasting(
365351 )
366352
367353
368- def _apply_layerwise_upcasting_diffusers_model (
369- module : torch .nn .Module ,
370- storage_dtype : torch .dtype ,
371- compute_dtype : torch .dtype ,
372- ) -> torch .nn .Module :
373- from .modeling_utils import ModelMixin
374-
375- if not isinstance (module , ModelMixin ):
376- raise ValueError ("The input module must be an instance of ModelMixin" )
377-
378- logger .debug (f'Applying layerwise upcasting to model "{ module .__class__ .__name__ } "' )
379- apply_layerwise_upcasting_hook (module , storage_dtype , compute_dtype )
380- return module
381-
382-
383354def _apply_layerwise_upcasting_diffusers_layer (
384355 module : torch .nn .Module ,
385356 storage_dtype : torch .dtype ,
0 commit comments