@@ -49,7 +49,7 @@ For Ada and higher-series GPUs. we recommend changing `torch_dtype` to `torch.bf
4949from  diffusers import  BitsAndBytesConfig as  DiffusersBitsAndBytesConfig
5050from  transformers import  BitsAndBytesConfig as  TransformersBitsAndBytesConfig
5151
52- from  diffusers import  FluxTransformer2DModel 
52+ from  diffusers import  AutoModel 
5353from  transformers import  T5EncoderModel
5454
5555quant_config =  TransformersBitsAndBytesConfig(load_in_8bit = True ,)
@@ -63,7 +63,7 @@ text_encoder_2_8bit = T5EncoderModel.from_pretrained(
6363
6464quant_config =  DiffusersBitsAndBytesConfig(load_in_8bit = True ,)
6565
66- transformer_8bit =  FluxTransformer2DModel .from_pretrained(
66+ transformer_8bit =  AutoModel .from_pretrained(
6767    " black-forest-labs/FLUX.1-dev" 
6868    subfolder = " transformer" 
6969    quantization_config = quant_config,
@@ -74,7 +74,7 @@ transformer_8bit = FluxTransformer2DModel.from_pretrained(
7474By default, all the other modules such as ` torch.nn.LayerNorm `  are converted to ` torch.float16 ` . You can change the data type of these modules with the ` torch_dtype `  parameter.
7575
7676``` diff 
77- transformer_8bit = FluxTransformer2DModel .from_pretrained(
77+ transformer_8bit = AutoModel .from_pretrained(
7878    "black-forest-labs/FLUX.1-dev",
7979    subfolder="transformer",
8080    quantization_config=quant_config,
@@ -133,7 +133,7 @@ For Ada and higher-series GPUs. we recommend changing `torch_dtype` to `torch.bf
133133from  diffusers import  BitsAndBytesConfig as  DiffusersBitsAndBytesConfig
134134from  transformers import  BitsAndBytesConfig as  TransformersBitsAndBytesConfig
135135
136- from  diffusers import  FluxTransformer2DModel 
136+ from  diffusers import  AutoModel 
137137from  transformers import  T5EncoderModel
138138
139139quant_config =  TransformersBitsAndBytesConfig(load_in_4bit = True ,)
@@ -147,7 +147,7 @@ text_encoder_2_4bit = T5EncoderModel.from_pretrained(
147147
148148quant_config =  DiffusersBitsAndBytesConfig(load_in_4bit = True ,)
149149
150- transformer_4bit =  FluxTransformer2DModel .from_pretrained(
150+ transformer_4bit =  AutoModel .from_pretrained(
151151    " black-forest-labs/FLUX.1-dev" 
152152    subfolder = " transformer" 
153153    quantization_config = quant_config,
@@ -158,7 +158,7 @@ transformer_4bit = FluxTransformer2DModel.from_pretrained(
158158By default, all the other modules such as ` torch.nn.LayerNorm `  are converted to ` torch.float16 ` . You can change the data type of these modules with the ` torch_dtype `  parameter.
159159
160160``` diff 
161- transformer_4bit = FluxTransformer2DModel .from_pretrained(
161+ transformer_4bit = AutoModel .from_pretrained(
162162    "black-forest-labs/FLUX.1-dev",
163163    subfolder="transformer",
164164    quantization_config=quant_config,
@@ -217,11 +217,11 @@ print(model.get_memory_footprint())
217217Quantized models can be loaded from the [ ` ~ModelMixin.from_pretrained ` ]  method without needing to specify the ` quantization_config `  parameters:
218218
219219``` py 
220- from  diffusers import  FluxTransformer2DModel , BitsAndBytesConfig
220+ from  diffusers import  AutoModel , BitsAndBytesConfig
221221
222222quantization_config =  BitsAndBytesConfig(load_in_4bit = True )
223223
224- model_4bit =  FluxTransformer2DModel .from_pretrained(
224+ model_4bit =  AutoModel .from_pretrained(
225225    " hf-internal-testing/flux.1-dev-nf4-pkg" subfolder = " transformer" 
226226)
227227``` 
@@ -243,13 +243,13 @@ An "outlier" is a hidden state value greater than a certain threshold, and these
243243To find the best threshold for your model, we recommend experimenting with the ` llm_int8_threshold `  parameter in [ ` BitsAndBytesConfig ` ] :
244244
245245``` py 
246- from  diffusers import  FluxTransformer2DModel , BitsAndBytesConfig
246+ from  diffusers import  AutoModel , BitsAndBytesConfig
247247
248248quantization_config =  BitsAndBytesConfig(
249249    load_in_8bit = True , llm_int8_threshold = 10 ,
250250)
251251
252- model_8bit =  FluxTransformer2DModel .from_pretrained(
252+ model_8bit =  AutoModel .from_pretrained(
253253    " black-forest-labs/FLUX.1-dev" 
254254    subfolder = " transformer" 
255255    quantization_config = quantization_config,
@@ -305,7 +305,7 @@ NF4 is a 4-bit data type from the [QLoRA](https://hf.co/papers/2305.14314) paper
305305from  diffusers import  BitsAndBytesConfig as  DiffusersBitsAndBytesConfig
306306from  transformers import  BitsAndBytesConfig as  TransformersBitsAndBytesConfig
307307
308- from  diffusers import  FluxTransformer2DModel 
308+ from  diffusers import  AutoModel 
309309from  transformers import  T5EncoderModel
310310
311311quant_config =  TransformersBitsAndBytesConfig(
@@ -325,7 +325,7 @@ quant_config = DiffusersBitsAndBytesConfig(
325325    bnb_4bit_quant_type = " nf4" 
326326)
327327
328- transformer_4bit =  FluxTransformer2DModel .from_pretrained(
328+ transformer_4bit =  AutoModel .from_pretrained(
329329    " black-forest-labs/FLUX.1-dev" 
330330    subfolder = " transformer" 
331331    quantization_config = quant_config,
@@ -343,7 +343,7 @@ Nested quantization is a technique that can save additional memory at no additio
343343from  diffusers import  BitsAndBytesConfig as  DiffusersBitsAndBytesConfig
344344from  transformers import  BitsAndBytesConfig as  TransformersBitsAndBytesConfig
345345
346- from  diffusers import  FluxTransformer2DModel 
346+ from  diffusers import  AutoModel 
347347from  transformers import  T5EncoderModel
348348
349349quant_config =  TransformersBitsAndBytesConfig(
@@ -363,7 +363,7 @@ quant_config = DiffusersBitsAndBytesConfig(
363363    bnb_4bit_use_double_quant = True ,
364364)
365365
366- transformer_4bit =  FluxTransformer2DModel .from_pretrained(
366+ transformer_4bit =  AutoModel .from_pretrained(
367367    " black-forest-labs/FLUX.1-dev" 
368368    subfolder = " transformer" 
369369    quantization_config = quant_config,
@@ -379,7 +379,7 @@ Once quantized, you can dequantize a model to its original precision, but this m
379379from  diffusers import  BitsAndBytesConfig as  DiffusersBitsAndBytesConfig
380380from  transformers import  BitsAndBytesConfig as  TransformersBitsAndBytesConfig
381381
382- from  diffusers import  FluxTransformer2DModel 
382+ from  diffusers import  AutoModel 
383383from  transformers import  T5EncoderModel
384384
385385quant_config =  TransformersBitsAndBytesConfig(
@@ -399,7 +399,7 @@ quant_config = DiffusersBitsAndBytesConfig(
399399    bnb_4bit_use_double_quant = True ,
400400)
401401
402- transformer_4bit =  FluxTransformer2DModel .from_pretrained(
402+ transformer_4bit =  AutoModel .from_pretrained(
403403    " black-forest-labs/FLUX.1-dev" 
404404    subfolder = " transformer" 
405405    quantization_config = quant_config,
0 commit comments