@@ -80,27 +80,6 @@ By default, all the other modules such as `torch.nn.LayerNorm` are converted to
8080You can change the data type of these modules with the ` torch_dtype ` parameter.
8181
8282``` py
83- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
84- from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
85-
86- from diffusers import FluxTransformer2DModel
87- from transformers import T5EncoderModel
88-
89- quant_config = TransformersBitsAndBytesConfig(
90- load_in_8bit = True ,
91- )
92-
93- text_encoder_2_8bit = T5EncoderModel.from_pretrained(
94- " black-forest-labs/FLUX.1-dev" ,
95- subfolder = " text_encoder_2" ,
96- quantization_config = quant_config,
97- torch_dtype = torch.float32,
98- )
99-
100- quant_config = DiffusersBitsAndBytesConfig(
101- load_in_8bit = True ,
102- )
103-
10483transformer_8bit = FluxTransformer2DModel.from_pretrained(
10584 " black-forest-labs/FLUX.1-dev" ,
10685 subfolder = " transformer" ,
@@ -111,6 +90,9 @@ transformer_8bit = FluxTransformer2DModel.from_pretrained(
11190
11291Let's generate an image using our quantized models.
11392
93+ Setting ` device_map="auto" ` automatically fills all available space on the GPU(s) first, then the
94+ CPU, and finally, the hard drive (the absolute slowest option) if there is still not enough memory.
95+
11496``` py
11597pipe = FluxPipeline.from_pretrained(
11698 " black-forest-labs/FLUX.1-dev" ,
@@ -187,27 +169,6 @@ By default, all the other modules such as `torch.nn.LayerNorm` are converted to
187169You can change the data type of these modules with the ` torch_dtype ` parameter.
188170
189171``` py
190- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
191- from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
192-
193- from diffusers import FluxTransformer2DModel
194- from transformers import T5EncoderModel
195-
196- quant_config = TransformersBitsAndBytesConfig(
197- load_in_4bit = True ,
198- )
199-
200- text_encoder_2_4bit = T5EncoderModel.from_pretrained(
201- " black-forest-labs/FLUX.1-dev,
202- subfolder = " text_encoder_2" ,
203- quantization_config = quant_config,
204- torch_dtype = torch.float32,
205- )
206-
207- quant_config = DiffusersBitsAndBytesConfig(
208- load_in_4bit = True ,
209- )
210-
211172transformer_4bit = FluxTransformer2DModel.from_pretrained(
212173 " black-forest-labs/FLUX.1-dev" ,
213174 subfolder = " transformer" ,
@@ -218,6 +179,9 @@ transformer_4bit = FluxTransformer2DModel.from_pretrained(
218179
219180Let's generate an image using our quantized models.
220181
182+ Setting ` device_map="auto" ` automatically fills all available space on the GPU(s) first, then the
183+ CPU, and finally, the hard drive (the absolute slowest option) if there is still not enough memory.
184+
221185``` py
222186pipe = FluxPipeline.from_pretrained(
223187 " black-forest-labs/FLUX.1-dev" ,
0 commit comments