Skip to content

Commit 7b09d62

Browse files
committed
bitsandbytes
1 parent 61287de commit 7b09d62

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

docs/source/en/quantization/bitsandbytes.md

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ For Ada and higher-series GPUs. we recommend changing `torch_dtype` to `torch.bf
4949
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
5050
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
5151

52-
from diffusers import FluxTransformer2DModel
52+
from diffusers import AutoModel
5353
from transformers import T5EncoderModel
5454

5555
quant_config = TransformersBitsAndBytesConfig(load_in_8bit=True,)
@@ -63,7 +63,7 @@ text_encoder_2_8bit = T5EncoderModel.from_pretrained(
6363

6464
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True,)
6565

66-
transformer_8bit = FluxTransformer2DModel.from_pretrained(
66+
transformer_8bit = AutoModel.from_pretrained(
6767
"black-forest-labs/FLUX.1-dev",
6868
subfolder="transformer",
6969
quantization_config=quant_config,
@@ -74,7 +74,7 @@ transformer_8bit = FluxTransformer2DModel.from_pretrained(
7474
By default, all the other modules such as `torch.nn.LayerNorm` are converted to `torch.float16`. You can change the data type of these modules with the `torch_dtype` parameter.
7575

7676
```diff
77-
transformer_8bit = FluxTransformer2DModel.from_pretrained(
77+
transformer_8bit = AutoModel.from_pretrained(
7878
"black-forest-labs/FLUX.1-dev",
7979
subfolder="transformer",
8080
quantization_config=quant_config,
@@ -133,7 +133,7 @@ For Ada and higher-series GPUs. we recommend changing `torch_dtype` to `torch.bf
133133
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
134134
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
135135

136-
from diffusers import FluxTransformer2DModel
136+
from diffusers import AutoModel
137137
from transformers import T5EncoderModel
138138

139139
quant_config = TransformersBitsAndBytesConfig(load_in_4bit=True,)
@@ -147,7 +147,7 @@ text_encoder_2_4bit = T5EncoderModel.from_pretrained(
147147

148148
quant_config = DiffusersBitsAndBytesConfig(load_in_4bit=True,)
149149

150-
transformer_4bit = FluxTransformer2DModel.from_pretrained(
150+
transformer_4bit = AutoModel.from_pretrained(
151151
"black-forest-labs/FLUX.1-dev",
152152
subfolder="transformer",
153153
quantization_config=quant_config,
@@ -158,7 +158,7 @@ transformer_4bit = FluxTransformer2DModel.from_pretrained(
158158
By default, all the other modules such as `torch.nn.LayerNorm` are converted to `torch.float16`. You can change the data type of these modules with the `torch_dtype` parameter.
159159

160160
```diff
161-
transformer_4bit = FluxTransformer2DModel.from_pretrained(
161+
transformer_4bit = AutoModel.from_pretrained(
162162
"black-forest-labs/FLUX.1-dev",
163163
subfolder="transformer",
164164
quantization_config=quant_config,
@@ -217,11 +217,11 @@ print(model.get_memory_footprint())
217217
Quantized models can be loaded from the [`~ModelMixin.from_pretrained`] method without needing to specify the `quantization_config` parameters:
218218

219219
```py
220-
from diffusers import FluxTransformer2DModel, BitsAndBytesConfig
220+
from diffusers import AutoModel, BitsAndBytesConfig
221221

222222
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
223223

224-
model_4bit = FluxTransformer2DModel.from_pretrained(
224+
model_4bit = AutoModel.from_pretrained(
225225
"hf-internal-testing/flux.1-dev-nf4-pkg", subfolder="transformer"
226226
)
227227
```
@@ -243,13 +243,13 @@ An "outlier" is a hidden state value greater than a certain threshold, and these
243243
To find the best threshold for your model, we recommend experimenting with the `llm_int8_threshold` parameter in [`BitsAndBytesConfig`]:
244244

245245
```py
246-
from diffusers import FluxTransformer2DModel, BitsAndBytesConfig
246+
from diffusers import AutoModel, BitsAndBytesConfig
247247

248248
quantization_config = BitsAndBytesConfig(
249249
load_in_8bit=True, llm_int8_threshold=10,
250250
)
251251

252-
model_8bit = FluxTransformer2DModel.from_pretrained(
252+
model_8bit = AutoModel.from_pretrained(
253253
"black-forest-labs/FLUX.1-dev",
254254
subfolder="transformer",
255255
quantization_config=quantization_config,
@@ -305,7 +305,7 @@ NF4 is a 4-bit data type from the [QLoRA](https://hf.co/papers/2305.14314) paper
305305
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
306306
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
307307

308-
from diffusers import FluxTransformer2DModel
308+
from diffusers import AutoModel
309309
from transformers import T5EncoderModel
310310

311311
quant_config = TransformersBitsAndBytesConfig(
@@ -325,7 +325,7 @@ quant_config = DiffusersBitsAndBytesConfig(
325325
bnb_4bit_quant_type="nf4",
326326
)
327327

328-
transformer_4bit = FluxTransformer2DModel.from_pretrained(
328+
transformer_4bit = AutoModel.from_pretrained(
329329
"black-forest-labs/FLUX.1-dev",
330330
subfolder="transformer",
331331
quantization_config=quant_config,
@@ -343,7 +343,7 @@ Nested quantization is a technique that can save additional memory at no additio
343343
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
344344
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
345345

346-
from diffusers import FluxTransformer2DModel
346+
from diffusers import AutoModel
347347
from transformers import T5EncoderModel
348348

349349
quant_config = TransformersBitsAndBytesConfig(
@@ -363,7 +363,7 @@ quant_config = DiffusersBitsAndBytesConfig(
363363
bnb_4bit_use_double_quant=True,
364364
)
365365

366-
transformer_4bit = FluxTransformer2DModel.from_pretrained(
366+
transformer_4bit = AutoModel.from_pretrained(
367367
"black-forest-labs/FLUX.1-dev",
368368
subfolder="transformer",
369369
quantization_config=quant_config,
@@ -379,7 +379,7 @@ Once quantized, you can dequantize a model to its original precision, but this m
379379
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
380380
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
381381

382-
from diffusers import FluxTransformer2DModel
382+
from diffusers import AutoModel
383383
from transformers import T5EncoderModel
384384

385385
quant_config = TransformersBitsAndBytesConfig(
@@ -399,7 +399,7 @@ quant_config = DiffusersBitsAndBytesConfig(
399399
bnb_4bit_use_double_quant=True,
400400
)
401401

402-
transformer_4bit = FluxTransformer2DModel.from_pretrained(
402+
transformer_4bit = AutoModel.from_pretrained(
403403
"black-forest-labs/FLUX.1-dev",
404404
subfolder="transformer",
405405
quantization_config=quant_config,

0 commit comments

Comments
 (0)