@@ -40,22 +40,25 @@ The quantized HunyuanVideo model below requires ~14GB of VRAM.
4040
4141``` py
4242import torch
43- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
43+ from diffusers import AutoModel, HunyuanVideoPipeline
44+ from diffusers.quantizers import PipelineQuantizationConfig
4445from diffusers.utils import export_to_video
4546
4647# quantize weights to int4 with bitsandbytes
47- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
48- transformer = AutoModel.from_pretrained(
49- " hunyuanvideo-community/HunyuanVideo" ,
50- subfolder = " transformer" ,
51- quantization_config = quant_config,
52- torch_dtype = torch.bfloat16,
48+ pipeline_quant_config = PipelineQuantizationConfig(
49+ quant_backend = " bitsandbytes_4bit" ,
50+ quant_kwargs = {
51+ " load_in_4bit" : True ,
52+ " bnb_4bit_quant_type" : " nf4" ,
53+ " bnb_4bit_compute_dtype" : torch.bfloat16
54+ },
55+ components_to_quantize = [" transformer" ]
5356)
5457
5558pipeline = HunyuanVideoPipeline.from_pretrained(
5659 " hunyuanvideo-community/HunyuanVideo" ,
57- transformer = transformer ,
58- torch_dtype = torch.float16 ,
60+ quantization_config = pipeline_quant_config ,
61+ torch_dtype = torch.bfloat16 ,
5962)
6063
6164# model-offloading and tiling
@@ -74,22 +77,25 @@ Compilation is slow the first time but subsequent calls to the pipeline are fast
7477
7578``` py
7679import torch
77- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
80+ from diffusers import AutoModel, HunyuanVideoPipeline
81+ from diffusers.quantizers import PipelineQuantizationConfig
7882from diffusers.utils import export_to_video
7983
8084# quantize weights to int4 with bitsandbytes
81- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
82- transformer = AutoModel.from_pretrained(
83- " hunyuanvideo-community/HunyuanVideo" ,
84- subfolder = " transformer" ,
85- quantization_config = quant_config,
86- torch_dtype = torch.bfloat16,
85+ pipeline_quant_config = PipelineQuantizationConfig(
86+ quant_backend = " bitsandbytes_4bit" ,
87+ quant_kwargs = {
88+ " load_in_4bit" : True ,
89+ " bnb_4bit_quant_type" : " nf4" ,
90+ " bnb_4bit_compute_dtype" : torch.bfloat16
91+ },
92+ components_to_quantize = [" transformer" ]
8793)
8894
8995pipeline = HunyuanVideoPipeline.from_pretrained(
9096 " hunyuanvideo-community/HunyuanVideo" ,
91- transformer = transformer ,
92- torch_dtype = torch.float16 ,
97+ quantization_config = pipeline_quant_config ,
98+ torch_dtype = torch.bfloat16 ,
9399)
94100
95101# model-offloading and tiling
@@ -116,22 +122,25 @@ export_to_video(video, "output.mp4", fps=15)
116122
117123 ``` py
118124 import torch
119- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
125+ from diffusers import AutoModel, HunyuanVideoPipeline
126+ from diffusers.quantizers import PipelineQuantizationConfig
120127 from diffusers.utils import export_to_video
121128
122129 # quantize weights to int4 with bitsandbytes
123- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
124- transformer = AutoModel.from_pretrained(
125- " hunyuanvideo-community/HunyuanVideo" ,
126- subfolder = " transformer" ,
127- quantization_config = quant_config,
128- torch_dtype = torch.bfloat16,
130+ pipeline_quant_config = PipelineQuantizationConfig(
131+ quant_backend = " bitsandbytes_4bit" ,
132+ quant_kwargs = {
133+ " load_in_4bit" : True ,
134+ " bnb_4bit_quant_type" : " nf4" ,
135+ " bnb_4bit_compute_dtype" : torch.bfloat16
136+ },
137+ components_to_quantize = [" transformer" ]
129138 )
130139
131140 pipeline = HunyuanVideoPipeline.from_pretrained(
132141 " hunyuanvideo-community/HunyuanVideo" ,
133- transformer = transformer ,
134- torch_dtype = torch.float16 ,
142+ quantization_config = pipeline_quant_config ,
143+ torch_dtype = torch.bfloat16 ,
135144 )
136145
137146 # load LoRA weights
0 commit comments