@@ -91,19 +91,30 @@ model = FluxTransformer2DModel.from_pretrained("<your quantized model save path>
9191
9292## Using ` torch.compile `  with Quanto  
9393
94- Currently the Quanto backend only supports ` torch.compile `  for ` int8 `  weights and activations.
94+ Currently the Quanto backend supports ` torch.compile `  for the following quantization types:
95+ 
96+ -  ` int8 `  weights 
9597
9698``` python 
9799import  torch
98- from  diffusers import  FluxTransformer2DModel, QuantoConfig
100+ from  diffusers import  FluxPipeline,  FluxTransformer2DModel, QuantoConfig
99101
100102model_id =  " black-forest-labs/FLUX.1-dev" 
101103quantization_config =  QuantoConfig(weights = " int8" 
102- transformer =  FluxTransformer2DModel.from_pretrained(model_id, quantization_config = quantization_config, torch_dtype = torch.bfloat16)
104+ transformer =  FluxTransformer2DModel.from_pretrained(
105+     model_id,
106+     subfolder = " transformer" 
107+     quantization_config = quantization_config,
108+     torch_dtype = torch.bfloat16,
109+ )
103110transformer =  torch.compile(transformer, mode = " max-autotune" fullgraph = True )
104111
105- pipe =  FluxPipeline.from_pretrained(model_id, transformer = transformer, torch_dtype = torch_dtype)
112+ pipe =  FluxPipeline.from_pretrained(
113+     model_id, transformer = transformer, torch_dtype = torch_dtype
114+ )
106115pipe.to(" cuda" 
116+ images =  pipe(" A cat holding a sign that says hello" 0 ]
117+ images.save(" flux-quanto.png" 
107118``` 
108119
109120## Supported Quantization Types  
0 commit comments