1+ # coding=utf-8
2+ # Copyright 2025 HuggingFace Inc.
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
16+ import copy
17+ import gc
18+ import tempfile
19+ import unittest
20+
21+ import numpy as np
22+ import torch
23+ from transformers import CLIPTextConfig , CLIPTextModel , CLIPTextModelWithProjection , CLIPTokenizer
24+
25+ from diffusers import (
26+ ModularPipeline ,
27+ ComponentSpec ,
28+ ComponentsManager ,
29+ AutoencoderKL ,
30+ DDIMScheduler ,
31+ DPMSolverMultistepScheduler ,
32+ EulerDiscreteScheduler ,
33+ HeunDiscreteScheduler ,
34+ LCMScheduler ,
35+ StableDiffusionXLImg2ImgPipeline ,
36+ StableDiffusionXLPipeline ,
37+ UNet2DConditionModel ,
38+ UniPCMultistepScheduler ,
39+ )
40+ from diffusers .utils .testing_utils import (
41+ backend_empty_cache ,
42+ enable_full_determinism ,
43+ load_image ,
44+ numpy_cosine_similarity_distance ,
45+ require_torch_accelerator ,
46+ slow ,
47+ torch_device ,
48+ )
49+
50+ from ..pipeline_params import (
51+ TEXT_TO_IMAGE_BATCH_PARAMS ,
52+ TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS ,
53+ TEXT_TO_IMAGE_IMAGE_PARAMS ,
54+ TEXT_TO_IMAGE_PARAMS ,
55+ )
56+ from ..test_pipelines_common import (
57+ IPAdapterTesterMixin ,
58+ PipelineLatentTesterMixin ,
59+ PipelineTesterMixin ,
60+ SDFunctionTesterMixin ,
61+ )
62+
63+
64+ enable_full_determinism ()
65+
66+
67+ class StableDiffusionXLModularPipelineFastTests (
68+ SDFunctionTesterMixin ,
69+ IPAdapterTesterMixin ,
70+ PipelineLatentTesterMixin ,
71+ PipelineTesterMixin ,
72+ unittest .TestCase ,
73+ ):
74+ pipeline_class = StableDiffusionXLPipeline
75+ params = (TEXT_TO_IMAGE_PARAMS | IMAGE_INPAINTING_PARAMS ) - {"guidance_scale" }
76+ batch_params = TEXT_TO_IMAGE_BATCH_PARAMS | IMAGE_INPAINTING_BATCH_PARAMS
77+ image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
78+ test_layerwise_casting = False
79+ test_group_offloading = False
80+
81+ def get_dummy_inputs (self , device , seed = 0 ):
82+ if str (device ).startswith ("mps" ):
83+ generator = torch .manual_seed (seed )
84+ else :
85+ generator = torch .Generator (device = device ).manual_seed (seed )
86+ inputs = {
87+ "prompt" : "A painting of a squirrel eating a burger" ,
88+ "generator" : generator ,
89+ "num_inference_steps" : 2 ,
90+ "output_type" : "np" ,
91+ }
92+ return inputs
93+
94+ def test_stable_diffusion_xl_euler (self ):
95+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
96+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" )
97+ sd_pipe = sd_pipe .to (device )
98+ sd_pipe .set_progress_bar_config (disable = None )
99+
100+ inputs = self .get_dummy_inputs (device )
101+ image = sd_pipe (** inputs , output = "images" )
102+ image_slice = image [0 , - 3 :, - 3 :, - 1 ]
103+
104+ assert image .shape == (1 , 64 , 64 , 3 )
105+ expected_slice = np .array ([0.5388 , 0.5452 , 0.4694 , 0.4583 , 0.5253 , 0.4832 , 0.5288 , 0.5035 , 0.47 ])
106+
107+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
108+
109+ def test_stable_diffusion_xl_euler_lcm (self ):
110+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
111+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" )
112+ sd_pipe .update_components (scheduler = LCMScheduler .from_config (sd_pipe .scheduler .config ))
113+ sd_pipe = sd_pipe .to (device )
114+ sd_pipe .set_progress_bar_config (disable = None )
115+
116+ inputs = self .get_dummy_inputs (device )
117+ image = sd_pipe (** inputs , output = "images" )
118+ image_slice = image [0 , - 3 :, - 3 :, - 1 ]
119+
120+ assert image .shape == (1 , 64 , 64 , 3 )
121+ expected_slice = np .array ([0.4917 , 0.6555 , 0.4348 , 0.5219 , 0.7324 , 0.4855 , 0.5168 , 0.5447 , 0.5156 ])
122+
123+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
124+
125+ def test_stable_diffusion_xl_euler_lcm_custom_timesteps (self ):
126+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
127+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" )
128+ sd_pipe .update_components (scheduler = LCMScheduler .from_config (sd_pipe .scheduler .config ))
129+ sd_pipe = sd_pipe .to (device )
130+ sd_pipe .set_progress_bar_config (disable = None )
131+
132+ inputs = self .get_dummy_inputs (device )
133+ del inputs ["num_inference_steps" ]
134+ inputs ["timesteps" ] = [999 , 499 ]
135+ image = sd_pipe (** inputs , output = "images" )
136+ image_slice = image [0 , - 3 :, - 3 :, - 1 ]
137+
138+ assert image .shape == (1 , 64 , 64 , 3 )
139+ expected_slice = np .array ([0.4917 , 0.6555 , 0.4348 , 0.5219 , 0.7324 , 0.4855 , 0.5168 , 0.5447 , 0.5156 ])
140+
141+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
142+
143+ @require_torch_accelerator
144+ def test_stable_diffusion_xl_offloads (self ):
145+ pipes = []
146+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" ,).to (torch_device )
147+ pipes .append (sd_pipe )
148+
149+ cm = ComponentsManager ()
150+ cm .enable_auto_cpu_offload (device = torch_device )
151+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" , components_manager = cm ).to (torch_device )
152+ pipes .append (sd_pipe )
153+
154+ image_slices = []
155+ for pipe in pipes :
156+ inputs = self .get_dummy_inputs (torch_device )
157+ image = pipe (** inputs , output = "images" )
158+
159+ image_slices .append (image [0 , - 3 :, - 3 :, - 1 ].flatten ())
160+
161+ assert np .abs (image_slices [0 ] - image_slices [1 ]).max () < 1e-3
162+ assert np .abs (image_slices [0 ] - image_slices [2 ]).max () < 1e-3
163+
164+ def test_stable_diffusion_xl_multi_prompts (self ):
165+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" ).to (torch_device )
166+
167+ # forward with single prompt
168+ inputs = self .get_dummy_inputs (torch_device )
169+ output = sd_pipe (** inputs , output = "images" )
170+ image_slice_1 = output .images [0 , - 3 :, - 3 :, - 1 ]
171+
172+ # forward with same prompt duplicated
173+ inputs = self .get_dummy_inputs (torch_device )
174+ inputs ["prompt_2" ] = inputs ["prompt" ]
175+ output = sd_pipe (** inputs , output = "images" )
176+ image_slice_2 = output .images [0 , - 3 :, - 3 :, - 1 ]
177+
178+ # ensure the results are equal
179+ assert np .abs (image_slice_1 .flatten () - image_slice_2 .flatten ()).max () < 1e-4
180+
181+ # forward with different prompt
182+ inputs = self .get_dummy_inputs (torch_device )
183+ inputs ["prompt_2" ] = "different prompt"
184+ output = sd_pipe (** inputs , output = "images" )
185+ image_slice_3 = output .images [0 , - 3 :, - 3 :, - 1 ]
186+
187+ # ensure the results are not equal
188+ assert np .abs (image_slice_1 .flatten () - image_slice_3 .flatten ()).max () > 1e-4
189+
190+ # manually set a negative_prompt
191+ inputs = self .get_dummy_inputs (torch_device )
192+ inputs ["negative_prompt" ] = "negative prompt"
193+ output = sd_pipe (** inputs , output = "images" )
194+ image_slice_1 = output .images [0 , - 3 :, - 3 :, - 1 ]
195+
196+ # forward with same negative_prompt duplicated
197+ inputs = self .get_dummy_inputs (torch_device )
198+ inputs ["negative_prompt" ] = "negative prompt"
199+ inputs ["negative_prompt_2" ] = inputs ["negative_prompt" ]
200+ output = sd_pipe (** inputs , output = "images" )
201+ image_slice_2 = output .images [0 , - 3 :, - 3 :, - 1 ]
202+
203+ # ensure the results are equal
204+ assert np .abs (image_slice_1 .flatten () - image_slice_2 .flatten ()).max () < 1e-4
205+
206+ # forward with different negative_prompt
207+ inputs = self .get_dummy_inputs (torch_device )
208+ inputs ["negative_prompt" ] = "negative prompt"
209+ inputs ["negative_prompt_2" ] = "different negative prompt"
210+ output = sd_pipe (** inputs , output = "images" )
211+ image_slice_3 = output .images [0 , - 3 :, - 3 :, - 1 ]
212+
213+ # ensure the results are not equal
214+ assert np .abs (image_slice_1 .flatten () - image_slice_3 .flatten ()).max () > 1e-4
215+
216+ def test_stable_diffusion_xl_negative_conditions (self ):
217+ device = "cpu" # ensure determinism for the device-dependent torch.Generator
218+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" ).to (torch_device )
219+ sd_pipe = sd_pipe .to (device )
220+ sd_pipe .set_progress_bar_config (disable = None )
221+
222+ inputs = self .get_dummy_inputs (device )
223+ image = sd_pipe (** inputs , output = "images" )
224+ image_slice_with_no_neg_cond = image [0 , - 3 :, - 3 :, - 1 ]
225+
226+ image = sd_pipe (
227+ ** inputs ,
228+ negative_original_size = (512 , 512 ),
229+ negative_crops_coords_top_left = (0 , 0 ),
230+ negative_target_size = (1024 , 1024 ),
231+ output = "images" ,
232+ )
233+ image_slice_with_neg_cond = image [0 , - 3 :, - 3 :, - 1 ]
234+
235+ self .assertTrue (np .abs (image_slice_with_no_neg_cond - image_slice_with_neg_cond ).max () > 1e-2 )
236+
237+ def test_stable_diffusion_xl_save_from_pretrained (self ):
238+ pipes = []
239+ sd_pipe = ModularPipeline .from_pretrained ("hf-internal-testing/tiny-sd-pipe" ).to (torch_device )
240+ pipes .append (sd_pipe )
241+
242+ with tempfile .TemporaryDirectory () as tmpdirname :
243+ sd_pipe .save_pretrained (tmpdirname )
244+ sd_pipe = ModularPipeline .from_pretrained (tmpdirname ).to (torch_device )
245+ pipes .append (sd_pipe )
246+
247+ image_slices = []
248+ for pipe in pipes :
249+ pipe .unet .set_default_attn_processor ()
250+
251+ inputs = self .get_dummy_inputs (torch_device )
252+ image = pipe (** inputs , output = "images" )
253+
254+ image_slices .append (image [0 , - 3 :, - 3 :, - 1 ].flatten ())
255+
256+ assert np .abs (image_slices [0 ] - image_slices [1 ]).max () < 1e-3
0 commit comments