Skip to content

Commit b165cf3

Browse files
committed
rearrage the params to groups: default params /image params /batch params / callback params
1 parent 6398fbc commit b165cf3

File tree

2 files changed

+287
-26
lines changed

2 files changed

+287
-26
lines changed

tests/pipelines/pipeline_params.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,6 @@
2020
]
2121
)
2222

23-
TEXT_TO_IMAGE_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
24-
25-
TEXT_TO_IMAGE_IMAGE_PARAMS = frozenset([])
26-
27-
IMAGE_TO_IMAGE_IMAGE_PARAMS = frozenset(["image"])
28-
2923
IMAGE_VARIATION_PARAMS = frozenset(
3024
[
3125
"image",
@@ -35,8 +29,6 @@
3529
]
3630
)
3731

38-
IMAGE_VARIATION_BATCH_PARAMS = frozenset(["image"])
39-
4032
TEXT_GUIDED_IMAGE_VARIATION_PARAMS = frozenset(
4133
[
4234
"prompt",
@@ -50,8 +42,6 @@
5042
]
5143
)
5244

53-
TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS = frozenset(["prompt", "image", "negative_prompt"])
54-
5545
TEXT_GUIDED_IMAGE_INPAINTING_PARAMS = frozenset(
5646
[
5747
# Text guided image variation with an image mask
@@ -67,8 +57,6 @@
6757
]
6858
)
6959

70-
TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["prompt", "image", "mask_image", "negative_prompt"])
71-
7260
IMAGE_INPAINTING_PARAMS = frozenset(
7361
[
7462
# image variation with an image mask
@@ -80,8 +68,6 @@
8068
]
8169
)
8270

83-
IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["image", "mask_image"])
84-
8571
IMAGE_GUIDED_IMAGE_INPAINTING_PARAMS = frozenset(
8672
[
8773
"example_image",
@@ -93,20 +79,12 @@
9379
]
9480
)
9581

96-
IMAGE_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["example_image", "image", "mask_image"])
82+
UNCONDITIONAL_IMAGE_GENERATION_PARAMS = frozenset(["batch_size"])
9783

9884
CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS = frozenset(["class_labels"])
9985

10086
CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS = frozenset(["class_labels"])
10187

102-
UNCONDITIONAL_IMAGE_GENERATION_PARAMS = frozenset(["batch_size"])
103-
104-
UNCONDITIONAL_IMAGE_GENERATION_BATCH_PARAMS = frozenset([])
105-
106-
UNCONDITIONAL_AUDIO_GENERATION_PARAMS = frozenset(["batch_size"])
107-
108-
UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS = frozenset([])
109-
11088
TEXT_TO_AUDIO_PARAMS = frozenset(
11189
[
11290
"prompt",
@@ -119,11 +97,38 @@
11997
]
12098
)
12199

122-
TEXT_TO_AUDIO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
123100
TOKENS_TO_AUDIO_GENERATION_PARAMS = frozenset(["input_tokens"])
124101

125-
TOKENS_TO_AUDIO_GENERATION_BATCH_PARAMS = frozenset(["input_tokens"])
102+
UNCONDITIONAL_AUDIO_GENERATION_PARAMS = frozenset(["batch_size"])
103+
104+
# image params
105+
TEXT_TO_IMAGE_IMAGE_PARAMS = frozenset([])
106+
107+
IMAGE_TO_IMAGE_IMAGE_PARAMS = frozenset(["image"])
108+
109+
110+
# batch params
111+
TEXT_TO_IMAGE_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
112+
113+
IMAGE_VARIATION_BATCH_PARAMS = frozenset(["image"])
114+
115+
TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS = frozenset(["prompt", "image", "negative_prompt"])
116+
117+
TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["prompt", "image", "mask_image", "negative_prompt"])
118+
119+
IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["image", "mask_image"])
126120

127-
TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS = frozenset(["prompt_embeds"])
121+
IMAGE_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["example_image", "image", "mask_image"])
122+
123+
UNCONDITIONAL_IMAGE_GENERATION_BATCH_PARAMS = frozenset([])
124+
125+
UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS = frozenset([])
126+
127+
TEXT_TO_AUDIO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
128+
129+
TOKENS_TO_AUDIO_GENERATION_BATCH_PARAMS = frozenset(["input_tokens"])
128130

129131
VIDEO_TO_VIDEO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt", "video"])
132+
133+
# callback params
134+
TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS = frozenset(["prompt_embeds"])
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
# coding=utf-8
2+
# Copyright 2025 HuggingFace Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import copy
17+
import gc
18+
import tempfile
19+
import unittest
20+
21+
import numpy as np
22+
import torch
23+
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
24+
25+
from diffusers import (
26+
ModularPipeline,
27+
ComponentSpec,
28+
ComponentsManager,
29+
AutoencoderKL,
30+
DDIMScheduler,
31+
DPMSolverMultistepScheduler,
32+
EulerDiscreteScheduler,
33+
HeunDiscreteScheduler,
34+
LCMScheduler,
35+
StableDiffusionXLImg2ImgPipeline,
36+
StableDiffusionXLPipeline,
37+
UNet2DConditionModel,
38+
UniPCMultistepScheduler,
39+
)
40+
from diffusers.utils.testing_utils import (
41+
backend_empty_cache,
42+
enable_full_determinism,
43+
load_image,
44+
numpy_cosine_similarity_distance,
45+
require_torch_accelerator,
46+
slow,
47+
torch_device,
48+
)
49+
50+
from ..pipeline_params import (
51+
TEXT_TO_IMAGE_BATCH_PARAMS,
52+
TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS,
53+
TEXT_TO_IMAGE_IMAGE_PARAMS,
54+
TEXT_TO_IMAGE_PARAMS,
55+
)
56+
from ..test_pipelines_common import (
57+
IPAdapterTesterMixin,
58+
PipelineLatentTesterMixin,
59+
PipelineTesterMixin,
60+
SDFunctionTesterMixin,
61+
)
62+
63+
64+
enable_full_determinism()
65+
66+
67+
class StableDiffusionXLModularPipelineFastTests(
68+
SDFunctionTesterMixin,
69+
IPAdapterTesterMixin,
70+
PipelineLatentTesterMixin,
71+
PipelineTesterMixin,
72+
unittest.TestCase,
73+
):
74+
pipeline_class = StableDiffusionXLPipeline
75+
params = (TEXT_TO_IMAGE_PARAMS | IMAGE_INPAINTING_PARAMS) - {"guidance_scale"}
76+
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS | IMAGE_INPAINTING_BATCH_PARAMS
77+
image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
78+
test_layerwise_casting = False
79+
test_group_offloading = False
80+
81+
def get_dummy_inputs(self, device, seed=0):
82+
if str(device).startswith("mps"):
83+
generator = torch.manual_seed(seed)
84+
else:
85+
generator = torch.Generator(device=device).manual_seed(seed)
86+
inputs = {
87+
"prompt": "A painting of a squirrel eating a burger",
88+
"generator": generator,
89+
"num_inference_steps": 2,
90+
"output_type": "np",
91+
}
92+
return inputs
93+
94+
def test_stable_diffusion_xl_euler(self):
95+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
96+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe")
97+
sd_pipe = sd_pipe.to(device)
98+
sd_pipe.set_progress_bar_config(disable=None)
99+
100+
inputs = self.get_dummy_inputs(device)
101+
image = sd_pipe(**inputs, output="images")
102+
image_slice = image[0, -3:, -3:, -1]
103+
104+
assert image.shape == (1, 64, 64, 3)
105+
expected_slice = np.array([0.5388, 0.5452, 0.4694, 0.4583, 0.5253, 0.4832, 0.5288, 0.5035, 0.47])
106+
107+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
108+
109+
def test_stable_diffusion_xl_euler_lcm(self):
110+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
111+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe")
112+
sd_pipe.update_components(scheduler=LCMScheduler.from_config(sd_pipe.scheduler.config))
113+
sd_pipe = sd_pipe.to(device)
114+
sd_pipe.set_progress_bar_config(disable=None)
115+
116+
inputs = self.get_dummy_inputs(device)
117+
image = sd_pipe(**inputs, output="images")
118+
image_slice = image[0, -3:, -3:, -1]
119+
120+
assert image.shape == (1, 64, 64, 3)
121+
expected_slice = np.array([0.4917, 0.6555, 0.4348, 0.5219, 0.7324, 0.4855, 0.5168, 0.5447, 0.5156])
122+
123+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
124+
125+
def test_stable_diffusion_xl_euler_lcm_custom_timesteps(self):
126+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
127+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe")
128+
sd_pipe.update_components(scheduler=LCMScheduler.from_config(sd_pipe.scheduler.config))
129+
sd_pipe = sd_pipe.to(device)
130+
sd_pipe.set_progress_bar_config(disable=None)
131+
132+
inputs = self.get_dummy_inputs(device)
133+
del inputs["num_inference_steps"]
134+
inputs["timesteps"] = [999, 499]
135+
image = sd_pipe(**inputs, output="images")
136+
image_slice = image[0, -3:, -3:, -1]
137+
138+
assert image.shape == (1, 64, 64, 3)
139+
expected_slice = np.array([0.4917, 0.6555, 0.4348, 0.5219, 0.7324, 0.4855, 0.5168, 0.5447, 0.5156])
140+
141+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
142+
143+
@require_torch_accelerator
144+
def test_stable_diffusion_xl_offloads(self):
145+
pipes = []
146+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe",).to(torch_device)
147+
pipes.append(sd_pipe)
148+
149+
cm = ComponentsManager()
150+
cm.enable_auto_cpu_offload(device=torch_device)
151+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe", components_manager=cm).to(torch_device)
152+
pipes.append(sd_pipe)
153+
154+
image_slices = []
155+
for pipe in pipes:
156+
inputs = self.get_dummy_inputs(torch_device)
157+
image = pipe(**inputs, output="images")
158+
159+
image_slices.append(image[0, -3:, -3:, -1].flatten())
160+
161+
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
162+
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
163+
164+
def test_stable_diffusion_xl_multi_prompts(self):
165+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe").to(torch_device)
166+
167+
# forward with single prompt
168+
inputs = self.get_dummy_inputs(torch_device)
169+
output = sd_pipe(**inputs, output="images")
170+
image_slice_1 = output.images[0, -3:, -3:, -1]
171+
172+
# forward with same prompt duplicated
173+
inputs = self.get_dummy_inputs(torch_device)
174+
inputs["prompt_2"] = inputs["prompt"]
175+
output = sd_pipe(**inputs, output="images")
176+
image_slice_2 = output.images[0, -3:, -3:, -1]
177+
178+
# ensure the results are equal
179+
assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4
180+
181+
# forward with different prompt
182+
inputs = self.get_dummy_inputs(torch_device)
183+
inputs["prompt_2"] = "different prompt"
184+
output = sd_pipe(**inputs, output="images")
185+
image_slice_3 = output.images[0, -3:, -3:, -1]
186+
187+
# ensure the results are not equal
188+
assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4
189+
190+
# manually set a negative_prompt
191+
inputs = self.get_dummy_inputs(torch_device)
192+
inputs["negative_prompt"] = "negative prompt"
193+
output = sd_pipe(**inputs, output="images")
194+
image_slice_1 = output.images[0, -3:, -3:, -1]
195+
196+
# forward with same negative_prompt duplicated
197+
inputs = self.get_dummy_inputs(torch_device)
198+
inputs["negative_prompt"] = "negative prompt"
199+
inputs["negative_prompt_2"] = inputs["negative_prompt"]
200+
output = sd_pipe(**inputs, output="images")
201+
image_slice_2 = output.images[0, -3:, -3:, -1]
202+
203+
# ensure the results are equal
204+
assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4
205+
206+
# forward with different negative_prompt
207+
inputs = self.get_dummy_inputs(torch_device)
208+
inputs["negative_prompt"] = "negative prompt"
209+
inputs["negative_prompt_2"] = "different negative prompt"
210+
output = sd_pipe(**inputs, output="images")
211+
image_slice_3 = output.images[0, -3:, -3:, -1]
212+
213+
# ensure the results are not equal
214+
assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4
215+
216+
def test_stable_diffusion_xl_negative_conditions(self):
217+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
218+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe").to(torch_device)
219+
sd_pipe = sd_pipe.to(device)
220+
sd_pipe.set_progress_bar_config(disable=None)
221+
222+
inputs = self.get_dummy_inputs(device)
223+
image = sd_pipe(**inputs, output="images")
224+
image_slice_with_no_neg_cond = image[0, -3:, -3:, -1]
225+
226+
image = sd_pipe(
227+
**inputs,
228+
negative_original_size=(512, 512),
229+
negative_crops_coords_top_left=(0, 0),
230+
negative_target_size=(1024, 1024),
231+
output="images",
232+
)
233+
image_slice_with_neg_cond = image[0, -3:, -3:, -1]
234+
235+
self.assertTrue(np.abs(image_slice_with_no_neg_cond - image_slice_with_neg_cond).max() > 1e-2)
236+
237+
def test_stable_diffusion_xl_save_from_pretrained(self):
238+
pipes = []
239+
sd_pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-sd-pipe").to(torch_device)
240+
pipes.append(sd_pipe)
241+
242+
with tempfile.TemporaryDirectory() as tmpdirname:
243+
sd_pipe.save_pretrained(tmpdirname)
244+
sd_pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device)
245+
pipes.append(sd_pipe)
246+
247+
image_slices = []
248+
for pipe in pipes:
249+
pipe.unet.set_default_attn_processor()
250+
251+
inputs = self.get_dummy_inputs(torch_device)
252+
image = pipe(**inputs, output="images")
253+
254+
image_slices.append(image[0, -3:, -3:, -1].flatten())
255+
256+
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3

0 commit comments

Comments
 (0)