@@ -126,6 +126,8 @@ class StableDiffusionPipelineFastTests(
126
126
callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS
127
127
128
128
def get_dummy_components (self , time_cond_proj_dim = None ):
129
+ cross_attention_dim = 8
130
+
129
131
torch .manual_seed (0 )
130
132
unet = UNet2DConditionModel (
131
133
block_out_channels = (4 , 8 ),
@@ -136,7 +138,7 @@ def get_dummy_components(self, time_cond_proj_dim=None):
136
138
out_channels = 4 ,
137
139
down_block_types = ("DownBlock2D" , "CrossAttnDownBlock2D" ),
138
140
up_block_types = ("CrossAttnUpBlock2D" , "UpBlock2D" ),
139
- cross_attention_dim = 32 ,
141
+ cross_attention_dim = cross_attention_dim ,
140
142
norm_num_groups = 2 ,
141
143
)
142
144
scheduler = DDIMScheduler (
@@ -160,11 +162,11 @@ def get_dummy_components(self, time_cond_proj_dim=None):
160
162
text_encoder_config = CLIPTextConfig (
161
163
bos_token_id = 0 ,
162
164
eos_token_id = 2 ,
163
- hidden_size = 32 ,
164
- intermediate_size = 64 ,
165
+ hidden_size = cross_attention_dim ,
166
+ intermediate_size = 16 ,
165
167
layer_norm_eps = 1e-05 ,
166
- num_attention_heads = 8 ,
167
- num_hidden_layers = 3 ,
168
+ num_attention_heads = 2 ,
169
+ num_hidden_layers = 2 ,
168
170
pad_token_id = 1 ,
169
171
vocab_size = 1000 ,
170
172
)
@@ -212,7 +214,7 @@ def test_stable_diffusion_ddim(self):
212
214
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
213
215
214
216
assert image .shape == (1 , 64 , 64 , 3 )
215
- expected_slice = np .array ([0.3203 , 0.4555 , 0.4711 , 0.3505 , 0.3973 , 0.4650 , 0.5137 , 0.3392 , 0.4045 ])
217
+ expected_slice = np .array ([0.1763 , 0.4776 , 0.4986 , 0.2566 , 0.3802 , 0.4596 , 0.5363 , 0.3277 , 0.3949 ])
216
218
217
219
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
218
220
@@ -232,7 +234,7 @@ def test_stable_diffusion_lcm(self):
232
234
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
233
235
234
236
assert image .shape == (1 , 64 , 64 , 3 )
235
- expected_slice = np .array ([0.3454 , 0.5349 , 0.5185 , 0.2808 , 0.4509 , 0.4612 , 0.4655 , 0.3601 , 0.4315 ])
237
+ expected_slice = np .array ([0.2368 , 0.4900 , 0.5019 , 0.2723 , 0.4473 , 0.4578 , 0.4551 , 0.3532 , 0.4133 ])
236
238
237
239
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
238
240
@@ -254,7 +256,7 @@ def test_stable_diffusion_lcm_custom_timesteps(self):
254
256
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
255
257
256
258
assert image .shape == (1 , 64 , 64 , 3 )
257
- expected_slice = np .array ([0.3454 , 0.5349 , 0.5185 , 0.2808 , 0.4509 , 0.4612 , 0.4655 , 0.3601 , 0.4315 ])
259
+ expected_slice = np .array ([0.2368 , 0.4900 , 0.5019 , 0.2723 , 0.4473 , 0.4578 , 0.4551 , 0.3532 , 0.4133 ])
258
260
259
261
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
260
262
@@ -373,12 +375,6 @@ def test_stable_diffusion_prompt_embeds_with_plain_negative_prompt_list(self):
373
375
374
376
assert np .abs (image_slice_1 .flatten () - image_slice_2 .flatten ()).max () < 1e-4
375
377
376
- def test_ip_adapter_single (self ):
377
- expected_pipe_slice = None
378
- if torch_device == "cpu" :
379
- expected_pipe_slice = np .array ([0.3203 , 0.4555 , 0.4711 , 0.3505 , 0.3973 , 0.4650 , 0.5137 , 0.3392 , 0.4045 ])
380
- return super ().test_ip_adapter_single (expected_pipe_slice = expected_pipe_slice )
381
-
382
378
def test_stable_diffusion_ddim_factor_8 (self ):
383
379
device = "cpu" # ensure determinism for the device-dependent torch.Generator
384
380
@@ -394,7 +390,7 @@ def test_stable_diffusion_ddim_factor_8(self):
394
390
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
395
391
396
392
assert image .shape == (1 , 136 , 136 , 3 )
397
- expected_slice = np .array ([0.4346 , 0.5621 , 0.5016 , 0.3926 , 0.4533 , 0.4134 , 0.5625 , 0.5632 , 0.5265 ])
393
+ expected_slice = np .array ([0.4720 , 0.5426 , 0.5160 , 0.3961 , 0.4696 , 0.4296 , 0.5738 , 0.5888 , 0.5481 ])
398
394
399
395
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
400
396
@@ -412,7 +408,7 @@ def test_stable_diffusion_pndm(self):
412
408
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
413
409
414
410
assert image .shape == (1 , 64 , 64 , 3 )
415
- expected_slice = np .array ([0.3411 , 0.5032 , 0.4704 , 0.3135 , 0.4323 , 0.4740 , 0.5150 , 0.3498 , 0.4022 ])
411
+ expected_slice = np .array ([0.1941 , 0.4748 , 0.4880 , 0.2222 , 0.4221 , 0.4545 , 0.5604 , 0.3488 , 0.3902 ])
416
412
417
413
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
418
414
@@ -452,7 +448,7 @@ def test_stable_diffusion_k_lms(self):
452
448
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
453
449
454
450
assert image .shape == (1 , 64 , 64 , 3 )
455
- expected_slice = np .array ([0.3149 , 0.5246 , 0.4796 , 0.3218 , 0.4469 , 0.4729 , 0.5151 , 0.3597 , 0.3954 ])
451
+ expected_slice = np .array ([0.2681 , 0.4785 , 0.4857 , 0.2426 , 0.4473 , 0.4481 , 0.5610 , 0.3676 , 0.3855 ])
456
452
457
453
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
458
454
@@ -471,7 +467,7 @@ def test_stable_diffusion_k_euler_ancestral(self):
471
467
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
472
468
473
469
assert image .shape == (1 , 64 , 64 , 3 )
474
- expected_slice = np .array ([0.3151 , 0.5243 , 0.4794 , 0.3217 , 0.4468 , 0.4728 , 0.5152 , 0.3598 , 0.3954 ])
470
+ expected_slice = np .array ([0.2682 , 0.4782 , 0.4855 , 0.2424 , 0.4472 , 0.4479 , 0.5612 , 0.3676 , 0.3854 ])
475
471
476
472
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
477
473
@@ -490,7 +486,7 @@ def test_stable_diffusion_k_euler(self):
490
486
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
491
487
492
488
assert image .shape == (1 , 64 , 64 , 3 )
493
- expected_slice = np .array ([0.3149 , 0.5246 , 0.4796 , 0.3218 , 0.4469 , 0.4729 , 0.5151 , 0.3597 , 0.3954 ])
489
+ expected_slice = np .array ([0.2681 , 0.4785 , 0.4857 , 0.2426 , 0.4473 , 0.4481 , 0.5610 , 0.3676 , 0.3855 ])
494
490
495
491
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
496
492
@@ -562,7 +558,7 @@ def test_stable_diffusion_negative_prompt(self):
562
558
image_slice = image [0 , - 3 :, - 3 :, - 1 ]
563
559
564
560
assert image .shape == (1 , 64 , 64 , 3 )
565
- expected_slice = np .array ([0.3458 , 0.5120 , 0.4800 , 0.3116 , 0.4348 , 0.4802 , 0.5237 , 0.3467 , 0.3991 ])
561
+ expected_slice = np .array ([0.1907 , 0.4709 , 0.4858 , 0.2224 , 0.4223 , 0.4539 , 0.5606 , 0.3489 , 0.3900 ])
566
562
567
563
assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
568
564
0 commit comments