11
11
AutoencoderKL ,
12
12
DDIMScheduler ,
13
13
MotionAdapter ,
14
+ StableDiffusionPipeline ,
14
15
UNet2DConditionModel ,
15
16
UNetMotionModel ,
16
17
)
@@ -51,16 +52,19 @@ class AnimateDiffPipelineFastTests(
51
52
)
52
53
53
54
def get_dummy_components (self ):
55
+ cross_attention_dim = 8
56
+ block_out_channels = (8 , 8 )
57
+
54
58
torch .manual_seed (0 )
55
59
unet = UNet2DConditionModel (
56
- block_out_channels = ( 32 , 64 ) ,
60
+ block_out_channels = block_out_channels ,
57
61
layers_per_block = 2 ,
58
- sample_size = 32 ,
62
+ sample_size = 8 ,
59
63
in_channels = 4 ,
60
64
out_channels = 4 ,
61
65
down_block_types = ("CrossAttnDownBlock2D" , "DownBlock2D" ),
62
66
up_block_types = ("CrossAttnUpBlock2D" , "UpBlock2D" ),
63
- cross_attention_dim = 32 ,
67
+ cross_attention_dim = cross_attention_dim ,
64
68
norm_num_groups = 2 ,
65
69
)
66
70
scheduler = DDIMScheduler (
@@ -71,18 +75,19 @@ def get_dummy_components(self):
71
75
)
72
76
torch .manual_seed (0 )
73
77
vae = AutoencoderKL (
74
- block_out_channels = [ 32 , 64 ] ,
78
+ block_out_channels = block_out_channels ,
75
79
in_channels = 3 ,
76
80
out_channels = 3 ,
77
81
down_block_types = ["DownEncoderBlock2D" , "DownEncoderBlock2D" ],
78
82
up_block_types = ["UpDecoderBlock2D" , "UpDecoderBlock2D" ],
79
83
latent_channels = 4 ,
84
+ norm_num_groups = 2 ,
80
85
)
81
86
torch .manual_seed (0 )
82
87
text_encoder_config = CLIPTextConfig (
83
88
bos_token_id = 0 ,
84
89
eos_token_id = 2 ,
85
- hidden_size = 32 ,
90
+ hidden_size = cross_attention_dim ,
86
91
intermediate_size = 37 ,
87
92
layer_norm_eps = 1e-05 ,
88
93
num_attention_heads = 4 ,
@@ -92,8 +97,9 @@ def get_dummy_components(self):
92
97
)
93
98
text_encoder = CLIPTextModel (text_encoder_config )
94
99
tokenizer = CLIPTokenizer .from_pretrained ("hf-internal-testing/tiny-random-clip" )
100
+ torch .manual_seed (0 )
95
101
motion_adapter = MotionAdapter (
96
- block_out_channels = ( 32 , 64 ) ,
102
+ block_out_channels = block_out_channels ,
97
103
motion_layers_per_block = 2 ,
98
104
motion_norm_num_groups = 2 ,
99
105
motion_num_attention_heads = 4 ,
@@ -126,6 +132,36 @@ def get_dummy_inputs(self, device, seed=0):
126
132
}
127
133
return inputs
128
134
135
+ def test_from_pipe_consistent_config (self ):
136
+ assert self .original_pipeline_class == StableDiffusionPipeline
137
+ original_repo = "hf-internal-testing/tinier-stable-diffusion-pipe"
138
+ original_kwargs = {"requires_safety_checker" : False }
139
+
140
+ # create original_pipeline_class(sd)
141
+ pipe_original = self .original_pipeline_class .from_pretrained (original_repo , ** original_kwargs )
142
+
143
+ # original_pipeline_class(sd) -> pipeline_class
144
+ pipe_components = self .get_dummy_components ()
145
+ pipe_additional_components = {}
146
+ for name , component in pipe_components .items ():
147
+ if name not in pipe_original .components :
148
+ pipe_additional_components [name ] = component
149
+
150
+ pipe = self .pipeline_class .from_pipe (pipe_original , ** pipe_additional_components )
151
+
152
+ # pipeline_class -> original_pipeline_class(sd)
153
+ original_pipe_additional_components = {}
154
+ for name , component in pipe_original .components .items ():
155
+ if name not in pipe .components or not isinstance (component , pipe .components [name ].__class__ ):
156
+ original_pipe_additional_components [name ] = component
157
+
158
+ pipe_original_2 = self .original_pipeline_class .from_pipe (pipe , ** original_pipe_additional_components )
159
+
160
+ # compare the config
161
+ original_config = {k : v for k , v in pipe_original .config .items () if not k .startswith ("_" )}
162
+ original_config_2 = {k : v for k , v in pipe_original_2 .config .items () if not k .startswith ("_" )}
163
+ assert original_config_2 == original_config
164
+
129
165
def test_motion_unet_loading (self ):
130
166
components = self .get_dummy_components ()
131
167
pipe = AnimateDiffPipeline (** components )
@@ -141,41 +177,41 @@ def test_ip_adapter_single(self):
141
177
if torch_device == "cpu" :
142
178
expected_pipe_slice = np .array (
143
179
[
144
- 0.5541 ,
145
- 0.5802 ,
146
- 0.5074 ,
147
- 0.4583 ,
148
- 0.4729 ,
149
- 0.5374 ,
150
- 0.4051 ,
151
- 0.4495 ,
152
- 0.4480 ,
153
- 0.5292 ,
154
- 0.6322 ,
155
- 0.6265 ,
156
- 0.5455 ,
157
- 0.4771 ,
158
- 0.5795 ,
159
- 0.5845 ,
160
- 0.4172 ,
161
- 0.6066 ,
162
- 0.6535 ,
163
- 0.4113 ,
164
- 0.6833 ,
165
- 0.5736 ,
166
- 0.3589 ,
167
- 0.5730 ,
168
- 0.4205 ,
169
- 0.3786 ,
170
- 0.5323 ,
180
+ 0.5216 ,
181
+ 0.5620 ,
182
+ 0.4927 ,
183
+ 0.5082 ,
184
+ 0.4786 ,
185
+ 0.5932 ,
186
+ 0.5125 ,
187
+ 0.4514 ,
188
+ 0.5315 ,
189
+ 0.4694 ,
190
+ 0.3276 ,
191
+ 0.4863 ,
192
+ 0.3920 ,
193
+ 0.3684 ,
194
+ 0.5745 ,
195
+ 0.4499 ,
196
+ 0.5081 ,
197
+ 0.5414 ,
198
+ 0.6014 ,
199
+ 0.5062 ,
200
+ 0.3630 ,
201
+ 0.5296 ,
202
+ 0.6018 ,
203
+ 0.5098 ,
204
+ 0.4948 ,
205
+ 0.5101 ,
206
+ 0.5620 ,
171
207
]
172
208
)
173
209
return super ().test_ip_adapter_single (expected_pipe_slice = expected_pipe_slice )
174
210
175
211
def test_dict_tuple_outputs_equivalent (self ):
176
212
expected_slice = None
177
213
if torch_device == "cpu" :
178
- expected_slice = np .array ([0.4051 , 0.4495 , 0.4480 , 0.5845 , 0.4172 , 0.6066 , 0.4205 , 0.3786 , 0.5323 ])
214
+ expected_slice = np .array ([0.5125 , 0.4514 , 0.5315 , 0.4499 , 0.5081 , 0.5414 , 0.4948 , 0.5101 , 0.5620 ])
179
215
return super ().test_dict_tuple_outputs_equivalent (expected_slice = expected_slice )
180
216
181
217
def test_inference_batch_single_identical (
@@ -279,7 +315,7 @@ def test_prompt_embeds(self):
279
315
280
316
inputs = self .get_dummy_inputs (torch_device )
281
317
inputs .pop ("prompt" )
282
- inputs ["prompt_embeds" ] = torch .randn ((1 , 4 , 32 ), device = torch_device )
318
+ inputs ["prompt_embeds" ] = torch .randn ((1 , 4 , pipe . text_encoder . config . hidden_size ), device = torch_device )
283
319
pipe (** inputs )
284
320
285
321
def test_free_init (self ):
0 commit comments