Skip to content

Commit f2aa727

Browse files
committed
fast tests
1 parent 8b2670d commit f2aa727

File tree

2 files changed

+149
-0
lines changed

2 files changed

+149
-0
lines changed

tests/pipelines/hidream/__init__.py

Whitespace-only changes.
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# coding=utf-8
2+
# Copyright 2024 HuggingFace Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import unittest
17+
18+
import numpy as np
19+
import torch
20+
from transformers import CLIPTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, T5EncoderModel, AutoTokenizer, LlamaForCausalLM, PreTrainedTokenizerFast
21+
22+
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, HiDreamImageTransformer2DModel, HiDreamImagePipeline
23+
from diffusers.utils.testing_utils import (
24+
enable_full_determinism,
25+
)
26+
27+
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
28+
from ..test_pipelines_common import (
29+
PipelineTesterMixin,
30+
)
31+
32+
33+
enable_full_determinism()
34+
35+
36+
class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
37+
pipeline_class = HiDreamImagePipeline
38+
params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"}
39+
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
40+
image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
41+
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
42+
43+
required_optional_params = PipelineTesterMixin.required_optional_params
44+
test_layerwise_casting = True
45+
46+
def get_dummy_components(self):
47+
torch.manual_seed(0)
48+
transformer = HiDreamImageTransformer2DModel(
49+
patch_size=2,
50+
in_channels=4,
51+
out_channels=4,
52+
num_layers=1,
53+
num_single_layers=1,
54+
attention_head_dim=8,
55+
num_attention_heads=4,
56+
caption_channels=[32, 16],
57+
text_emb_dim=64,
58+
num_routed_experts=4,
59+
num_activated_experts=2,
60+
axes_dims_rope=(4, 2, 2),
61+
max_resolution=(32, 32),
62+
llama_layers=(0, 1),
63+
).eval()
64+
torch.manual_seed(0)
65+
vae = AutoencoderKL(scaling_factor=0.3611, shift_factor=0.1159)
66+
clip_text_encoder_config = CLIPTextConfig(
67+
bos_token_id=0,
68+
eos_token_id=2,
69+
hidden_size=32,
70+
intermediate_size=37,
71+
layer_norm_eps=1e-05,
72+
num_attention_heads=4,
73+
num_hidden_layers=5,
74+
pad_token_id=1,
75+
vocab_size=1000,
76+
hidden_act="gelu",
77+
projection_dim=32,
78+
max_position_embeddings=128,
79+
)
80+
81+
torch.manual_seed(0)
82+
text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config)
83+
84+
torch.manual_seed(0)
85+
text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
86+
87+
torch.manual_seed(0)
88+
text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
89+
90+
torch.manual_seed(0)
91+
text_encoder_4 = LlamaForCausalLM.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
92+
text_encoder_4.generation_config.pad_token_id=1
93+
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
94+
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
95+
tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
96+
tokenizer_4 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
97+
98+
scheduler = FlowMatchEulerDiscreteScheduler()
99+
100+
components = {
101+
"scheduler": scheduler,
102+
"vae": vae,
103+
"text_encoder": text_encoder,
104+
"tokenizer": tokenizer,
105+
"text_encoder_2": text_encoder_2,
106+
"tokenizer_2": tokenizer_2,
107+
"text_encoder_3": text_encoder_3,
108+
"tokenizer_3": tokenizer_3,
109+
"text_encoder_4": text_encoder_4,
110+
"tokenizer_4": tokenizer_4,
111+
"transformer": transformer,
112+
}
113+
return components
114+
115+
def get_dummy_inputs(self, device, seed=0):
116+
if str(device).startswith("mps"):
117+
generator = torch.manual_seed(seed)
118+
else:
119+
generator = torch.Generator(device=device).manual_seed(seed)
120+
inputs = {
121+
"prompt": "A painting of a squirrel eating a burger",
122+
"generator": generator,
123+
"num_inference_steps": 2,
124+
"guidance_scale": 5.0,
125+
"output_type": "np",
126+
}
127+
return inputs
128+
129+
def test_inference(self):
130+
device = "cpu"
131+
132+
components = self.get_dummy_components()
133+
pipe = self.pipeline_class(**components)
134+
pipe.to(device)
135+
pipe.set_progress_bar_config(disable=None)
136+
137+
inputs = self.get_dummy_inputs(device)
138+
image = pipe(**inputs).images
139+
image_slice = image[0, -3:, -3:, -1]
140+
141+
self.assertEqual(image.shape, (1, 128, 128, 3))
142+
expected_slice = np.array(
143+
[0.6253079 , 0.6115351, 0.5223988, 0.5683453, 0.44545278, 0.53524655, 0.3968956, 0.5558849, 0.5917772]
144+
)
145+
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
146+
self.assertLessEqual(max_diff, 1e-3, f"Got {image_slice.flatten()=}")
147+
148+
def test_inference_batch_single_identical(self):
149+
super().test_inference_batch_single_identical(expected_max_diff=3e-4)

0 commit comments

Comments
 (0)