Skip to content

Commit b02e211

Browse files
ariG23498sayakpaul
andauthored
[Tests] reduce the model size in the amused fast test (#7804)
* chore: reducing model sizes * chore: shrinks further * chore: shrinks further * chore: shrinking model for img2img pipeline * chore: reducing size of model for inpaint pipeline --------- Co-authored-by: Sayak Paul <[email protected]>
1 parent 21f023e commit b02e211

File tree

3 files changed

+54
-54
lines changed

3 files changed

+54
-54
lines changed

tests/pipelines/amused/test_amused.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,43 +38,43 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
3838
def get_dummy_components(self):
3939
torch.manual_seed(0)
4040
transformer = UVit2DModel(
41-
hidden_size=32,
41+
hidden_size=8,
4242
use_bias=False,
4343
hidden_dropout=0.0,
44-
cond_embed_dim=32,
44+
cond_embed_dim=8,
4545
micro_cond_encode_dim=2,
4646
micro_cond_embed_dim=10,
47-
encoder_hidden_size=32,
47+
encoder_hidden_size=8,
4848
vocab_size=32,
49-
codebook_size=32,
50-
in_channels=32,
51-
block_out_channels=32,
49+
codebook_size=8,
50+
in_channels=8,
51+
block_out_channels=8,
5252
num_res_blocks=1,
5353
downsample=True,
5454
upsample=True,
5555
block_num_heads=1,
5656
num_hidden_layers=1,
5757
num_attention_heads=1,
5858
attention_dropout=0.0,
59-
intermediate_size=32,
59+
intermediate_size=8,
6060
layer_norm_eps=1e-06,
6161
ln_elementwise_affine=True,
6262
)
6363
scheduler = AmusedScheduler(mask_token_id=31)
6464
torch.manual_seed(0)
6565
vqvae = VQModel(
6666
act_fn="silu",
67-
block_out_channels=[32],
67+
block_out_channels=[8],
6868
down_block_types=[
6969
"DownEncoderBlock2D",
7070
],
7171
in_channels=3,
72-
latent_channels=32,
73-
layers_per_block=2,
74-
norm_num_groups=32,
75-
num_vq_embeddings=32,
72+
latent_channels=8,
73+
layers_per_block=1,
74+
norm_num_groups=8,
75+
num_vq_embeddings=8,
7676
out_channels=3,
77-
sample_size=32,
77+
sample_size=8,
7878
up_block_types=[
7979
"UpDecoderBlock2D",
8080
],
@@ -85,14 +85,14 @@ def get_dummy_components(self):
8585
text_encoder_config = CLIPTextConfig(
8686
bos_token_id=0,
8787
eos_token_id=2,
88-
hidden_size=32,
89-
intermediate_size=64,
88+
hidden_size=8,
89+
intermediate_size=8,
9090
layer_norm_eps=1e-05,
91-
num_attention_heads=8,
92-
num_hidden_layers=3,
91+
num_attention_heads=1,
92+
num_hidden_layers=1,
9393
pad_token_id=1,
9494
vocab_size=1000,
95-
projection_dim=32,
95+
projection_dim=8,
9696
)
9797
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
9898
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

tests/pipelines/amused/test_amused_img2img.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,43 +42,43 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
4242
def get_dummy_components(self):
4343
torch.manual_seed(0)
4444
transformer = UVit2DModel(
45-
hidden_size=32,
45+
hidden_size=8,
4646
use_bias=False,
4747
hidden_dropout=0.0,
48-
cond_embed_dim=32,
48+
cond_embed_dim=8,
4949
micro_cond_encode_dim=2,
5050
micro_cond_embed_dim=10,
51-
encoder_hidden_size=32,
51+
encoder_hidden_size=8,
5252
vocab_size=32,
53-
codebook_size=32,
54-
in_channels=32,
55-
block_out_channels=32,
53+
codebook_size=8,
54+
in_channels=8,
55+
block_out_channels=8,
5656
num_res_blocks=1,
5757
downsample=True,
5858
upsample=True,
5959
block_num_heads=1,
6060
num_hidden_layers=1,
6161
num_attention_heads=1,
6262
attention_dropout=0.0,
63-
intermediate_size=32,
63+
intermediate_size=8,
6464
layer_norm_eps=1e-06,
6565
ln_elementwise_affine=True,
6666
)
6767
scheduler = AmusedScheduler(mask_token_id=31)
6868
torch.manual_seed(0)
6969
vqvae = VQModel(
7070
act_fn="silu",
71-
block_out_channels=[32],
71+
block_out_channels=[8],
7272
down_block_types=[
7373
"DownEncoderBlock2D",
7474
],
7575
in_channels=3,
76-
latent_channels=32,
77-
layers_per_block=2,
78-
norm_num_groups=32,
79-
num_vq_embeddings=32,
76+
latent_channels=8,
77+
layers_per_block=1,
78+
norm_num_groups=8,
79+
num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
8080
out_channels=3,
81-
sample_size=32,
81+
sample_size=8,
8282
up_block_types=[
8383
"UpDecoderBlock2D",
8484
],
@@ -89,14 +89,14 @@ def get_dummy_components(self):
8989
text_encoder_config = CLIPTextConfig(
9090
bos_token_id=0,
9191
eos_token_id=2,
92-
hidden_size=32,
93-
intermediate_size=64,
92+
hidden_size=8,
93+
intermediate_size=8,
9494
layer_norm_eps=1e-05,
95-
num_attention_heads=8,
96-
num_hidden_layers=3,
95+
num_attention_heads=1,
96+
num_hidden_layers=1,
9797
pad_token_id=1,
9898
vocab_size=1000,
99-
projection_dim=32,
99+
projection_dim=8,
100100
)
101101
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
102102
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

tests/pipelines/amused/test_amused_inpaint.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,43 +42,43 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
4242
def get_dummy_components(self):
4343
torch.manual_seed(0)
4444
transformer = UVit2DModel(
45-
hidden_size=32,
45+
hidden_size=8,
4646
use_bias=False,
4747
hidden_dropout=0.0,
48-
cond_embed_dim=32,
48+
cond_embed_dim=8,
4949
micro_cond_encode_dim=2,
5050
micro_cond_embed_dim=10,
51-
encoder_hidden_size=32,
51+
encoder_hidden_size=8,
5252
vocab_size=32,
53-
codebook_size=32,
54-
in_channels=32,
55-
block_out_channels=32,
53+
codebook_size=32, # codebook size needs to be consistent with num_vq_embeddings for inpaint tests
54+
in_channels=8,
55+
block_out_channels=8,
5656
num_res_blocks=1,
5757
downsample=True,
5858
upsample=True,
5959
block_num_heads=1,
6060
num_hidden_layers=1,
6161
num_attention_heads=1,
6262
attention_dropout=0.0,
63-
intermediate_size=32,
63+
intermediate_size=8,
6464
layer_norm_eps=1e-06,
6565
ln_elementwise_affine=True,
6666
)
6767
scheduler = AmusedScheduler(mask_token_id=31)
6868
torch.manual_seed(0)
6969
vqvae = VQModel(
7070
act_fn="silu",
71-
block_out_channels=[32],
71+
block_out_channels=[8],
7272
down_block_types=[
7373
"DownEncoderBlock2D",
7474
],
7575
in_channels=3,
76-
latent_channels=32,
77-
layers_per_block=2,
78-
norm_num_groups=32,
79-
num_vq_embeddings=32,
76+
latent_channels=8,
77+
layers_per_block=1,
78+
norm_num_groups=8,
79+
num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
8080
out_channels=3,
81-
sample_size=32,
81+
sample_size=8,
8282
up_block_types=[
8383
"UpDecoderBlock2D",
8484
],
@@ -89,14 +89,14 @@ def get_dummy_components(self):
8989
text_encoder_config = CLIPTextConfig(
9090
bos_token_id=0,
9191
eos_token_id=2,
92-
hidden_size=32,
93-
intermediate_size=64,
92+
hidden_size=8,
93+
intermediate_size=8,
9494
layer_norm_eps=1e-05,
95-
num_attention_heads=8,
96-
num_hidden_layers=3,
95+
num_attention_heads=1,
96+
num_hidden_layers=1,
9797
pad_token_id=1,
9898
vocab_size=1000,
99-
projection_dim=32,
99+
projection_dim=8,
100100
)
101101
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
102102
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

0 commit comments

Comments
 (0)