@@ -115,9 +115,12 @@ def get_dummy_components(self, scheduler_cls=None):
115
115
116
116
torch .manual_seed (0 )
117
117
unet = UNet2DConditionModel (** self .unet_kwargs )
118
+
118
119
scheduler = scheduler_cls (** self .scheduler_kwargs )
120
+
119
121
torch .manual_seed (0 )
120
122
vae = AutoencoderKL (** self .vae_kwargs )
123
+
121
124
text_encoder = CLIPTextModel .from_pretrained ("peft-internal-testing/tiny-clip-text-2" )
122
125
tokenizer = CLIPTokenizer .from_pretrained ("peft-internal-testing/tiny-clip-text-2" )
123
126
@@ -1402,6 +1405,35 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
1402
1405
@slow
1403
1406
@require_torch_gpu
1404
1407
class LoraIntegrationTests (PeftLoraLoaderMixinTests , unittest .TestCase ):
1408
+ pipeline_class = StableDiffusionPipeline
1409
+ scheduler_cls = DDIMScheduler
1410
+ scheduler_kwargs = {
1411
+ "beta_start" : 0.00085 ,
1412
+ "beta_end" : 0.012 ,
1413
+ "beta_schedule" : "scaled_linear" ,
1414
+ "clip_sample" : False ,
1415
+ "set_alpha_to_one" : False ,
1416
+ "steps_offset" : 1 ,
1417
+ }
1418
+ unet_kwargs = {
1419
+ "block_out_channels" : (32 , 64 ),
1420
+ "layers_per_block" : 2 ,
1421
+ "sample_size" : 32 ,
1422
+ "in_channels" : 4 ,
1423
+ "out_channels" : 4 ,
1424
+ "down_block_types" : ("DownBlock2D" , "CrossAttnDownBlock2D" ),
1425
+ "up_block_types" : ("CrossAttnUpBlock2D" , "UpBlock2D" ),
1426
+ "cross_attention_dim" : 32 ,
1427
+ }
1428
+ vae_kwargs = {
1429
+ "block_out_channels" : [32 , 64 ],
1430
+ "in_channels" : 3 ,
1431
+ "out_channels" : 3 ,
1432
+ "down_block_types" : ["DownEncoderBlock2D" , "DownEncoderBlock2D" ],
1433
+ "up_block_types" : ["UpDecoderBlock2D" , "UpDecoderBlock2D" ],
1434
+ "latent_channels" : 4 ,
1435
+ }
1436
+
1405
1437
def tearDown (self ):
1406
1438
import gc
1407
1439
@@ -1655,6 +1687,42 @@ def test_load_unload_load_kohya_lora(self):
1655
1687
@slow
1656
1688
@require_torch_gpu
1657
1689
class LoraSDXLIntegrationTests (PeftLoraLoaderMixinTests , unittest .TestCase ):
1690
+ has_two_text_encoders = True
1691
+ pipeline_class = StableDiffusionXLPipeline
1692
+ scheduler_cls = EulerDiscreteScheduler
1693
+ scheduler_kwargs = {
1694
+ "beta_start" : 0.00085 ,
1695
+ "beta_end" : 0.012 ,
1696
+ "beta_schedule" : "scaled_linear" ,
1697
+ "timestep_spacing" : "leading" ,
1698
+ "steps_offset" : 1 ,
1699
+ }
1700
+ unet_kwargs = {
1701
+ "block_out_channels" : (32 , 64 ),
1702
+ "layers_per_block" : 2 ,
1703
+ "sample_size" : 32 ,
1704
+ "in_channels" : 4 ,
1705
+ "out_channels" : 4 ,
1706
+ "down_block_types" : ("DownBlock2D" , "CrossAttnDownBlock2D" ),
1707
+ "up_block_types" : ("CrossAttnUpBlock2D" , "UpBlock2D" ),
1708
+ "attention_head_dim" : (2 , 4 ),
1709
+ "use_linear_projection" : True ,
1710
+ "addition_embed_type" : "text_time" ,
1711
+ "addition_time_embed_dim" : 8 ,
1712
+ "transformer_layers_per_block" : (1 , 2 ),
1713
+ "projection_class_embeddings_input_dim" : 80 , # 6 * 8 + 32
1714
+ "cross_attention_dim" : 64 ,
1715
+ }
1716
+ vae_kwargs = {
1717
+ "block_out_channels" : [32 , 64 ],
1718
+ "in_channels" : 3 ,
1719
+ "out_channels" : 3 ,
1720
+ "down_block_types" : ["DownEncoderBlock2D" , "DownEncoderBlock2D" ],
1721
+ "up_block_types" : ["UpDecoderBlock2D" , "UpDecoderBlock2D" ],
1722
+ "latent_channels" : 4 ,
1723
+ "sample_size" : 128 ,
1724
+ }
1725
+
1658
1726
def tearDown (self ):
1659
1727
import gc
1660
1728
0 commit comments