diff --git a/.github/workflows/pr_tests_gpu.yml b/.github/workflows/pr_tests_gpu.yml index a06689b5fad7..307c7d7e1f7f 100644 --- a/.github/workflows/pr_tests_gpu.yml +++ b/.github/workflows/pr_tests_gpu.yml @@ -11,6 +11,8 @@ on: - "src/diffusers/loaders/lora_base.py" - "src/diffusers/loaders/lora_pipeline.py" - "src/diffusers/loaders/peft.py" + - "tests/pipelines/test_pipelines_common.py" + - "tests/models/test_modeling_common.py" workflow_dispatch: concurrency: diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py index c473c63a42d2..b917efe0850f 100644 --- a/tests/models/test_modeling_common.py +++ b/tests/models/test_modeling_common.py @@ -1169,17 +1169,16 @@ def test_disk_offload_without_safetensors(self): base_output = model(**inputs_dict) model_size = compute_module_sizes(model)[""] + max_size = int(self.model_split_percents[0] * model_size) + # Force disk offload by setting very small CPU memory + max_memory = {0: max_size, "cpu": int(0.1 * max_size)} + with tempfile.TemporaryDirectory() as tmp_dir: model.cpu().save_pretrained(tmp_dir, safe_serialization=False) - with self.assertRaises(ValueError): - max_size = int(self.model_split_percents[0] * model_size) - max_memory = {0: max_size, "cpu": max_size} # This errors out because it's missing an offload folder new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory) - max_size = int(self.model_split_percents[0] * model_size) - max_memory = {0: max_size, "cpu": max_size} new_model = self.model_class.from_pretrained( tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir ) diff --git a/tests/models/transformers/test_models_transformer_omnigen.py b/tests/models/transformers/test_models_transformer_omnigen.py index a7653f1f9d6d..1bdcc68b0378 100644 --- a/tests/models/transformers/test_models_transformer_omnigen.py +++ b/tests/models/transformers/test_models_transformer_omnigen.py @@ -30,6 +30,7 @@ class OmniGenTransformerTests(ModelTesterMixin, unittest.TestCase): model_class = OmniGenTransformer2DModel main_input_name = "hidden_states" uses_custom_attn_processor = True + model_split_percents = [0.1, 0.1, 0.1] @property def dummy_input(self): @@ -73,9 +74,9 @@ def prepare_init_args_and_inputs_for_common(self): "num_attention_heads": 4, "num_key_value_heads": 4, "intermediate_size": 32, - "num_layers": 1, + "num_layers": 20, "pad_token_id": 0, - "vocab_size": 100, + "vocab_size": 1000, "in_channels": 4, "time_step_dim": 4, "rope_scaling": {"long_factor": list(range(1, 3)), "short_factor": list(range(1, 3))}, diff --git a/tests/models/transformers/test_models_transformer_sd3.py b/tests/models/transformers/test_models_transformer_sd3.py index 2531381dc7c8..659d9a82fd76 100644 --- a/tests/models/transformers/test_models_transformer_sd3.py +++ b/tests/models/transformers/test_models_transformer_sd3.py @@ -33,6 +33,7 @@ class SD3TransformerTests(ModelTesterMixin, unittest.TestCase): model_class = SD3Transformer2DModel main_input_name = "hidden_states" + model_split_percents = [0.8, 0.8, 0.9] @property def dummy_input(self): @@ -67,7 +68,7 @@ def prepare_init_args_and_inputs_for_common(self): "sample_size": 32, "patch_size": 1, "in_channels": 4, - "num_layers": 1, + "num_layers": 4, "attention_head_dim": 8, "num_attention_heads": 4, "caption_projection_dim": 32, @@ -107,6 +108,7 @@ def test_gradient_checkpointing_is_applied(self): class SD35TransformerTests(ModelTesterMixin, unittest.TestCase): model_class = SD3Transformer2DModel main_input_name = "hidden_states" + model_split_percents = [0.8, 0.8, 0.9] @property def dummy_input(self): @@ -141,7 +143,7 @@ def prepare_init_args_and_inputs_for_common(self): "sample_size": 32, "patch_size": 1, "in_channels": 4, - "num_layers": 2, + "num_layers": 4, "attention_head_dim": 8, "num_attention_heads": 4, "caption_projection_dim": 32,