huggingface · DN6 · Feb 27, 2025 · Feb 25, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
@@ -1169,17 +1169,16 @@ def test_disk_offload_without_safetensors(self):
         base_output = model(**inputs_dict)
 
         model_size = compute_module_sizes(model)[""]
+        max_size = int(self.model_split_percents[0] * model_size)
+        # Force disk offload by setting very small CPU memory
+        max_memory = {0: max_size, "cpu": int(0.1 * max_size)}
+
         with tempfile.TemporaryDirectory() as tmp_dir:
             model.cpu().save_pretrained(tmp_dir, safe_serialization=False)
-
             with self.assertRaises(ValueError):
-                max_size = int(self.model_split_percents[0] * model_size)
-                max_memory = {0: max_size, "cpu": max_size}
                 # This errors out because it's missing an offload folder
                 new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
 
-            max_size = int(self.model_split_percents[0] * model_size)
-            max_memory = {0: max_size, "cpu": max_size}
             new_model = self.model_class.from_pretrained(
                 tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
             )

diff --git a/tests/models/transformers/test_models_transformer_omnigen.py b/tests/models/transformers/test_models_transformer_omnigen.py
@@ -30,6 +30,7 @@ class OmniGenTransformerTests(ModelTesterMixin, unittest.TestCase):
     model_class = OmniGenTransformer2DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
+    model_split_percents = [0.1, 0.1, 0.1]
 
     @property
     def dummy_input(self):
@@ -73,9 +74,9 @@ def prepare_init_args_and_inputs_for_common(self):
             "num_attention_heads": 4,
             "num_key_value_heads": 4,
             "intermediate_size": 32,
-            "num_layers": 1,
+            "num_layers": 20,
             "pad_token_id": 0,
-            "vocab_size": 100,
+            "vocab_size": 1000,
             "in_channels": 4,
             "time_step_dim": 4,
             "rope_scaling": {"long_factor": list(range(1, 3)), "short_factor": list(range(1, 3))},

diff --git a/tests/models/transformers/test_models_transformer_sd3.py b/tests/models/transformers/test_models_transformer_sd3.py
@@ -33,6 +33,7 @@
 class SD3TransformerTests(ModelTesterMixin, unittest.TestCase):
     model_class = SD3Transformer2DModel
     main_input_name = "hidden_states"
+    model_split_percents = [0.8, 0.8, 0.9]
 
     @property
     def dummy_input(self):
@@ -67,7 +68,7 @@ def prepare_init_args_and_inputs_for_common(self):
             "sample_size": 32,
             "patch_size": 1,
             "in_channels": 4,
-            "num_layers": 1,
+            "num_layers": 4,
             "attention_head_dim": 8,
             "num_attention_heads": 4,
             "caption_projection_dim": 32,
@@ -107,6 +108,7 @@ def test_gradient_checkpointing_is_applied(self):
 class SD35TransformerTests(ModelTesterMixin, unittest.TestCase):
     model_class = SD3Transformer2DModel
     main_input_name = "hidden_states"
+    model_split_percents = [0.8, 0.8, 0.9]
 
     @property
     def dummy_input(self):
@@ -141,7 +143,7 @@ def prepare_init_args_and_inputs_for_common(self):
             "sample_size": 32,
             "patch_size": 1,
             "in_channels": 4,
-            "num_layers": 2,
+            "num_layers": 4,
             "attention_head_dim": 8,
             "num_attention_heads": 4,
             "caption_projection_dim": 32,