huggingface
diff --git a/‎tests/conftest.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/conftest.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/models/test_modeling_common.py‎
Lines changed: 12 additions & 12 deletions b/‎tests/models/test_modeling_common.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎tests/models/testing_utils/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/models/testing_utils/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/models/testing_utils/attention.py‎
Lines changed: 91 additions & 7 deletions b/‎tests/models/testing_utils/attention.py‎
Lines changed: 91 additions & 7 deletions
@@ -46,6 +46,7 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "torchao: marks tests for TorchAO quantization functionality")
     config.addinivalue_line("markers", "gguf: marks tests for GGUF quantization functionality")
     config.addinivalue_line("markers", "modelopt: marks tests for NVIDIA ModelOpt quantization functionality")
+    config.addinivalue_line("markers", "context_parallel: marks tests for context parallel inference functionality")
 
 
 def pytest_addoption(parser):
 
@@ -317,9 +317,9 @@ def test_local_files_only_with_sharded_checkpoint(self):
                     repo_id, subfolder="transformer", cache_dir=tmpdir, local_files_only=True
                 )
 
-            assert all(
-                torch.equal(p1, p2) for p1, p2 in zip(model.parameters(), local_model.parameters())
-            ), "Model parameters don't match!"
+            assert all(torch.equal(p1, p2) for p1, p2 in zip(model.parameters(), local_model.parameters())), (
+                "Model parameters don't match!"
+            )
 
             # Remove a shard file
             cached_shard_file = try_to_load_from_cache(
@@ -335,9 +335,9 @@ def test_local_files_only_with_sharded_checkpoint(self):
 
             # Verify error mentions the missing shard
             error_msg = str(context.exception)
-            assert (
-                cached_shard_file in error_msg or "required according to the checkpoint index" in error_msg
-            ), f"Expected error about missing shard, got: {error_msg}"
+            assert cached_shard_file in error_msg or "required according to the checkpoint index" in error_msg, (
+                f"Expected error about missing shard, got: {error_msg}"
+            )
 
     @unittest.skip("Flaky behaviour on CI. Re-enable after migrating to new runners")
     @unittest.skipIf(torch_device == "mps", reason="Test not supported for MPS.")
@@ -354,9 +354,9 @@ def test_one_request_upon_cached(self):
                 )
 
             download_requests = [r.method for r in m.request_history]
-            assert (
-                download_requests.count("HEAD") == 3
-            ), "3 HEAD requests one for config, one for model, and one for shard index file."
+            assert download_requests.count("HEAD") == 3, (
+                "3 HEAD requests one for config, one for model, and one for shard index file."
+            )
             assert download_requests.count("GET") == 2, "2 GET requests one for config, one for model"
 
             with requests_mock.mock(real_http=True) as m:
@@ -368,9 +368,9 @@ def test_one_request_upon_cached(self):
                 )
 
             cache_requests = [r.method for r in m.request_history]
-            assert (
-                "HEAD" == cache_requests[0] and len(cache_requests) == 2
-            ), "We should call only `model_info` to check for commit hash and  knowing if shard index is present."
+            assert "HEAD" == cache_requests[0] and len(cache_requests) == 2, (
+                "We should call only `model_info` to check for commit hash and  knowing if shard index is present."
+            )
 
     def test_weight_overwrite(self):
         with tempfile.TemporaryDirectory() as tmpdirname, self.assertRaises(ValueError) as error_context:
 
@@ -1,4 +1,4 @@
-from .attention import AttentionTesterMixin
+from .attention import AttentionTesterMixin, ContextParallelTesterMixin
 from .common import ModelTesterMixin
 from .compile import TorchCompileTesterMixin
 from .ip_adapter import IPAdapterTesterMixin
@@ -17,6 +17,7 @@
 
 
 __all__ = [
+    "ContextParallelTesterMixin",
     "AttentionTesterMixin",
     "BitsAndBytesTesterMixin",
     "CPUOffloadTesterMixin",
 
@@ -13,15 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+
 import pytest
 import torch
+import torch.multiprocessing as mp
 
+from diffusers.models._modeling_parallel import ContextParallelConfig
 from diffusers.models.attention import AttentionModuleMixin
 from diffusers.models.attention_processor import (
     AttnProcessor,
 )
 
-from ...testing_utils import is_attention, torch_device
+from ...testing_utils import is_attention, is_context_parallel, require_torch_multi_accelerator, torch_device
 
 
 @is_attention
@@ -85,9 +89,9 @@ def test_fuse_unfuse_qkv_projections(self):
                     output_after_fusion = output_after_fusion.to_tuple()[0]
 
             # Verify outputs match
-            assert torch.allclose(
-                output_before_fusion, output_after_fusion, atol=self.base_precision
-            ), "Output should not change after fusing projections"
+            assert torch.allclose(output_before_fusion, output_after_fusion, atol=self.base_precision), (
+                "Output should not change after fusing projections"
+            )
 
             # Unfuse projections
             model.unfuse_qkv_projections()
@@ -106,9 +110,9 @@ def test_fuse_unfuse_qkv_projections(self):
                     output_after_unfusion = output_after_unfusion.to_tuple()[0]
 
             # Verify outputs still match
-            assert torch.allclose(
-                output_before_fusion, output_after_unfusion, atol=self.base_precision
-            ), "Output should match original after unfusing projections"
+            assert torch.allclose(output_before_fusion, output_after_unfusion, atol=self.base_precision), (
+                "Output should match original after unfusing projections"
+            )
 
     def test_get_set_processor(self):
         init_dict = self.get_init_dict()
@@ -177,3 +181,83 @@ def test_attention_processor_count_mismatch_raises_error(self):
             model.set_attn_processor(wrong_processors)
 
         assert "number of processors" in str(exc_info.value).lower(), "Error should mention processor count mismatch"
+
+
+def _context_parallel_worker(rank, world_size, model_class, init_dict, cp_dict, inputs_dict, result_queue):
+    try:
+        # Setup distributed environment
+        os.environ["MASTER_ADDR"] = "localhost"
+        os.environ["MASTER_PORT"] = "12355"
+
+        torch.distributed.init_process_group(
+            backend="nccl",
+            init_method="env://",
+            world_size=world_size,
+            rank=rank,
+        )
+        torch.cuda.set_device(rank)
+        device = torch.device(f"cuda:{rank}")
+
+        model = model_class(**init_dict)
+        model.to(device)
+        model.eval()
+
+        inputs_on_device = {}
+        for key, value in inputs_dict.items():
+            if isinstance(value, torch.Tensor):
+                inputs_on_device[key] = value.to(device)
+            else:
+                inputs_on_device[key] = value
+
+        cp_config = ContextParallelConfig(**cp_dict)
+        model.enable_parallelism(config=cp_config)
+
+        with torch.no_grad():
+            output = model(**inputs_on_device)
+            if isinstance(output, dict):
+                output = output.to_tuple()[0]
+
+        if rank == 0:
+            result_queue.put(("success", output.shape))
+
+    except Exception as e:
+        if rank == 0:
+            result_queue.put(("error", str(e)))
+    finally:
+        if torch.distributed.is_initialized():
+            torch.distributed.destroy_process_group()
+
+
+@is_context_parallel
+@require_torch_multi_accelerator
+class ContextParallelTesterMixin:
+    base_precision = 1e-3
+
+    @pytest.mark.parametrize("cp_type", ["ulysses_degree", "ring_degree"], ids=["ulysses", "ring"])
+    def test_context_parallel_inference(self, cp_type):
+        if not torch.distributed.is_available():
+            pytest.skip("torch.distributed is not available.")
+
+        if not torch.cuda.is_available() or torch.cuda.device_count() < 2:
+            pytest.skip("Context parallel requires at least 2 CUDA devices.")
+
+        if not hasattr(self.model_class, "_cp_plan") or self.model_class._cp_plan is None:
+            pytest.skip("Model does not have a _cp_plan defined for context parallel inference.")
+
+        world_size = 2
+        init_dict = self.get_init_dict()
+        inputs_dict = self.get_dummy_inputs()
+        cp_dict = {cp_type: world_size}
+
+        ctx = mp.get_context("spawn")
+        result_queue = ctx.Queue()
+
+        mp.spawn(
+            _context_parallel_worker,
+            args=(world_size, self.model_class, init_dict, cp_dict, inputs_dict, result_queue),
+            nprocs=world_size,
+            join=True,
+        )
+
+        status, result = result_queue.get(timeout=60)
+        assert status == "success", f"Context parallel inference failed: {result}"