xfail

sayakpaul · sayakpaul · commit d3010dd3859f · 2025-06-07T10:55:21.000+05:30
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -878,9 +878,10 @@ def test_torch_compile(self):
         super()._test_torch_compile(quantization_config=self.quantization_config)
 
     def test_torch_compile_with_cpu_offload(self):
-        torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile_with_cpu_offload(quantization_config=self.quantization_config)
 
+    @pytest.mark.xfail(
+        reason="Test fails because of an illegal memory access.",
+    )
     def test_torch_compile_with_group_offload(self):
-        torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile_with_group_offload(quantization_config=self.quantization_config)
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -792,13 +792,12 @@ def test_torch_compile(self):
         super()._test_torch_compile(quantization_config=self.quantization_config, torch_dtype=torch.float16)
 
     def test_torch_compile_with_cpu_offload(self):
-        torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile_with_cpu_offload(
             quantization_config=self.quantization_config, torch_dtype=torch.float16
         )
 
+    @pytest.mark.xfail(reason="Test fails because of an offloading problem from Accelerate with confusion in hooks.")
     def test_torch_compile_with_group_offload(self):
-        torch._dynamo.config.capture_dynamic_output_shape_ops = True
         super()._test_torch_compile_with_group_offload(
             quantization_config=self.quantization_config, torch_dtype=torch.float16
         )
diff --git a/tests/quantization/test_torch_compile_utils.py b/tests/quantization/test_torch_compile_utils.py
@@ -65,17 +65,23 @@ def _test_torch_compile_with_cpu_offload(self, quantization_config, torch_dtype=
             pipe("a dog", num_inference_steps=3, max_sequence_length=16, height=256, width=256)
 
     def _test_torch_compile_with_group_offload(self, quantization_config, torch_dtype=torch.bfloat16):
+        torch._dynamo.config.cache_size_limit = 10000
+
         pipe = self._init_pipeline(quantization_config, torch_dtype)
         group_offload_kwargs = {
-            "onload_device": "cuda",
-            "offload_device": "cpu",
+            "onload_device": torch.device("cuda"),
+            "offload_device": torch.device("cpu"),
             "offload_type": "block_level",
             "num_blocks_per_group": 1,
             "use_stream": True,
             "non_blocking": True,
         }
-        pipe.enable_group_offload(**group_offload_kwargs)
+        pipe.transformer.enable_group_offload(**group_offload_kwargs)
         pipe.transformer.compile()
+        for name, component in pipe.components.items():
+            if name != "transformer" and isinstance(component, torch.nn.Module):
+                if torch.device(component.device).type == "cpu":
+                    component.to("cuda")
 
         for _ in range(2):
             # small resolutions to ensure speedy execution.

Original file line number	Diff line number	Diff line change
`@@ -792,13 +792,12 @@ def test_torch_compile(self):`
`792`	`792`	`super()._test_torch_compile(quantization_config=self.quantization_config, torch_dtype=torch.float16)`
`793`	`793`
`794`	`794`	`def test_torch_compile_with_cpu_offload(self):`
`795`		`- torch._dynamo.config.capture_dynamic_output_shape_ops = True`
`796`	`795`	`super()._test_torch_compile_with_cpu_offload(`
`797`	`796`	`quantization_config=self.quantization_config, torch_dtype=torch.float16`
`798`	`797`	`)`
`799`	`798`
	`799`	`+ @pytest.mark.xfail(reason="Test fails because of an offloading problem from Accelerate with confusion in hooks.")`
`800`	`800`	`def test_torch_compile_with_group_offload(self):`
`801`		`- torch._dynamo.config.capture_dynamic_output_shape_ops = True`
`802`	`801`	`super()._test_torch_compile_with_group_offload(`
`803`	`802`	`quantization_config=self.quantization_config, torch_dtype=torch.float16`
`804`	`803`	`)`