better manage compilation cache.

sayakpaul · sayakpaul · commit e9fee7ce50df · 2025-05-06T15:14:33.000+05:30
diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
@@ -1721,14 +1721,14 @@ class TorchCompileTesterMixin:
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)
 
     def tearDown(self):
         # clean up the VRAM after each test in case of CUDA runtime errors
         super().tearDown()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)
 
@@ -1737,13 +1737,17 @@ def tearDown(self):
     @is_torch_compile
     @slow
     def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
+        torch.compiler.reset()
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
 
         model = self.model_class(**init_dict).to(torch_device)
         model = torch.compile(model, fullgraph=True)
 
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
+        with (
+            torch._inductor.utils.fresh_inductor_cache(),
+            torch._dynamo.config.patch(error_on_recompile=True),
+            torch.no_grad(),
+        ):
             _ = model(**inputs_dict)
             _ = model(**inputs_dict)
 
@@ -1771,7 +1775,7 @@ def tearDown(self):
         # It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model,
         # there will be recompilation errors, as torch caches the model when run in the same process.
         super().tearDown()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)
 
@@ -1905,21 +1909,21 @@ def test_hotswapping_model(self, rank0, rank1):
     def test_hotswapping_compiled_model_linear(self, rank0, rank1):
         # It's important to add this context to raise an error on recompilation
         target_modules = ["to_q", "to_k", "to_v", "to_out.0"]
-        with torch._dynamo.config.patch(error_on_recompile=True):
+        with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
             self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
 
     @parameterized.expand([(11, 11), (7, 13), (13, 7)])  # important to test small to large and vice versa
     def test_hotswapping_compiled_model_conv2d(self, rank0, rank1):
         # It's important to add this context to raise an error on recompilation
         target_modules = ["conv", "conv1", "conv2"]
-        with torch._dynamo.config.patch(error_on_recompile=True):
+        with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
             self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
 
     @parameterized.expand([(11, 11), (7, 13), (13, 7)])  # important to test small to large and vice versa
     def test_hotswapping_compiled_model_both_linear_and_conv2d(self, rank0, rank1):
         # It's important to add this context to raise an error on recompilation
         target_modules = ["to_q", "conv"]
-        with torch._dynamo.config.patch(error_on_recompile=True):
+        with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
             self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
 
     def test_enable_lora_hotswap_called_after_adapter_added_raises(self):
diff --git a/tests/models/transformers/test_models_transformer_hunyuan_video.py b/tests/models/transformers/test_models_transformer_hunyuan_video.py
@@ -19,20 +19,16 @@
 from diffusers import HunyuanVideoTransformer3DModel
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    is_torch_compile,
-    require_torch_2,
-    require_torch_gpu,
-    slow,
     torch_device,
 )
 
-from ..test_modeling_common import ModelTesterMixin
+from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin
 
 
 enable_full_determinism()
 
 
-class HunyuanVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+class HunyuanVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
     model_class = HunyuanVideoTransformer3DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
@@ -96,23 +92,8 @@ def test_gradient_checkpointing_is_applied(self):
         expected_set = {"HunyuanVideoTransformer3DModel"}
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
 
-    @require_torch_gpu
-    @require_torch_2
-    @is_torch_compile
-    @slow
-    def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
 
-        model = self.model_class(**init_dict).to(torch_device)
-        model = torch.compile(model, fullgraph=True)
-
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
-
-
-class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
     model_class = HunyuanVideoTransformer3DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
@@ -179,23 +160,8 @@ def test_gradient_checkpointing_is_applied(self):
         expected_set = {"HunyuanVideoTransformer3DModel"}
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
 
-    @require_torch_gpu
-    @require_torch_2
-    @is_torch_compile
-    @slow
-    def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        model = self.model_class(**init_dict).to(torch_device)
-        model = torch.compile(model, fullgraph=True)
-
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
-
 
-class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
     model_class = HunyuanVideoTransformer3DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
@@ -260,23 +226,10 @@ def test_gradient_checkpointing_is_applied(self):
         expected_set = {"HunyuanVideoTransformer3DModel"}
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
 
-    @require_torch_gpu
-    @require_torch_2
-    @is_torch_compile
-    @slow
-    def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
 
-        model = self.model_class(**init_dict).to(torch_device)
-        model = torch.compile(model, fullgraph=True)
-
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
-
-
-class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(
+    ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase
+):
     model_class = HunyuanVideoTransformer3DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
@@ -342,18 +295,3 @@ def test_output(self):
     def test_gradient_checkpointing_is_applied(self):
         expected_set = {"HunyuanVideoTransformer3DModel"}
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
-
-    @require_torch_gpu
-    @require_torch_2
-    @is_torch_compile
-    @slow
-    def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        model = self.model_class(**init_dict).to(torch_device)
-        model = torch.compile(model, fullgraph=True)
-
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
diff --git a/tests/models/transformers/test_models_transformer_wan.py b/tests/models/transformers/test_models_transformer_wan.py
@@ -19,20 +19,16 @@
 from diffusers import WanTransformer3DModel
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    is_torch_compile,
-    require_torch_2,
-    require_torch_gpu,
-    slow,
     torch_device,
 )
 
-from ..test_modeling_common import ModelTesterMixin
+from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin
 
 
 enable_full_determinism()
 
 
-class WanTransformer3DTests(ModelTesterMixin, unittest.TestCase):
+class WanTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
     model_class = WanTransformer3DModel
     main_input_name = "hidden_states"
     uses_custom_attn_processor = True
@@ -86,18 +82,3 @@ def prepare_init_args_and_inputs_for_common(self):
     def test_gradient_checkpointing_is_applied(self):
         expected_set = {"WanTransformer3DModel"}
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
-
-    @require_torch_gpu
-    @require_torch_2
-    @is_torch_compile
-    @slow
-    def test_torch_compile_recompilation_and_graph_break(self):
-        torch._dynamo.reset()
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        model = self.model_class(**init_dict).to(torch_device)
-        model = torch.compile(model, fullgraph=True)
-
-        with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
-            _ = model(**inputs_dict)
-            _ = model(**inputs_dict)
diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py
@@ -2209,7 +2209,7 @@ def tearDown(self):
         # It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model,
         # there will be recompilation errors, as torch caches the model when run in the same process.
         super().tearDown()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)
 
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
@@ -1111,14 +1111,14 @@ def callback_cfg_params(self) -> frozenset:
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)
 
     def tearDown(self):
         # clean up the VRAM after each test in case of CUDA runtime errors
         super().tearDown()
-        torch._dynamo.reset()
+        torch.compiler.reset()
         gc.collect()
         backend_empty_cache(torch_device)