[cherry-pick] [CI] Disable some tests that are failing in periodic pytorch#150059 (pytorch#150327)

atalman · clee2000 · malfet · web-flow · commit dfd39fe14f82 · 2025-04-01T23:05:14.000-07:00
* [CI] Disable some tests that are failing in periodic (pytorch#150059) Disabling some tests to restore periodic nogpu avx512 timeout: https://hud.pytorch.org/pytorch/pytorch/commit/59f14d19aea4091c65cca2417c509e3dbf60c0ed#38492953496-box profiler failure: https://hud.pytorch.org/pytorch/pytorch/commit/7ae0ce6360b6e4f944906502d20da24c04debee5#38461255009-box test_accelerator failure: https://hud.pytorch.org/pytorch/pytorch/commit/87bfd66c3c7061db6d36d8daa62f08f507f90e39#39476723746-box origin: 146098 test_overrides failure: https://hud.pytorch.org/pytorch/pytorch/commit/bf752c36da08871d76a66fd52ad09f87e66fc770#39484562957-box origin: 146098 inductor cpu repro: https://hud.pytorch.org/pytorch/pytorch/commit/bb9c4260249ea0c57e87395eff5271fb479efb6a#38447525659-box functorch eager transforms: https://hud.pytorch.org/pytorch/pytorch/commit/8f858e226ba81fde41d39aa34f1fd4cb4a4ecc51#39488068620-box https://hud.pytorch.org/pytorch/pytorch/commit/f2cea01f7195e59abd154b5551213ee3e38fa40d#39555064878 https://hud.pytorch.org/pytorch/pytorch/commit/b5281a4a1806c978e34c5cfa0befd298e469b7fd#39599355600 either 148288 or 148261? https://hud.pytorch.org/hud/pytorch/pytorch/2ec9aceaeb77176c4bdeb2d008a34cba0cd57e3c/1?per_page=100&name_filter=periodic&mergeLF=true Pull Request resolved: pytorch#150059 Approved by: https://github.com/ZainRizvi, https://github.com/atalman, https://github.com/malfet * disable_CompiledOptimizerParityTests * Update test/inductor/test_compiled_optimizers.py --------- Co-authored-by: Catherine Lee <csl@fb.com> Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com>
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
@@ -59,8 +59,9 @@ jobs:
       docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
       test-matrix: |
         { include: [
-          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
           { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
           { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
           { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
@@ -74,6 +74,7 @@
     skipIfRocm,
     skipIfTorchDynamo,
     subtest,
+    TEST_CUDA_MEM_LEAK_CHECK,
     TEST_WITH_TORCHDYNAMO,
     TestCase,
     xfailIfTorchDynamo,
@@ -2865,6 +2866,10 @@ def fn(x):
         self.assertEqual(actual_jvp, expected_jvp)
 
     @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_linearize_return(self, device, dtype):
         x_p = make_tensor((3, 1), device=device, dtype=dtype)
         x_t = make_tensor((3, 1), device=device, dtype=dtype)
@@ -2879,6 +2884,10 @@ def fn(x):
         self.assertEqual(actual_jvp, expected_jvp)
 
     @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_linearize_composition_vmap(self, device, dtype):
         x_p = make_tensor((3, 1), device=device, dtype=dtype)
         x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
@@ -2897,6 +2906,10 @@ def jvp_fn(x_t):
         self.assertEqual(actual_batched_jvp, expected_batched_jvp)
 
     @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_linearize_composition_grad(self, device, dtype):
         x_p = make_tensor((3,), device=device, dtype=dtype)
         x_t = make_tensor((3,), device=device, dtype=dtype)
@@ -2916,6 +2929,10 @@ def jvp_fn(x_t):
         self.assertEqual(actual_batched_jvp, expected_batched_jvp)
 
     @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_linearize_nested_input_nested_output(self, device, dtype):
         x_p = make_tensor((3, 1), device=device, dtype=dtype)
         x_t = make_tensor((3, 1), device=device, dtype=dtype)
@@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
     # torch.compile is not supported on Windows CUDA.
     # Triton only supports GPU with SM70 or later.
     @expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_compile_vmap_hessian(self, device):
         # The model and inputs are a smaller version
         # of code at benchmark repo:
diff --git a/test/inductor/test_compiled_optimizers.py b/test/inductor/test_compiled_optimizers.py
@@ -568,7 +568,7 @@ def test_fn(self):
 
 
 class CompiledOptimizerParityTests(TestCase):
-    @skipCUDAIf(not has_triton(), "torch.compile with cuda requires triton")
+    @skipCUDAIf(True, "failing Adam and RMSprop")
     @skipXPUIf(not has_triton(), "torch.compile with xpu requires triton")
     @optims(optim_db, dtypes=[torch.float32])
     @parametrize("use_closure", [True, False])
diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py
@@ -4131,6 +4131,10 @@ def forward(self, x):
                         "__at_align__ std::array", 0, exactly=True
                     ).run(code)
 
+    @unittest.skipIf(
+        os.getenv("ATEN_CPU_CAPABILITY") == "default",
+        "Failing in periodic nogpu_NO_AVX2, see #150059 for example",
+    )
     def test_group_norm_large_input(self):
         class M(torch.nn.Module):
             def __init__(self) -> None:
diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py
@@ -2169,6 +2169,10 @@ def validate_json(prof, disable_external_correlation):
     @skipIfTorchDynamo("profiler gets ignored if dynamo activated")
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
     @unittest.skipIf(not kineto_available(), "Kineto is required")
+    @unittest.skipIf(
+        "RelWithAssert" in torch.__config__.show(),
+        "failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
     def test_profile_all_threads(self):
         profiling_started = threading.Event()
         profiling_ended = threading.Event()
diff --git a/test/test_accelerator.py b/test/test_accelerator.py
@@ -10,6 +10,8 @@
 if not torch.accelerator.is_available():
     print("No available accelerator detected, skipping tests", file=sys.stderr)
     TestCase = NoTest  # noqa: F811
+    # Skip because failing when run on cuda build with no GPU, see #150059 for example
+    sys.exit()
 
 TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1
 
diff --git a/test/test_overrides.py b/test/test_overrides.py
@@ -1,5 +1,6 @@
 # Owner(s): ["module: __torch_function__"]
 
+import sys
 import torch
 import numpy as np
 import inspect
@@ -9,6 +10,7 @@
 import collections
 import unittest
 import contextlib
+import os
 
 from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
 from torch.overrides import (
@@ -29,6 +31,14 @@
 
 Tensor = torch.Tensor
 
+if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
+    # This test is not supported on ARM
+    print(
+        "Skipping due to failing when cuda build runs on non cuda machine, "
+        + "see https://github.com/pytorch/pytorch/pull/150059 for example"
+    )
+    sys.exit()
+
 # The functions below simulate the pure-python torch functions in the
 # torch.functional namespace. We use examples local to this file rather
 # than any of the real examples implemented in Python since in the