Skip to content

Commit dfd39fe

Browse files
atalmanclee2000malfet
authored
[cherry-pick] [CI] Disable some tests that are failing in periodic pytorch#150059 (pytorch#150327)
* [CI] Disable some tests that are failing in periodic (pytorch#150059) Disabling some tests to restore periodic nogpu avx512 timeout: https://hud.pytorch.org/pytorch/pytorch/commit/59f14d19aea4091c65cca2417c509e3dbf60c0ed#38492953496-box profiler failure: https://hud.pytorch.org/pytorch/pytorch/commit/7ae0ce6360b6e4f944906502d20da24c04debee5#38461255009-box test_accelerator failure: https://hud.pytorch.org/pytorch/pytorch/commit/87bfd66c3c7061db6d36d8daa62f08f507f90e39#39476723746-box origin: 146098 test_overrides failure: https://hud.pytorch.org/pytorch/pytorch/commit/bf752c36da08871d76a66fd52ad09f87e66fc770#39484562957-box origin: 146098 inductor cpu repro: https://hud.pytorch.org/pytorch/pytorch/commit/bb9c4260249ea0c57e87395eff5271fb479efb6a#38447525659-box functorch eager transforms: https://hud.pytorch.org/pytorch/pytorch/commit/8f858e226ba81fde41d39aa34f1fd4cb4a4ecc51#39488068620-box https://hud.pytorch.org/pytorch/pytorch/commit/f2cea01f7195e59abd154b5551213ee3e38fa40d#39555064878 https://hud.pytorch.org/pytorch/pytorch/commit/b5281a4a1806c978e34c5cfa0befd298e469b7fd#39599355600 either 148288 or 148261? https://hud.pytorch.org/hud/pytorch/pytorch/2ec9aceaeb77176c4bdeb2d008a34cba0cd57e3c/1?per_page=100&name_filter=periodic&mergeLF=true Pull Request resolved: pytorch#150059 Approved by: https://github.com/ZainRizvi, https://github.com/atalman, https://github.com/malfet * disable_CompiledOptimizerParityTests * Update test/inductor/test_compiled_optimizers.py --------- Co-authored-by: Catherine Lee <[email protected]> Co-authored-by: Nikita Shulga <[email protected]>
1 parent b766c02 commit dfd39fe

File tree

7 files changed

+45
-3
lines changed

7 files changed

+45
-3
lines changed

.github/workflows/periodic.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ jobs:
5959
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
6060
test-matrix: |
6161
{ include: [
62-
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
63-
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
62+
{ config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
63+
{ config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
64+
{ config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
6465
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
6566
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
6667
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },

test/functorch/test_eager_transforms.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
skipIfRocm,
7575
skipIfTorchDynamo,
7676
subtest,
77+
TEST_CUDA_MEM_LEAK_CHECK,
7778
TEST_WITH_TORCHDYNAMO,
7879
TestCase,
7980
xfailIfTorchDynamo,
@@ -2865,6 +2866,10 @@ def fn(x):
28652866
self.assertEqual(actual_jvp, expected_jvp)
28662867

28672868
@dtypes(torch.float)
2869+
@unittest.skipIf(
2870+
TEST_CUDA_MEM_LEAK_CHECK,
2871+
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
2872+
)
28682873
def test_linearize_return(self, device, dtype):
28692874
x_p = make_tensor((3, 1), device=device, dtype=dtype)
28702875
x_t = make_tensor((3, 1), device=device, dtype=dtype)
@@ -2879,6 +2884,10 @@ def fn(x):
28792884
self.assertEqual(actual_jvp, expected_jvp)
28802885

28812886
@dtypes(torch.float)
2887+
@unittest.skipIf(
2888+
TEST_CUDA_MEM_LEAK_CHECK,
2889+
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
2890+
)
28822891
def test_linearize_composition_vmap(self, device, dtype):
28832892
x_p = make_tensor((3, 1), device=device, dtype=dtype)
28842893
x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
@@ -2897,6 +2906,10 @@ def jvp_fn(x_t):
28972906
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
28982907

28992908
@dtypes(torch.float)
2909+
@unittest.skipIf(
2910+
TEST_CUDA_MEM_LEAK_CHECK,
2911+
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
2912+
)
29002913
def test_linearize_composition_grad(self, device, dtype):
29012914
x_p = make_tensor((3,), device=device, dtype=dtype)
29022915
x_t = make_tensor((3,), device=device, dtype=dtype)
@@ -2916,6 +2929,10 @@ def jvp_fn(x_t):
29162929
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
29172930

29182931
@dtypes(torch.float)
2932+
@unittest.skipIf(
2933+
TEST_CUDA_MEM_LEAK_CHECK,
2934+
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
2935+
)
29192936
def test_linearize_nested_input_nested_output(self, device, dtype):
29202937
x_p = make_tensor((3, 1), device=device, dtype=dtype)
29212938
x_t = make_tensor((3, 1), device=device, dtype=dtype)
@@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
51515168
# torch.compile is not supported on Windows CUDA.
51525169
# Triton only supports GPU with SM70 or later.
51535170
@expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
5171+
@unittest.skipIf(
5172+
TEST_CUDA_MEM_LEAK_CHECK,
5173+
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
5174+
)
51545175
def test_compile_vmap_hessian(self, device):
51555176
# The model and inputs are a smaller version
51565177
# of code at benchmark repo:

test/inductor/test_compiled_optimizers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ def test_fn(self):
568568

569569

570570
class CompiledOptimizerParityTests(TestCase):
571-
@skipCUDAIf(not has_triton(), "torch.compile with cuda requires triton")
571+
@skipCUDAIf(True, "failing Adam and RMSprop")
572572
@skipXPUIf(not has_triton(), "torch.compile with xpu requires triton")
573573
@optims(optim_db, dtypes=[torch.float32])
574574
@parametrize("use_closure", [True, False])

test/inductor/test_cpu_repro.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4131,6 +4131,10 @@ def forward(self, x):
41314131
"__at_align__ std::array", 0, exactly=True
41324132
).run(code)
41334133

4134+
@unittest.skipIf(
4135+
os.getenv("ATEN_CPU_CAPABILITY") == "default",
4136+
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
4137+
)
41344138
def test_group_norm_large_input(self):
41354139
class M(torch.nn.Module):
41364140
def __init__(self) -> None:

test/profiler/test_profiler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,6 +2169,10 @@ def validate_json(prof, disable_external_correlation):
21692169
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
21702170
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
21712171
@unittest.skipIf(not kineto_available(), "Kineto is required")
2172+
@unittest.skipIf(
2173+
"RelWithAssert" in torch.__config__.show(),
2174+
"failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
2175+
)
21722176
def test_profile_all_threads(self):
21732177
profiling_started = threading.Event()
21742178
profiling_ended = threading.Event()

test/test_accelerator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
if not torch.accelerator.is_available():
1111
print("No available accelerator detected, skipping tests", file=sys.stderr)
1212
TestCase = NoTest # noqa: F811
13+
# Skip because failing when run on cuda build with no GPU, see #150059 for example
14+
sys.exit()
1315

1416
TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1
1517

test/test_overrides.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Owner(s): ["module: __torch_function__"]
22

3+
import sys
34
import torch
45
import numpy as np
56
import inspect
@@ -9,6 +10,7 @@
910
import collections
1011
import unittest
1112
import contextlib
13+
import os
1214

1315
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
1416
from torch.overrides import (
@@ -29,6 +31,14 @@
2931

3032
Tensor = torch.Tensor
3133

34+
if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
35+
# This test is not supported on ARM
36+
print(
37+
"Skipping due to failing when cuda build runs on non cuda machine, "
38+
+ "see https://github.com/pytorch/pytorch/pull/150059 for example"
39+
)
40+
sys.exit()
41+
3242
# The functions below simulate the pure-python torch functions in the
3343
# torch.functional namespace. We use examples local to this file rather
3444
# than any of the real examples implemented in Python since in the

0 commit comments

Comments
 (0)