Skip to content

Commit ae142ab

Browse files
s390x: fix periodic tests build (pytorch#168001)
It looks like building python_call.cpp with -O3 triggers a bug in gcc-14. As a workaround, ignore offending warning on s390x in the code. Build failure link: https://github.com/pytorch/pytorch/actions/runs/19423391774/job/55584553077 GCC bug reference: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115016 In addition to that, fix docker image names for s390x test workflows similar to build workflows and remove fail marks from couple of tests. Pull Request resolved: pytorch#168001 Approved by: https://github.com/seemethere
1 parent a6b6383 commit ae142ab

File tree

4 files changed

+15
-4
lines changed

4 files changed

+15
-4
lines changed

.github/workflows/_linux-test.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ jobs:
327327
SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
328328
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
329329
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
330+
DOCKER_IMAGE_S390X: ${{ inputs.docker-image }}
330331
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
331332
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
332333
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
@@ -360,10 +361,12 @@ jobs:
360361
# if for some reason cleanup action doesn't stop container
361362
# when job is cancelled
362363
DOCKER_SHELL_CMD="sleep 12h"
364+
USED_IMAGE="${DOCKER_IMAGE_S390X}"
363365
else
364366
SHM_OPTS="--shm-size=${SHM_SIZE}"
365367
JENKINS_USER="--user jenkins"
366368
DOCKER_SHELL_CMD=
369+
USED_IMAGE="${DOCKER_IMAGE}"
367370
fi
368371
369372
# detached container should get cleaned up by teardown_ec2_linux
@@ -426,7 +429,7 @@ jobs:
426429
${JENKINS_USER} \
427430
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
428431
-w /var/lib/jenkins/workspace \
429-
"${DOCKER_IMAGE}" \
432+
"${USED_IMAGE}" \
430433
${DOCKER_SHELL_CMD}
431434
)
432435
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"

test/dynamo/test_structured_trace.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from torch._inductor.test_case import TestCase
2222
from torch._logging._internal import TorchLogsFormatter
2323
from torch.nn.parallel import DistributedDataParallel as DDP
24-
from torch.testing._internal.common_utils import find_free_port, xfailIfS390X
24+
from torch.testing._internal.common_utils import find_free_port
2525
from torch.testing._internal.triton_utils import requires_cuda_and_triton
2626

2727

@@ -1017,7 +1017,6 @@ def fn(a):
10171017
logs = self.buffer.getvalue()
10181018
self.assertTrue(all(event in logs for event in chromium_events))
10191019

1020-
@xfailIfS390X
10211020
@requires_tlparse
10221021
@torch._dynamo.config.patch("compiled_autograd", True)
10231022
def test_compiled_autograd_attribution(self):

test/inductor/test_torchinductor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2172,7 +2172,6 @@ def fn(a):
21722172

21732173
@skipCPUIf(IS_MACOS, "fails on macos")
21742174
@skip_if_halide # accuracy 4.7% off
2175-
@xfailIfS390X # accuracy failure
21762175
def test_multilayer_var_lowp(self):
21772176
def fn(a):
21782177
return torch.var(a)

torch/csrc/distributed/rpc/python_call.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ PythonCall::PythonCall(SerializedPyObj&& serializedPyObj, bool isAsyncExecution)
66
: serializedPyObj_(std::move(serializedPyObj)),
77
isAsyncExecution_(isAsyncExecution) {}
88

9+
#if defined(__GNUC__) && __GNUC__ == 14
10+
/* this warning is falsely triggered with gcc-14 in following function. */
11+
#pragma GCC diagnostic push
12+
#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
13+
#endif
14+
915
c10::intrusive_ptr<Message> PythonCall::toMessageImpl() && {
1016
std::vector<char> payload;
1117
payload.reserve(serializedPyObj_.payload_.length() + 1);
@@ -21,6 +27,10 @@ c10::intrusive_ptr<Message> PythonCall::toMessageImpl() && {
2127
MessageType::PYTHON_CALL);
2228
}
2329

30+
#if defined(__GNUC__) && __GNUC__ == 14
31+
#pragma GCC diagnostic pop
32+
#endif
33+
2434
std::unique_ptr<PythonCall> PythonCall::fromMessage(const Message& message) {
2535
TORCH_INTERNAL_ASSERT(
2636
!message.payload().empty(),

0 commit comments

Comments
 (0)