Skip to content

Commit 061033b

Browse files
authored
Start using rbe workers (#401)
* Start using rbe workers * Fix sanitizer ignore file paths * Use local test runner for multigpu * Trigger CI/CD pipeline * Avoid copying data files over if non hermetic build is used * Use remote builders * Use different pool for tests * Run tests only remotely since we dont have 2 queues yet * Address review comments * Address review comments * Build remotely, test locally * Switch to build locally, we have too few remote nodes * Exclude failing tests * Add tsan ignore * Ignore collective_ops_e2e_tests * Ignore collective_ops_e2e_tests * Remove not needed opts * Ignore tsan errors * Ignore failing testcases affected by tsan * Add additional rbe flags * Address review comments
1 parent 74b4b8d commit 061033b

File tree

9 files changed

+185
-92
lines changed

9 files changed

+185
-92
lines changed

build_tools/ci/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,5 @@ diff_test(
4545
sh_binary(
4646
name = "parallel_gpu_execute",
4747
srcs = ["parallel_gpu_execute.sh"],
48-
deps = ["//build_tools/rocm:sanitizer_ignore_lists"],
48+
data = ["//build_tools/rocm:sanitizer_ignore_lists"],
4949
)

build_tools/ci/parallel_gpu_execute.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# Required environment variables:
2222
# TF_GPU_COUNT = Number of GPUs available.
2323

24-
TF_GPU_COUNT=${TF_GPU_COUNT:-4}
24+
TF_GPU_COUNT=$(lspci | grep -e 'controller' -e 'accelerators' | grep 'AMD/ATI' | wc -l)
2525
TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-8}
2626

2727
# This function is used below in rlocation to check that a path is absolute

build_tools/rocm/platform/BUILD

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright 2025 The OpenXLA Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
16+
package(default_visibility = ["//visibility:public"])

build_tools/rocm/platform/linux_x64/BUILD

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,22 @@ platform(
1111
"@bazel_tools//tools/cpp:clang",
1212
],
1313
exec_properties = {
14-
"container-image": "docker://rocm/tensorflow-build:2.19-jammy-python3.9-rocm6.4.0@sha256:7b42dafbe5401fd84cf951a63f16dd44a84c64a6ba2eeb7675fd0919f03b2bdf",
14+
"container-image": "rocm/tensorflow-build@sha256:7cd444ac48657fee2f5087fbda7766266704d3f8fb2299f681952ae4eabed060",
1515
"OSFamily": "Linux",
16+
"Pool": "linux_x64_large",
17+
},
18+
)
19+
20+
platform(
21+
name = "linux_x64_gpu",
22+
constraint_values = [
23+
"@platforms//os:linux",
24+
"@platforms//cpu:x86_64",
25+
"@bazel_tools//tools/cpp:clang",
26+
],
27+
exec_properties = {
28+
"container-image": "rocm/tensorflow-build@sha256:7cd444ac48657fee2f5087fbda7766266704d3f8fb2299f681952ae4eabed060",
29+
"OSFamily": "Linux",
30+
"Pool": "amd_gpu",
1631
},
1732
)

build_tools/rocm/rocm_xla.bazelrc

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,21 @@ build:rocm_dev --remote_cache="https://wardite.cluster.engflow.com"
55

66
build:rocm_rbe --bes_backend="grpcs://wardite.cluster.engflow.com"
77
build:rocm_rbe --bes_results_url="https://wardite.cluster.engflow.com/invocation/"
8-
build:rocm_rbe --host_platform="//build_tools/rocm/platform/linux_x64"
9-
build:rocm_rbe --extra_execution_platforms="//build_tools/rocm/platform/linux_x64"
10-
build:rocm_rbe --platforms="//build_tools/rocm/platform/linux_x64"
8+
build:rocm_rbe --host_platform="//build_tools/rocm/platform/linux_x64:linux_x64_gpu"
9+
build:rocm_rbe --extra_execution_platforms="//build_tools/rocm/platform/linux_x64:linux_x64_gpu"
10+
build:rocm_rbe --platforms="//build_tools/rocm/platform/linux_x64:linux_x64_gpu"
1111
build:rocm_rbe --bes_timeout=600s
1212
build:rocm_rbe --tls_client_certificate="/tf/certificates/ci-cert.crt"
1313
build:rocm_rbe --tls_client_key="/tf/certificates/ci-cert.key"
14+
build:rocm_rbe --remote_executor="grpcs://wardite.cluster.engflow.com"
15+
build:rocm_rbe --remote_cache="grpcs://wardite.cluster.engflow.com"
1416
build:rocm_rbe --spawn_strategy=local
1517
build:rocm_rbe --jobs=200
16-
build:rocm_rbe --remote_executor=grpcs://wardite.cluster.engflow.com
1718
build:rocm_rbe --remote_timeout=3600
19+
build:rocm_rbe --remote_download_minimal
20+
build:rocm_rbe --remote_upload_local_results
1821

1922
test:rocm_rbe --strategy=TestRunner=local
20-
test:rocm_rbe --worker_sandboxing=true
2123

2224
build:asan --strip=never
2325
build:asan --copt -fsanitize=address
@@ -28,6 +30,8 @@ build:asan --linkopt -g
2830
build:asan --copt -fno-omit-frame-pointer
2931
build:asan --linkopt -fsanitize=address
3032
build:asan --//build_tools/rocm:sanitizer=asan
33+
build:asan --test_env=ASAN_OPTIONS=suppressions=build_tools/rocm/asan_ignore_list.txt:use_sigaltstack=0
34+
build:asan --test_env=LSAN_OPTIONS=suppressions=build_tools/rocm/lsan_ignore_list.txt:use_sigaltstack=0
3135

3236
build:tsan --strip=never
3337
build:tsan --copt -fsanitize=thread
@@ -36,8 +40,9 @@ build:tsan --copt -fno-omit-frame-pointer
3640
build:tsan --linkopt -fsanitize=thread
3741
build:tsan --linkopt -g
3842
build:tsan --//build_tools/rocm:sanitizer=tsan
43+
build:tsan --test_env=TSAN_OPTIONS=suppressions=build_tools/rocm/tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1
3944

40-
test:xla_sgpu -- \
45+
build:xla_sgpu -- \
4146
//xla/... \
4247
-//xla/backends/gpu/collectives:gpu_clique_key_test \
4348
-//xla/backends/gpu/collectives:nccl_communicator_test \

build_tools/rocm/run_xla.sh

Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -41,61 +41,58 @@ export PYTHON_BIN_PATH=`which python3`
4141
export TF_NEED_ROCM=1
4242
export ROCM_PATH="/opt/rocm"
4343

44-
GPU_NAME=(`rocminfo | grep -m 1 gfx`)
45-
GPU_NAME=${GPU_NAME[1]}
46-
4744
EXCLUDED_TESTS=(
48-
# //xla/service/gpu/tests:gpu_kernel_tiling_test_gpu_amd_any
49-
GpuKernelTilingTest.ColumnReductionWithLayoutChangeTiled
50-
GpuKernelTilingTest.ReductionInputTooLarge
51-
# //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
52-
PjrtCAPIGpuExtensionTest.TritonCompile
53-
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
54-
TritonEmitterTest.CheckRocmWarpSize
55-
TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
56-
TritonEmitterTest.FP8ToFP8EndToEnd
57-
TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
58-
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
59-
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
60-
TritonGemmTest.BroadcastOfVectorConstantIsFused
61-
TritonGemmTest.FailIfTooMuchShmem
62-
TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
63-
# //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
64-
TritonTest.NonstandardLayoutWithManyNonContractingDims
65-
TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
66-
# //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
67-
RandomEighTestInstantiation/RandomEighTest.Random/*
68-
# temp excludes for 0.7.1
69-
CompareTest.SplitK
70-
TritonEmitterTest.RocmWarpSizeIsSetCorrectly
71-
MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
72-
TestRadixSort/CubSortKeysTest.SortKeys/*
73-
GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
74-
CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
75-
CommandBufferConversionPassTest.ConvertWhileThunk
76-
TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
77-
TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
78-
TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
79-
TritonGemmTest.FailForTooComplexTiling
80-
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
81-
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
82-
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
83-
TestRadixSort/CubSortPairsTest.SortPairs/*
84-
GpuKernelTilingTest.ReductionInputTooLarge
85-
DeterminismTest.Conv
86-
TopKTests/TopKKernelTest*
87-
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
88-
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
89-
TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
90-
ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
91-
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
92-
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
93-
TritonTest.FuseSubchannelDequantizationWithTranspose
94-
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
95-
CommandBufferTests/CommandBufferTest.IndexConditional/*
96-
CommandBufferTests/CommandBufferTest.WhileLoop/*
97-
CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
98-
BufferComparatorTest.VeryLargeArray_Device_U8_Aligned
45+
# //xla/service/gpu/tests:gpu_kernel_tiling_test_gpu_amd_any
46+
GpuKernelTilingTest.ColumnReductionWithLayoutChangeTiled
47+
GpuKernelTilingTest.ReductionInputTooLarge
48+
# //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
49+
PjrtCAPIGpuExtensionTest.TritonCompile
50+
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
51+
TritonEmitterTest.CheckRocmWarpSize
52+
TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
53+
TritonEmitterTest.FP8ToFP8EndToEnd
54+
TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
55+
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
56+
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
57+
TritonGemmTest.BroadcastOfVectorConstantIsFused
58+
TritonGemmTest.FailIfTooMuchShmem
59+
TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
60+
# //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
61+
TritonTest.NonstandardLayoutWithManyNonContractingDims
62+
TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
63+
# //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
64+
RandomEighTestInstantiation/RandomEighTest.Random/*
65+
# temp excludes for 0.7.1
66+
CompareTest.SplitK
67+
TritonEmitterTest.RocmWarpSizeIsSetCorrectly
68+
MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
69+
TestRadixSort/CubSortKeysTest.SortKeys/*
70+
GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
71+
CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
72+
CommandBufferConversionPassTest.ConvertWhileThunk
73+
TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
74+
TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
75+
TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
76+
TritonGemmTest.FailForTooComplexTiling
77+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
78+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
79+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
80+
TestRadixSort/CubSortPairsTest.SortPairs/*
81+
GpuKernelTilingTest.ReductionInputTooLarge
82+
DeterminismTest.Conv
83+
TopKTests/TopKKernelTest*
84+
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
85+
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
86+
TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
87+
ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
88+
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
89+
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
90+
TritonTest.FuseSubchannelDequantizationWithTranspose
91+
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
92+
CommandBufferTests/CommandBufferTest.IndexConditional/*
93+
CommandBufferTests/CommandBufferTest.WhileLoop/*
94+
CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
95+
BufferComparatorTest.VeryLargeArray_Device_U8_Aligned
9996
)
10097

10198
BAZEL_DISK_CACHE_SIZE=100G
@@ -110,15 +107,20 @@ TAG_FILTERS=$($SCRIPT_DIR/rocm_tag_filters.sh),-multigpu,-multi_gpu_h100,require
110107

111108
SANITIZER_ARGS=()
112109
if [[ $1 == "asan" ]]; then
113-
SANITIZER_ARGS+=("--test_env=ASAN_OPTIONS=suppressions=${SCRIPT_DIR}/asan_ignore_list.txt:use_sigaltstack=0")
114-
SANITIZER_ARGS+=("--test_env=LSAN_OPTIONS=suppressions=${SCRIPT_DIR}/lsan_ignore_list.txt:use_sigaltstack=0")
115110
SANITIZER_ARGS+=("--config=asan")
116111
TAG_FILTERS=$TAG_FILTERS,-noasan
117112
shift
118113
elif [[ $1 == "tsan" ]]; then
119-
SANITIZER_ARGS+=("--test_env=TSAN_OPTIONS=suppressions=${SCRIPT_DIR}/tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1")
120114
SANITIZER_ARGS+=("--config=tsan")
121115
TAG_FILTERS=$TAG_FILTERS,-notsan
116+
# excluded from tsan
117+
EXCLUDED_TESTS+=(
118+
# //xla/tests:collective_ops_e2e_test_amdgpu_any
119+
CollectiveOpsTestE2E*
120+
# //xla/backends/gpu/runtime:host_execute_thunk_test_amdgpu_any
121+
HostExecuteStartThunkTest*
122+
HostExecuteDoneThunkTest*
123+
)
122124
shift
123125
fi
124126

@@ -138,8 +140,6 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
138140
--keep_going \
139141
--local_test_jobs=${N_TEST_JOBS} \
140142
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
141-
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
142-
--action_env=TF_ROCM_AMDGPU_TARGETS=${GPU_NAME} \
143143
--action_env=XLA_FLAGS="--xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
144144
--run_under=//build_tools/ci:parallel_gpu_execute \
145145
--test_env=MIOPEN_FIND_ENFORCE=5 \
@@ -151,4 +151,4 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
151151
# clean up bazel disk_cache
152152
bazel shutdown \
153153
--disk_cache=${BAZEL_DISK_CACHE_DIR} \
154-
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}
154+
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}

build_tools/rocm/run_xla_multi_gpu.sh

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,6 @@ export PYTHON_BIN_PATH=`which python3`
5252
export TF_NEED_ROCM=1
5353
export ROCM_PATH="/opt/rocm"
5454

55-
GPU_NAME=(`rocminfo | grep -m 1 gfx`)
56-
GPU_NAME=${GPU_NAME[1]}
57-
5855
BAZEL_DISK_CACHE_SIZE=100G
5956
BAZEL_DISK_CACHE_DIR="/tf/disk_cache/rocm-jaxlib-v0.7.1"
6057
mkdir -p ${BAZEL_DISK_CACHE_DIR}
@@ -66,24 +63,43 @@ EXCLUDED_TESTS=(
6663
CollectiveOpsTestE2E.MemcpyP2pLargeMessage
6764
RaggedAllToAllTest/RaggedAllToAllTest.RaggedAllToAll_8GPUs_2ReplicasPerGroups/sync_decomposer
6865
RaggedAllToAllTest/RaggedAllToAllTest.RaggedAllToAll_8GPUs_2ReplicasPerGroups/async_decomposer
66+
# //xla/backends/gpu/codegen/triton:fusion_emitter_parametrized_legacy_test_amdgpu_any
67+
ElementwiseTestSuiteF32/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f32_atan2
68+
# //xla/tests:collective_ops_e2e_test_amdgpu_any
69+
CollectiveOpsTestE2EPipelinedNonPipelined.CollectivePipelinerBackward
70+
CollectiveOpsTestE2EPipelinedNonPipelined.CollectivePipelinerBackwardStartFromOne
71+
# //xla/tools/multihost_hlo_runner:functional_hlo_runner_test
72+
FunctionalHloRunnerTest.Sharded2DevicesHloUnoptimizedSnapshot
73+
FunctionalHloRunnerTest.ShardedComputationUnderStreamCapture
74+
6975
)
7076

7177
SCRIPT_DIR=$(realpath $(dirname $0))
7278
TAG_FILTERS="$($SCRIPT_DIR/rocm_tag_filters.sh)"
7379

7480
SANITIZER_ARGS=()
7581
if [[ $1 == "asan" ]]; then
76-
SANITIZER_ARGS+=("--test_env=ASAN_OPTIONS=suppressions=${SCRIPT_DIR}/asan_ignore_list.txt:use_sigaltstack=0")
77-
SANITIZER_ARGS+=("--test_env=LSAN_OPTIONS=suppressions=${SCRIPT_DIR}/lsan_ignore_list.txt:use_sigaltstack=0")
7882
SANITIZER_ARGS+=("--run_under=//build_tools/rocm:sanitizer_wrapper")
7983
SANITIZER_ARGS+=("--config=asan")
8084
TAG_FILTERS="$TAG_FILTERS,-noasan"
8185
shift
8286
elif [[ $1 == "tsan" ]]; then
83-
SANITIZER_ARGS+=("--test_env=TSAN_OPTIONS=suppressions=${SCRIPT_DIR}/tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1")
8487
SANITIZER_ARGS+=("--run_under=//build_tools/rocm:sanitizer_wrapper")
8588
SANITIZER_ARGS+=("--config=tsan")
8689
TAG_FILTERS="$TAG_FILTERS,-notsan"
90+
# excluded from tsan
91+
EXCLUDED_TESTS+=(
92+
CollectiveOpsTest*
93+
Fp8CollectiveOpsTest.AllGather_8BitFloat
94+
Fp8CollectiveOpsTest.CollectivePermute_8BitFloat
95+
Fp8CollectiveOpsTest.AllToAll_8BitFloat
96+
AsyncCollectiveOps*
97+
AllReduceTest*
98+
RaggedAllToAllTest*
99+
AsyncCollectiveOps*
100+
AsyncMemcpyCollectiveOps*
101+
RaggedAllToAllTest*
102+
)
87103
shift
88104
fi
89105

@@ -102,15 +118,15 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
102118
--flaky_test_attempts=3 \
103119
--keep_going \
104120
--test_strategy=exclusive \
105-
--action_env=TF_ROCM_AMDGPU_TARGETS=${GPU_NAME} \
106121
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
107122
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
108123
--action_env=NCCL_MAX_NCHANNELS=1 \
109124
--test_filter=-$(IFS=: ; echo "${EXCLUDED_TESTS[*]}") \
110125
"${SANITIZER_ARGS[@]}" \
111-
"$@"
126+
"$@" \
127+
--strategy=TestRunner=local # execute multigpu tests locally as there is no gpu exclusive protection on rbe
112128

113129
# clean up bazel disk_cache
114130
bazel shutdown \
115131
--disk_cache=${BAZEL_DISK_CACHE_DIR} \
116-
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}
132+
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}

build_tools/rocm/tsan_ignore_list.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ race:libamdhip64.so
33
race:hipStreamSynchronize
44
race:libhipblaslt.so
55
race:libamd_comgr.so
6+
race:librccl.so
67

78
# Abseil reference counting (DropRef / RefCount init)
89
race:tsl::ReferenceCounted
@@ -15,9 +16,11 @@ race:xla::gpu::AllocateDestinationBuffer
1516
race:xla::LocalDeviceState::ThenRelease
1617

1718
# To be fixed
19+
race:xla::GpuAsyncHostToDeviceTransferManager::TransferRawDataToSubBuffer
1820
race:xla::LiteralBase::Piece::DeallocateBuffers
1921
race:xla::PjRtStreamExecutorLoadedExecutable::ExecuteHelper
2022
race:xla::PjRtStreamExecutorClient::BufferFromHostBufferInternal
23+
race:xla::PjRtStreamExecutorClient::AllocateAndRecordEvent
2124
race:xla::HloRunnerPjRt::TransferLiteralsFromDevice
2225
race:xla::MutableLiteralBase::~MutableLiteralBase
2326
race:xla::MutableLiteralBase::PopulateR1<int>

0 commit comments

Comments
 (0)