@@ -41,61 +41,58 @@ export PYTHON_BIN_PATH=`which python3`
4141export TF_NEED_ROCM=1
4242export ROCM_PATH=" /opt/rocm"
4343
44- GPU_NAME=(` rocminfo | grep -m 1 gfx` )
45- GPU_NAME=${GPU_NAME[1]}
46-
4744EXCLUDED_TESTS=(
48- # //xla/service/gpu/tests:gpu_kernel_tiling_test_gpu_amd_any
49- GpuKernelTilingTest.ColumnReductionWithLayoutChangeTiled
50- GpuKernelTilingTest.ReductionInputTooLarge
51- # //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
52- PjrtCAPIGpuExtensionTest.TritonCompile
53- # //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
54- TritonEmitterTest.CheckRocmWarpSize
55- TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
56- TritonEmitterTest.FP8ToFP8EndToEnd
57- TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
58- BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
59- # //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
60- TritonGemmTest.BroadcastOfVectorConstantIsFused
61- TritonGemmTest.FailIfTooMuchShmem
62- TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
63- # //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
64- TritonTest.NonstandardLayoutWithManyNonContractingDims
65- TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
66- # //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
67- RandomEighTestInstantiation/RandomEighTest.Random/*
68- # temp excludes for 0.7.1
69- CompareTest.SplitK
70- TritonEmitterTest.RocmWarpSizeIsSetCorrectly
71- MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
72- TestRadixSort/CubSortKeysTest.SortKeys/*
73- GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
74- CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
75- CommandBufferConversionPassTest.ConvertWhileThunk
76- TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
77- TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
78- TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
79- TritonGemmTest.FailForTooComplexTiling
80- TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
81- TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
82- TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
83- TestRadixSort/CubSortPairsTest.SortPairs/*
84- GpuKernelTilingTest.ReductionInputTooLarge
85- DeterminismTest.Conv
86- TopKTests/TopKKernelTest*
87- DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
88- DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
89- TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
90- ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
91- ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
92- ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
93- TritonTest.FuseSubchannelDequantizationWithTranspose
94- BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
95- CommandBufferTests/CommandBufferTest.IndexConditional/*
96- CommandBufferTests/CommandBufferTest.WhileLoop/*
97- CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
98- BufferComparatorTest.VeryLargeArray_Device_U8_Aligned
45+ # //xla/service/gpu/tests:gpu_kernel_tiling_test_gpu_amd_any
46+ GpuKernelTilingTest.ColumnReductionWithLayoutChangeTiled
47+ GpuKernelTilingTest.ReductionInputTooLarge
48+ # //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
49+ PjrtCAPIGpuExtensionTest.TritonCompile
50+ # //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
51+ TritonEmitterTest.CheckRocmWarpSize
52+ TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
53+ TritonEmitterTest.FP8ToFP8EndToEnd
54+ TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
55+ BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
56+ # //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
57+ TritonGemmTest.BroadcastOfVectorConstantIsFused
58+ TritonGemmTest.FailIfTooMuchShmem
59+ TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
60+ # //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
61+ TritonTest.NonstandardLayoutWithManyNonContractingDims
62+ TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
63+ # //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
64+ RandomEighTestInstantiation/RandomEighTest.Random/*
65+ # temp excludes for 0.7.1
66+ CompareTest.SplitK
67+ TritonEmitterTest.RocmWarpSizeIsSetCorrectly
68+ MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
69+ TestRadixSort/CubSortKeysTest.SortKeys/*
70+ GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
71+ CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
72+ CommandBufferConversionPassTest.ConvertWhileThunk
73+ TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
74+ TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
75+ TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
76+ TritonGemmTest.FailForTooComplexTiling
77+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
78+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
79+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
80+ TestRadixSort/CubSortPairsTest.SortPairs/*
81+ GpuKernelTilingTest.ReductionInputTooLarge
82+ DeterminismTest.Conv
83+ TopKTests/TopKKernelTest*
84+ DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
85+ DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
86+ TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
87+ ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
88+ ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
89+ ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
90+ TritonTest.FuseSubchannelDequantizationWithTranspose
91+ BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
92+ CommandBufferTests/CommandBufferTest.IndexConditional/*
93+ CommandBufferTests/CommandBufferTest.WhileLoop/*
94+ CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
95+ BufferComparatorTest.VeryLargeArray_Device_U8_Aligned
9996)
10097
10198BAZEL_DISK_CACHE_SIZE=100G
@@ -110,15 +107,20 @@ TAG_FILTERS=$($SCRIPT_DIR/rocm_tag_filters.sh),-multigpu,-multi_gpu_h100,require
110107
111108SANITIZER_ARGS=()
112109if [[ $1 == " asan" ]]; then
113- SANITIZER_ARGS+=(" --test_env=ASAN_OPTIONS=suppressions=${SCRIPT_DIR} /asan_ignore_list.txt:use_sigaltstack=0" )
114- SANITIZER_ARGS+=(" --test_env=LSAN_OPTIONS=suppressions=${SCRIPT_DIR} /lsan_ignore_list.txt:use_sigaltstack=0" )
115110 SANITIZER_ARGS+=(" --config=asan" )
116111 TAG_FILTERS=$TAG_FILTERS ,-noasan
117112 shift
118113elif [[ $1 == " tsan" ]]; then
119- SANITIZER_ARGS+=(" --test_env=TSAN_OPTIONS=suppressions=${SCRIPT_DIR} /tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1" )
120114 SANITIZER_ARGS+=(" --config=tsan" )
121115 TAG_FILTERS=$TAG_FILTERS ,-notsan
116+ # excluded from tsan
117+ EXCLUDED_TESTS+=(
118+ # //xla/tests:collective_ops_e2e_test_amdgpu_any
119+ CollectiveOpsTestE2E*
120+ # //xla/backends/gpu/runtime:host_execute_thunk_test_amdgpu_any
121+ HostExecuteStartThunkTest*
122+ HostExecuteDoneThunkTest*
123+ )
122124 shift
123125fi
124126
@@ -138,8 +140,6 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
138140 --keep_going \
139141 --local_test_jobs=${N_TEST_JOBS} \
140142 --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
141- --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
142- --action_env=TF_ROCM_AMDGPU_TARGETS=${GPU_NAME} \
143143 --action_env=XLA_FLAGS=" --xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
144144 --run_under=//build_tools/ci:parallel_gpu_execute \
145145 --test_env=MIOPEN_FIND_ENFORCE=5 \
@@ -151,4 +151,4 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
151151# clean up bazel disk_cache
152152bazel shutdown \
153153 --disk_cache=${BAZEL_DISK_CACHE_DIR} \
154- --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}
154+ --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}
0 commit comments