Skip to content

Commit 7c68944

Browse files
authored
[AMD] Add an environment variable to disable buffer atomics (#7311)
Adds an environment variable, `AMDGCN_DISABLE_BUFFER_ATOMICS` that can be set to disable Buffer atomics on the MI300. We have seen significant wins from BufferOps in general, but we haven't benchmarked the impact of atomics at Meta and we have seen some issue internally. We are working to resolve these issues/will open reproducers upstream, but with BufferOps being the default we would be much happier if we could allow a production workload to opt to accept BufferOps but not atomics.
1 parent fd5fb0c commit 7c68944

File tree

6 files changed

+10
-4
lines changed

6 files changed

+10
-4
lines changed

include/triton/Tools/Sys/GetEnv.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace mlir::triton {
1414
inline const std::set<std::string> CACHE_INVALIDATING_ENV_VARS = {
1515
// clang-format off
1616
"AMDGCN_ENABLE_DUMP",
17+
"AMDGCN_USE_BUFFER_ATOMICS",
1718
"AMDGCN_USE_BUFFER_OPS",
1819
"DISABLE_LLVM_OPT",
1920
"DISABLE_MMA_V3",

python/triton/knobs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,8 @@ class nvidia_knobs(base_knobs):
436436

437437
class amd_knobs(base_knobs):
438438
use_buffer_ops: env_bool = env_bool("AMDGCN_USE_BUFFER_OPS", True)
439+
# Note: This requires use_buffer_ops be true to have any effect
440+
use_buffer_atomics: env_bool = env_bool("AMDGCN_USE_BUFFER_ATOMICS", True)
439441
dump_amdgcn: env_bool = env_bool("AMDGCN_ENABLE_DUMP")
440442
libhip_path: env_opt_str = env_opt_str("TRITON_LIBHIP_PATH")
441443
lld_path: env_opt_str = env_opt_str("TRITON_HIP_LLD_PATH")

third_party/amd/backend/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def make_ttgir(mod, metadata, options):
261261
if knobs.amd.use_buffer_ops:
262262
amd.passes.ttgpuir.add_canonicalize_pointers(pm)
263263
passes.common.add_canonicalizer(pm)
264-
amd.passes.ttgpuir.add_convert_to_buffer_ops(pm, options.arch)
264+
amd.passes.ttgpuir.add_convert_to_buffer_ops(pm, options.arch, knobs.amd.use_buffer_atomics)
265265

266266
amd.passes.ttgpuir.add_fold_true_cmpi(pm)
267267
passes.common.add_canonicalizer(pm)

third_party/amd/include/TritonAMDGPUTransforms/Passes.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ def TritonAMDGPUConvertToBufferOps : Pass<"tritonamdgpu-convert-buffer-ops", "ml
137137
Option<"archGenerationName", "arch-generation-name",
138138
"std::string", /*default=*/"std::string{}",
139139
"GFX generation name of target device.">,
140+
Option<"allowBufferAtomics", "allow-buffer-atomics",
141+
"bool", /*default*/"true",
142+
"Allow buffer atomic operations when the hardware supports it.">,
140143
];
141144
}
142145

third_party/amd/lib/TritonAMDGPUTransforms/ConvertToBufferOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ class TritonAMDGPUConvertToBufferOpsPass
557557
// lowering to LLVM
558558
triton::AMD::ISAFamily isaFamily =
559559
triton::AMD::deduceISAFamily(archGenerationName);
560-
if (ISAFamily::CDNA3 == isaFamily)
560+
if (this->allowBufferAtomics && ISAFamily::CDNA3 == isaFamily)
561561
patterns.add<ConvertTritonAtomicRMWOpToBufferAtomicRMW>(
562562
context, assumptions, axisInfoAnalysis, solver, isaFamily);
563563

third_party/amd/python/triton_amd.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ void init_triton_amd_passes_ttgpuir(py::module &&m) {
6969
pm.addNestedPass<mlir::triton::FuncOp>(
7070
mlir::createTritonAMDGPUCanonicalizePointers());
7171
});
72-
ADD_PASS_OPTION_WRAPPER_1("add_convert_to_buffer_ops",
72+
ADD_PASS_OPTION_WRAPPER_2("add_convert_to_buffer_ops",
7373
mlir::createTritonAMDGPUConvertToBufferOps,
74-
const std::string &);
74+
const std::string &, bool);
7575
ADD_PASS_WRAPPER_0("add_reorder_instructions",
7676
mlir::createTritonAMDGPUReorderInstructions);
7777
ADD_PASS_WRAPPER_0("add_fold_true_cmpi", mlir::createTritonAMDFoldTrueCmpI);

0 commit comments

Comments
 (0)