Skip to content

Commit 14a7cf4

Browse files
Merge commit '2c498ee497837a2a8e8a0f285306d37528077ef4'
2 parents 65c7f47 + 2c498ee commit 14a7cf4

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

third_party/amd/backend/compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ def make_llir(src, metadata, options):
265265
denormal_mode = "preserve-sign" if options.allow_flush_denorm else "ieee"
266266
fns[0].add_fn_attr("denormal-fp-math-f32", denormal_mode)
267267

268+
# Hint the compiler that we'd like the firmware to set the kernel arguments
269+
# to user SGPRs so that the kernel does not need to s_load its arguments
270+
# from memory.
271+
amd.set_all_fn_arg_inreg(fns[0])
272+
268273
if options.extern_libs:
269274
paths = [path for (name, path) in options.extern_libs if amd.need_extern_lib(llvm_mod, name)]
270275
llvm.link_extern_libs(llvm_mod, paths)

third_party/amd/python/triton_amd.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,13 @@ void init_triton_amd(py::module &&m) {
257257
return false;
258258
}
259259
});
260+
261+
m.def("set_all_fn_arg_inreg", [](llvm::Function *fn) {
262+
for (llvm::Argument &arg : fn->args()) {
263+
// Check for incompatible attributes.
264+
if (arg.hasByRefAttr() || arg.hasNestAttr())
265+
continue;
266+
arg.addAttr(llvm::Attribute::InReg);
267+
}
268+
});
260269
}

0 commit comments

Comments
 (0)