Skip to content

Commit 5c13f9a

Browse files
authored
[libc] Add single threaded kernel attributes to AMDGPU startup utility (#104651)
Summary: I fixed the errors here recently so I can actually use these. This shouldn't impact much, just should hopefully make the code generated slightly better.
1 parent caa1070 commit 5c13f9a

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

libc/startup/gpu/amdgpu/start.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ static void call_fini_array_callbacks() {
4141

4242
} // namespace LIBC_NAMESPACE_DECL
4343

44-
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
44+
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
45+
clang::amdgpu_flat_work_group_size(1, 1),
46+
clang::amdgpu_max_num_work_groups(1)]] void
4547
_begin(int argc, char **argv, char **env) {
4648
__atomic_store_n(&LIBC_NAMESPACE::app.env_ptr,
4749
reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED);
@@ -60,7 +62,9 @@ _start(int argc, char **argv, char **envp, int *ret) {
6062
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
6163
}
6264

63-
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
65+
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
66+
clang::amdgpu_flat_work_group_size(1, 1),
67+
clang::amdgpu_max_num_work_groups(1)]] void
6468
_end(int retval) {
6569
// Only a single thread should call `exit` here, the rest should gracefully
6670
// return from the kernel. This is so only one thread calls the destructors

0 commit comments

Comments
 (0)