Skip to content

Conversation

@jmmartinez
Copy link
Contributor

The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.

…er_atomic_{{add|fadd|fmin|fmax}}

The "t" flag is used to disable typechecking. This is done on several
builtins taking pointers since otherwise HIP code would not compile
during compilation for the host (even if the builtin is only used in
device code).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag. Remove it and enable the default
type-checks on them.
@jmmartinez jmmartinez self-assigned this Oct 23, 2025
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Oct 23, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 23, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-clang

Author: Juan Manuel Martinez Caamaño (jmmartinez)

Changes

The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.


Full diff: https://github.com/llvm/llvm-project/pull/164824.diff

3 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+7-7)
  • (added) clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip (+18)
  • (added) clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip (+18)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 01d121b948b68..d149eec5ea990 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -177,15 +177,15 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
 
-BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t")
+BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "")
 
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "", "atomic-fadd-rtn-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "", "atomic-buffer-global-pk-add-f16-insts")
 
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
 
 TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
 TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip
new file mode 100644
index 0000000000000..e9e076fa312f2
--- /dev/null
+++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx90a -verify %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+
+typedef _Float16 __attribute__((ext_vector_type(2))) float16x2_t;
+
+#define __device__ __attribute__((device))
+
+__device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) {
+  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0);
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0);
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0);
+}
+
+__device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) {
+  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+}
diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip
new file mode 100644
index 0000000000000..a2dc02176099d
--- /dev/null
+++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx90a -verify %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+
+#define __device__ __attribute__((device))
+
+__device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, 0);
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, 0);
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0);
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0);
+}
+
+__device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+}

@shiltian
Copy link
Contributor

shiltian commented Oct 23, 2025

On a side note, one of the the reasons for some compile errors with type check enabling is, we have been using wrong type for FP16. We thought h should be fp16 but in fact we should use x. There is a PR somewhere else trying to fix it.

@jmmartinez
Copy link
Contributor Author

jmmartinez commented Oct 24, 2025

On a side note, one of the the reasons for some compile errors with type check enabling is, we have been using wrong type for FP16. We thought h should be fp16 but in fact we should use x. There is a PR somewhere else trying to fix it.

It could be the case for __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16. The rest do not take any floating-point 16 type.

…_atomic_fadd_v2f16, do that later, testing for the float
@jmmartinez
Copy link
Contributor Author

I've rolled back the change for __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16. I'll fix that in a separate PR.

@jmmartinez jmmartinez merged commit efcc613 into llvm:main Oct 27, 2025
10 checks passed
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)

The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)

The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)

The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).

The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants