-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Clang][AMDGPU] Enable type-checking on __builtin_amdgcn_raw_ptr_buffer_atomic_{{add|fadd|fmin|fmax}} #164824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…er_atomic_{{add|fadd|fmin|fmax}}
The "t" flag is used to disable typechecking. This is done on several
builtins taking pointers since otherwise HIP code would not compile
during compilation for the host (even if the builtin is only used in
device code).
The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag. Remove it and enable the default
type-checks on them.
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesThe "t" flag is used to mark the builtin signature as meaningless. The builtins changed by this patch are not affected by this issue, so Full diff: https://github.com/llvm/llvm-project/pull/164824.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 01d121b948b68..d149eec5ea990 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -177,15 +177,15 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t")
+BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "", "atomic-fadd-rtn-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "", "atomic-buffer-global-pk-add-f16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip
new file mode 100644
index 0000000000000..e9e076fa312f2
--- /dev/null
+++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx90a -verify %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+
+typedef _Float16 __attribute__((ext_vector_type(2))) float16x2_t;
+
+#define __device__ __attribute__((device))
+
+__device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) {
+ i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0);
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0);
+ v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0);
+}
+
+__device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) {
+ i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+ v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+}
diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip
new file mode 100644
index 0000000000000..a2dc02176099d
--- /dev/null
+++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-fmin-max.hip
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx90a -verify %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+
+#define __device__ __attribute__((device))
+
+__device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, 0);
+ f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, 0);
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0);
+ f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0);
+}
+
+__device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+ f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(f64, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+ f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+ f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}}
+}
|
|
On a side note, one of the the reasons for some compile errors with type check enabling is, we have been using wrong type for FP16. We thought |
It could be the case for |
…_atomic_fadd_v2f16, do that later, testing for the float
|
I've rolled back the change for __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16. I'll fix that in a separate PR. |
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)
The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).
The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)
The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).
The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
…er_atomic_{{add|fadd|fmin|fmax}} (llvm#164824)
The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP
code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).
The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.
The "t" flag is used to mark the builtin signature as meaningless.
This is done on several builtins taking pointers since otherwise HIP code would not compile
during compilation for the host (even if the builtin is only used in
device code, compilation would fail).
The builtins changed by this patch are not affected by this issue, so
they do not need the "t" flag in the first place.