-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Clang][OpenCL] Add support for the cl_intel_subgroup_buffer_prefetch #170532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL extension. The extension introduces a new built-in functions that allow prefetching data from a global memory to caches as a subgroup-level operation. The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Victor Mustya (vmustya) ChangesThe commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html Full diff: https://github.com/llvm/llvm-project/pull/170532.diff 1 Files Affected:
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index f65b4b314cffd..2ea48f2760c76 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17525,6 +17525,13 @@ void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint
#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
// defined(cl_intel_subgroups_long)
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups
#if defined(cl_intel_subgroups_short)
@@ -17660,6 +17667,14 @@ void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, u
void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
void __ovld __conv intel_sub_group_block_write_us16( __global ushort* p, ushort16 data );
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us16(const __global ushort *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_short
#if defined(cl_intel_subgroups_char)
@@ -17795,6 +17810,14 @@ void __ovld __conv intel_sub_group_block_write_uc2( __global uchar* p, uc
void __ovld __conv intel_sub_group_block_write_uc4( __global uchar* p, uchar4 data );
void __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uchar8 data );
void __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_char
#if defined(cl_intel_subgroups_long)
@@ -17839,6 +17862,13 @@ void __ovld __conv intel_sub_group_block_write_ul( __global ulong* p, ul
void __ovld __conv intel_sub_group_block_write_ul2( __global ulong* p, ulong2 data );
void __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ulong4 data );
void __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_long
#if defined(cl_intel_subgroup_local_block_io)
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h -- clang/lib/Headers/opencl-c.h --diff_from_common_commit
View the diff from clang-format here.diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index f9cdb2e70..b48851c36 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17527,10 +17527,10 @@ void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint
// defined(cl_intel_subgroups_long)
#if defined(cl_intel_subgroup_buffer_prefetch)
-void __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
-void __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
-void __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
-void __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups
@@ -17669,11 +17669,12 @@ void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, u
void __ovld __conv intel_sub_group_block_write_us16( __global ushort* p, ushort16 data );
#if defined(cl_intel_subgroup_buffer_prefetch)
-void __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
-void __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
-void __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
-void __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
-void __ovld __conv intel_sub_group_block_prefetch_us16(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
+void __ovld __conv
+intel_sub_group_block_prefetch_us16(const __global ushort *p);
#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_short
@@ -17812,11 +17813,11 @@ void __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uc
void __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
#if defined(cl_intel_subgroup_buffer_prefetch)
-void __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
-void __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
-void __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
-void __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
-void __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_char
@@ -17864,10 +17865,10 @@ void __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ul
void __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
#if defined(cl_intel_subgroup_buffer_prefetch)
-void __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
-void __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
-void __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
-void __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
#endif // defined(cl_intel_subgroup_buffer_prefetch)
#endif // cl_intel_subgroups_long
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with minor formatting nits.
Is SPIR-V backend and / or LLVM-SPIRV translator support in progress for the SPV_INTEL_subgroup_buffer_prefetch extension?
Right now it seems like only IGC implements the OCL extension, and does so not entirely conforming to the spec because the SPIR-V it produces should contain OpCall instructions instead of OpSubgroupBlockPrefetchINTEL AFAICT looking at the code.
Co-authored-by: Mészáros Gergely <[email protected]>
The commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL
extension. The extension introduces a new built-in functions that allow
prefetching data from a global memory to caches as a subgroup-level
operation.
The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html