Skip to content

Conversation

@vmustya
Copy link
Contributor

@vmustya vmustya commented Dec 3, 2025

The commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL
extension. The extension introduces a new built-in functions that allow
prefetching data from a global memory to caches as a subgroup-level
operation.

The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html

The commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL
extension. The extension introduces a new built-in functions that allow
prefetching data from a global memory to caches as a subgroup-level
operation.

The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Dec 3, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Victor Mustya (vmustya)

Changes

The commit adds support for the cl_intel_subgroup_buffer_prefetch OpenCL
extension. The extension introduces a new built-in functions that allow
prefetching data from a global memory to caches as a subgroup-level
operation.

The extension is defined here: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html


Full diff: https://github.com/llvm/llvm-project/pull/170532.diff

1 Files Affected:

  • (modified) clang/lib/Headers/opencl-c.h (+30)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index f65b4b314cffd..2ea48f2760c76 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17525,6 +17525,13 @@ void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint
 
 #endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
        // defined(cl_intel_subgroups_long)
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups
 
 #if defined(cl_intel_subgroups_short)
@@ -17660,6 +17667,14 @@ void        __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, u
 void        __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
 void        __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
 void        __ovld __conv intel_sub_group_block_write_us16( __global ushort* p, ushort16 data );
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us16(const __global ushort *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_short
 
 #if defined(cl_intel_subgroups_char)
@@ -17795,6 +17810,14 @@ void        __ovld __conv intel_sub_group_block_write_uc2( __global uchar* p, uc
 void        __ovld __conv intel_sub_group_block_write_uc4( __global uchar* p, uchar4 data );
 void        __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uchar8 data );
 void        __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_char
 
 #if defined(cl_intel_subgroups_long)
@@ -17839,6 +17862,13 @@ void        __ovld __conv intel_sub_group_block_write_ul(  __global ulong* p, ul
 void        __ovld __conv intel_sub_group_block_write_ul2( __global ulong* p, ulong2 data );
 void        __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ulong4 data );
 void        __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
+
+#if defined(cl_intel_subgroup_buffer_prefetch)
+void __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
+#endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_long
 
 #if defined(cl_intel_subgroup_local_block_io)

@github-actions
Copy link

github-actions bot commented Dec 3, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions h -- clang/lib/Headers/opencl-c.h --diff_from_common_commit

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index f9cdb2e70..b48851c36 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17527,10 +17527,10 @@ void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint
        // defined(cl_intel_subgroups_long)
 
 #if defined(cl_intel_subgroup_buffer_prefetch)
-void       __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
-void       __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
-void       __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
-void       __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui2(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui4(const __global uint *p);
+void __ovld __conv intel_sub_group_block_prefetch_ui8(const __global uint *p);
 #endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups
 
@@ -17669,11 +17669,12 @@ void        __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, u
 void        __ovld __conv intel_sub_group_block_write_us16( __global ushort* p, ushort16 data );
 
 #if defined(cl_intel_subgroup_buffer_prefetch)
-void        __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
-void        __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
-void        __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
-void        __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
-void        __ovld __conv intel_sub_group_block_prefetch_us16(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us2(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us4(const __global ushort *p);
+void __ovld __conv intel_sub_group_block_prefetch_us8(const __global ushort *p);
+void __ovld __conv
+intel_sub_group_block_prefetch_us16(const __global ushort *p);
 #endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_short
 
@@ -17812,11 +17813,11 @@ void        __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uc
 void        __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
 
 #if defined(cl_intel_subgroup_buffer_prefetch)
-void        __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
-void        __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
-void        __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
-void        __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
-void        __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc2(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc4(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc8(const __global uchar *p);
+void __ovld __conv intel_sub_group_block_prefetch_uc16(const __global uchar *p);
 #endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_char
 
@@ -17864,10 +17865,10 @@ void        __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ul
 void        __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
 
 #if defined(cl_intel_subgroup_buffer_prefetch)
-void        __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
-void        __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
-void        __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
-void        __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul2(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul4(const __global ulong *p);
+void __ovld __conv intel_sub_group_block_prefetch_ul8(const __global ulong *p);
 #endif // defined(cl_intel_subgroup_buffer_prefetch)
 #endif // cl_intel_subgroups_long
 

@vmustya vmustya changed the title Add support for the cl_intel_subgroup_buffer_prefetch [Clang][OpenCL] Add support for the cl_intel_subgroup_buffer_prefetch Dec 3, 2025
@bader bader requested review from Maetveis and svenvh December 3, 2025 19:47
Copy link
Contributor

@Maetveis Maetveis left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with minor formatting nits.

Is SPIR-V backend and / or LLVM-SPIRV translator support in progress for the SPV_INTEL_subgroup_buffer_prefetch extension?

Right now it seems like only IGC implements the OCL extension, and does so not entirely conforming to the spec because the SPIR-V it produces should contain OpCall instructions instead of OpSubgroupBlockPrefetchINTEL AFAICT looking at the code.

Co-authored-by: Mészáros Gergely <[email protected]>
@michalpaszkowski michalpaszkowski merged commit 5c289da into llvm:main Dec 5, 2025
7 of 9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants