-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[OpenCL] Add decls for cl_intel_subgroup_local_block_io #146656
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenCL] Add decls for cl_intel_subgroup_local_block_io #146656
Conversation
This extension extends the subgroup block read and write functions defined by `cl_intel_subgroups` (and, when supported, `cl_intel_subgroups_char`, `cl_intel_subgroups_short`, and `cl_intel_subgroups_long`) to support reading from and writing to pointers to the `__local` memory address space in addition to pointers to the `__global` memory address space. It is already supported by the Intel OpenCL compiler. Co-authored-by: Victor Mustya <[email protected]>
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Mészáros Gergely (Maetveis) ChangesThis extension extends the subgroup block read and write functions defined by It is already supported by the Intel OpenCL compiler. Full diff: https://github.com/llvm/llvm-project/pull/146656.diff 1 Files Affected:
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 8d8ef497cec49..3bdd72a7eaf94 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17651,6 +17651,72 @@ void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
#endif // cl_intel_subgroups_short
+#if defined(cl_intel_subgroup_local_block_io)
+uint __ovld __conv intel_sub_group_block_read( const __local uint* p );
+uint2 __ovld __conv intel_sub_group_block_read2( const __local uint* p );
+uint4 __ovld __conv intel_sub_group_block_read4( const __local uint* p );
+uint8 __ovld __conv intel_sub_group_block_read8( const __local uint* p );
+
+void __ovld __conv intel_sub_group_block_write( __local uint* p, uint data );
+void __ovld __conv intel_sub_group_block_write2( __local uint* p, uint2 data );
+void __ovld __conv intel_sub_group_block_write4( __local uint* p, uint4 data );
+void __ovld __conv intel_sub_group_block_write8( __local uint* p, uint8 data );
+
+#if defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) || \
+ defined(cl_intel_subgroups_long)
+uint __ovld __conv intel_sub_group_block_read_ui( const __local uint* p );
+uint2 __ovld __conv intel_sub_group_block_read_ui2( const __local uint* p );
+uint4 __ovld __conv intel_sub_group_block_read_ui4( const __local uint* p );
+uint8 __ovld __conv intel_sub_group_block_read_ui8( const __local uint* p );
+
+void __ovld __conv intel_sub_group_block_write_ui( __local uint* p, uint data );
+void __ovld __conv intel_sub_group_block_write_ui2( __local uint* p, uint2 data );
+void __ovld __conv intel_sub_group_block_write_ui4( __local uint* p, uint4 data );
+void __ovld __conv intel_sub_group_block_write_ui8( __local uint* p, uint8 data );
+#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
+ // defined(cl_intel_subgroups_long)
+
+#if defined(cl_intel_subgroups_char)
+uchar __ovld __conv intel_sub_group_block_read_uc( const __local uchar* p );
+uchar2 __ovld __conv intel_sub_group_block_read_uc2( const __local uchar* p );
+uchar4 __ovld __conv intel_sub_group_block_read_uc4( const __local uchar* p );
+uchar8 __ovld __conv intel_sub_group_block_read_uc8( const __local uchar* p );
+uchar16 __ovld __conv intel_sub_group_block_read_uc16( const __local uchar* p );
+
+void __ovld __conv intel_sub_group_block_write_uc( __local uchar* p, uchar data );
+void __ovld __conv intel_sub_group_block_write_uc2( __local uchar* p, uchar2 data );
+void __ovld __conv intel_sub_group_block_write_uc4( __local uchar* p, uchar4 data );
+void __ovld __conv intel_sub_group_block_write_uc8( __local uchar* p, uchar8 data );
+void __ovld __conv intel_sub_group_block_write_uc16( __local uchar* p, uchar16 data );
+#endif // defined(cl_intel_subgroups_char)
+
+#if defined(cl_intel_subgroups_short)
+ushort __ovld __conv intel_sub_group_block_read_us( const __local ushort* p );
+ushort2 __ovld __conv intel_sub_group_block_read_us2( const __local ushort* p );
+ushort4 __ovld __conv intel_sub_group_block_read_us4( const __local ushort* p );
+ushort8 __ovld __conv intel_sub_group_block_read_us8( const __local ushort* p );
+ushort16 __ovld __conv intel_sub_group_block_read_us16( const __local ushort* p );
+
+void __ovld __conv intel_sub_group_block_write_us( __local ushort* p, ushort data );
+void __ovld __conv intel_sub_group_block_write_us2( __local ushort* p, ushort2 data );
+void __ovld __conv intel_sub_group_block_write_us4( __local ushort* p, ushort4 data );
+void __ovld __conv intel_sub_group_block_write_us8( __local ushort* p, ushort8 data );
+void __ovld __conv intel_sub_group_block_write_us16( __local ushort* p, ushort16 data );
+#endif // defined(cl_intel_subgroups_short)
+
+#if defined(cl_intel_subgroups_long)
+ulong __ovld __conv intel_sub_group_block_read_ul( const __local ulong* p );
+ulong2 __ovld __conv intel_sub_group_block_read_ul2( const __local ulong* p );
+ulong4 __ovld __conv intel_sub_group_block_read_ul4( const __local ulong* p );
+ulong8 __ovld __conv intel_sub_group_block_read_ul8( const __local ulong* p );
+
+void __ovld __conv intel_sub_group_block_write_ul( __local ulong* p, ulong data );
+void __ovld __conv intel_sub_group_block_write_ul2( __local ulong* p, ulong2 data );
+void __ovld __conv intel_sub_group_block_write_ul4( __local ulong* p, ulong4 data );
+void __ovld __conv intel_sub_group_block_write_ul8( __local ulong* p, ulong8 data );
+#endif // defined(cl_intel_subgroups_long)
+#endif // cl_intel_subgroup_local_block_io
+
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
|
|
@llvm/pr-subscribers-clang Author: Mészáros Gergely (Maetveis) ChangesThis extension extends the subgroup block read and write functions defined by It is already supported by the Intel OpenCL compiler. Full diff: https://github.com/llvm/llvm-project/pull/146656.diff 1 Files Affected:
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 8d8ef497cec49..3bdd72a7eaf94 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17651,6 +17651,72 @@ void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
#endif // cl_intel_subgroups_short
+#if defined(cl_intel_subgroup_local_block_io)
+uint __ovld __conv intel_sub_group_block_read( const __local uint* p );
+uint2 __ovld __conv intel_sub_group_block_read2( const __local uint* p );
+uint4 __ovld __conv intel_sub_group_block_read4( const __local uint* p );
+uint8 __ovld __conv intel_sub_group_block_read8( const __local uint* p );
+
+void __ovld __conv intel_sub_group_block_write( __local uint* p, uint data );
+void __ovld __conv intel_sub_group_block_write2( __local uint* p, uint2 data );
+void __ovld __conv intel_sub_group_block_write4( __local uint* p, uint4 data );
+void __ovld __conv intel_sub_group_block_write8( __local uint* p, uint8 data );
+
+#if defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) || \
+ defined(cl_intel_subgroups_long)
+uint __ovld __conv intel_sub_group_block_read_ui( const __local uint* p );
+uint2 __ovld __conv intel_sub_group_block_read_ui2( const __local uint* p );
+uint4 __ovld __conv intel_sub_group_block_read_ui4( const __local uint* p );
+uint8 __ovld __conv intel_sub_group_block_read_ui8( const __local uint* p );
+
+void __ovld __conv intel_sub_group_block_write_ui( __local uint* p, uint data );
+void __ovld __conv intel_sub_group_block_write_ui2( __local uint* p, uint2 data );
+void __ovld __conv intel_sub_group_block_write_ui4( __local uint* p, uint4 data );
+void __ovld __conv intel_sub_group_block_write_ui8( __local uint* p, uint8 data );
+#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
+ // defined(cl_intel_subgroups_long)
+
+#if defined(cl_intel_subgroups_char)
+uchar __ovld __conv intel_sub_group_block_read_uc( const __local uchar* p );
+uchar2 __ovld __conv intel_sub_group_block_read_uc2( const __local uchar* p );
+uchar4 __ovld __conv intel_sub_group_block_read_uc4( const __local uchar* p );
+uchar8 __ovld __conv intel_sub_group_block_read_uc8( const __local uchar* p );
+uchar16 __ovld __conv intel_sub_group_block_read_uc16( const __local uchar* p );
+
+void __ovld __conv intel_sub_group_block_write_uc( __local uchar* p, uchar data );
+void __ovld __conv intel_sub_group_block_write_uc2( __local uchar* p, uchar2 data );
+void __ovld __conv intel_sub_group_block_write_uc4( __local uchar* p, uchar4 data );
+void __ovld __conv intel_sub_group_block_write_uc8( __local uchar* p, uchar8 data );
+void __ovld __conv intel_sub_group_block_write_uc16( __local uchar* p, uchar16 data );
+#endif // defined(cl_intel_subgroups_char)
+
+#if defined(cl_intel_subgroups_short)
+ushort __ovld __conv intel_sub_group_block_read_us( const __local ushort* p );
+ushort2 __ovld __conv intel_sub_group_block_read_us2( const __local ushort* p );
+ushort4 __ovld __conv intel_sub_group_block_read_us4( const __local ushort* p );
+ushort8 __ovld __conv intel_sub_group_block_read_us8( const __local ushort* p );
+ushort16 __ovld __conv intel_sub_group_block_read_us16( const __local ushort* p );
+
+void __ovld __conv intel_sub_group_block_write_us( __local ushort* p, ushort data );
+void __ovld __conv intel_sub_group_block_write_us2( __local ushort* p, ushort2 data );
+void __ovld __conv intel_sub_group_block_write_us4( __local ushort* p, ushort4 data );
+void __ovld __conv intel_sub_group_block_write_us8( __local ushort* p, ushort8 data );
+void __ovld __conv intel_sub_group_block_write_us16( __local ushort* p, ushort16 data );
+#endif // defined(cl_intel_subgroups_short)
+
+#if defined(cl_intel_subgroups_long)
+ulong __ovld __conv intel_sub_group_block_read_ul( const __local ulong* p );
+ulong2 __ovld __conv intel_sub_group_block_read_ul2( const __local ulong* p );
+ulong4 __ovld __conv intel_sub_group_block_read_ul4( const __local ulong* p );
+ulong8 __ovld __conv intel_sub_group_block_read_ul8( const __local ulong* p );
+
+void __ovld __conv intel_sub_group_block_write_ul( __local ulong* p, ulong data );
+void __ovld __conv intel_sub_group_block_write_ul2( __local ulong* p, ulong2 data );
+void __ovld __conv intel_sub_group_block_write_ul4( __local ulong* p, ulong4 data );
+void __ovld __conv intel_sub_group_block_write_ul8( __local ulong* p, ulong8 data );
+#endif // defined(cl_intel_subgroups_long)
+#endif // cl_intel_subgroup_local_block_io
+
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h -- clang/lib/Headers/opencl-c.hView the diff from clang-format here.diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index e1e0fdead..a6baa9c07 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17842,68 +17842,80 @@ void __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ul
#endif // cl_intel_subgroups_long
#if defined(cl_intel_subgroup_local_block_io)
-uint __ovld __conv intel_sub_group_block_read( const __local uint* p );
-uint2 __ovld __conv intel_sub_group_block_read2( const __local uint* p );
-uint4 __ovld __conv intel_sub_group_block_read4( const __local uint* p );
-uint8 __ovld __conv intel_sub_group_block_read8( const __local uint* p );
+uint __ovld __conv intel_sub_group_block_read(const __local uint *p);
+uint2 __ovld __conv intel_sub_group_block_read2(const __local uint *p);
+uint4 __ovld __conv intel_sub_group_block_read4(const __local uint *p);
+uint8 __ovld __conv intel_sub_group_block_read8(const __local uint *p);
-void __ovld __conv intel_sub_group_block_write( __local uint* p, uint data );
-void __ovld __conv intel_sub_group_block_write2( __local uint* p, uint2 data );
-void __ovld __conv intel_sub_group_block_write4( __local uint* p, uint4 data );
-void __ovld __conv intel_sub_group_block_write8( __local uint* p, uint8 data );
+void __ovld __conv intel_sub_group_block_write(__local uint *p, uint data);
+void __ovld __conv intel_sub_group_block_write2(__local uint *p, uint2 data);
+void __ovld __conv intel_sub_group_block_write4(__local uint *p, uint4 data);
+void __ovld __conv intel_sub_group_block_write8(__local uint *p, uint8 data);
#if defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) || \
defined(cl_intel_subgroups_long)
-uint __ovld __conv intel_sub_group_block_read_ui( const __local uint* p );
-uint2 __ovld __conv intel_sub_group_block_read_ui2( const __local uint* p );
-uint4 __ovld __conv intel_sub_group_block_read_ui4( const __local uint* p );
-uint8 __ovld __conv intel_sub_group_block_read_ui8( const __local uint* p );
-
-void __ovld __conv intel_sub_group_block_write_ui( __local uint* p, uint data );
-void __ovld __conv intel_sub_group_block_write_ui2( __local uint* p, uint2 data );
-void __ovld __conv intel_sub_group_block_write_ui4( __local uint* p, uint4 data );
-void __ovld __conv intel_sub_group_block_write_ui8( __local uint* p, uint8 data );
-#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
- // defined(cl_intel_subgroups_long)
+uint __ovld __conv intel_sub_group_block_read_ui(const __local uint *p);
+uint2 __ovld __conv intel_sub_group_block_read_ui2(const __local uint *p);
+uint4 __ovld __conv intel_sub_group_block_read_ui4(const __local uint *p);
+uint8 __ovld __conv intel_sub_group_block_read_ui8(const __local uint *p);
+
+void __ovld __conv intel_sub_group_block_write_ui(__local uint *p, uint data);
+void __ovld __conv intel_sub_group_block_write_ui2(__local uint *p, uint2 data);
+void __ovld __conv intel_sub_group_block_write_ui4(__local uint *p, uint4 data);
+void __ovld __conv intel_sub_group_block_write_ui8(__local uint *p, uint8 data);
+#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short)
+ // || defined(cl_intel_subgroups_long)
#if defined(cl_intel_subgroups_char)
-uchar __ovld __conv intel_sub_group_block_read_uc( const __local uchar* p );
-uchar2 __ovld __conv intel_sub_group_block_read_uc2( const __local uchar* p );
-uchar4 __ovld __conv intel_sub_group_block_read_uc4( const __local uchar* p );
-uchar8 __ovld __conv intel_sub_group_block_read_uc8( const __local uchar* p );
-uchar16 __ovld __conv intel_sub_group_block_read_uc16( const __local uchar* p );
-
-void __ovld __conv intel_sub_group_block_write_uc( __local uchar* p, uchar data );
-void __ovld __conv intel_sub_group_block_write_uc2( __local uchar* p, uchar2 data );
-void __ovld __conv intel_sub_group_block_write_uc4( __local uchar* p, uchar4 data );
-void __ovld __conv intel_sub_group_block_write_uc8( __local uchar* p, uchar8 data );
-void __ovld __conv intel_sub_group_block_write_uc16( __local uchar* p, uchar16 data );
+uchar __ovld __conv intel_sub_group_block_read_uc(const __local uchar *p);
+uchar2 __ovld __conv intel_sub_group_block_read_uc2(const __local uchar *p);
+uchar4 __ovld __conv intel_sub_group_block_read_uc4(const __local uchar *p);
+uchar8 __ovld __conv intel_sub_group_block_read_uc8(const __local uchar *p);
+uchar16 __ovld __conv intel_sub_group_block_read_uc16(const __local uchar *p);
+
+void __ovld __conv intel_sub_group_block_write_uc(__local uchar *p, uchar data);
+void __ovld __conv intel_sub_group_block_write_uc2(__local uchar *p,
+ uchar2 data);
+void __ovld __conv intel_sub_group_block_write_uc4(__local uchar *p,
+ uchar4 data);
+void __ovld __conv intel_sub_group_block_write_uc8(__local uchar *p,
+ uchar8 data);
+void __ovld __conv intel_sub_group_block_write_uc16(__local uchar *p,
+ uchar16 data);
#endif // defined(cl_intel_subgroups_char)
#if defined(cl_intel_subgroups_short)
-ushort __ovld __conv intel_sub_group_block_read_us( const __local ushort* p );
-ushort2 __ovld __conv intel_sub_group_block_read_us2( const __local ushort* p );
-ushort4 __ovld __conv intel_sub_group_block_read_us4( const __local ushort* p );
-ushort8 __ovld __conv intel_sub_group_block_read_us8( const __local ushort* p );
-ushort16 __ovld __conv intel_sub_group_block_read_us16( const __local ushort* p );
-
-void __ovld __conv intel_sub_group_block_write_us( __local ushort* p, ushort data );
-void __ovld __conv intel_sub_group_block_write_us2( __local ushort* p, ushort2 data );
-void __ovld __conv intel_sub_group_block_write_us4( __local ushort* p, ushort4 data );
-void __ovld __conv intel_sub_group_block_write_us8( __local ushort* p, ushort8 data );
-void __ovld __conv intel_sub_group_block_write_us16( __local ushort* p, ushort16 data );
+ushort __ovld __conv intel_sub_group_block_read_us(const __local ushort *p);
+ushort2 __ovld __conv intel_sub_group_block_read_us2(const __local ushort *p);
+ushort4 __ovld __conv intel_sub_group_block_read_us4(const __local ushort *p);
+ushort8 __ovld __conv intel_sub_group_block_read_us8(const __local ushort *p);
+ushort16 __ovld __conv intel_sub_group_block_read_us16(const __local ushort *p);
+
+void __ovld __conv intel_sub_group_block_write_us(__local ushort *p,
+ ushort data);
+void __ovld __conv intel_sub_group_block_write_us2(__local ushort *p,
+ ushort2 data);
+void __ovld __conv intel_sub_group_block_write_us4(__local ushort *p,
+ ushort4 data);
+void __ovld __conv intel_sub_group_block_write_us8(__local ushort *p,
+ ushort8 data);
+void __ovld __conv intel_sub_group_block_write_us16(__local ushort *p,
+ ushort16 data);
#endif // defined(cl_intel_subgroups_short)
#if defined(cl_intel_subgroups_long)
-ulong __ovld __conv intel_sub_group_block_read_ul( const __local ulong* p );
-ulong2 __ovld __conv intel_sub_group_block_read_ul2( const __local ulong* p );
-ulong4 __ovld __conv intel_sub_group_block_read_ul4( const __local ulong* p );
-ulong8 __ovld __conv intel_sub_group_block_read_ul8( const __local ulong* p );
-
-void __ovld __conv intel_sub_group_block_write_ul( __local ulong* p, ulong data );
-void __ovld __conv intel_sub_group_block_write_ul2( __local ulong* p, ulong2 data );
-void __ovld __conv intel_sub_group_block_write_ul4( __local ulong* p, ulong4 data );
-void __ovld __conv intel_sub_group_block_write_ul8( __local ulong* p, ulong8 data );
+ulong __ovld __conv intel_sub_group_block_read_ul(const __local ulong *p);
+ulong2 __ovld __conv intel_sub_group_block_read_ul2(const __local ulong *p);
+ulong4 __ovld __conv intel_sub_group_block_read_ul4(const __local ulong *p);
+ulong8 __ovld __conv intel_sub_group_block_read_ul8(const __local ulong *p);
+
+void __ovld __conv intel_sub_group_block_write_ul(__local ulong *p, ulong data);
+void __ovld __conv intel_sub_group_block_write_ul2(__local ulong *p,
+ ulong2 data);
+void __ovld __conv intel_sub_group_block_write_ul4(__local ulong *p,
+ ulong4 data);
+void __ovld __conv intel_sub_group_block_write_ul8(__local ulong *p,
+ ulong8 data);
#endif // defined(cl_intel_subgroups_long)
#endif // cl_intel_subgroup_local_block_io
|
This file is not formatted currently, the changes should match the current formatting style. |
|
@michalpaszkowski can you review or know who might be comfortable reviewing this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
…ent-local-block-io
This extension extends the subgroup block read and write functions defined by
cl_intel_subgroups(and, when supported,cl_intel_subgroups_char,cl_intel_subgroups_short, andcl_intel_subgroups_long) to support reading from and writing to pointers to the__localmemory address space in addition to pointers to the__globalmemory address space.It is already supported by the Intel OpenCL compiler.