Skip to content

Commit e3d1c0d

Browse files
[SPIRV] GPU intrinsics
1 parent 8aa835c commit e3d1c0d

File tree

14 files changed

+2021
-0
lines changed

14 files changed

+2021
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4770,6 +4770,35 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
47704770
let Prototype = "char const*(...)";
47714771
}
47724772

4773+
// GPU intrinsics
4774+
class GPUBuiltin<string prototype> : Builtin {
4775+
let Spellings = ["__builtin_" # NAME];
4776+
let Prototype = prototype;
4777+
let Attributes = [NoThrow];
4778+
}
4779+
4780+
multiclass GPUGridBuiltin<string prototype> {
4781+
def _x : GPUBuiltin<prototype>;
4782+
def _y : GPUBuiltin<prototype>;
4783+
def _z : GPUBuiltin<prototype>;
4784+
}
4785+
4786+
defm gpu_num_blocks : GPUGridBuiltin<"uint32_t()">;
4787+
defm gpu_block_id : GPUGridBuiltin<"uint32_t()">;
4788+
defm gpu_num_threads : GPUGridBuiltin<"uint32_t()">;
4789+
defm gpu_thread_id : GPUGridBuiltin<"uint32_t()">;
4790+
4791+
def gpu_ballot : GPUBuiltin<"uint64_t(uint64_t, bool)">;
4792+
def gpu_exit : GPUBuiltin<"void()">;
4793+
def gpu_lane_id : GPUBuiltin<"uint32_t()">;
4794+
def gpu_lane_mask : GPUBuiltin<"uint64_t()">;
4795+
def gpu_num_lanes : GPUBuiltin<"uint32_t()">;
4796+
def gpu_read_first_lane_u32 : GPUBuiltin<"uint32_t(uint64_t, uint32_t)">;
4797+
def gpu_shuffle_idx_u32 : GPUBuiltin<"uint32_t(uint64_t, uint32_t, uint32_t, uint32_t)">;
4798+
def gpu_sync_lane : GPUBuiltin<"void(uint64_t)">;
4799+
def gpu_sync_threads : GPUBuiltin<"void()">;
4800+
def gpu_thread_suspend : GPUBuiltin<"void()">;
4801+
47734802
// HLSL
47744803
def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
47754804
let Spellings = ["__builtin_hlsl_adduint64"];
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s -o - | FileCheck %s
4+
5+
#include <stdint.h>
6+
7+
// CHECK-LABEL: define dso_local noundef i32 @workgroup_id_x(
8+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x()
11+
// CHECK-NEXT: ret i32 [[TMP0]]
12+
//
13+
uint32_t workgroup_id_x(void)
14+
{
15+
return __builtin_amdgcn_workgroup_id_x();
16+
}
17+
18+
// CHECK-LABEL: define dso_local noundef i32 @workgroup_id_y(
19+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
20+
// CHECK-NEXT: [[ENTRY:.*:]]
21+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.y()
22+
// CHECK-NEXT: ret i32 [[TMP0]]
23+
//
24+
uint32_t workgroup_id_y(void)
25+
{
26+
return __builtin_amdgcn_workgroup_id_y();
27+
}
28+
29+
// CHECK-LABEL: define dso_local noundef i32 @workgroup_id_z(
30+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] {
31+
// CHECK-NEXT: [[ENTRY:.*:]]
32+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.z()
33+
// CHECK-NEXT: ret i32 [[TMP0]]
34+
//
35+
uint32_t workgroup_id_z(void)
36+
{
37+
return __builtin_amdgcn_workgroup_id_z();
38+
}
39+
40+
// CHECK-LABEL: define dso_local noundef range(i32 0, 1024) i32 @workitem_id_x(
41+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR4:[0-9]+]] {
42+
// CHECK-NEXT: [[ENTRY:.*:]]
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
44+
// CHECK-NEXT: ret i32 [[TMP0]]
45+
//
46+
uint32_t workitem_id_x(void)
47+
{
48+
return __builtin_amdgcn_workitem_id_x();
49+
}
50+
51+
// CHECK-LABEL: define dso_local noundef range(i32 0, 1024) i32 @workitem_id_y(
52+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR5:[0-9]+]] {
53+
// CHECK-NEXT: [[ENTRY:.*:]]
54+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
55+
// CHECK-NEXT: ret i32 [[TMP0]]
56+
//
57+
uint32_t workitem_id_y(void)
58+
{
59+
return __builtin_amdgcn_workitem_id_y();
60+
}
61+
62+
// CHECK-LABEL: define dso_local noundef range(i32 0, 1024) i32 @workitem_id_z(
63+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] {
64+
// CHECK-NEXT: [[ENTRY:.*:]]
65+
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
66+
// CHECK-NEXT: ret i32 [[TMP0]]
67+
//
68+
uint32_t workitem_id_z(void)
69+
{
70+
return __builtin_amdgcn_workitem_id_z();
71+
}
72+
73+
// CHECK-LABEL: define dso_local range(i32 1, 1025) i32 @workgroup_size_x(
74+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR7:[0-9]+]] {
75+
// CHECK-NEXT: [[ENTRY:.*:]]
76+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
77+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 12
78+
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG3:![0-9]+]], !invariant.load [[META4:![0-9]+]], !noundef [[META4]]
79+
// CHECK-NEXT: [[CONV:%.*]] = zext nneg i16 [[TMP2]] to i32
80+
// CHECK-NEXT: ret i32 [[CONV]]
81+
//
82+
uint32_t workgroup_size_x(void)
83+
{
84+
return __builtin_amdgcn_workgroup_size_x();
85+
}
86+
87+
// CHECK-LABEL: define dso_local range(i32 1, 1025) i32 @workgroup_size_y(
88+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR7]] {
89+
// CHECK-NEXT: [[ENTRY:.*:]]
90+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
91+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 14
92+
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
93+
// CHECK-NEXT: [[CONV:%.*]] = zext nneg i16 [[TMP2]] to i32
94+
// CHECK-NEXT: ret i32 [[CONV]]
95+
//
96+
uint32_t workgroup_size_y(void)
97+
{
98+
return __builtin_amdgcn_workgroup_size_y();
99+
}
100+
101+
// CHECK-LABEL: define dso_local range(i32 1, 1025) i32 @workgroup_size_z(
102+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR7]] {
103+
// CHECK-NEXT: [[ENTRY:.*:]]
104+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
105+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 16
106+
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 8, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
107+
// CHECK-NEXT: [[CONV:%.*]] = zext nneg i16 [[TMP2]] to i32
108+
// CHECK-NEXT: ret i32 [[CONV]]
109+
//
110+
uint32_t workgroup_size_z(void)
111+
{
112+
return __builtin_amdgcn_workgroup_size_z();
113+
}
114+
115+
// CHECK-LABEL: define dso_local range(i32 1, 0) i32 @grid_size_x(
116+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] {
117+
// CHECK-NEXT: [[ENTRY:.*:]]
118+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
119+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 12
120+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG5:![0-9]+]], !invariant.load [[META4]]
121+
// CHECK-NEXT: ret i32 [[TMP2]]
122+
//
123+
uint32_t grid_size_x(void)
124+
{
125+
return __builtin_amdgcn_grid_size_x();
126+
}
127+
128+
// CHECK-LABEL: define dso_local range(i32 1, 0) i32 @grid_size_y(
129+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR8]] {
130+
// CHECK-NEXT: [[ENTRY:.*:]]
131+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
132+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 16
133+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG5]], !invariant.load [[META4]]
134+
// CHECK-NEXT: ret i32 [[TMP2]]
135+
//
136+
uint32_t grid_size_y(void)
137+
{
138+
return __builtin_amdgcn_grid_size_y();
139+
}
140+
141+
// CHECK-LABEL: define dso_local range(i32 1, 0) i32 @grid_size_z(
142+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR8]] {
143+
// CHECK-NEXT: [[ENTRY:.*:]]
144+
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
145+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 20
146+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG5]], !invariant.load [[META4]]
147+
// CHECK-NEXT: ret i32 [[TMP2]]
148+
//
149+
uint32_t grid_size_z(void)
150+
{
151+
return __builtin_amdgcn_grid_size_z();
152+
}
153+
154+
//.
155+
// CHECK: [[RNG3]] = !{i16 1, i16 1025}
156+
// CHECK: [[META4]] = !{}
157+
// CHECK: [[RNG5]] = !{i32 1, i32 0}
158+
//.

0 commit comments

Comments
 (0)