Skip to content

Commit 0b8f555

Browse files
[Headers] Create stub spirv64intrin.h
1 parent c9d7f70 commit 0b8f555

File tree

4 files changed

+357
-1
lines changed

4 files changed

+357
-1
lines changed

clang/lib/Headers/amdgpuintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===-- amdgpuintrin.h - AMDPGU intrinsic functions -----------------------===//
1+
//===-- amdgpuintrin.h - AMDGPU intrinsic functions -----------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

clang/lib/Headers/gpuintrin.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ _Pragma("omp end declare target");
6060
#include <nvptxintrin.h>
6161
#elif defined(__AMDGPU__)
6262
#include <amdgpuintrin.h>
63+
#elif defined(__SPIRV64__)
64+
#include <spirv64intrin.h>
6365
#elif !defined(_OPENMP)
6466
#error "This header is only meant to be used on GPU architectures."
6567
#endif

clang/lib/Headers/spirv64intrin.h

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===-- spirv64intrin.h - SPIRV64 intrinsic functions --------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __SPIRV64INTRIN_H
10+
#define __SPIRV64INTRIN_H
11+
12+
#ifndef __SPIRV64__
13+
#error "This file is intended for SPIRV64 targets or offloading to SPIRV64"
14+
#endif
15+
16+
#ifndef __GPUINTRIN_H
17+
#error "Never use <spirv64intrin.h> directly; include <gpuintrin.h> instead"
18+
#endif
19+
20+
// This is the skeleton of the spirv64 implementation for gpuintrin
21+
// Address spaces and kernel attribute are not yet implemented
22+
// The target-specific functions are declarations waiting for clang support
23+
24+
#if defined(_OPENMP)
25+
#error "Openmp is not yet available on spirv64 though gpuintrin header"
26+
#endif
27+
28+
// Type aliases to the address spaces used by the SPIRV backend.
29+
#define __gpu_private
30+
#define __gpu_constant
31+
#define __gpu_local
32+
#define __gpu_global
33+
#define __gpu_generic
34+
35+
// Attribute to declare a function as a kernel.
36+
#define __gpu_kernel
37+
38+
// Returns the number of workgroups in the 'x' dimension of the grid.
39+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_x(void);
40+
41+
// Returns the number of workgroups in the 'y' dimension of the grid.
42+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_y(void);
43+
44+
// Returns the number of workgroups in the 'z' dimension of the grid.
45+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_z(void);
46+
47+
// Returns the 'x' dimension of the current AMD workgroup's id.
48+
_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_x(void);
49+
50+
// Returns the 'y' dimension of the current AMD workgroup's id.
51+
_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_y(void);
52+
53+
// Returns the 'z' dimension of the current AMD workgroup's id.
54+
_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_z(void);
55+
56+
// Returns the number of workitems in the 'x' dimension.
57+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_x(void);
58+
59+
// Returns the number of workitems in the 'y' dimension.
60+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_y(void);
61+
62+
// Returns the number of workitems in the 'z' dimension.
63+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_z(void);
64+
65+
// Returns the 'x' dimension id of the workitem in the current workgroup.
66+
_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_x(void);
67+
68+
// Returns the 'y' dimension id of the workitem in the current workgroup.
69+
_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_y(void);
70+
71+
// Returns the 'z' dimension id of the workitem in the current workgroup.
72+
_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_z(void);
73+
74+
// Returns the size of the wave.
75+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_lanes(void);
76+
77+
// Returns the id of the thread inside of a wave executing together.
78+
_DEFAULT_FN_ATTRS uint32_t __gpu_lane_id(void);
79+
80+
// Returns the bit-mask of active threads in the current wave.
81+
_DEFAULT_FN_ATTRS uint64_t __gpu_lane_mask(void);
82+
83+
// Copies the value from the first active thread in the wave to the rest.
84+
_DEFAULT_FN_ATTRS uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask,
85+
uint32_t __x);
86+
87+
// Returns a bitmask of threads in the current lane for which \p x is true.
88+
_DEFAULT_FN_ATTRS uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x);
89+
90+
// Waits for all the threads in the block to converge and issues a fence.
91+
_DEFAULT_FN_ATTRS void __gpu_sync_threads(void);
92+
93+
// Wait for all threads in the wave to converge
94+
_DEFAULT_FN_ATTRS void __gpu_sync_lane(uint64_t __lane_mask);
95+
96+
// Shuffles the the lanes inside the wave according to the given index.
97+
_DEFAULT_FN_ATTRS uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask,
98+
uint32_t __idx, uint32_t __x,
99+
uint32_t __width);
100+
101+
// Returns a bitmask marking all lanes that have the same value of __x.
102+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
103+
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
104+
return __gpu_match_any_u32_impl(__lane_mask, __x);
105+
}
106+
107+
// Returns a bitmask marking all lanes that have the same value of __x.
108+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
109+
__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
110+
return __gpu_match_any_u64_impl(__lane_mask, __x);
111+
}
112+
113+
// Returns the current lane mask if every lane contains __x.
114+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
115+
__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
116+
return __gpu_match_all_u32_impl(__lane_mask, __x);
117+
}
118+
119+
// Returns the current lane mask if every lane contains __x.
120+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
121+
__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
122+
return __gpu_match_all_u64_impl(__lane_mask, __x);
123+
}
124+
125+
// Terminates execution of the associated wave.
126+
_DEFAULT_FN_ATTRS [[noreturn]] void __gpu_exit(void);
127+
128+
// Suspend the thread briefly to assist the scheduler during busy loops.
129+
_DEFAULT_FN_ATTRS void __gpu_thread_suspend(void);
130+
131+
#endif // __SPIRV64INTRIN_H

clang/test/Headers/gpuintrin.c

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
// RUN: -target-feature +ptx62 \
1010
// RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \
1111
// RUN: | FileCheck %s --check-prefix=NVPTX
12+
//
13+
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
14+
// RUN: -internal-isystem %S/../../lib/Headers/ \
15+
// RUN: -triple spirv64-- -emit-llvm %s -o - \
16+
// RUN: | FileCheck %s --check-prefix=SPIRV64
1217

1318
#include <gpuintrin.h>
1419

@@ -978,6 +983,224 @@ __gpu_kernel void foo() {
978983
// NVPTX-NEXT: call void @llvm.nvvm.exit()
979984
// NVPTX-NEXT: ret void
980985
//
986+
//
987+
// SPIRV64-LABEL: define spir_func void @foo(
988+
// SPIRV64-SAME: ) #[[ATTR0:[0-9]+]] {
989+
// SPIRV64-NEXT: [[ENTRY:.*:]]
990+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x()
991+
// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i32 @__gpu_num_blocks_y()
992+
// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_z()
993+
// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_num_blocks(i32 noundef 0)
994+
// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_x()
995+
// SPIRV64-NEXT: [[CALL5:%.*]] = call spir_func i32 @__gpu_block_id_y()
996+
// SPIRV64-NEXT: [[CALL6:%.*]] = call spir_func i32 @__gpu_block_id_z()
997+
// SPIRV64-NEXT: [[CALL7:%.*]] = call spir_func i32 @__gpu_block_id(i32 noundef 0)
998+
// SPIRV64-NEXT: [[CALL8:%.*]] = call spir_func i32 @__gpu_num_threads_x()
999+
// SPIRV64-NEXT: [[CALL9:%.*]] = call spir_func i32 @__gpu_num_threads_y()
1000+
// SPIRV64-NEXT: [[CALL10:%.*]] = call spir_func i32 @__gpu_num_threads_z()
1001+
// SPIRV64-NEXT: [[CALL11:%.*]] = call spir_func i32 @__gpu_num_threads(i32 noundef 0)
1002+
// SPIRV64-NEXT: [[CALL12:%.*]] = call spir_func i32 @__gpu_thread_id_x()
1003+
// SPIRV64-NEXT: [[CALL13:%.*]] = call spir_func i32 @__gpu_thread_id_y()
1004+
// SPIRV64-NEXT: [[CALL14:%.*]] = call spir_func i32 @__gpu_thread_id_z()
1005+
// SPIRV64-NEXT: [[CALL15:%.*]] = call spir_func i32 @__gpu_thread_id(i32 noundef 0)
1006+
// SPIRV64-NEXT: [[CALL16:%.*]] = call spir_func i32 @__gpu_num_lanes()
1007+
// SPIRV64-NEXT: [[CALL17:%.*]] = call spir_func i32 @__gpu_lane_id()
1008+
// SPIRV64-NEXT: [[CALL18:%.*]] = call spir_func i64 @__gpu_lane_mask()
1009+
// SPIRV64-NEXT: [[CALL19:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1)
1010+
// SPIRV64-NEXT: [[CALL20:%.*]] = call spir_func i64 @__gpu_read_first_lane_u64(i64 noundef -1, i64 noundef -1)
1011+
// SPIRV64-NEXT: [[CALL21:%.*]] = call spir_func i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true)
1012+
// SPIRV64-NEXT: call spir_func void @__gpu_sync_threads()
1013+
// SPIRV64-NEXT: call spir_func void @__gpu_sync_lane(i64 noundef -1)
1014+
// SPIRV64-NEXT: [[CALL22:%.*]] = call spir_func i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0)
1015+
// SPIRV64-NEXT: [[CALL23:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef -1)
1016+
// SPIRV64-NEXT: [[CALL24:%.*]] = call spir_func zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1)
1017+
// SPIRV64-NEXT: call spir_func void @__gpu_exit() #[[ATTR4:[0-9]+]]
1018+
// SPIRV64-NEXT: unreachable
1019+
//
1020+
//
1021+
// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_blocks(
1022+
// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
1023+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1024+
// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
1025+
// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
1026+
// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
1027+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
1028+
// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
1029+
// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
1030+
// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
1031+
// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
1032+
// SPIRV64-NEXT: ]
1033+
// SPIRV64: [[SW_BB]]:
1034+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x()
1035+
// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
1036+
// SPIRV64-NEXT: br label %[[RETURN:.*]]
1037+
// SPIRV64: [[SW_BB1]]:
1038+
// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_y()
1039+
// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
1040+
// SPIRV64-NEXT: br label %[[RETURN]]
1041+
// SPIRV64: [[SW_BB3]]:
1042+
// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_blocks_z()
1043+
// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
1044+
// SPIRV64-NEXT: br label %[[RETURN]]
1045+
// SPIRV64: [[SW_DEFAULT]]:
1046+
// SPIRV64-NEXT: unreachable
1047+
// SPIRV64: [[RETURN]]:
1048+
// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
1049+
// SPIRV64-NEXT: ret i32 [[TMP1]]
1050+
//
1051+
//
1052+
// SPIRV64-LABEL: define internal spir_func i32 @__gpu_block_id(
1053+
// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
1054+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1055+
// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
1056+
// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
1057+
// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
1058+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
1059+
// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
1060+
// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
1061+
// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
1062+
// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
1063+
// SPIRV64-NEXT: ]
1064+
// SPIRV64: [[SW_BB]]:
1065+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_block_id_x()
1066+
// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
1067+
// SPIRV64-NEXT: br label %[[RETURN:.*]]
1068+
// SPIRV64: [[SW_BB1]]:
1069+
// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_block_id_y()
1070+
// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
1071+
// SPIRV64-NEXT: br label %[[RETURN]]
1072+
// SPIRV64: [[SW_BB3]]:
1073+
// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_z()
1074+
// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
1075+
// SPIRV64-NEXT: br label %[[RETURN]]
1076+
// SPIRV64: [[SW_DEFAULT]]:
1077+
// SPIRV64-NEXT: unreachable
1078+
// SPIRV64: [[RETURN]]:
1079+
// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
1080+
// SPIRV64-NEXT: ret i32 [[TMP1]]
1081+
//
1082+
//
1083+
// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_threads(
1084+
// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
1085+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1086+
// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
1087+
// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
1088+
// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
1089+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
1090+
// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
1091+
// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
1092+
// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
1093+
// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
1094+
// SPIRV64-NEXT: ]
1095+
// SPIRV64: [[SW_BB]]:
1096+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_threads_x()
1097+
// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
1098+
// SPIRV64-NEXT: br label %[[RETURN:.*]]
1099+
// SPIRV64: [[SW_BB1]]:
1100+
// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_threads_y()
1101+
// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
1102+
// SPIRV64-NEXT: br label %[[RETURN]]
1103+
// SPIRV64: [[SW_BB3]]:
1104+
// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_threads_z()
1105+
// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
1106+
// SPIRV64-NEXT: br label %[[RETURN]]
1107+
// SPIRV64: [[SW_DEFAULT]]:
1108+
// SPIRV64-NEXT: unreachable
1109+
// SPIRV64: [[RETURN]]:
1110+
// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
1111+
// SPIRV64-NEXT: ret i32 [[TMP1]]
1112+
//
1113+
//
1114+
// SPIRV64-LABEL: define internal spir_func i32 @__gpu_thread_id(
1115+
// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
1116+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1117+
// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
1118+
// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
1119+
// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
1120+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
1121+
// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
1122+
// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
1123+
// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
1124+
// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
1125+
// SPIRV64-NEXT: ]
1126+
// SPIRV64: [[SW_BB]]:
1127+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_thread_id_x()
1128+
// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
1129+
// SPIRV64-NEXT: br label %[[RETURN:.*]]
1130+
// SPIRV64: [[SW_BB1]]:
1131+
// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_thread_id_y()
1132+
// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
1133+
// SPIRV64-NEXT: br label %[[RETURN]]
1134+
// SPIRV64: [[SW_BB3]]:
1135+
// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_thread_id_z()
1136+
// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
1137+
// SPIRV64-NEXT: br label %[[RETURN]]
1138+
// SPIRV64: [[SW_DEFAULT]]:
1139+
// SPIRV64-NEXT: unreachable
1140+
// SPIRV64: [[RETURN]]:
1141+
// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
1142+
// SPIRV64-NEXT: ret i32 [[TMP1]]
1143+
//
1144+
//
1145+
// SPIRV64-LABEL: define internal spir_func i64 @__gpu_read_first_lane_u64(
1146+
// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] {
1147+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1148+
// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
1149+
// SPIRV64-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8
1150+
// SPIRV64-NEXT: [[__HI:%.*]] = alloca i32, align 4
1151+
// SPIRV64-NEXT: [[__LO:%.*]] = alloca i32, align 4
1152+
// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
1153+
// SPIRV64-NEXT: store i64 [[__X]], ptr [[__X_ADDR]], align 8
1154+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__X_ADDR]], align 8
1155+
// SPIRV64-NEXT: [[SHR:%.*]] = lshr i64 [[TMP0]], 32
1156+
// SPIRV64-NEXT: [[CONV:%.*]] = trunc i64 [[SHR]] to i32
1157+
// SPIRV64-NEXT: store i32 [[CONV]], ptr [[__HI]], align 4
1158+
// SPIRV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR]], align 8
1159+
// SPIRV64-NEXT: [[AND:%.*]] = and i64 [[TMP1]], 4294967295
1160+
// SPIRV64-NEXT: [[CONV1:%.*]] = trunc i64 [[AND]] to i32
1161+
// SPIRV64-NEXT: store i32 [[CONV1]], ptr [[__LO]], align 4
1162+
// SPIRV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
1163+
// SPIRV64-NEXT: [[TMP3:%.*]] = load i32, ptr [[__HI]], align 4
1164+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP2]], i32 noundef [[TMP3]])
1165+
// SPIRV64-NEXT: [[CONV2:%.*]] = zext i32 [[CALL]] to i64
1166+
// SPIRV64-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 32
1167+
// SPIRV64-NEXT: [[TMP4:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
1168+
// SPIRV64-NEXT: [[TMP5:%.*]] = load i32, ptr [[__LO]], align 4
1169+
// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP4]], i32 noundef [[TMP5]])
1170+
// SPIRV64-NEXT: [[CONV4:%.*]] = zext i32 [[CALL3]] to i64
1171+
// SPIRV64-NEXT: [[AND5:%.*]] = and i64 [[CONV4]], 4294967295
1172+
// SPIRV64-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[AND5]]
1173+
// SPIRV64-NEXT: ret i64 [[OR]]
1174+
//
1175+
//
1176+
// SPIRV64-LABEL: define internal spir_func i64 @__gpu_first_lane_id(
1177+
// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
1178+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1179+
// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
1180+
// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
1181+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
1182+
// SPIRV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true)
1183+
// SPIRV64-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1
1184+
// SPIRV64-NEXT: [[ISZERO:%.*]] = icmp eq i64 [[TMP0]], 0
1185+
// SPIRV64-NEXT: [[FFS:%.*]] = select i1 [[ISZERO]], i64 0, i64 [[TMP2]]
1186+
// SPIRV64-NEXT: [[CAST:%.*]] = trunc i64 [[FFS]] to i32
1187+
// SPIRV64-NEXT: [[SUB:%.*]] = sub nsw i32 [[CAST]], 1
1188+
// SPIRV64-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64
1189+
// SPIRV64-NEXT: ret i64 [[CONV]]
1190+
//
1191+
//
1192+
// SPIRV64-LABEL: define internal spir_func zeroext i1 @__gpu_is_first_in_lane(
1193+
// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
1194+
// SPIRV64-NEXT: [[ENTRY:.*:]]
1195+
// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
1196+
// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
1197+
// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id()
1198+
// SPIRV64-NEXT: [[CONV:%.*]] = zext i32 [[CALL]] to i64
1199+
// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
1200+
// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef [[TMP0]])
1201+
// SPIRV64-NEXT: [[CMP:%.*]] = icmp eq i64 [[CONV]], [[CALL1]]
1202+
// SPIRV64-NEXT: ret i1 [[CMP]]
1203+
//
9811204
//.
9821205
// AMDGPU: [[RNG3]] = !{i32 1, i32 0}
9831206
// AMDGPU: [[META4]] = !{}

0 commit comments

Comments
 (0)