|
9 | 9 | // RUN: -target-feature +ptx62 \ |
10 | 10 | // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ |
11 | 11 | // RUN: | FileCheck %s --check-prefix=NVPTX |
| 12 | +// |
| 13 | +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ |
| 14 | +// RUN: -internal-isystem %S/../../lib/Headers/ \ |
| 15 | +// RUN: -triple spirv64-- -emit-llvm %s -o - \ |
| 16 | +// RUN: | FileCheck %s --check-prefix=SPIRV64 |
12 | 17 |
|
13 | 18 | #include <gpuintrin.h> |
14 | 19 |
|
@@ -978,6 +983,224 @@ __gpu_kernel void foo() { |
978 | 983 | // NVPTX-NEXT: call void @llvm.nvvm.exit() |
979 | 984 | // NVPTX-NEXT: ret void |
980 | 985 | // |
| 986 | +// |
| 987 | +// SPIRV64-LABEL: define spir_func void @foo( |
| 988 | +// SPIRV64-SAME: ) #[[ATTR0:[0-9]+]] { |
| 989 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 990 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() |
| 991 | +// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i32 @__gpu_num_blocks_y() |
| 992 | +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_z() |
| 993 | +// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_num_blocks(i32 noundef 0) |
| 994 | +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_x() |
| 995 | +// SPIRV64-NEXT: [[CALL5:%.*]] = call spir_func i32 @__gpu_block_id_y() |
| 996 | +// SPIRV64-NEXT: [[CALL6:%.*]] = call spir_func i32 @__gpu_block_id_z() |
| 997 | +// SPIRV64-NEXT: [[CALL7:%.*]] = call spir_func i32 @__gpu_block_id(i32 noundef 0) |
| 998 | +// SPIRV64-NEXT: [[CALL8:%.*]] = call spir_func i32 @__gpu_num_threads_x() |
| 999 | +// SPIRV64-NEXT: [[CALL9:%.*]] = call spir_func i32 @__gpu_num_threads_y() |
| 1000 | +// SPIRV64-NEXT: [[CALL10:%.*]] = call spir_func i32 @__gpu_num_threads_z() |
| 1001 | +// SPIRV64-NEXT: [[CALL11:%.*]] = call spir_func i32 @__gpu_num_threads(i32 noundef 0) |
| 1002 | +// SPIRV64-NEXT: [[CALL12:%.*]] = call spir_func i32 @__gpu_thread_id_x() |
| 1003 | +// SPIRV64-NEXT: [[CALL13:%.*]] = call spir_func i32 @__gpu_thread_id_y() |
| 1004 | +// SPIRV64-NEXT: [[CALL14:%.*]] = call spir_func i32 @__gpu_thread_id_z() |
| 1005 | +// SPIRV64-NEXT: [[CALL15:%.*]] = call spir_func i32 @__gpu_thread_id(i32 noundef 0) |
| 1006 | +// SPIRV64-NEXT: [[CALL16:%.*]] = call spir_func i32 @__gpu_num_lanes() |
| 1007 | +// SPIRV64-NEXT: [[CALL17:%.*]] = call spir_func i32 @__gpu_lane_id() |
| 1008 | +// SPIRV64-NEXT: [[CALL18:%.*]] = call spir_func i64 @__gpu_lane_mask() |
| 1009 | +// SPIRV64-NEXT: [[CALL19:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) |
| 1010 | +// SPIRV64-NEXT: [[CALL20:%.*]] = call spir_func i64 @__gpu_read_first_lane_u64(i64 noundef -1, i64 noundef -1) |
| 1011 | +// SPIRV64-NEXT: [[CALL21:%.*]] = call spir_func i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) |
| 1012 | +// SPIRV64-NEXT: call spir_func void @__gpu_sync_threads() |
| 1013 | +// SPIRV64-NEXT: call spir_func void @__gpu_sync_lane(i64 noundef -1) |
| 1014 | +// SPIRV64-NEXT: [[CALL22:%.*]] = call spir_func i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0) |
| 1015 | +// SPIRV64-NEXT: [[CALL23:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef -1) |
| 1016 | +// SPIRV64-NEXT: [[CALL24:%.*]] = call spir_func zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) |
| 1017 | +// SPIRV64-NEXT: call spir_func void @__gpu_exit() #[[ATTR4:[0-9]+]] |
| 1018 | +// SPIRV64-NEXT: unreachable |
| 1019 | +// |
| 1020 | +// |
| 1021 | +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_blocks( |
| 1022 | +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { |
| 1023 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1024 | +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 |
| 1025 | +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 |
| 1026 | +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 |
| 1027 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 |
| 1028 | +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ |
| 1029 | +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] |
| 1030 | +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] |
| 1031 | +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] |
| 1032 | +// SPIRV64-NEXT: ] |
| 1033 | +// SPIRV64: [[SW_BB]]: |
| 1034 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() |
| 1035 | +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 |
| 1036 | +// SPIRV64-NEXT: br label %[[RETURN:.*]] |
| 1037 | +// SPIRV64: [[SW_BB1]]: |
| 1038 | +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_y() |
| 1039 | +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 |
| 1040 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1041 | +// SPIRV64: [[SW_BB3]]: |
| 1042 | +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_blocks_z() |
| 1043 | +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 |
| 1044 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1045 | +// SPIRV64: [[SW_DEFAULT]]: |
| 1046 | +// SPIRV64-NEXT: unreachable |
| 1047 | +// SPIRV64: [[RETURN]]: |
| 1048 | +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 |
| 1049 | +// SPIRV64-NEXT: ret i32 [[TMP1]] |
| 1050 | +// |
| 1051 | +// |
| 1052 | +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_block_id( |
| 1053 | +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { |
| 1054 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1055 | +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 |
| 1056 | +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 |
| 1057 | +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 |
| 1058 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 |
| 1059 | +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ |
| 1060 | +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] |
| 1061 | +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] |
| 1062 | +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] |
| 1063 | +// SPIRV64-NEXT: ] |
| 1064 | +// SPIRV64: [[SW_BB]]: |
| 1065 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_block_id_x() |
| 1066 | +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 |
| 1067 | +// SPIRV64-NEXT: br label %[[RETURN:.*]] |
| 1068 | +// SPIRV64: [[SW_BB1]]: |
| 1069 | +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_block_id_y() |
| 1070 | +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 |
| 1071 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1072 | +// SPIRV64: [[SW_BB3]]: |
| 1073 | +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_z() |
| 1074 | +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 |
| 1075 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1076 | +// SPIRV64: [[SW_DEFAULT]]: |
| 1077 | +// SPIRV64-NEXT: unreachable |
| 1078 | +// SPIRV64: [[RETURN]]: |
| 1079 | +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 |
| 1080 | +// SPIRV64-NEXT: ret i32 [[TMP1]] |
| 1081 | +// |
| 1082 | +// |
| 1083 | +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_threads( |
| 1084 | +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { |
| 1085 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1086 | +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 |
| 1087 | +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 |
| 1088 | +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 |
| 1089 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 |
| 1090 | +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ |
| 1091 | +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] |
| 1092 | +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] |
| 1093 | +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] |
| 1094 | +// SPIRV64-NEXT: ] |
| 1095 | +// SPIRV64: [[SW_BB]]: |
| 1096 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_threads_x() |
| 1097 | +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 |
| 1098 | +// SPIRV64-NEXT: br label %[[RETURN:.*]] |
| 1099 | +// SPIRV64: [[SW_BB1]]: |
| 1100 | +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_threads_y() |
| 1101 | +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 |
| 1102 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1103 | +// SPIRV64: [[SW_BB3]]: |
| 1104 | +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_threads_z() |
| 1105 | +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 |
| 1106 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1107 | +// SPIRV64: [[SW_DEFAULT]]: |
| 1108 | +// SPIRV64-NEXT: unreachable |
| 1109 | +// SPIRV64: [[RETURN]]: |
| 1110 | +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 |
| 1111 | +// SPIRV64-NEXT: ret i32 [[TMP1]] |
| 1112 | +// |
| 1113 | +// |
| 1114 | +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_thread_id( |
| 1115 | +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { |
| 1116 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1117 | +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 |
| 1118 | +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 |
| 1119 | +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 |
| 1120 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 |
| 1121 | +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ |
| 1122 | +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] |
| 1123 | +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] |
| 1124 | +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] |
| 1125 | +// SPIRV64-NEXT: ] |
| 1126 | +// SPIRV64: [[SW_BB]]: |
| 1127 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_thread_id_x() |
| 1128 | +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 |
| 1129 | +// SPIRV64-NEXT: br label %[[RETURN:.*]] |
| 1130 | +// SPIRV64: [[SW_BB1]]: |
| 1131 | +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_thread_id_y() |
| 1132 | +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 |
| 1133 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1134 | +// SPIRV64: [[SW_BB3]]: |
| 1135 | +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_thread_id_z() |
| 1136 | +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 |
| 1137 | +// SPIRV64-NEXT: br label %[[RETURN]] |
| 1138 | +// SPIRV64: [[SW_DEFAULT]]: |
| 1139 | +// SPIRV64-NEXT: unreachable |
| 1140 | +// SPIRV64: [[RETURN]]: |
| 1141 | +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 |
| 1142 | +// SPIRV64-NEXT: ret i32 [[TMP1]] |
| 1143 | +// |
| 1144 | +// |
| 1145 | +// SPIRV64-LABEL: define internal spir_func i64 @__gpu_read_first_lane_u64( |
| 1146 | +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] { |
| 1147 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1148 | +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 |
| 1149 | +// SPIRV64-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8 |
| 1150 | +// SPIRV64-NEXT: [[__HI:%.*]] = alloca i32, align 4 |
| 1151 | +// SPIRV64-NEXT: [[__LO:%.*]] = alloca i32, align 4 |
| 1152 | +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 |
| 1153 | +// SPIRV64-NEXT: store i64 [[__X]], ptr [[__X_ADDR]], align 8 |
| 1154 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__X_ADDR]], align 8 |
| 1155 | +// SPIRV64-NEXT: [[SHR:%.*]] = lshr i64 [[TMP0]], 32 |
| 1156 | +// SPIRV64-NEXT: [[CONV:%.*]] = trunc i64 [[SHR]] to i32 |
| 1157 | +// SPIRV64-NEXT: store i32 [[CONV]], ptr [[__HI]], align 4 |
| 1158 | +// SPIRV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR]], align 8 |
| 1159 | +// SPIRV64-NEXT: [[AND:%.*]] = and i64 [[TMP1]], 4294967295 |
| 1160 | +// SPIRV64-NEXT: [[CONV1:%.*]] = trunc i64 [[AND]] to i32 |
| 1161 | +// SPIRV64-NEXT: store i32 [[CONV1]], ptr [[__LO]], align 4 |
| 1162 | +// SPIRV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 |
| 1163 | +// SPIRV64-NEXT: [[TMP3:%.*]] = load i32, ptr [[__HI]], align 4 |
| 1164 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP2]], i32 noundef [[TMP3]]) |
| 1165 | +// SPIRV64-NEXT: [[CONV2:%.*]] = zext i32 [[CALL]] to i64 |
| 1166 | +// SPIRV64-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 32 |
| 1167 | +// SPIRV64-NEXT: [[TMP4:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 |
| 1168 | +// SPIRV64-NEXT: [[TMP5:%.*]] = load i32, ptr [[__LO]], align 4 |
| 1169 | +// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP4]], i32 noundef [[TMP5]]) |
| 1170 | +// SPIRV64-NEXT: [[CONV4:%.*]] = zext i32 [[CALL3]] to i64 |
| 1171 | +// SPIRV64-NEXT: [[AND5:%.*]] = and i64 [[CONV4]], 4294967295 |
| 1172 | +// SPIRV64-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[AND5]] |
| 1173 | +// SPIRV64-NEXT: ret i64 [[OR]] |
| 1174 | +// |
| 1175 | +// |
| 1176 | +// SPIRV64-LABEL: define internal spir_func i64 @__gpu_first_lane_id( |
| 1177 | +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { |
| 1178 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1179 | +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 |
| 1180 | +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 |
| 1181 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 |
| 1182 | +// SPIRV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true) |
| 1183 | +// SPIRV64-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1 |
| 1184 | +// SPIRV64-NEXT: [[ISZERO:%.*]] = icmp eq i64 [[TMP0]], 0 |
| 1185 | +// SPIRV64-NEXT: [[FFS:%.*]] = select i1 [[ISZERO]], i64 0, i64 [[TMP2]] |
| 1186 | +// SPIRV64-NEXT: [[CAST:%.*]] = trunc i64 [[FFS]] to i32 |
| 1187 | +// SPIRV64-NEXT: [[SUB:%.*]] = sub nsw i32 [[CAST]], 1 |
| 1188 | +// SPIRV64-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64 |
| 1189 | +// SPIRV64-NEXT: ret i64 [[CONV]] |
| 1190 | +// |
| 1191 | +// |
| 1192 | +// SPIRV64-LABEL: define internal spir_func zeroext i1 @__gpu_is_first_in_lane( |
| 1193 | +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { |
| 1194 | +// SPIRV64-NEXT: [[ENTRY:.*:]] |
| 1195 | +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 |
| 1196 | +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 |
| 1197 | +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id() |
| 1198 | +// SPIRV64-NEXT: [[CONV:%.*]] = zext i32 [[CALL]] to i64 |
| 1199 | +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 |
| 1200 | +// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef [[TMP0]]) |
| 1201 | +// SPIRV64-NEXT: [[CMP:%.*]] = icmp eq i64 [[CONV]], [[CALL1]] |
| 1202 | +// SPIRV64-NEXT: ret i1 [[CMP]] |
| 1203 | +// |
981 | 1204 | //. |
982 | 1205 | // AMDGPU: [[RNG3]] = !{i32 1, i32 0} |
983 | 1206 | // AMDGPU: [[META4]] = !{} |
|
0 commit comments