@@ -1064,6 +1064,174 @@ void test_sat_pk4_i4_i8(ushort *out, uint src)
1064
1064
* out = __builtin_amdgcn_sat_pk4_u4_u8 (src );
1065
1065
}
1066
1066
1067
+ // CHECK-LABEL: @test_get_cluster_id(
1068
+ // CHECK-NEXT: entry:
1069
+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1070
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1071
+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1072
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1073
+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1074
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1075
+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1076
+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1077
+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1078
+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1079
+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1080
+ // CHECK-NEXT: ]
1081
+ // CHECK: sw.bb:
1082
+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
1083
+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1084
+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1085
+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1086
+ // CHECK: sw.bb1:
1087
+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
1088
+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1089
+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1090
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1091
+ // CHECK: sw.bb2:
1092
+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
1093
+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1094
+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1095
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1096
+ // CHECK: sw.default:
1097
+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1098
+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1099
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1100
+ // CHECK: sw.epilog:
1101
+ // CHECK-NEXT: ret void
1102
+ //
1103
+ void test_get_cluster_id (int d , global int * out )
1104
+ {
1105
+ switch (d ) {
1106
+ case 0 : * out = __builtin_amdgcn_cluster_id_x (); break ;
1107
+ case 1 : * out = __builtin_amdgcn_cluster_id_y (); break ;
1108
+ case 2 : * out = __builtin_amdgcn_cluster_id_z (); break ;
1109
+ default : * out = 0 ;
1110
+ }
1111
+ }
1112
+
1113
+ // CHECK-LABEL: @test_get_cluster_group_id(
1114
+ // CHECK-NEXT: entry:
1115
+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1116
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1117
+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1118
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1119
+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1120
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1121
+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1122
+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1123
+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1124
+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1125
+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1126
+ // CHECK-NEXT: ]
1127
+ // CHECK: sw.bb:
1128
+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
1129
+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1130
+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1131
+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1132
+ // CHECK: sw.bb1:
1133
+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
1134
+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1135
+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1136
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1137
+ // CHECK: sw.bb2:
1138
+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
1139
+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1140
+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1141
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1142
+ // CHECK: sw.default:
1143
+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1144
+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1145
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1146
+ // CHECK: sw.epilog:
1147
+ // CHECK-NEXT: ret void
1148
+ //
1149
+ void test_get_cluster_group_id (int d , global int * out )
1150
+ {
1151
+ switch (d ) {
1152
+ case 0 : * out = __builtin_amdgcn_cluster_workgroup_id_x (); break ;
1153
+ case 1 : * out = __builtin_amdgcn_cluster_workgroup_id_y (); break ;
1154
+ case 2 : * out = __builtin_amdgcn_cluster_workgroup_id_z (); break ;
1155
+ default : * out = 0 ;
1156
+ }
1157
+ }
1158
+
1159
+ // CHECK-LABEL: @test_cluster_workgroup_flat_id(
1160
+ // CHECK-NEXT: entry:
1161
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1162
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1163
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1164
+ // CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.flat.id()
1165
+ // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1166
+ // CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4
1167
+ // CHECK-NEXT: ret void
1168
+ //
1169
+ void test_cluster_workgroup_flat_id (global uint * out )
1170
+ {
1171
+ * out = __builtin_amdgcn_cluster_workgroup_flat_id ();
1172
+ }
1173
+
1174
+ // CHECK-LABEL: @test_get_cluster_workgroups_max_id(
1175
+ // CHECK-NEXT: entry:
1176
+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1177
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1178
+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1179
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1180
+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1181
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1182
+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1183
+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1184
+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1185
+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1186
+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1187
+ // CHECK-NEXT: ]
1188
+ // CHECK: sw.bb:
1189
+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
1190
+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1191
+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1192
+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1193
+ // CHECK: sw.bb1:
1194
+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
1195
+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1196
+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1197
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1198
+ // CHECK: sw.bb2:
1199
+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
1200
+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1201
+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1202
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1203
+ // CHECK: sw.default:
1204
+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1205
+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1206
+ // CHECK-NEXT: br label [[SW_EPILOG]]
1207
+ // CHECK: sw.epilog:
1208
+ // CHECK-NEXT: ret void
1209
+ //
1210
+ void test_get_cluster_workgroups_max_id (int d , global int * out )
1211
+ {
1212
+ switch (d ) {
1213
+ case 0 : * out = __builtin_amdgcn_cluster_workgroup_max_id_x (); break ;
1214
+ case 1 : * out = __builtin_amdgcn_cluster_workgroup_max_id_y (); break ;
1215
+ case 2 : * out = __builtin_amdgcn_cluster_workgroup_max_id_z (); break ;
1216
+ default : * out = 0 ;
1217
+ }
1218
+ }
1219
+
1220
+ // CHECK-LABEL: @test_get_cluster_workgroup_max_flat_id(
1221
+ // CHECK-NEXT: entry:
1222
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1223
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1224
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1225
+ // CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
1226
+ // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1227
+ // CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4
1228
+ // CHECK-NEXT: ret void
1229
+ //
1230
+ void test_get_cluster_workgroup_max_flat_id (global int * out )
1231
+ {
1232
+ * out = __builtin_amdgcn_cluster_workgroup_max_flat_id ();
1233
+ }
1234
+
1067
1235
// CHECK-LABEL: @test_permlane16_swap(
1068
1236
// CHECK-NEXT: entry:
1069
1237
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
0 commit comments