@@ -1064,6 +1064,174 @@ void test_sat_pk4_i4_i8(ushort *out, uint src)
10641064 * out = __builtin_amdgcn_sat_pk4_u4_u8 (src );
10651065}
10661066
1067+ // CHECK-LABEL: @test_get_cluster_id(
1068+ // CHECK-NEXT: entry:
1069+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1070+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1071+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1072+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1073+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1074+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1075+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1076+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1077+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1078+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1079+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1080+ // CHECK-NEXT: ]
1081+ // CHECK: sw.bb:
1082+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
1083+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1084+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1085+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1086+ // CHECK: sw.bb1:
1087+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
1088+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1089+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1090+ // CHECK-NEXT: br label [[SW_EPILOG]]
1091+ // CHECK: sw.bb2:
1092+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
1093+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1094+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1095+ // CHECK-NEXT: br label [[SW_EPILOG]]
1096+ // CHECK: sw.default:
1097+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1098+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1099+ // CHECK-NEXT: br label [[SW_EPILOG]]
1100+ // CHECK: sw.epilog:
1101+ // CHECK-NEXT: ret void
1102+ //
1103+ void test_get_cluster_id (int d , global int * out )
1104+ {
1105+ switch (d ) {
1106+ case 0 : * out = __builtin_amdgcn_cluster_id_x (); break ;
1107+ case 1 : * out = __builtin_amdgcn_cluster_id_y (); break ;
1108+ case 2 : * out = __builtin_amdgcn_cluster_id_z (); break ;
1109+ default : * out = 0 ;
1110+ }
1111+ }
1112+
1113+ // CHECK-LABEL: @test_get_cluster_group_id(
1114+ // CHECK-NEXT: entry:
1115+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1116+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1117+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1118+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1119+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1120+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1121+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1122+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1123+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1124+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1125+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1126+ // CHECK-NEXT: ]
1127+ // CHECK: sw.bb:
1128+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
1129+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1130+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1131+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1132+ // CHECK: sw.bb1:
1133+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
1134+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1135+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1136+ // CHECK-NEXT: br label [[SW_EPILOG]]
1137+ // CHECK: sw.bb2:
1138+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
1139+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1140+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1141+ // CHECK-NEXT: br label [[SW_EPILOG]]
1142+ // CHECK: sw.default:
1143+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1144+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1145+ // CHECK-NEXT: br label [[SW_EPILOG]]
1146+ // CHECK: sw.epilog:
1147+ // CHECK-NEXT: ret void
1148+ //
1149+ void test_get_cluster_group_id (int d , global int * out )
1150+ {
1151+ switch (d ) {
1152+ case 0 : * out = __builtin_amdgcn_cluster_workgroup_id_x (); break ;
1153+ case 1 : * out = __builtin_amdgcn_cluster_workgroup_id_y (); break ;
1154+ case 2 : * out = __builtin_amdgcn_cluster_workgroup_id_z (); break ;
1155+ default : * out = 0 ;
1156+ }
1157+ }
1158+
1159+ // CHECK-LABEL: @test_cluster_workgroup_flat_id(
1160+ // CHECK-NEXT: entry:
1161+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1162+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1163+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1164+ // CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.flat.id()
1165+ // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1166+ // CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4
1167+ // CHECK-NEXT: ret void
1168+ //
1169+ void test_cluster_workgroup_flat_id (global uint * out )
1170+ {
1171+ * out = __builtin_amdgcn_cluster_workgroup_flat_id ();
1172+ }
1173+
1174+ // CHECK-LABEL: @test_get_cluster_workgroups_max_id(
1175+ // CHECK-NEXT: entry:
1176+ // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1177+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1178+ // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
1179+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1180+ // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
1181+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1182+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1183+ // CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
1184+ // CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1185+ // CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
1186+ // CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
1187+ // CHECK-NEXT: ]
1188+ // CHECK: sw.bb:
1189+ // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
1190+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1191+ // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
1192+ // CHECK-NEXT: br label [[SW_EPILOG:%.*]]
1193+ // CHECK: sw.bb1:
1194+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
1195+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1196+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1197+ // CHECK-NEXT: br label [[SW_EPILOG]]
1198+ // CHECK: sw.bb2:
1199+ // CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
1200+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1201+ // CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
1202+ // CHECK-NEXT: br label [[SW_EPILOG]]
1203+ // CHECK: sw.default:
1204+ // CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1205+ // CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP7]], align 4
1206+ // CHECK-NEXT: br label [[SW_EPILOG]]
1207+ // CHECK: sw.epilog:
1208+ // CHECK-NEXT: ret void
1209+ //
1210+ void test_get_cluster_workgroups_max_id (int d , global int * out )
1211+ {
1212+ switch (d ) {
1213+ case 0 : * out = __builtin_amdgcn_cluster_workgroup_max_id_x (); break ;
1214+ case 1 : * out = __builtin_amdgcn_cluster_workgroup_max_id_y (); break ;
1215+ case 2 : * out = __builtin_amdgcn_cluster_workgroup_max_id_z (); break ;
1216+ default : * out = 0 ;
1217+ }
1218+ }
1219+
1220+ // CHECK-LABEL: @test_get_cluster_workgroup_max_flat_id(
1221+ // CHECK-NEXT: entry:
1222+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1223+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1224+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1225+ // CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
1226+ // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1227+ // CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4
1228+ // CHECK-NEXT: ret void
1229+ //
1230+ void test_get_cluster_workgroup_max_flat_id (global int * out )
1231+ {
1232+ * out = __builtin_amdgcn_cluster_workgroup_max_flat_id ();
1233+ }
1234+
10671235// CHECK-LABEL: @test_permlane16_swap(
10681236// CHECK-NEXT: entry:
10691237// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
0 commit comments