@@ -1083,58 +1083,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1(ptr addrspace(1)
10831083; GFX90A-VGPR: ; %bb.0: ; %bb
10841084; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
10851085; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1086- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1087- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1088- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
10891086; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1090- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1091- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1092- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1093- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1094- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1095- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1096- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1097- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1098- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1099- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1100- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1087+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1088+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
11011089; GFX90A-VGPR-NEXT: s_nop 1
1102- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1103- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1090+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 1.0
1091+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1092+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
11041093; GFX90A-VGPR-NEXT: s_nop 7
11051094; GFX90A-VGPR-NEXT: s_nop 7
1106- ; GFX90A-VGPR-NEXT: s_nop 1
1107- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1108- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1095+ ; GFX90A-VGPR-NEXT: s_nop 0
1096+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1097+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
11091098; GFX90A-VGPR-NEXT: s_endpgm
11101099;
11111100; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_1:
11121101; GFX942-VGPR: ; %bb.0: ; %bb
11131102; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
11141103; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1115- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1116- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1117- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
11181104; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1119- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1120- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1121- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1122- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1123- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1124- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1125- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1126- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1127- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1128- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1129- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1105+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1106+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
11301107; GFX942-VGPR-NEXT: s_nop 1
1131- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1132- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1108+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 1.0
1109+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1110+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
11331111; GFX942-VGPR-NEXT: s_nop 7
11341112; GFX942-VGPR-NEXT: s_nop 7
1135- ; GFX942-VGPR-NEXT: s_nop 1
1136- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1137- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1113+ ; GFX942-VGPR-NEXT: s_nop 0
1114+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1115+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
11381116; GFX942-VGPR-NEXT: s_endpgm
11391117bb:
11401118 %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double 1 .0 ), i32 0 , i32 0 , i32 0 )
@@ -1184,58 +1162,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1(ptr addrspace
11841162; GFX90A-VGPR: ; %bb.0: ; %bb
11851163; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
11861164; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1187- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1188- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1189- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
11901165; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1191- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1192- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1193- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1194- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1195- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1196- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1197- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1198- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1199- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1200- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1201- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1166+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1167+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
12021168; GFX90A-VGPR-NEXT: s_nop 1
1203- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1204- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1169+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], -1.0
1170+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1171+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
12051172; GFX90A-VGPR-NEXT: s_nop 7
12061173; GFX90A-VGPR-NEXT: s_nop 7
1207- ; GFX90A-VGPR-NEXT: s_nop 1
1208- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1209- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1174+ ; GFX90A-VGPR-NEXT: s_nop 0
1175+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1176+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
12101177; GFX90A-VGPR-NEXT: s_endpgm
12111178;
12121179; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_neg1:
12131180; GFX942-VGPR: ; %bb.0: ; %bb
12141181; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
12151182; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1216- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1217- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1218- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
12191183; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1220- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1221- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1222- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1223- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1224- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1225- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1226- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1227- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1228- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1229- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1230- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1184+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1185+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
12311186; GFX942-VGPR-NEXT: s_nop 1
1232- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1233- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1187+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], -1.0
1188+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1189+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
12341190; GFX942-VGPR-NEXT: s_nop 7
12351191; GFX942-VGPR-NEXT: s_nop 7
1236- ; GFX942-VGPR-NEXT: s_nop 1
1237- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1238- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1192+ ; GFX942-VGPR-NEXT: s_nop 0
1193+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1194+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
12391195; GFX942-VGPR-NEXT: s_endpgm
12401196bb:
12411197 %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double -1 .0 ), i32 0 , i32 0 , i32 0 )
@@ -1285,58 +1241,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64(ptr addrspa
12851241; GFX90A-VGPR: ; %bb.0: ; %bb
12861242; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
12871243; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1288- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0
1289- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 64
1290- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
12911244; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1292- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1293- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1294- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1295- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1296- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1297- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1298- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1299- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1300- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1301- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1302- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1245+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1246+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
13031247; GFX90A-VGPR-NEXT: s_nop 1
1304- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1305- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1248+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 64
1249+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1250+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
13061251; GFX90A-VGPR-NEXT: s_nop 7
13071252; GFX90A-VGPR-NEXT: s_nop 7
1308- ; GFX90A-VGPR-NEXT: s_nop 1
1309- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v1 , v[6:9 ], s[0:1] offset:16
1310- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v1 , v[2:5 ], s[0:1]
1253+ ; GFX90A-VGPR-NEXT: s_nop 0
1254+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1255+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
13111256; GFX90A-VGPR-NEXT: s_endpgm
13121257;
13131258; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64:
13141259; GFX942-VGPR: ; %bb.0: ; %bb
13151260; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
13161261; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1317- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0
1318- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64
1319- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
13201262; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1321- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1322- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1323- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1324- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1325- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1326- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1327- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1328- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1329- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1330- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1331- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1263+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1264+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
13321265; GFX942-VGPR-NEXT: s_nop 1
1333- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1334- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1266+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 64
1267+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1268+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
13351269; GFX942-VGPR-NEXT: s_nop 7
13361270; GFX942-VGPR-NEXT: s_nop 7
1337- ; GFX942-VGPR-NEXT: s_nop 1
1338- ; GFX942-VGPR-NEXT: global_store_dwordx4 v1 , v[6:9 ], s[0:1] offset:16
1339- ; GFX942-VGPR-NEXT: global_store_dwordx4 v1 , v[2:5 ], s[0:1]
1271+ ; GFX942-VGPR-NEXT: s_nop 0
1272+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1273+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
13401274; GFX942-VGPR-NEXT: s_endpgm
13411275bb:
13421276 %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 64 to double )), i32 0 , i32 0 , i32 0 )
0 commit comments