Skip to content

Commit a36482f

Browse files
arsenmmahesh-attarde
authored andcommitted
AMDGPU: Fix not folding splat immediate into VGPR MFMA src2 (llvm#150628)
1 parent 37cc20c commit a36482f

File tree

3 files changed

+70
-185
lines changed

3 files changed

+70
-185
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,9 +1062,13 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
10621062
switch (OpTy) {
10631063
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
10641064
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1065+
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1066+
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
10651067
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
10661068
break;
10671069
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1070+
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1071+
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
10681072
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
10691073
break;
10701074
default:

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 48 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,58 +1083,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1(ptr addrspace(1)
10831083
; GFX90A-VGPR: ; %bb.0: ; %bb
10841084
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
10851085
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1086-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1087-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1088-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
10891086
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1090-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1091-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1092-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1093-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1094-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1095-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1096-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1097-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1098-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1099-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1100-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1087+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1088+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
11011089
; GFX90A-VGPR-NEXT: s_nop 1
1102-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1103-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1090+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 1.0
1091+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1092+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
11041093
; GFX90A-VGPR-NEXT: s_nop 7
11051094
; GFX90A-VGPR-NEXT: s_nop 7
1106-
; GFX90A-VGPR-NEXT: s_nop 1
1107-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
1108-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
1095+
; GFX90A-VGPR-NEXT: s_nop 0
1096+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1097+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
11091098
; GFX90A-VGPR-NEXT: s_endpgm
11101099
;
11111100
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_1:
11121101
; GFX942-VGPR: ; %bb.0: ; %bb
11131102
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
11141103
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1115-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1116-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1117-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
11181104
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1119-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1120-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1121-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1122-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1123-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1124-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1125-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1126-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1127-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1128-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1129-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1105+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1106+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
11301107
; GFX942-VGPR-NEXT: s_nop 1
1131-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1132-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1108+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 1.0
1109+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1110+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
11331111
; GFX942-VGPR-NEXT: s_nop 7
11341112
; GFX942-VGPR-NEXT: s_nop 7
1135-
; GFX942-VGPR-NEXT: s_nop 1
1136-
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
1137-
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
1113+
; GFX942-VGPR-NEXT: s_nop 0
1114+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1115+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
11381116
; GFX942-VGPR-NEXT: s_endpgm
11391117
bb:
11401118
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double 1.0), i32 0, i32 0, i32 0)
@@ -1184,58 +1162,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1(ptr addrspace
11841162
; GFX90A-VGPR: ; %bb.0: ; %bb
11851163
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
11861164
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1187-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1188-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1189-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
11901165
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1191-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1192-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1193-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1194-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1195-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1196-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1197-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1198-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1199-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1200-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1201-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1166+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1167+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
12021168
; GFX90A-VGPR-NEXT: s_nop 1
1203-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1204-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1169+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], -1.0
1170+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1171+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
12051172
; GFX90A-VGPR-NEXT: s_nop 7
12061173
; GFX90A-VGPR-NEXT: s_nop 7
1207-
; GFX90A-VGPR-NEXT: s_nop 1
1208-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
1209-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
1174+
; GFX90A-VGPR-NEXT: s_nop 0
1175+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1176+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
12101177
; GFX90A-VGPR-NEXT: s_endpgm
12111178
;
12121179
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_neg1:
12131180
; GFX942-VGPR: ; %bb.0: ; %bb
12141181
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
12151182
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1216-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1217-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1218-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
12191183
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1220-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1221-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1222-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1223-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1224-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1225-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1226-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1227-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1228-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1229-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1230-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1184+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1185+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
12311186
; GFX942-VGPR-NEXT: s_nop 1
1232-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1233-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1187+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], -1.0
1188+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1189+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
12341190
; GFX942-VGPR-NEXT: s_nop 7
12351191
; GFX942-VGPR-NEXT: s_nop 7
1236-
; GFX942-VGPR-NEXT: s_nop 1
1237-
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
1238-
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
1192+
; GFX942-VGPR-NEXT: s_nop 0
1193+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1194+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
12391195
; GFX942-VGPR-NEXT: s_endpgm
12401196
bb:
12411197
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double -1.0), i32 0, i32 0, i32 0)
@@ -1285,58 +1241,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64(ptr addrspa
12851241
; GFX90A-VGPR: ; %bb.0: ; %bb
12861242
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
12871243
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1288-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0
1289-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 64
1290-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
12911244
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1292-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1293-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1294-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1295-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1296-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1297-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1298-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1299-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1300-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1301-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1302-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1245+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1246+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
13031247
; GFX90A-VGPR-NEXT: s_nop 1
1304-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1305-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1248+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 64
1249+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1250+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
13061251
; GFX90A-VGPR-NEXT: s_nop 7
13071252
; GFX90A-VGPR-NEXT: s_nop 7
1308-
; GFX90A-VGPR-NEXT: s_nop 1
1309-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16
1310-
; GFX90A-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1]
1253+
; GFX90A-VGPR-NEXT: s_nop 0
1254+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1255+
; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
13111256
; GFX90A-VGPR-NEXT: s_endpgm
13121257
;
13131258
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64:
13141259
; GFX942-VGPR: ; %bb.0: ; %bb
13151260
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
13161261
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1317-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0
1318-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64
1319-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
13201262
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1321-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1322-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1323-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1324-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1325-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1326-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1327-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1328-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1329-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1330-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1331-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1263+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1264+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
13321265
; GFX942-VGPR-NEXT: s_nop 1
1333-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1334-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1266+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 64
1267+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1268+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
13351269
; GFX942-VGPR-NEXT: s_nop 7
13361270
; GFX942-VGPR-NEXT: s_nop 7
1337-
; GFX942-VGPR-NEXT: s_nop 1
1338-
; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16
1339-
; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1]
1271+
; GFX942-VGPR-NEXT: s_nop 0
1272+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
1273+
; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
13401274
; GFX942-VGPR-NEXT: s_endpgm
13411275
bb:
13421276
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double bitcast (i64 64 to double)), i32 0, i32 0, i32 0)

0 commit comments

Comments
 (0)