@@ -1083,58 +1083,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1(ptr addrspace(1)
1083
1083
; GFX90A-VGPR: ; %bb.0: ; %bb
1084
1084
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1085
1085
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1086
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1087
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1088
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
1089
1086
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1090
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1091
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1092
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1093
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1094
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1095
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1096
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1097
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1098
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1099
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1100
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1087
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1088
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1101
1089
; GFX90A-VGPR-NEXT: s_nop 1
1102
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1103
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1090
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 1.0
1091
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1092
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
1104
1093
; GFX90A-VGPR-NEXT: s_nop 7
1105
1094
; GFX90A-VGPR-NEXT: s_nop 7
1106
- ; GFX90A-VGPR-NEXT: s_nop 1
1107
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1108
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1095
+ ; GFX90A-VGPR-NEXT: s_nop 0
1096
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1097
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1109
1098
; GFX90A-VGPR-NEXT: s_endpgm
1110
1099
;
1111
1100
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_1:
1112
1101
; GFX942-VGPR: ; %bb.0: ; %bb
1113
1102
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1114
1103
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1115
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1116
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1117
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
1118
1104
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1119
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1120
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1121
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1122
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1123
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1124
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1125
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1126
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1127
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1128
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1129
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1105
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1106
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1130
1107
; GFX942-VGPR-NEXT: s_nop 1
1131
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1132
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1108
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 1.0
1109
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1110
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
1133
1111
; GFX942-VGPR-NEXT: s_nop 7
1134
1112
; GFX942-VGPR-NEXT: s_nop 7
1135
- ; GFX942-VGPR-NEXT: s_nop 1
1136
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1137
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1113
+ ; GFX942-VGPR-NEXT: s_nop 0
1114
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1115
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1138
1116
; GFX942-VGPR-NEXT: s_endpgm
1139
1117
bb:
1140
1118
%mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double 1 .0 ), i32 0 , i32 0 , i32 0 )
@@ -1184,58 +1162,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1(ptr addrspace
1184
1162
; GFX90A-VGPR: ; %bb.0: ; %bb
1185
1163
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1186
1164
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1187
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0
1188
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1189
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
1190
1165
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1191
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1192
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1193
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1194
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1195
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1196
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1197
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1198
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1199
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1200
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1201
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1166
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1167
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1202
1168
; GFX90A-VGPR-NEXT: s_nop 1
1203
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1204
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1169
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], -1.0
1170
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1171
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
1205
1172
; GFX90A-VGPR-NEXT: s_nop 7
1206
1173
; GFX90A-VGPR-NEXT: s_nop 7
1207
- ; GFX90A-VGPR-NEXT: s_nop 1
1208
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1209
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1174
+ ; GFX90A-VGPR-NEXT: s_nop 0
1175
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1176
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1210
1177
; GFX90A-VGPR-NEXT: s_endpgm
1211
1178
;
1212
1179
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_neg1:
1213
1180
; GFX942-VGPR: ; %bb.0: ; %bb
1214
1181
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1215
1182
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1216
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
1217
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000
1218
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
1219
1183
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1220
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1221
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1222
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1223
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1224
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1225
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1226
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1227
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1228
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1229
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1230
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1184
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1185
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1231
1186
; GFX942-VGPR-NEXT: s_nop 1
1232
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1233
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1187
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], -1.0
1188
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1189
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
1234
1190
; GFX942-VGPR-NEXT: s_nop 7
1235
1191
; GFX942-VGPR-NEXT: s_nop 7
1236
- ; GFX942-VGPR-NEXT: s_nop 1
1237
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[6:9 ], s[0:1] offset:16
1238
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v0 , v[2:5 ], s[0:1]
1192
+ ; GFX942-VGPR-NEXT: s_nop 0
1193
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1194
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1239
1195
; GFX942-VGPR-NEXT: s_endpgm
1240
1196
bb:
1241
1197
%mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double -1 .0 ), i32 0 , i32 0 , i32 0 )
@@ -1285,58 +1241,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64(ptr addrspa
1285
1241
; GFX90A-VGPR: ; %bb.0: ; %bb
1286
1242
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1287
1243
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1288
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0
1289
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 64
1290
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
1291
1244
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1292
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1]
1293
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1294
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1295
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1296
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1297
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1298
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1299
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
1300
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1301
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1302
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1245
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1]
1246
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1303
1247
; GFX90A-VGPR-NEXT: s_nop 1
1304
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
1305
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3
1248
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 64
1249
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
1250
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
1306
1251
; GFX90A-VGPR-NEXT: s_nop 7
1307
1252
; GFX90A-VGPR-NEXT: s_nop 7
1308
- ; GFX90A-VGPR-NEXT: s_nop 1
1309
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v1 , v[6:9 ], s[0:1] offset:16
1310
- ; GFX90A-VGPR-NEXT: global_store_dwordx4 v1 , v[2:5 ], s[0:1]
1253
+ ; GFX90A-VGPR-NEXT: s_nop 0
1254
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1255
+ ; GFX90A-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1311
1256
; GFX90A-VGPR-NEXT: s_endpgm
1312
1257
;
1313
1258
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64:
1314
1259
; GFX942-VGPR: ; %bb.0: ; %bb
1315
1260
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1316
1261
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1317
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0
1318
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64
1319
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
1320
1262
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1321
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
1322
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1323
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1324
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1325
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1326
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1327
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1328
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
1329
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1330
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1331
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1263
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
1264
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1332
1265
; GFX942-VGPR-NEXT: s_nop 1
1333
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
1334
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
1266
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 64
1267
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
1268
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
1335
1269
; GFX942-VGPR-NEXT: s_nop 7
1336
1270
; GFX942-VGPR-NEXT: s_nop 7
1337
- ; GFX942-VGPR-NEXT: s_nop 1
1338
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v1 , v[6:9 ], s[0:1] offset:16
1339
- ; GFX942-VGPR-NEXT: global_store_dwordx4 v1 , v[2:5 ], s[0:1]
1271
+ ; GFX942-VGPR-NEXT: s_nop 0
1272
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[4:7 ], s[0:1] offset:16
1273
+ ; GFX942-VGPR-NEXT: global_store_dwordx4 v8 , v[0:3 ], s[0:1]
1340
1274
; GFX942-VGPR-NEXT: s_endpgm
1341
1275
bb:
1342
1276
%mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 64 to double )), i32 0 , i32 0 , i32 0 )
0 commit comments