Skip to content

Commit 7040bb2

Browse files
committed
Different strategy for emitting legal REG_SEQUENCE operands
1 parent 3517b8e commit 7040bb2

11 files changed

+132
-118
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,15 +1363,6 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
13631363
return Where;
13641364
}
13651365

1366-
static bool isCompatibleAlignSubReg(const TargetRegisterClass *RC, unsigned Idx,
1367-
const GCNSubtarget *STM,
1368-
const SIRegisterInfo *TRI) {
1369-
if (!STM->needsAlignedVGPRs() || !TRI->isVGPRClass(RC) ||
1370-
!TRI->isProperlyAlignedRC(*RC) || AMDGPU::getRegBitWidth(*RC) == 32)
1371-
return true;
1372-
return !(TRI->getChannelFromSubReg(Idx) & 0x1);
1373-
}
1374-
13751366
// Copy the merged load result from DestReg to the original dest regs of CI and
13761367
// Paired.
13771368
void SILoadStoreOptimizer::copyToDestRegs(
@@ -1421,23 +1412,34 @@ SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
14211412
const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName);
14221413

14231414
// Make sure the generated REG_SEQUENCE has sensibly aligned registers.
1424-
if (isCompatibleAlignSubReg(Paired.DataRC, SubRegIdx1, STM, TRI)) {
1425-
BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
1426-
.add(*Src0)
1427-
.addImm(SubRegIdx0)
1428-
.add(*Src1)
1429-
.addImm(SubRegIdx1);
1430-
} else {
1431-
auto BMI =
1432-
BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
1433-
.add(*Src0)
1434-
.addImm(SubRegIdx0);
1435-
for (unsigned i = 0; i < Paired.Width; ++i) {
1436-
unsigned PreviousChannel = TRI->getChannelFromSubReg(SubRegIdx0);
1437-
BMI.addReg(Src1->getReg(), /*flags=*/0, TRI->getSubRegFromChannel(i))
1438-
.addImm(TRI->getSubRegFromChannel(PreviousChannel + i + 1));
1439-
}
1440-
}
1415+
const TargetRegisterClass *CompatRC0 =
1416+
TRI->getSubRegisterClass(SuperRC, SubRegIdx0);
1417+
const TargetRegisterClass *CompatRC1 =
1418+
TRI->getSubRegisterClass(SuperRC, SubRegIdx1);
1419+
assert(CompatRC0 && CompatRC1 &&
1420+
"Cannot find compatible TargetRegisterClass");
1421+
1422+
Register Src0Reg = CompatRC0 == CI.DataRC
1423+
? Src0->getReg()
1424+
: MRI->createVirtualRegister(CompatRC0);
1425+
Register Src1Reg = CompatRC1 == Paired.DataRC
1426+
? Src1->getReg()
1427+
: MRI->createVirtualRegister(CompatRC1);
1428+
1429+
if (CompatRC0 != CI.DataRC)
1430+
BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src0Reg)
1431+
.add(*Src0);
1432+
1433+
if (CompatRC1 != Paired.DataRC)
1434+
BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src1Reg)
1435+
.add(*Src1);
1436+
1437+
BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
1438+
.addReg(Src0Reg)
1439+
.addImm(SubRegIdx0)
1440+
.addReg(Src1Reg)
1441+
.addImm(SubRegIdx1);
1442+
14411443
return SrcReg;
14421444
}
14431445

llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,11 +409,11 @@ body: |
409409
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
410410
; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4)
411411
; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
412-
; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2
413-
; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3
412+
; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2
413+
; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3
414414
; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
415415
; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
416-
; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2
416+
; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2
417417
; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4)
418418
%12:vgpr_32 = COPY $vgpr8
419419
%11:vgpr_32 = COPY $vgpr7

llvm/test/CodeGen/AMDGPU/merge-buffer.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,11 +411,11 @@ body: |
411411
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
412412
; GCN-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4)
413413
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
414-
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2
415-
; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3
414+
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2
415+
; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3
416416
; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
417417
; GCN-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
418-
; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2
418+
; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2
419419
; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4)
420420
%12:vgpr_32 = COPY $vgpr8
421421
%11:vgpr_32 = COPY $vgpr7

llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ body: |
239239
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
240240
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
241241
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
242-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
242+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
243243
; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4)
244244
%0:vreg_64_align2 = IMPLICIT_DEF
245245
%1:vgpr_32 = IMPLICIT_DEF
@@ -258,8 +258,8 @@ body: |
258258
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
259259
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
260260
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
261-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
262-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2
261+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
262+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
263263
; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4)
264264
%0:vreg_64_align2 = IMPLICIT_DEF
265265
%1:vgpr_32 = IMPLICIT_DEF
@@ -278,9 +278,13 @@ body: |
278278
; GCN-LABEL: name: merge_flat_store_dword_4
279279
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
280280
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
281-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0
282-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1
283-
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2
281+
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub1
282+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0
283+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub1, [[COPY1]], %subreg.sub0
284+
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub2
285+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub2, [[REG_SEQUENCE]], %subreg.sub0_sub1
286+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub3
287+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub3, [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2
284288
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4)
285289
%0:vreg_64_align2 = IMPLICIT_DEF
286290
%1:vreg_128 = IMPLICIT_DEF
@@ -303,8 +307,8 @@ body: |
303307
; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
304308
; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
305309
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
306-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
307-
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
310+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
311+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
308312
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4)
309313
; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`)
310314
%0:vreg_64_align2 = IMPLICIT_DEF
@@ -334,8 +338,8 @@ body: |
334338
; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
335339
; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
336340
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
337-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
338-
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
341+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
342+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
339343
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 8)
340344
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1
341345
; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4)
@@ -363,7 +367,7 @@ body: |
363367
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
364368
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
365369
; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
366-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3
370+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1, [[DEF2]], %subreg.sub2_sub3
367371
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4)
368372
%0:vreg_64_align2 = IMPLICIT_DEF
369373
%1:vreg_64_align2 = IMPLICIT_DEF
@@ -381,7 +385,7 @@ body: |
381385
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
382386
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
383387
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
384-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3
388+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1_sub2, [[DEF2]], %subreg.sub3
385389
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`)
386390
%0:vreg_64_align2 = IMPLICIT_DEF
387391
%1:vreg_96_align2 = IMPLICIT_DEF
@@ -399,7 +403,8 @@ body: |
399403
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
400404
; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
401405
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
402-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
406+
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[DEF2]]
407+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1
403408
; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4)
404409
%0:vreg_64_align2 = IMPLICIT_DEF
405410
%1:agpr_32 = IMPLICIT_DEF

llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ body: |
201201
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
202202
; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
203203
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
204-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
204+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
205205
; GCN-NEXT: FLAT_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1)
206206
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
207207
%1:vgpr_32 = IMPLICIT_DEF
@@ -226,8 +226,8 @@ body: |
226226
; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
227227
; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
228228
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
229-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
230-
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
229+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
230+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
231231
; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
232232
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
233233
%1:vgpr_32 = IMPLICIT_DEF
@@ -256,8 +256,8 @@ body: |
256256
; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
257257
; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
258258
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
259-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
260-
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
259+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
260+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
261261
; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
262262
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1
263263
; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)

llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ body: |
179179
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
180180
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
181181
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
182-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
182+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
183183
; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4)
184184
%0:vreg_64_align2 = IMPLICIT_DEF
185185
%1:vgpr_32 = IMPLICIT_DEF
@@ -196,7 +196,7 @@ body: |
196196
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
197197
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
198198
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
199-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
199+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
200200
; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) poison`, align 4)
201201
%0:vreg_64_align2 = IMPLICIT_DEF
202202
%1:vgpr_32 = IMPLICIT_DEF
@@ -213,7 +213,8 @@ body: |
213213
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
214214
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
215215
; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
216-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2
216+
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
217+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2
217218
; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4)
218219
%0:vreg_64_align2 = IMPLICIT_DEF
219220
%1:vgpr_32 = IMPLICIT_DEF
@@ -230,7 +231,8 @@ body: |
230231
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
231232
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
232233
; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
233-
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2, [[DEF2]].sub2, %subreg.sub3
234+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_96_with_sub1_sub2_in_vreg_64_align2 = COPY [[DEF2]]
235+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2_sub3
234236
; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4)
235237
%0:vreg_64_align2 = IMPLICIT_DEF
236238
%1:vgpr_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)