From 3517b8e3a05aa38c11872f670095d92667099973 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Mon, 6 Oct 2025 14:01:29 +0100 Subject: [PATCH 1/3] [AMDGPU] siloadstoreopt generate REG_SEQUENCE with correct operand register alignment --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 33 +++++++++++++++---- .../merge-flat-with-global-load-store.mir | 4 +-- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117664983..1185fbf325932 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1363,6 +1363,15 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, return Where; } +static bool isCompatibleAlignSubReg(const TargetRegisterClass *RC, unsigned Idx, + const GCNSubtarget *STM, + const SIRegisterInfo *TRI) { + if (!STM->needsAlignedVGPRs() || !TRI->isVGPRClass(RC) || + !TRI->isProperlyAlignedRC(*RC) || AMDGPU::getRegBitWidth(*RC) == 32) + return true; + return !(TRI->getChannelFromSubReg(Idx) & 0x1); +} + // Copy the merged load result from DestReg to the original dest regs of CI and // Paired. void SILoadStoreOptimizer::copyToDestRegs( @@ -1411,12 +1420,24 @@ SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, const auto *Src0 = TII->getNamedOperand(*CI.I, OpName); const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName); - BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) - .add(*Src0) - .addImm(SubRegIdx0) - .add(*Src1) - .addImm(SubRegIdx1); - + // Make sure the generated REG_SEQUENCE has sensibly aligned registers. + if (isCompatibleAlignSubReg(Paired.DataRC, SubRegIdx1, STM, TRI)) { + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0) + .add(*Src1) + .addImm(SubRegIdx1); + } else { + auto BMI = + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0); + for (unsigned i = 0; i < Paired.Width; ++i) { + unsigned PreviousChannel = TRI->getChannelFromSubReg(SubRegIdx0); + BMI.addReg(Src1->getReg(), /*flags=*/0, TRI->getSubRegFromChannel(i)) + .addImm(TRI->getSubRegFromChannel(PreviousChannel + i + 1)); + } + } return SrcReg; } diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir index a67cf22bdd1ce..b45b69010141e 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir @@ -213,7 +213,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1_sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -230,7 +230,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1_sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2, [[DEF2]].sub2, %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF From 7040bb2b674101e885fc8e4aa05ab372b8c1c1e6 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Tue, 7 Oct 2025 18:45:57 +0100 Subject: [PATCH 2/3] Different strategy for emitting legal REG_SEQUENCE operands --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 54 ++++++++++--------- .../CodeGen/AMDGPU/merge-buffer-gfx12.mir | 6 +-- llvm/test/CodeGen/AMDGPU/merge-buffer.mir | 6 +-- .../CodeGen/AMDGPU/merge-flat-load-store.mir | 31 ++++++----- .../AMDGPU/merge-flat-saddr-load-store.mir | 10 ++-- .../merge-flat-with-global-load-store.mir | 10 ++-- .../AMDGPU/merge-global-load-store.mir | 41 +++++++------- .../CodeGen/AMDGPU/merge-tbuffer-gfx10.mir | 18 +++---- .../CodeGen/AMDGPU/merge-tbuffer-gfx11.mir | 38 ++++++------- .../CodeGen/AMDGPU/merge-tbuffer-gfx12.mir | 18 +++---- .../CodeGen/AMDGPU/merge-tbuffer-gfx9.mir | 18 +++---- 11 files changed, 132 insertions(+), 118 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 1185fbf325932..da3068c398cba 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1363,15 +1363,6 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, return Where; } -static bool isCompatibleAlignSubReg(const TargetRegisterClass *RC, unsigned Idx, - const GCNSubtarget *STM, - const SIRegisterInfo *TRI) { - if (!STM->needsAlignedVGPRs() || !TRI->isVGPRClass(RC) || - !TRI->isProperlyAlignedRC(*RC) || AMDGPU::getRegBitWidth(*RC) == 32) - return true; - return !(TRI->getChannelFromSubReg(Idx) & 0x1); -} - // Copy the merged load result from DestReg to the original dest regs of CI and // Paired. void SILoadStoreOptimizer::copyToDestRegs( @@ -1421,23 +1412,34 @@ SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName); // Make sure the generated REG_SEQUENCE has sensibly aligned registers. - if (isCompatibleAlignSubReg(Paired.DataRC, SubRegIdx1, STM, TRI)) { - BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) - .add(*Src0) - .addImm(SubRegIdx0) - .add(*Src1) - .addImm(SubRegIdx1); - } else { - auto BMI = - BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) - .add(*Src0) - .addImm(SubRegIdx0); - for (unsigned i = 0; i < Paired.Width; ++i) { - unsigned PreviousChannel = TRI->getChannelFromSubReg(SubRegIdx0); - BMI.addReg(Src1->getReg(), /*flags=*/0, TRI->getSubRegFromChannel(i)) - .addImm(TRI->getSubRegFromChannel(PreviousChannel + i + 1)); - } - } + const TargetRegisterClass *CompatRC0 = + TRI->getSubRegisterClass(SuperRC, SubRegIdx0); + const TargetRegisterClass *CompatRC1 = + TRI->getSubRegisterClass(SuperRC, SubRegIdx1); + assert(CompatRC0 && CompatRC1 && + "Cannot find compatible TargetRegisterClass"); + + Register Src0Reg = CompatRC0 == CI.DataRC + ? Src0->getReg() + : MRI->createVirtualRegister(CompatRC0); + Register Src1Reg = CompatRC1 == Paired.DataRC + ? Src1->getReg() + : MRI->createVirtualRegister(CompatRC1); + + if (CompatRC0 != CI.DataRC) + BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src0Reg) + .add(*Src0); + + if (CompatRC1 != Paired.DataRC) + BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src1Reg) + .add(*Src1); + + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .addReg(Src0Reg) + .addImm(SubRegIdx0) + .addReg(Src1Reg) + .addImm(SubRegIdx1); + return SrcReg; } diff --git a/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir index d7f5d1a237895..7416ceb7aacb8 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir @@ -409,11 +409,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-buffer.mir b/llvm/test/CodeGen/AMDGPU/merge-buffer.mir index 1c6d429d20eac..ad4aaf3c512e5 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-buffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-buffer.mir @@ -411,11 +411,11 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GCN-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir index 09aae9152c4ee..1bd50537f161f 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir @@ -239,7 +239,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -258,8 +258,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -278,9 +278,13 @@ body: | ; GCN-LABEL: name: merge_flat_store_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub1, [[COPY1]], %subreg.sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub2, [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub3 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub3, [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_128 = IMPLICIT_DEF @@ -303,8 +307,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`) %0:vreg_64_align2 = IMPLICIT_DEF @@ -334,8 +338,8 @@ body: | ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 8) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) @@ -363,7 +367,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1, [[DEF2]], %subreg.sub2_sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = IMPLICIT_DEF @@ -381,7 +385,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1_sub2, [[DEF2]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = IMPLICIT_DEF @@ -399,7 +403,8 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[DEF2]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:agpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir index 1c133c6114ec2..750cd1b5d6393 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir @@ -201,7 +201,7 @@ body: | ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -226,8 +226,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -256,8 +256,8 @@ body: | ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir index b45b69010141e..718d6f1523f89 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir @@ -179,7 +179,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -196,7 +196,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -213,7 +213,8 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -230,7 +231,8 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2, [[DEF2]].sub2, %subreg.sub3 + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_96_with_sub1_sub2_in_vreg_64_align2 = COPY [[DEF2]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2_sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir index 0817694295f86..b7768045af2c5 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir @@ -445,7 +445,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -464,8 +464,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -484,9 +484,13 @@ body: | ; GCN-LABEL: name: merge_global_store_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub1, [[COPY1]], %subreg.sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub2, [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub3 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub3, [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_128 = IMPLICIT_DEF @@ -509,8 +513,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF @@ -540,8 +544,8 @@ body: | ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 8, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) @@ -569,7 +573,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1, [[DEF2]], %subreg.sub2_sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = IMPLICIT_DEF @@ -587,7 +591,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1_sub2, [[DEF2]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = IMPLICIT_DEF @@ -605,7 +609,8 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[DEF2]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:agpr_32 = IMPLICIT_DEF @@ -718,7 +723,7 @@ body: | ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -743,8 +748,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -773,8 +778,8 @@ body: | ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir index 402c00298c8da..5f19bb2a80ce1 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir @@ -610,11 +610,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 77, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 74, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 76, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 73, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 62, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 75, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 72, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir index f5407a5223166..e19b9857e1223 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir @@ -610,11 +610,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 63, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 60, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 49, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 62, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 59, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 48, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 61, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 58, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -1679,7 +1679,7 @@ body: | ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s48), align 2, addrspace 4) %4:vgpr_32 = COPY $vgpr0 %5:vgpr_32 = COPY $vgpr1 @@ -1717,12 +1717,12 @@ body: | ; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 8, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4) ; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 16, 13, 0, 0, implicit $exec :: (store (s16), addrspace 4) %12:vgpr_32 = COPY $vgpr8 @@ -1973,7 +1973,7 @@ body: | ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s24), align 1, addrspace 4) %6:vgpr_32 = COPY $vgpr2 %5:vgpr_32 = COPY $vgpr1 @@ -2011,12 +2011,12 @@ body: | ; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 4, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4) ; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 8, 5, 0, 0, implicit $exec :: (store (s8), addrspace 4) %12:vgpr_32 = COPY $vgpr8 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir index 1ee4f9e008197..55ec6ffed32f7 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir @@ -610,11 +610,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 50, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 63, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 60, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 49, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 62, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 59, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 48, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 61, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 58, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir index 3a43e743de493..293ae5e60ef75 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir @@ -611,11 +611,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 126, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 125, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -667,11 +667,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 91, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 93, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -723,11 +723,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 75, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 78, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 77, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 From 6dbfe1fe776dd62714c073bae1c9693a21aacbba Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Tue, 14 Oct 2025 15:08:07 +0100 Subject: [PATCH 3/3] Scalarize if the regclasses are incompatible, add test --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 68 +++++++++++-------- .../CodeGen/AMDGPU/merge-buffer-gfx12.mir | 6 +- llvm/test/CodeGen/AMDGPU/merge-buffer.mir | 6 +- .../CodeGen/AMDGPU/merge-flat-load-store.mir | 31 ++++----- .../AMDGPU/merge-flat-saddr-load-store.mir | 10 +-- .../merge-flat-with-global-load-store.mir | 10 ++- .../AMDGPU/merge-global-load-store.mir | 41 +++++------ .../CodeGen/AMDGPU/merge-tbuffer-gfx10.mir | 18 ++--- .../CodeGen/AMDGPU/merge-tbuffer-gfx11.mir | 38 +++++------ .../CodeGen/AMDGPU/merge-tbuffer-gfx12.mir | 18 ++--- .../CodeGen/AMDGPU/merge-tbuffer-gfx9.mir | 18 ++--- .../AMDGPU/siloadstoreopt-misalign.mir | 29 ++++++++ 12 files changed, 162 insertions(+), 131 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/siloadstoreopt-misalign.mir diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index da3068c398cba..36b22e457da10 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1412,33 +1412,47 @@ SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired, const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName); // Make sure the generated REG_SEQUENCE has sensibly aligned registers. - const TargetRegisterClass *CompatRC0 = - TRI->getSubRegisterClass(SuperRC, SubRegIdx0); - const TargetRegisterClass *CompatRC1 = - TRI->getSubRegisterClass(SuperRC, SubRegIdx1); - assert(CompatRC0 && CompatRC1 && - "Cannot find compatible TargetRegisterClass"); - - Register Src0Reg = CompatRC0 == CI.DataRC - ? Src0->getReg() - : MRI->createVirtualRegister(CompatRC0); - Register Src1Reg = CompatRC1 == Paired.DataRC - ? Src1->getReg() - : MRI->createVirtualRegister(CompatRC1); - - if (CompatRC0 != CI.DataRC) - BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src0Reg) - .add(*Src0); - - if (CompatRC1 != Paired.DataRC) - BuildMI(*MBB, InsertBefore, DL, TII->get(TargetOpcode::COPY), Src1Reg) - .add(*Src1); - - BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) - .addReg(Src0Reg) - .addImm(SubRegIdx0) - .addReg(Src1Reg) - .addImm(SubRegIdx1); + const TargetRegisterClass *Src0RC = TRI->findCommonRegClass( + MRI->getRegClass(Src0->getReg()), Src0->getSubReg(), SuperRC, SubRegIdx0); + const TargetRegisterClass *Src1RC = TRI->findCommonRegClass( + MRI->getRegClass(Src1->getReg()), Src1->getSubReg(), SuperRC, SubRegIdx1); + if (!Src0RC || !Src1RC) { + unsigned SuperRCWSize = TRI->getRegSizeInBits(*SuperRC) / 32; + unsigned Src1WSizeOffset = CI.Width; + + auto BMI = + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg); + + unsigned Src0SubReg = Src0->getSubReg(); + unsigned Src1SubReg = Src1->getSubReg(); + unsigned It = 0; + for (; It < Src1WSizeOffset; ++It) { + unsigned ChOffset = + Src0SubReg ? TRI->getChannelFromSubReg(Src0SubReg) : 0; + unsigned NewSubReg = Src0SubReg ? TRI->getSubRegFromChannel(ChOffset + It) + : CI.Width == 1 ? 0 + : TRI->getSubRegFromChannel(It); + BMI.addUse(Src0->getReg(), /*Flags=*/0U, NewSubReg) + .addImm(TRI->getSubRegFromChannel(It)); + } + for (; It < SuperRCWSize; ++It) { + unsigned ChOffset = + Src1SubReg ? TRI->getChannelFromSubReg(Src1SubReg) : 0; + unsigned NewSubReg = Src1SubReg ? TRI->getSubRegFromChannel(ChOffset + It) + : Paired.Width == 1 + ? 0 + : TRI->getSubRegFromChannel(It - CI.Width); + BMI.addUse(Src1->getReg(), /*Flags=*/0U, NewSubReg) + .addImm(TRI->getSubRegFromChannel(It)); + } + + } else { + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0) + .add(*Src1) + .addImm(SubRegIdx1); + } return SrcReg; } diff --git a/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir index 7416ceb7aacb8..d7f5d1a237895 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir @@ -409,11 +409,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-buffer.mir b/llvm/test/CodeGen/AMDGPU/merge-buffer.mir index ad4aaf3c512e5..1c6d429d20eac 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-buffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-buffer.mir @@ -411,11 +411,11 @@ body: | ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GCN-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir index 1bd50537f161f..4add3cb294132 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir @@ -239,7 +239,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -258,8 +258,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -278,13 +278,9 @@ body: | ; GCN-LABEL: name: merge_flat_store_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub1, [[COPY1]], %subreg.sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub2 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub2, [[REG_SEQUENCE]], %subreg.sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub3 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub3, [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_128 = IMPLICIT_DEF @@ -307,8 +303,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`) %0:vreg_64_align2 = IMPLICIT_DEF @@ -338,8 +334,8 @@ body: | ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 8) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) @@ -367,7 +363,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1, [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = IMPLICIT_DEF @@ -385,7 +381,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1_sub2, [[DEF2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = IMPLICIT_DEF @@ -403,8 +399,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[DEF2]] - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:agpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir index 750cd1b5d6393..1c133c6114ec2 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir @@ -201,7 +201,7 @@ body: | ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -226,8 +226,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -256,8 +256,8 @@ body: | ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir index 718d6f1523f89..b45b69010141e 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir @@ -179,7 +179,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -196,7 +196,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -213,8 +213,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2 ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -231,8 +230,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:av_96_with_sub1_sub2_in_vreg_64_align2 = COPY [[DEF2]] - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1_sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]].sub0, %subreg.sub1, [[DEF2]].sub1, %subreg.sub2, [[DEF2]].sub2, %subreg.sub3 ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr poison`, align 4) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir index b7768045af2c5..a3de9f05d889b 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir @@ -445,7 +445,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -464,8 +464,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -484,13 +484,9 @@ body: | ; GCN-LABEL: name: merge_global_store_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub1, [[COPY1]], %subreg.sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub2 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub2, [[REG_SEQUENCE]], %subreg.sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub3 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub3, [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_128 = IMPLICIT_DEF @@ -513,8 +509,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF @@ -544,8 +540,8 @@ body: | ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 8, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) @@ -573,7 +569,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1, [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = IMPLICIT_DEF @@ -591,7 +587,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0_sub1_sub2, [[DEF2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = IMPLICIT_DEF @@ -609,8 +605,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[DEF2]] - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:vreg_64_align2 = IMPLICIT_DEF %1:agpr_32 = IMPLICIT_DEF @@ -723,7 +718,7 @@ body: | ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 ; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -748,8 +743,8 @@ body: | ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) %0:sreg_64_xexec_xnull = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF @@ -778,8 +773,8 @@ body: | ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir index 5f19bb2a80ce1..402c00298c8da 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir @@ -610,11 +610,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 77, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 74, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 76, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 73, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 62, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 75, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX10-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX10-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX10-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 72, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir index e19b9857e1223..f5407a5223166 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir @@ -610,11 +610,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 63, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 60, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 49, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 62, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 59, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 48, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 61, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 58, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -1679,7 +1679,7 @@ body: | ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s48), align 2, addrspace 4) %4:vgpr_32 = COPY $vgpr0 %5:vgpr_32 = COPY $vgpr1 @@ -1717,12 +1717,12 @@ body: | ; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 8, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4) ; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 16, 13, 0, 0, implicit $exec :: (store (s16), addrspace 4) %12:vgpr_32 = COPY $vgpr8 @@ -1973,7 +1973,7 @@ body: | ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s24), align 1, addrspace 4) %6:vgpr_32 = COPY $vgpr2 %5:vgpr_32 = COPY $vgpr1 @@ -2011,12 +2011,12 @@ body: | ; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4) ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 - ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2 + ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3 ; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 4, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4) ; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 8, 5, 0, 0, implicit $exec :: (store (s8), addrspace 4) %12:vgpr_32 = COPY $vgpr8 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir index 55ec6ffed32f7..1ee4f9e008197 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir @@ -610,11 +610,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 50, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 63, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 60, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -666,11 +666,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 49, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 62, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 59, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -722,11 +722,11 @@ body: | ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], $sgpr_null, 4, 48, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], $sgpr_null, 16, 61, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX12-NEXT: TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], $sgpr_null, 36, 58, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir index 293ae5e60ef75..3a43e743de493 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir @@ -611,11 +611,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 126, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 125, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -667,11 +667,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 91, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 93, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 @@ -723,11 +723,11 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 4, 75, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 - ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 + ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE2]], %subreg.sub0_sub1, [[COPY4]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1_sub2, [[COPY3]], %subreg.sub3 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]], 0, 16, 78, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GFX9-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 + ; GFX9-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE5]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2 ; GFX9-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE6]], [[REG_SEQUENCE]], 0, 36, 77, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) %12:vgpr_32 = COPY $vgpr8 %11:vgpr_32 = COPY $vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misalign.mir b/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misalign.mir new file mode 100644 index 0000000000000..4fb0320f1e253 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misalign.mir @@ -0,0 +1,29 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -start-before=si-load-store-opt %s -o - | FileCheck %s + +# CHECK-LABEL: misaligned_vgpr: +# CHECK: ; %bb.0: +# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +# CHECK: v_mov_b64_e32 v[0:1], 0 +# CHECK: v_mov_b32_e32 v2, 0 +# CHECK: v_mov_b32_e32 v3, v0 +# CHECK: v_mov_b32_e32 v4, v1 +# CHECK: flat_store_dwordx3 v[0:1], v[2:4] +# CHECK: s_endpgm + +--- | + define void @misaligned_vgpr() { ret void } +... + +--- +name: misaligned_vgpr +tracksRegLiveness: true +body: | + bb.0: + %10:vreg_64_align2 = IMPLICIT_DEF + %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + FLAT_STORE_DWORD %10:vreg_64_align2, %11:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`) + %14:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + FLAT_STORE_DWORDX2 %10:vreg_64_align2, killed %14:vreg_64_align2, 4, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4) + S_ENDPGM 0 + +---