Skip to content

Commit 7a2b0b5

Browse files
committed
VirtRegRewriter: Add super register defs for live out undef lanes
If an undef subregister def is live into another block, we need to maintain a physreg def to track the liveness of those lanes. This would manifest a verifier error after branch folding, when the cloned tail block use no longer had a def. There is a missing verifier check for this situation. Added an xfailed test that demonstrates this. We may also be able to revert the changes in 47d3cbc. It might be better to insert an IMPLICIT_DEF before the instruction rather than using the implicit-def operand. Fixes #98474
1 parent cf4442e commit 7a2b0b5

10 files changed

+396
-46
lines changed

llvm/lib/CodeGen/VirtRegMap.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ class VirtRegRewriter : public MachineFunctionPass {
199199
void handleIdentityCopy(MachineInstr &MI);
200200
void expandCopyBundle(MachineInstr &MI) const;
201201
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
202+
bool needLiveOutUndefSubregDef(const LiveInterval &LI,
203+
const MachineBasicBlock &MBB, unsigned SubReg,
204+
MCPhysReg PhysReg) const;
202205

203206
public:
204207
static char ID;
@@ -532,6 +535,26 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
532535
return false;
533536
}
534537

538+
/// Check if we need to maintain liveness for undef subregister lanes that are
539+
/// live out of a block.
540+
bool VirtRegRewriter::needLiveOutUndefSubregDef(const LiveInterval &LI,
541+
const MachineBasicBlock &MBB,
542+
unsigned SubReg,
543+
MCPhysReg PhysReg) const {
544+
LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
545+
for (const LiveInterval::SubRange &SR : LI.subranges()) {
546+
LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
547+
if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
548+
for (const MachineBasicBlock *Succ : MBB.successors()) {
549+
if (LIS->isLiveInToMBB(SR, Succ))
550+
return true;
551+
}
552+
}
553+
}
554+
555+
return false;
556+
}
557+
535558
void VirtRegRewriter::rewrite() {
536559
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
537560
SmallVector<Register, 8> SuperDeads;
@@ -586,6 +609,11 @@ void VirtRegRewriter::rewrite() {
586609
MO.setIsUndef(true);
587610
} else if (!MO.isDead()) {
588611
assert(MO.isDef());
612+
if (MO.isUndef()) {
613+
const LiveInterval &LI = LIS->getInterval(VirtReg);
614+
if (needLiveOutUndefSubregDef(LI, *MBBI, SubReg, PhysReg))
615+
SuperDefs.push_back(PhysReg);
616+
}
589617
}
590618
}
591619

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
3838
; GFX90A-NEXT: {{ $}}
3939
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
4040
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
41-
; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
42-
; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF
43-
; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
44-
; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF
45-
; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
4641
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
4742
; GFX90A-NEXT: {{ $}}
4843
; GFX90A-NEXT: bb.2:
4944
; GFX90A-NEXT: successors: %bb.3(0x80000000)
50-
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
45+
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
5146
; GFX90A-NEXT: {{ $}}
5247
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
5348
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
54-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
55-
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
56-
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF
57-
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
58-
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
49+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
50+
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
51+
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
52+
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
53+
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
5954
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
6055
; GFX90A-NEXT: {{ $}}
6156
; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
111106
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
112107
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
113108
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
114-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
115-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
109+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
110+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
116111
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
117112
; GFX90A-NEXT: {{ $}}
118113
; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
395390
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
396391
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
397392
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
398-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
399-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
393+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
394+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
400395
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
401396
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
402397
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
434429
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
435430
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
436431
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
437-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
438-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
432+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
433+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
439434
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
440435
; GFX90A-NEXT: $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
441436
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
484479
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
485480
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
486481
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
487-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
488-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
482+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
483+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
489484
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
490485
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
491486
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
535530
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
536531
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
537532
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
538-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
539-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
533+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
534+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
540535
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
541536
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
542537
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
589584
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
590585
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
591586
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
592-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
593-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
587+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
588+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
594589
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
595590
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
596591
; GFX90A-NEXT: {{ $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
643638
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
644639
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
645640
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
646-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
647-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
641+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
642+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
648643
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
649644
; GFX90A-NEXT: $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
650645
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
689684
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
690685
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
691686
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
692-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
693-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
687+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
688+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
694689
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
695690
; GFX90A-NEXT: S_BRANCH %bb.45
696691
; GFX90A-NEXT: {{ $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
719714
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
720715
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
721716
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
722-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
723-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
717+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
718+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
724719
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
725720
; GFX90A-NEXT: S_BRANCH %bb.46
726721
; GFX90A-NEXT: {{ $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
748743
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
749744
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
750745
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
751-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
752-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
746+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
747+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
753748
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
754749
; GFX90A-NEXT: S_BRANCH %bb.62
755750
; GFX90A-NEXT: {{ $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
773768
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
774769
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
775770
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
776-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
777-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
771+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
772+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
778773
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
779774
; GFX90A-NEXT: $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
780775
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
880875
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
881876
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
882877
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
883-
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
884-
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
878+
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
879+
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
885880
; GFX90A-NEXT: $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
886881
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec
887882
; GFX90A-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/indirect-call.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,6 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
603603
; GISEL-NEXT: s_mov_b32 s14, s43
604604
; GISEL-NEXT: s_mov_b32 s15, s42
605605
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
606-
; GISEL-NEXT: v_mov_b32_e32 v1, v0
607606
; GISEL-NEXT: ; implicit-def: $vgpr0
608607
; GISEL-NEXT: ; implicit-def: $vgpr31
609608
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
@@ -1384,7 +1383,6 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
13841383
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
13851384
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
13861385
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
1387-
; GISEL-NEXT: v_mov_b32_e32 v2, v0
13881386
; GISEL-NEXT: ; implicit-def: $vgpr1
13891387
; GISEL-NEXT: ; implicit-def: $vgpr0
13901388
; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]

llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ body: |
3030
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
3131
; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
3232
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
33-
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF
33+
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
3434
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
3535
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
3636
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -83,7 +83,7 @@ body: |
8383
; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
8484
; CHECK-NEXT: {{ $}}
8585
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
86-
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24
86+
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
8787
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
8888
; CHECK-NEXT: S_BRANCH %bb.6
8989
; CHECK-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ body: |
3030
; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
3131
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
3232
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
33-
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF
33+
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
3434
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
3535
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
3636
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -80,7 +80,7 @@ body: |
8080
; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
8181
; CHECK-NEXT: {{ $}}
8282
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
83-
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24
83+
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
8484
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
8585
; CHECK-NEXT: S_BRANCH %bb.5
8686
; CHECK-NEXT: {{ $}}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
; Check for verifier error after tail duplication. An implicit_def of
5+
; a subregsiter is needed to maintain liveness after assignment.
6+
7+
define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
8+
; CHECK-LABEL: test:
9+
; CHECK: ; %bb.0: ; %entry
10+
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
11+
; CHECK-NEXT: s_mov_b32 s0, 0
12+
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
13+
; CHECK-NEXT: ; %bb.1: ; %load
14+
; CHECK-NEXT: s_mov_b32 s1, s0
15+
; CHECK-NEXT: s_mov_b32 s2, s0
16+
; CHECK-NEXT: s_mov_b32 s3, s0
17+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
18+
; CHECK-NEXT: buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
19+
; CHECK-NEXT: s_waitcnt vmcnt(0)
20+
; CHECK-NEXT: exp mrt0 v0, v1, v2, v0
21+
; CHECK-NEXT: s_endpgm
22+
; CHECK-NEXT: .LBB0_2:
23+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
24+
; CHECK-NEXT: exp mrt0 v0, v1, v2, v0
25+
; CHECK-NEXT: s_endpgm
26+
entry:
27+
%cond = icmp eq i32 %cmp, 0
28+
br i1 %cond, label %end, label %load
29+
30+
load:
31+
%data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
32+
%e1 = extractelement <2 x i32> %data1, i32 0
33+
%e2 = extractelement <2 x i32> %data1, i32 1
34+
br label %end
35+
36+
end:
37+
%out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
38+
%out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
39+
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
40+
ret void
41+
}
42+

0 commit comments

Comments
 (0)