Skip to content

Commit 95b0187

Browse files
authored
[RISCV] Remove deleted AVL register defs from LiveInterval instr maps (#97011)
When coalescing vsetvlis we might remove a use of a register AVL, which in turn might leave the AVL def dead. When it's dead (currently limited to just ADDIs) we delete the def, but we were forgetting to remove it from LiveInterval's instruction map. This fixes #95865
1 parent e6a961d commit 95b0187

File tree

2 files changed

+255
-1
lines changed

2 files changed

+255
-1
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1705,8 +1705,10 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
17051705
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
17061706
MRI->use_nodbg_empty(OldVLReg)) {
17071707
VLOpDef->eraseFromParent();
1708-
if (LIS)
1708+
if (LIS) {
17091709
LIS->removeInterval(OldVLReg);
1710+
LIS->RemoveMachineInstrFromMaps(*VLOpDef);
1711+
}
17101712
}
17111713
}
17121714
MI.setDesc(NextMI->getDesc());
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -mattr=+v,+c < %s | FileCheck %s
3+
4+
; This previously crashed when spilling a GPR because when we removed a dead
5+
; ADDI we weren't removing it from the LIS instruction map. Needs +c to trigger.
6+
7+
define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
8+
; CHECK-LABEL: main:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: addi sp, sp, -112
11+
; CHECK-NEXT: .cfi_def_cfa_offset 112
12+
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
13+
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
14+
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
15+
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
16+
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
17+
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
18+
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
19+
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
20+
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
21+
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
22+
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
23+
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
24+
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
25+
; CHECK-NEXT: .cfi_offset ra, -8
26+
; CHECK-NEXT: .cfi_offset s0, -16
27+
; CHECK-NEXT: .cfi_offset s1, -24
28+
; CHECK-NEXT: .cfi_offset s2, -32
29+
; CHECK-NEXT: .cfi_offset s3, -40
30+
; CHECK-NEXT: .cfi_offset s4, -48
31+
; CHECK-NEXT: .cfi_offset s5, -56
32+
; CHECK-NEXT: .cfi_offset s6, -64
33+
; CHECK-NEXT: .cfi_offset s7, -72
34+
; CHECK-NEXT: .cfi_offset s8, -80
35+
; CHECK-NEXT: .cfi_offset s9, -88
36+
; CHECK-NEXT: .cfi_offset s10, -96
37+
; CHECK-NEXT: .cfi_offset s11, -104
38+
; CHECK-NEXT: li s2, 0
39+
; CHECK-NEXT: li a7, 8
40+
; CHECK-NEXT: li t0, 12
41+
; CHECK-NEXT: li s0, 4
42+
; CHECK-NEXT: li t1, 20
43+
; CHECK-NEXT: ld a1, 112(sp)
44+
; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
45+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
46+
; CHECK-NEXT: vmv.v.i v8, 0
47+
; CHECK-NEXT: andi t3, a4, 1
48+
; CHECK-NEXT: li t2, 4
49+
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i
50+
; CHECK-NEXT: # =>This Loop Header: Depth=1
51+
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
52+
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
53+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
54+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
55+
; CHECK-NEXT: mv t4, t1
56+
; CHECK-NEXT: mv t5, t2
57+
; CHECK-NEXT: mv t6, t0
58+
; CHECK-NEXT: mv s3, a7
59+
; CHECK-NEXT: mv a6, s2
60+
; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i
61+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
62+
; CHECK-NEXT: # => This Loop Header: Depth=2
63+
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
64+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
65+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
66+
; CHECK-NEXT: mv s5, t4
67+
; CHECK-NEXT: mv s6, t5
68+
; CHECK-NEXT: mv s7, t6
69+
; CHECK-NEXT: mv s8, s3
70+
; CHECK-NEXT: mv s4, a6
71+
; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i
72+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
73+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
74+
; CHECK-NEXT: # => This Loop Header: Depth=3
75+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
76+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
77+
; CHECK-NEXT: mv s11, s5
78+
; CHECK-NEXT: mv a3, s6
79+
; CHECK-NEXT: mv ra, s7
80+
; CHECK-NEXT: mv a4, s8
81+
; CHECK-NEXT: mv s9, s4
82+
; CHECK-NEXT: .LBB0_4: # %vector.ph.i
83+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
84+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
85+
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
86+
; CHECK-NEXT: # => This Loop Header: Depth=4
87+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
88+
; CHECK-NEXT: li a5, 0
89+
; CHECK-NEXT: .LBB0_5: # %vector.body.i
90+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
91+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
92+
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
93+
; CHECK-NEXT: # Parent Loop BB0_4 Depth=4
94+
; CHECK-NEXT: # => This Inner Loop Header: Depth=5
95+
; CHECK-NEXT: addi s1, a5, 4
96+
; CHECK-NEXT: add a1, a4, a5
97+
; CHECK-NEXT: vse32.v v8, (a1), v0.t
98+
; CHECK-NEXT: add a5, a5, a3
99+
; CHECK-NEXT: vse32.v v8, (a5), v0.t
100+
; CHECK-NEXT: mv a5, s1
101+
; CHECK-NEXT: bne s1, s0, .LBB0_5
102+
; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i
103+
; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4
104+
; CHECK-NEXT: addi s9, s9, 4
105+
; CHECK-NEXT: addi a4, a4, 4
106+
; CHECK-NEXT: addi ra, ra, 4
107+
; CHECK-NEXT: addi a3, a3, 4
108+
; CHECK-NEXT: andi s10, a0, 1
109+
; CHECK-NEXT: addi s11, s11, 4
110+
; CHECK-NEXT: beqz s10, .LBB0_4
111+
; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i
112+
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3
113+
; CHECK-NEXT: addi s4, s4, 4
114+
; CHECK-NEXT: addi s8, s8, 4
115+
; CHECK-NEXT: addi s7, s7, 4
116+
; CHECK-NEXT: addi s6, s6, 4
117+
; CHECK-NEXT: andi a1, a2, 1
118+
; CHECK-NEXT: addi s5, s5, 4
119+
; CHECK-NEXT: beqz a1, .LBB0_3
120+
; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i
121+
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2
122+
; CHECK-NEXT: addi a6, a6, 4
123+
; CHECK-NEXT: addi s3, s3, 4
124+
; CHECK-NEXT: addi t6, t6, 4
125+
; CHECK-NEXT: addi t5, t5, 4
126+
; CHECK-NEXT: addi t4, t4, 4
127+
; CHECK-NEXT: beqz t3, .LBB0_2
128+
; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i
129+
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
130+
; CHECK-NEXT: addi s2, s2, 4
131+
; CHECK-NEXT: addi a7, a7, 4
132+
; CHECK-NEXT: addi t0, t0, 4
133+
; CHECK-NEXT: addi t2, t2, 4
134+
; CHECK-NEXT: addi t1, t1, 4
135+
; CHECK-NEXT: beqz a1, .LBB0_1
136+
; CHECK-NEXT: # %bb.10: # %l.exit
137+
; CHECK-NEXT: li a0, 0
138+
; CHECK-NEXT: jalr a0
139+
; CHECK-NEXT: beqz s10, .LBB0_12
140+
; CHECK-NEXT: .LBB0_11: # %for.body7.us.14
141+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
142+
; CHECK-NEXT: j .LBB0_11
143+
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
144+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
145+
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
146+
; CHECK-NEXT: vmv.s.x v8, a0
147+
; CHECK-NEXT: vmv.v.i v16, 0
148+
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
149+
; CHECK-NEXT: vslideup.vi v16, v8, 1
150+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
151+
; CHECK-NEXT: vmsne.vi v8, v16, 0
152+
; CHECK-NEXT: vmv.x.s a0, v8
153+
; CHECK-NEXT: snez a0, a0
154+
; CHECK-NEXT: sb a0, 0(zero)
155+
; CHECK-NEXT: li a0, 0
156+
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
157+
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
158+
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
159+
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
160+
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
161+
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
162+
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
163+
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
164+
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
165+
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
166+
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
167+
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
168+
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
169+
; CHECK-NEXT: addi sp, sp, 112
170+
; CHECK-NEXT: ret
171+
entry:
172+
%0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
173+
br label %for.cond1.preheader.i
174+
175+
for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry
176+
%arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ]
177+
br label %for.cond5.preheader.i
178+
179+
for.cond5.preheader.i: ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i
180+
%arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ]
181+
%1 = add i64 %arg.42, %arg.21
182+
br label %for.cond9.preheader.i
183+
184+
for.cond.cleanup3.i: ; preds = %for.cond.cleanup7.i
185+
%indvars.iv.next74.i = add i64 %arg.21, 1
186+
br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i
187+
188+
for.cond9.preheader.i: ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i
189+
%arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ]
190+
%2 = add i64 %1, %arg.74
191+
br label %vector.ph.i
192+
193+
for.cond.cleanup7.i: ; preds = %for.cond.cleanup11.i
194+
%indvars.iv.next70.i = add i64 %arg.42, 1
195+
br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i
196+
197+
vector.ph.i: ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i
198+
%arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ]
199+
%3 = add i64 %2, %arg.96
200+
%broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0
201+
%broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
202+
br label %vector.body.i
203+
204+
vector.body.i: ; preds = %vector.body.i, %vector.ph.i
205+
%index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ]
206+
%vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ]
207+
%4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i
208+
%5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4
209+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer)
210+
%6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
211+
%7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6
212+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer)
213+
%arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
214+
%arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100
215+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6)
216+
%arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
217+
%arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102
218+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer)
219+
%arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
220+
%arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104
221+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6)
222+
%arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
223+
%arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106
224+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer)
225+
%index.next.i = add i64 %index.i, 1
226+
%arg.108 = icmp eq i64 %index.i, 0
227+
br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i
228+
229+
for.cond.cleanup11.i: ; preds = %for.cond.cleanup15.i
230+
%indvars.iv.next66.i = add i64 %arg.74, 1
231+
br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i
232+
233+
for.cond.cleanup15.i: ; preds = %vector.body.i
234+
%indvars.iv.next62.i = add i64 %arg.96, 1
235+
br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i
236+
237+
l.exit: ; preds = %for.cond.cleanup3.i
238+
tail call void null()
239+
br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19
240+
241+
for.body7.us.14: ; preds = %for.body7.us.14, %l.exit
242+
br label %for.body7.us.14
243+
244+
for.body7.us.19: ; preds = %l.exit
245+
%arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1
246+
%8 = icmp ne <32 x i32> %arg.109, zeroinitializer
247+
%9 = bitcast <32 x i1> %8 to i32
248+
%op.rdx13 = icmp ne i32 %9, 0
249+
%op.rdx = zext i1 %op.rdx13 to i8
250+
store i8 %op.rdx, ptr null, align 1
251+
ret i32 0
252+
}

0 commit comments

Comments
 (0)