Skip to content

Commit 91010a3

Browse files
authored
[RISCV] Reduce number of GPRs needed by lowerSegmentSpillReload. (llvm#165337)
Previously, we kept VLENB unaltered in register and used a temporary register to shift it. Now we store the shifted value in the VLENB register and keep track of how much it has been shifted. If we need a smaller multiple of VLENB we can shift right. Fixes llvm#165232.
1 parent 3c4fece commit 91010a3

File tree

3 files changed

+269
-18
lines changed

3 files changed

+269
-18
lines changed

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -438,34 +438,41 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
438438
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
439439

440440
Register VLENB = 0;
441-
unsigned PreHandledNum = 0;
441+
unsigned VLENBShift = 0;
442+
unsigned PrevHandledNum = 0;
442443
unsigned I = 0;
443444
while (I != NumRegs) {
444445
auto [LMulHandled, RegClass, Opcode] =
445446
getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
446447
auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
447448
bool IsLast = I + RegNumHandled == NumRegs;
448-
if (PreHandledNum) {
449+
if (PrevHandledNum) {
449450
Register Step;
450451
// Optimize for constant VLEN.
451452
if (auto VLEN = STI.getRealVLen()) {
452-
int64_t Offset = *VLEN / 8 * PreHandledNum;
453+
int64_t Offset = *VLEN / 8 * PrevHandledNum;
453454
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
454455
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
455456
} else {
456457
if (!VLENB) {
457458
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
458459
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
459460
}
460-
uint32_t ShiftAmount = Log2_32(PreHandledNum);
461-
if (ShiftAmount == 0)
462-
Step = VLENB;
463-
else {
464-
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
465-
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
466-
.addReg(VLENB, getKillRegState(IsLast))
467-
.addImm(ShiftAmount);
461+
uint32_t ShiftAmount = Log2_32(PrevHandledNum);
462+
// To avoid using an extra register, we shift the VLENB register and
463+
// remember how much it has been shifted. We can then use relative
464+
// shifts to adjust to the desired shift amount.
465+
if (VLENBShift > ShiftAmount) {
466+
BuildMI(MBB, II, DL, TII->get(RISCV::SRLI), VLENB)
467+
.addReg(VLENB, RegState::Kill)
468+
.addImm(VLENBShift - ShiftAmount);
469+
} else if (VLENBShift < ShiftAmount) {
470+
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VLENB)
471+
.addReg(VLENB, RegState::Kill)
472+
.addImm(ShiftAmount - VLENBShift);
468473
}
474+
VLENBShift = ShiftAmount;
475+
Step = VLENB;
469476
}
470477

471478
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
@@ -489,7 +496,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
489496
if (IsSpill)
490497
MIB.addReg(Reg, RegState::Implicit);
491498

492-
PreHandledNum = RegNumHandled;
499+
PrevHandledNum = RegNumHandled;
493500
RegEncoding += RegNumHandled;
494501
I += RegNumHandled;
495502
}
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
target triple = "riscv64-unknown-linux-gnu"
6+
7+
define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element3047, ptr %arrayinit.element3049, ptr %arrayinit.element3051, ptr %arrayinit.element3053, ptr %arrayinit.element3055, ptr %arrayinit.element3057, ptr %arrayinit.element3059, ptr %arrayinit.element3061, ptr %arrayinit.element3063, ptr %arrayinit.element3065, ptr %arrayinit.element3067, i64 %var_94_i.07698, target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1) {
8+
; CHECK-LABEL: main:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: addi sp, sp, -16
11+
; CHECK-NEXT: .cfi_def_cfa_offset 16
12+
; CHECK-NEXT: csrr t0, vlenb
13+
; CHECK-NEXT: slli t0, t0, 3
14+
; CHECK-NEXT: mv t1, t0
15+
; CHECK-NEXT: slli t0, t0, 1
16+
; CHECK-NEXT: add t0, t0, t1
17+
; CHECK-NEXT: sub sp, sp, t0
18+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
19+
; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
20+
; CHECK-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
21+
; CHECK-NEXT: csrr a1, vlenb
22+
; CHECK-NEXT: slli a1, a1, 4
23+
; CHECK-NEXT: add a1, sp, a1
24+
; CHECK-NEXT: addi a1, a1, 16
25+
; CHECK-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
26+
; CHECK-NEXT: csrr a2, vlenb
27+
; CHECK-NEXT: slli a2, a2, 2
28+
; CHECK-NEXT: add a1, a1, a2
29+
; CHECK-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill
30+
; CHECK-NEXT: csrr a1, vlenb
31+
; CHECK-NEXT: slli a1, a1, 2
32+
; CHECK-NEXT: mv a2, a1
33+
; CHECK-NEXT: slli a1, a1, 1
34+
; CHECK-NEXT: add a1, a1, a2
35+
; CHECK-NEXT: add a1, sp, a1
36+
; CHECK-NEXT: addi a1, a1, 16
37+
; CHECK-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
38+
; CHECK-NEXT: csrr a1, vlenb
39+
; CHECK-NEXT: slli a1, a1, 3
40+
; CHECK-NEXT: mv a2, a1
41+
; CHECK-NEXT: slli a1, a1, 1
42+
; CHECK-NEXT: add a1, a1, a2
43+
; CHECK-NEXT: add a1, sp, a1
44+
; CHECK-NEXT: ld t0, 56(a1)
45+
; CHECK-NEXT: csrr a1, vlenb
46+
; CHECK-NEXT: slli a1, a1, 3
47+
; CHECK-NEXT: mv a2, a1
48+
; CHECK-NEXT: slli a1, a1, 1
49+
; CHECK-NEXT: add a1, a1, a2
50+
; CHECK-NEXT: add a1, sp, a1
51+
; CHECK-NEXT: ld t1, 48(a1)
52+
; CHECK-NEXT: vsetvli t2, zero, e8, m1, ta, ma
53+
; CHECK-NEXT: vmv.v.i v9, 0
54+
; CHECK-NEXT: csrr a1, vlenb
55+
; CHECK-NEXT: slli a1, a1, 3
56+
; CHECK-NEXT: mv a2, a1
57+
; CHECK-NEXT: slli a1, a1, 1
58+
; CHECK-NEXT: add a1, a1, a2
59+
; CHECK-NEXT: add a1, sp, a1
60+
; CHECK-NEXT: ld t2, 40(a1)
61+
; CHECK-NEXT: # kill: def $v10 killed $v9 killed $vtype
62+
; CHECK-NEXT: csrr a1, vlenb
63+
; CHECK-NEXT: slli a1, a1, 3
64+
; CHECK-NEXT: mv a2, a1
65+
; CHECK-NEXT: slli a1, a1, 1
66+
; CHECK-NEXT: add a1, a1, a2
67+
; CHECK-NEXT: add a1, sp, a1
68+
; CHECK-NEXT: ld t3, 32(a1)
69+
; CHECK-NEXT: vmv.v.i v11, 0
70+
; CHECK-NEXT: csrr a1, vlenb
71+
; CHECK-NEXT: slli a1, a1, 3
72+
; CHECK-NEXT: mv a2, a1
73+
; CHECK-NEXT: slli a1, a1, 1
74+
; CHECK-NEXT: add a1, a1, a2
75+
; CHECK-NEXT: add a1, sp, a1
76+
; CHECK-NEXT: ld t4, 16(a1)
77+
; CHECK-NEXT: vmv.v.i v12, 0
78+
; CHECK-NEXT: csrr a1, vlenb
79+
; CHECK-NEXT: slli a1, a1, 3
80+
; CHECK-NEXT: mv a2, a1
81+
; CHECK-NEXT: slli a1, a1, 1
82+
; CHECK-NEXT: add a1, a1, a2
83+
; CHECK-NEXT: add a1, sp, a1
84+
; CHECK-NEXT: ld t5, 24(a1)
85+
; CHECK-NEXT: vmv.v.i v13, 0
86+
; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma
87+
; CHECK-NEXT: vmv.v.i v22, 0
88+
; CHECK-NEXT: vmv1r.v v14, v9
89+
; CHECK-NEXT: sd zero, 0(a0)
90+
; CHECK-NEXT: vmv.v.i v24, 0
91+
; CHECK-NEXT: vmv1r.v v15, v9
92+
; CHECK-NEXT: vmv1r.v v18, v9
93+
; CHECK-NEXT: li t6, 1023
94+
; CHECK-NEXT: vmv.v.i v26, 0
95+
; CHECK-NEXT: vmv1r.v v19, v9
96+
; CHECK-NEXT: slli t6, t6, 52
97+
; CHECK-NEXT: vmv.v.i v28, 0
98+
; CHECK-NEXT: addi a1, sp, 16
99+
; CHECK-NEXT: vs2r.v v22, (a1) # vscale x 16-byte Folded Spill
100+
; CHECK-NEXT: csrr a2, vlenb
101+
; CHECK-NEXT: slli a2, a2, 1
102+
; CHECK-NEXT: add a1, a1, a2
103+
; CHECK-NEXT: vs4r.v v24, (a1) # vscale x 32-byte Folded Spill
104+
; CHECK-NEXT: slli a2, a2, 1
105+
; CHECK-NEXT: add a1, a1, a2
106+
; CHECK-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
107+
; CHECK-NEXT: vs2r.v v28, (a1) # vscale x 16-byte Folded Spill
108+
; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
109+
; CHECK-NEXT: vmv1r.v v20, v9
110+
; CHECK-NEXT: sd t6, 0(t5)
111+
; CHECK-NEXT: vmv2r.v v16, v14
112+
; CHECK-NEXT: vmv2r.v v14, v12
113+
; CHECK-NEXT: vmv2r.v v12, v10
114+
; CHECK-NEXT: vmv1r.v v11, v9
115+
; CHECK-NEXT: vmv1r.v v21, v9
116+
; CHECK-NEXT: csrr t5, vlenb
117+
; CHECK-NEXT: slli t5, t5, 3
118+
; CHECK-NEXT: add t5, sp, t5
119+
; CHECK-NEXT: addi t5, t5, 16
120+
; CHECK-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
121+
; CHECK-NEXT: csrr t6, vlenb
122+
; CHECK-NEXT: slli t6, t6, 1
123+
; CHECK-NEXT: add t5, t5, t6
124+
; CHECK-NEXT: vs2r.v v20, (t5) # vscale x 16-byte Folded Spill
125+
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
126+
; CHECK-NEXT: vmv.v.i v19, 0
127+
; CHECK-NEXT: vmclr.m v10
128+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
129+
; CHECK-NEXT: vmv.v.i v6, 0
130+
; CHECK-NEXT: .LBB0_1: # %for.body
131+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
132+
; CHECK-NEXT: vmv1r.v v0, v10
133+
; CHECK-NEXT: vmv1r.v v20, v19
134+
; CHECK-NEXT: vmv1r.v v3, v19
135+
; CHECK-NEXT: vmv1r.v v5, v19
136+
; CHECK-NEXT: vmv1r.v v2, v19
137+
; CHECK-NEXT: vmv1r.v v31, v19
138+
; CHECK-NEXT: vmv1r.v v30, v19
139+
; CHECK-NEXT: vmv1r.v v4, v19
140+
; CHECK-NEXT: vmv2r.v v22, v10
141+
; CHECK-NEXT: vmv4r.v v24, v12
142+
; CHECK-NEXT: vmv2r.v v28, v16
143+
; CHECK-NEXT: vmv2r.v v8, v6
144+
; CHECK-NEXT: vmv1r.v v18, v19
145+
; CHECK-NEXT: vmv1r.v v21, v10
146+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
147+
; CHECK-NEXT: vle32.v v20, (t4)
148+
; CHECK-NEXT: vle32.v v3, (t1)
149+
; CHECK-NEXT: vle32.v v30, (a7)
150+
; CHECK-NEXT: vle64.v v8, (a4)
151+
; CHECK-NEXT: vle32.v v5, (t2)
152+
; CHECK-NEXT: vle32.v v2, (t3)
153+
; CHECK-NEXT: vle32.v v31, (a6)
154+
; CHECK-NEXT: vmv1r.v v24, v30
155+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
156+
; CHECK-NEXT: vmflt.vv v21, v8, v6, v0.t
157+
; CHECK-NEXT: vmv1r.v v8, v19
158+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
159+
; CHECK-NEXT: vle32.v v18, (a2)
160+
; CHECK-NEXT: vle32.v v8, (a3)
161+
; CHECK-NEXT: vle32.v v4, (a5)
162+
; CHECK-NEXT: vmv1r.v v22, v20
163+
; CHECK-NEXT: csrr t5, vlenb
164+
; CHECK-NEXT: slli t5, t5, 3
165+
; CHECK-NEXT: add t5, sp, t5
166+
; CHECK-NEXT: addi t5, t5, 16
167+
; CHECK-NEXT: vl1r.v v1, (t5) # vscale x 8-byte Folded Reload
168+
; CHECK-NEXT: csrr t6, vlenb
169+
; CHECK-NEXT: add t5, t5, t6
170+
; CHECK-NEXT: vl2r.v v2, (t5) # vscale x 16-byte Folded Reload
171+
; CHECK-NEXT: slli t6, t6, 1
172+
; CHECK-NEXT: add t5, t5, t6
173+
; CHECK-NEXT: vl1r.v v4, (t5) # vscale x 8-byte Folded Reload
174+
; CHECK-NEXT: vsseg4e32.v v1, (zero)
175+
; CHECK-NEXT: vsseg8e32.v v22, (a1)
176+
; CHECK-NEXT: vmv1r.v v0, v21
177+
; CHECK-NEXT: vssub.vv v8, v19, v18, v0.t
178+
; CHECK-NEXT: csrr t5, vlenb
179+
; CHECK-NEXT: slli t5, t5, 2
180+
; CHECK-NEXT: mv t6, t5
181+
; CHECK-NEXT: slli t5, t5, 1
182+
; CHECK-NEXT: add t5, t5, t6
183+
; CHECK-NEXT: add t5, sp, t5
184+
; CHECK-NEXT: addi t5, t5, 16
185+
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
186+
; CHECK-NEXT: vsetvli zero, t0, e64, m2, ta, ma
187+
; CHECK-NEXT: vsseg2e64.v v20, (zero)
188+
; CHECK-NEXT: vmv1r.v v0, v10
189+
; CHECK-NEXT: addi t5, sp, 16
190+
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
191+
; CHECK-NEXT: csrr t6, vlenb
192+
; CHECK-NEXT: slli t6, t6, 2
193+
; CHECK-NEXT: add t5, t5, t6
194+
; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
195+
; CHECK-NEXT: vsetivli zero, 0, e64, m2, ta, ma
196+
; CHECK-NEXT: vsseg4e64.v v20, (zero), v0.t
197+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
198+
; CHECK-NEXT: vsseg8e32.v v8, (a0)
199+
; CHECK-NEXT: csrr t5, vlenb
200+
; CHECK-NEXT: slli t5, t5, 4
201+
; CHECK-NEXT: add t5, sp, t5
202+
; CHECK-NEXT: addi t5, t5, 16
203+
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
204+
; CHECK-NEXT: csrr t6, vlenb
205+
; CHECK-NEXT: slli t6, t6, 2
206+
; CHECK-NEXT: add t5, t5, t6
207+
; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
208+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
209+
; CHECK-NEXT: vsseg4e64.v v20, (zero)
210+
; CHECK-NEXT: j .LBB0_1
211+
entry:
212+
store double 0.000000e+00, ptr %var_117, align 8
213+
store double 1.000000e+00, ptr %arrayinit.element3061, align 8
214+
br label %for.body
215+
216+
for.body: ; preds = %for.body, %entry
217+
%2 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3059, i64 0)
218+
%3 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3067, i64 0)
219+
%4 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3065, i64 0)
220+
%5 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3063, i64 0)
221+
%6 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3055, i64 0)
222+
%7 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3057, i64 0)
223+
%8 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3053, i64 0)
224+
%9 = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.p0.i64(<vscale x 2 x double> zeroinitializer, ptr %arrayinit.element3051, i64 0)
225+
%10 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3047, i64 0)
226+
%11 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3049, i64 0)
227+
call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) zeroinitializer, ptr null, i64 0, i64 5)
228+
%12 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) zeroinitializer, <vscale x 2 x float> %8, i32 0)
229+
%13 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %12, <vscale x 2 x float> %7, i32 2)
230+
%14 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %13, <vscale x 2 x float> %6, i32 0)
231+
%15 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %14, <vscale x 2 x float> %5, i32 0)
232+
%16 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %15, <vscale x 2 x float> %4, i32 0)
233+
%17 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %16, <vscale x 2 x float> %3, i32 0)
234+
%18 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %17, <vscale x 2 x float> %2, i32 0)
235+
call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %18, ptr %arrayinit.element3045, i64 0, i64 5)
236+
%19 = tail call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.nxv2f64.i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %9, <vscale x 2 x i1> zeroinitializer, i64 0)
237+
%20 = tail call <vscale x 2 x i32> @llvm.riscv.vssub.mask.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> %11, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %10, <vscale x 2 x i1> %19, i64 0, i64 0)
238+
call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv16i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, ptr null, i64 %var_94_i.07698, i64 6)
239+
call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv2i1.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) zeroinitializer, ptr null, <vscale x 2 x i1> zeroinitializer, i64 0, i64 6)
240+
%21 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) poison, <vscale x 2 x i32> %20, i32 0)
241+
call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %21, ptr %var_117, i64 0, i64 5)
242+
call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1, ptr null, i64 0, i64 6)
243+
br label %for.body
244+
}

llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ body: |
3232
; CHECK-NEXT: $x11 = ADDI $x2, 16
3333
; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
3434
; CHECK-NEXT: $x12 = PseudoReadVLENB
35-
; CHECK-NEXT: $x13 = SLLI $x12, 2
36-
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
35+
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
36+
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
3737
; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
38-
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
38+
; CHECK-NEXT: $x12 = SRLI killed $x12, 1
3939
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
4040
; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
4141
; CHECK-NEXT: $x11 = ADDI $x2, 16
@@ -93,10 +93,10 @@ body: |
9393
; CHECK-NEXT: $x11 = ADDI $x2, 16
9494
; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
9595
; CHECK-NEXT: $x12 = PseudoReadVLENB
96-
; CHECK-NEXT: $x13 = SLLI $x12, 1
97-
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
96+
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
97+
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
9898
; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
99-
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
99+
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
100100
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
101101
; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
102102
; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10

0 commit comments

Comments
 (0)