Skip to content

Commit ea3716e

Browse files
committed
[RISCV] Fix coalescing vsetvlis when AVL and vl register are the same
With EVL tail folding we can end up with vsetvlis where the output vl and the input AVL are the same register. When we try to coalesce it we crashed because we tried to move the def's live interval before the kill's live interval, e.g. in this example: (vn0 def) dead $x0 = PseudoVSETIVLI 1, 192, implicit-def $vl, implicit-def $vtype renamable $v9 = COPY killed renamable $v8 (vn1 def) %23:gprnox0 = PseudoVSETVLI killed (vn0) %23:gprnox0, 197, implicit-def $vl, implicit-def $vtype We would try to move the vn1 def VNInfo up to the previous VSETVLI, in the middle of vn0's segment. However separately, we were also assuming that the vl would only have one definition and thus were just taking the VNInfo from beginIndex(), so we ended up with a backwards segment and got the error "Cannot create empty or backwards segment". This fixes these two issues, the first one by moving the AVL operand + live interval up first, and the second by taking the VNInfo from NextMI's slot index. Fixes #141907
1 parent c842705 commit ea3716e

File tree

3 files changed

+98
-10
lines changed

3 files changed

+98
-10
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,13 +1699,24 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
16991699
MI.getOperand(0).setReg(DefReg);
17001700
MI.getOperand(0).setIsDead(false);
17011701

1702+
// Move the AVL from MI to NextMI
1703+
dropAVLUse(MI.getOperand(1));
1704+
if (NextMI->getOperand(1).isImm())
1705+
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1706+
else
1707+
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1708+
false);
1709+
dropAVLUse(NextMI->getOperand(1));
1710+
17021711
// The def of DefReg moved to MI, so extend the LiveInterval up to
17031712
// it.
17041713
if (DefReg.isVirtual() && LIS) {
17051714
LiveInterval &DefLI = LIS->getInterval(DefReg);
17061715
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1707-
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1708-
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1716+
SlotIndex NextMISlot =
1717+
LIS->getInstructionIndex(*NextMI).getRegSlot();
1718+
VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
1719+
LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
17091720
DefLI.addSegment(S);
17101721
DefVNI->def = MISlot;
17111722
// Mark DefLI as spillable if it was previously unspillable
@@ -1716,13 +1727,6 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
17161727
LIS->shrinkToUses(&DefLI);
17171728
}
17181729

1719-
dropAVLUse(MI.getOperand(1));
1720-
if (NextMI->getOperand(1).isImm())
1721-
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1722-
else
1723-
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1724-
false);
1725-
17261730
MI.setDesc(NextMI->getDesc());
17271731
}
17281732
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -verify-machineinstrs | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
target triple = "riscv64-unknown-linux-gnu"
6+
7+
define void @pr141907(ptr %0) #0 {
8+
; CHECK-LABEL: pr141907:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: addi sp, sp, -16
11+
; CHECK-NEXT: .cfi_def_cfa_offset 16
12+
; CHECK-NEXT: csrr a1, vlenb
13+
; CHECK-NEXT: slli a1, a1, 2
14+
; CHECK-NEXT: sub sp, sp, a1
15+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
16+
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
17+
; CHECK-NEXT: vmv.v.i v9, 0
18+
; CHECK-NEXT: vmclr.m v0
19+
; CHECK-NEXT: li a1, 0
20+
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
21+
; CHECK-NEXT: vmv.v.i v12, 0
22+
; CHECK-NEXT: addi a2, sp, 16
23+
; CHECK-NEXT: .LBB0_1: # %vector.body
24+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
25+
; CHECK-NEXT: vs4r.v v8, (a2)
26+
; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma
27+
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
28+
; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t
29+
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
30+
; CHECK-NEXT: vlseg3e32.v v8, (a2)
31+
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
32+
; CHECK-NEXT: vsseg2e16.v v11, (zero)
33+
; CHECK-NEXT: bnez a1, .LBB0_1
34+
; CHECK-NEXT: .LBB0_2: # %while.body5
35+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
36+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
37+
; CHECK-NEXT: vse16.v v9, (a0)
38+
; CHECK-NEXT: j .LBB0_2
39+
entry:
40+
br label %vector.body
41+
42+
vector.body: ; preds = %vector.body, %entry
43+
%evl.based.iv = phi i64 [ 0, %entry ], [ %2, %vector.body ]
44+
%vector.recur = phi <vscale x 2 x i32> [ zeroinitializer, %entry ], [ %3, %vector.body ]
45+
%1 = call i32 @llvm.experimental.get.vector.length.i64(i64 %evl.based.iv, i32 1, i1 true)
46+
%2 = zext i32 %1 to i64
47+
%wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load.nxv6i32.p0(ptr null, <vscale x 6 x i1> zeroinitializer, i32 0)
48+
%deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> %wide.masked.load)
49+
%3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
50+
%vp.cast65 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %vector.recur, <vscale x 2 x i1> zeroinitializer, i32 0)
51+
%interleaved.vec = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %vp.cast65, <vscale x 2 x i16> zeroinitializer)
52+
call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %interleaved.vec, ptr null, <vscale x 4 x i1> splat (i1 true), i32 0)
53+
%4 = icmp eq i32 %1, 0
54+
br i1 %4, label %while.body5, label %vector.body
55+
56+
while.body5: ; preds = %while.body5, %vector.body
57+
%5 = bitcast <vscale x 2 x i32> %3 to <vscale x 4 x i16>
58+
%cond52 = extractelement <vscale x 4 x i16> %5, i64 0
59+
store i16 %cond52, ptr %0, align 2
60+
br label %while.body5
61+
}
62+
63+
attributes #0 = { "target-cpu"="sifive-p670" "target-features"="+64bit,+a,+c,+d,+experimental,+f,+m,+relax,+unaligned-scalar-mem,+unaligned-vector-mem,+v,+xsifivecdiscarddlone,+xsifivecflushdlone,+za64rs,+zaamo,+zalrsc,+zba,+zbb,+zbs,+zca,+zcd,+zfhmin,+zic64b,+zicbom,+zicbop,+zicboz,+ziccamoa,+ziccif,+zicclsm,+ziccrse,+zicsr,+zifencei,+zihintntl,+zihintpause,+zihpm,+zmmul,+zvbb,+zvbc,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvkb,+zvkg,+zvkn,+zvknc,+zvkned,+zvkng,+zvknhb,+zvks,+zvksc,+zvksed,+zvksg,+zvksh,+zvkt,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-p,-experimental-smctr" }

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@
9696
ret void
9797
}
9898

99+
define void @coalesce_vl_avl_same_reg() {
100+
ret void
101+
}
102+
99103
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
100104

101105
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4
@@ -408,7 +412,7 @@ body: |
408412
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
409413
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
410414
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 undef $noreg, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
411-
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
415+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit-def $vl, implicit-def $vtype
412416
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
413417
; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 undef $noreg, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
414418
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]], implicit $vtype
@@ -601,3 +605,20 @@ body: |
601605
%x:gpr = COPY $x10
602606
renamable $v8 = PseudoVMV_S_X undef renamable $v8, killed renamable %x, 1, 5
603607
PseudoRET implicit $v8
608+
...
609+
---
610+
name: coalesce_vl_avl_same_reg
611+
tracksRegLiveness: true
612+
body: |
613+
bb.0:
614+
liveins: $x8, $v8
615+
; CHECK-LABEL: name: coalesce_vl_avl_same_reg
616+
; CHECK: liveins: $x8, $v8
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: %x:gprnox0 = COPY $x8
619+
; CHECK-NEXT: dead %x:gprnox0 = PseudoVSETVLI %x, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
620+
; CHECK-NEXT: dead %v:vr = COPY $v8, implicit $vtype
621+
%x:gprnox0 = COPY $x8
622+
dead $x0 = PseudoVSETIVLI 1, 208, implicit-def $vl, implicit-def $vtype
623+
%v:vr = COPY $v8, implicit $vtype
624+
%x = PseudoVSETVLI %x, 208, implicit-def $vl, implicit-def $vtype

0 commit comments

Comments
 (0)