Skip to content

Commit 6ce315a

Browse files
toppercDebadri Basak
authored andcommitted
[RISCV] Fix misuse of EVT::changeVectorElementType() in legalizeScatterGatherIndexType. (llvm#165829)
This function doesn't work well when the type is a SimpleVT, but the changed type isn't. We need an LLVMContext to make an non-SimpleVT, but there's nowhere to get it from. Fix this by using EVT::getVectorVT instead. In the added test, v7i8 is a SimpleVT, but v7i64 is not.
1 parent 5f3bd37 commit 6ce315a

File tree

2 files changed

+256
-1
lines changed

2 files changed

+256
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19794,7 +19794,9 @@ legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
1979419794
// LLVM's legalization take care of the splitting.
1979519795
// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
1979619796
Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19797-
IndexVT.changeVectorElementType(XLenVT), Index);
19797+
EVT::getVectorVT(*DAG.getContext(), XLenVT,
19798+
IndexVT.getVectorElementCount()),
19799+
Index);
1979819800
}
1979919801
IndexType = ISD::UNSIGNED_SCALED;
1980019802
return true;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15265,6 +15265,259 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
1526515265
ret <4 x i32> %x
1526615266
}
1526715267

15268+
define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7 x i8> %passthru) {
15269+
; RV32-LABEL: mgather_baseidx_v7i8:
15270+
; RV32: # %bb.0:
15271+
; RV32-NEXT: li a1, 127
15272+
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
15273+
; RV32-NEXT: vmv.s.x v10, a1
15274+
; RV32-NEXT: vmand.mm v0, v0, v10
15275+
; RV32-NEXT: vsext.vf4 v10, v8
15276+
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
15277+
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
15278+
; RV32-NEXT: vmv1r.v v8, v9
15279+
; RV32-NEXT: ret
15280+
;
15281+
; RV64V-LABEL: mgather_baseidx_v7i8:
15282+
; RV64V: # %bb.0:
15283+
; RV64V-NEXT: li a1, 127
15284+
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
15285+
; RV64V-NEXT: vmv.s.x v10, a1
15286+
; RV64V-NEXT: vmand.mm v0, v0, v10
15287+
; RV64V-NEXT: vsext.vf8 v12, v8
15288+
; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
15289+
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
15290+
; RV64V-NEXT: vmv1r.v v8, v9
15291+
; RV64V-NEXT: ret
15292+
;
15293+
; RV64ZVE32F-LABEL: mgather_baseidx_v7i8:
15294+
; RV64ZVE32F: # %bb.0:
15295+
; RV64ZVE32F-NEXT: addi sp, sp, -16
15296+
; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
15297+
; RV64ZVE32F-NEXT: .cfi_remember_state
15298+
; RV64ZVE32F-NEXT: li a1, 64
15299+
; RV64ZVE32F-NEXT: addi a2, sp, 8
15300+
; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m4, ta, ma
15301+
; RV64ZVE32F-NEXT: vsm.v v0, (a2)
15302+
; RV64ZVE32F-NEXT: ld a1, 8(sp)
15303+
; RV64ZVE32F-NEXT: andi a2, a1, 1
15304+
; RV64ZVE32F-NEXT: beqz a2, .LBB132_2
15305+
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
15306+
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15307+
; RV64ZVE32F-NEXT: add a2, a0, a2
15308+
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15309+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15310+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15311+
; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15312+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15313+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15314+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15315+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15316+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15317+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15318+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15319+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15320+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15321+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15322+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
15323+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15324+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15325+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15326+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a2
15327+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15328+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15329+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15330+
; RV64ZVE32F-NEXT: .LBB132_2: # %else
15331+
; RV64ZVE32F-NEXT: andi a2, a1, 2
15332+
; RV64ZVE32F-NEXT: beqz a2, .LBB132_4
15333+
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
15334+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15335+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
15336+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15337+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15338+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
15339+
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15340+
; RV64ZVE32F-NEXT: add a3, a0, a3
15341+
; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15342+
; RV64ZVE32F-NEXT: vmv.v.x v10, a2
15343+
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
15344+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
15345+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15346+
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15347+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 4
15348+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15349+
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
15350+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
15351+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15352+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15353+
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15354+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15355+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15356+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15357+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15358+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15359+
; RV64ZVE32F-NEXT: .LBB132_4: # %else2
15360+
; RV64ZVE32F-NEXT: andi a2, a1, 4
15361+
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
15362+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
15363+
; RV64ZVE32F-NEXT: beqz a2, .LBB132_6
15364+
; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
15365+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15366+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15367+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15368+
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
15369+
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 3
15370+
; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15371+
; RV64ZVE32F-NEXT: vmv.v.x v11, a3
15372+
; RV64ZVE32F-NEXT: vmv.x.s a3, v12
15373+
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 4
15374+
; RV64ZVE32F-NEXT: add a2, a0, a2
15375+
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15376+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15377+
; RV64ZVE32F-NEXT: vmv.x.s a4, v12
15378+
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 5
15379+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15380+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15381+
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
15382+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a3
15383+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15384+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15385+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15386+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v11, a2
15387+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15388+
; RV64ZVE32F-NEXT: .LBB132_6: # %else5
15389+
; RV64ZVE32F-NEXT: andi a2, a1, 8
15390+
; RV64ZVE32F-NEXT: beqz a2, .LBB132_8
15391+
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
15392+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15393+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
15394+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15395+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15396+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15397+
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15398+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15399+
; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15400+
; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15401+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15402+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15403+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15404+
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15405+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
15406+
; RV64ZVE32F-NEXT: add a3, a0, a3
15407+
; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15408+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15409+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15410+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15411+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a3
15412+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15413+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15414+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15415+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15416+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15417+
; RV64ZVE32F-NEXT: .LBB132_8: # %else8
15418+
; RV64ZVE32F-NEXT: andi a2, a1, 16
15419+
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
15420+
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
15421+
; RV64ZVE32F-NEXT: bnez a2, .LBB132_13
15422+
; RV64ZVE32F-NEXT: # %bb.9: # %else11
15423+
; RV64ZVE32F-NEXT: andi a2, a1, 32
15424+
; RV64ZVE32F-NEXT: bnez a2, .LBB132_14
15425+
; RV64ZVE32F-NEXT: .LBB132_10: # %else14
15426+
; RV64ZVE32F-NEXT: andi a1, a1, 64
15427+
; RV64ZVE32F-NEXT: beqz a1, .LBB132_12
15428+
; RV64ZVE32F-NEXT: .LBB132_11: # %cond.load16
15429+
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
15430+
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
15431+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15432+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15433+
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
15434+
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15435+
; RV64ZVE32F-NEXT: vmv.v.x v8, a1
15436+
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
15437+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15438+
; RV64ZVE32F-NEXT: add a0, a0, a2
15439+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15440+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15441+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15442+
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
15443+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15444+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 5
15445+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
15446+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15447+
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
15448+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15449+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
15450+
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
15451+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15452+
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
15453+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
15454+
; RV64ZVE32F-NEXT: .LBB132_12: # %else17
15455+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
15456+
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
15457+
; RV64ZVE32F-NEXT: addi sp, sp, 16
15458+
; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
15459+
; RV64ZVE32F-NEXT: ret
15460+
; RV64ZVE32F-NEXT: .LBB132_13: # %cond.load10
15461+
; RV64ZVE32F-NEXT: .cfi_restore_state
15462+
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15463+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15464+
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
15465+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
15466+
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15467+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15468+
; RV64ZVE32F-NEXT: vmv.v.x v10, a3
15469+
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15470+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
15471+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15472+
; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15473+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
15474+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15475+
; RV64ZVE32F-NEXT: add a2, a0, a2
15476+
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15477+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15478+
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15479+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15480+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15481+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15482+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15483+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15484+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15485+
; RV64ZVE32F-NEXT: andi a2, a1, 32
15486+
; RV64ZVE32F-NEXT: beqz a2, .LBB132_10
15487+
; RV64ZVE32F-NEXT: .LBB132_14: # %cond.load13
15488+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15489+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
15490+
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15491+
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15492+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15493+
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15494+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15495+
; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15496+
; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15497+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15498+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15499+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15500+
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15501+
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15502+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15503+
; RV64ZVE32F-NEXT: add a3, a0, a3
15504+
; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15505+
; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15506+
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15507+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a4
15508+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15509+
; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15510+
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15511+
; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15512+
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15513+
; RV64ZVE32F-NEXT: andi a1, a1, 64
15514+
; RV64ZVE32F-NEXT: bnez a1, .LBB132_11
15515+
; RV64ZVE32F-NEXT: j .LBB132_12
15516+
%ptrs = getelementptr inbounds i8, ptr %base, <7 x i8> %idxs
15517+
%v = call <7 x i8> @llvm.masked.gather.v7i8.v7p0(<7 x ptr> %ptrs, i32 1, <7 x i1> %m, <7 x i8> %passthru)
15518+
ret <7 x i8> %v
15519+
}
15520+
1526815521
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1526915522
; RV32V-ZVFH: {{.*}}
1527015523
; RV32V-ZVFHMIN: {{.*}}

0 commit comments

Comments
 (0)