Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19710,20 +19710,46 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return V;
break;
case RISCVISD::VRGATHER_VX_VL: {
// Drop a redundant vrgather_vx.
// Note this assumes that out of bounds indices produce poison
// and can thus be replaced without having to prove them inbounds..
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDValue Passthru = N->getOperand(2);
SDValue VL = N->getOperand(4);

// Warning: Unlike most cases we strip an insert_subvector, this one
// does not require the first operand to be undef.
if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
isNullConstant(Src.getOperand(2)))
Src = Src.getOperand(1);

switch (Src.getOpcode()) {
default:
break;
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL:
if (Passthru.isUndef() && VL == Src.getOperand(2))
// Drop a redundant vrgather_vx.
// TODO: Remove the type restriction if we find a motivating
// test case?
if (Passthru.isUndef() && VL == Src.getOperand(2) &&
Src.getValueType() == VT)
return Src;
break;
case RISCVISD::VMV_S_X_VL:
case RISCVISD::VFMV_S_F_VL:
// If this use only demands lane zero from the source vmv.s.x, and
// doesn't have a passthru, then this vrgather.vi/vx is equivalent to
// a vmv.v.x. Note that there can be other uses of the original
// vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
if (isNullConstant(Idx) && Passthru.isUndef() &&
VL == Src.getOperand(2)) {
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
VL);
}
break;
}
break;
}
Expand Down
19 changes: 6 additions & 13 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,11 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) {
; CHECK-LABEL: vmerge_vxm:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 25
; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vsetivli zero, 1, e32, m4, tu, ma
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The choice of m4 here is very odd, but not really related to this change. This is happening in InsertVSETVLI because the LMUL isn't really demanded, but starts at m1, the original SEW of the vmv.s.x is e8, and we decide to adjust the input to preserve the SEW/LMUL ratio. I don't believe this actually matters, it just creates an odd looking diff.

; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmv2r.v v10, v8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vrgather.vi v10, v8, 0, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: vfmv.s.f v8, fa0
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this vfmv.s.f could be eliminated if we rewrote the mask on the vmerge. I don't plan to do this, just noting it's vaguely possible. I'm mildly of the opinion that this approach (the post lowering DAG) has been pushed as far as we should, and that if we want to further improve, we should instead starting canonicalizing shuffles before lowering. I may change my mind based on what future cases I stumble into. :)

; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
; CHECK-NEXT: ret
%ins = insertelement <8 x float> %v, float %s, i32 0
%shuf = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
Expand All @@ -112,15 +110,10 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) {
define <8 x float> @vmerge_vxm2(<8 x float> %v, float %s) {
; CHECK-LABEL: vmerge_vxm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m4, tu, ma
; CHECK-NEXT: vmv1r.v v12, v8
; CHECK-NEXT: vmv2r.v v10, v8
; CHECK-NEXT: li a0, 25
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmv1r.v v10, v12
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vrgather.vi v8, v10, 0, v0.t
; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
; CHECK-NEXT: ret
%ins = insertelement <8 x float> %v, float %s, i32 0
%shuf = shufflevector <8 x float> %v, <8 x float> %ins, <8 x i32> <i32 8, i32 1, i32 2, i32 8, i32 8, i32 5, i32 6, i32 7>
Expand Down
17 changes: 6 additions & 11 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1448,13 +1448,11 @@ define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) {
; CHECK-LABEL: vmerge_vxm:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 25
; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetivli zero, 1, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: ret
%ins = insertelement <8 x i8> %v, i8 %s, i32 0
%shuf = shufflevector <8 x i8> %ins, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
Expand All @@ -1465,12 +1463,9 @@ define <8 x i8> @vmerge_vxm2(<8 x i8> %v, i8 %s) {
; CHECK-LABEL: vmerge_vxm2:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 25
; CHECK-NEXT: vsetivli zero, 1, e8, m1, tu, ma
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vi v8, v9, 0, v0.t
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: ret
%ins = insertelement <8 x i8> %v, i8 %s, i32 0
%shuf = shufflevector <8 x i8> %v, <8 x i8> %ins, <8 x i32> <i32 8, i32 1, i32 2, i32 8, i32 8, i32 5, i32 6, i32 7>
Expand Down
Loading