Skip to content

Commit 9569161

Browse files
committed
[RISCV] Sink vp.splat operands of VP intrinsic.
1 parent b1159a9 commit 9569161

File tree

2 files changed

+46
-42
lines changed

2 files changed

+46
-42
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,41 +2838,46 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
28382838
if (!ST->sinkSplatOperands())
28392839
return false;
28402840

2841-
for (auto OpIdx : enumerate(I->operands())) {
2842-
if (!canSplatOperand(I, OpIdx.index()))
2843-
continue;
2844-
2845-
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2841+
for (auto &U : I->operands()) {
2842+
auto *Op = dyn_cast<Instruction>(U.get());
28462843
// Make sure we are not already sinking this operand
28472844
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
28482845
continue;
28492846

2850-
// We are looking for a splat that can be sunk.
2851-
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2847+
// We are looking for a splat/vp.splat that can be sunk.
2848+
bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
2849+
m_Value(), m_Value(), m_Value()));
2850+
if (!IsVPSplat &&
2851+
!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
28522852
m_Undef(), m_ZeroMask())))
28532853
continue;
28542854

28552855
// Don't sink i1 splats.
28562856
if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
28572857
continue;
28582858

2859-
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
2860-
// and vector registers
2861-
for (Use &U : Op->uses()) {
2862-
Instruction *Insn = cast<Instruction>(U.getUser());
2863-
if (!canSplatOperand(Insn, U.getOperandNo()))
2864-
return false;
2865-
}
2859+
// All uses of the splat/vp.splat should be sunk to avoid duplicating it
2860+
// across gpr and vector registers
2861+
if (any_of(Op->uses(), [this](Use &U) {
2862+
return !canSplatOperand(cast<Instruction>(U.getUser()),
2863+
U.getOperandNo());
2864+
}))
2865+
continue;
28662866

2867-
Use *InsertEltUse = &Op->getOperandUse(0);
28682867
// Sink any fpexts since they might be used in a widening fp pattern.
2869-
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2870-
if (isa<FPExtInst>(InsertElt->getOperand(1)))
2871-
Ops.push_back(&InsertElt->getOperandUse(1));
2872-
Ops.push_back(InsertEltUse);
2873-
Ops.push_back(&OpIdx.value());
2868+
if (IsVPSplat) {
2869+
if (isa<FPExtInst>(Op->getOperand(0)))
2870+
Ops.push_back(&Op->getOperandUse(0));
2871+
} else {
2872+
Use *InsertEltUse = &Op->getOperandUse(0);
2873+
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2874+
if (isa<FPExtInst>(InsertElt->getOperand(1)))
2875+
Ops.push_back(&InsertElt->getOperandUse(1));
2876+
Ops.push_back(InsertEltUse);
2877+
}
2878+
Ops.push_back(&U);
28742879
}
2875-
return true;
2880+
return !Ops.empty();
28762881
}
28772882

28782883
RISCVTTIImpl::TTI::MemCmpExpansionOptions

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5896,30 +5896,30 @@ define void @sink_vp_splat(ptr nocapture %out, ptr nocapture %in) {
58965896
; CHECK: # %bb.0: # %entry
58975897
; CHECK-NEXT: li a2, 0
58985898
; CHECK-NEXT: li a3, 1024
5899-
; CHECK-NEXT: lui a4, 1
5899+
; CHECK-NEXT: li a4, 3
5900+
; CHECK-NEXT: lui a5, 1
59005901
; CHECK-NEXT: .LBB129_1: # %vector.body
59015902
; CHECK-NEXT: # =>This Loop Header: Depth=1
59025903
; CHECK-NEXT: # Child Loop BB129_2 Depth 2
5903-
; CHECK-NEXT: vsetvli a5, a3, e32, m4, ta, ma
5904-
; CHECK-NEXT: slli a6, a2, 2
5904+
; CHECK-NEXT: vsetvli a6, a3, e32, m4, ta, ma
5905+
; CHECK-NEXT: slli a7, a2, 2
59055906
; CHECK-NEXT: vmv.v.i v8, 0
5906-
; CHECK-NEXT: vmv.v.i v12, 3
5907-
; CHECK-NEXT: add a7, a1, a6
5908-
; CHECK-NEXT: li t0, 1024
5907+
; CHECK-NEXT: add t0, a1, a7
5908+
; CHECK-NEXT: li t1, 1024
59095909
; CHECK-NEXT: .LBB129_2: # %for.body424
59105910
; CHECK-NEXT: # Parent Loop BB129_1 Depth=1
59115911
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
5912-
; CHECK-NEXT: vle32.v v16, (a7)
5913-
; CHECK-NEXT: addi t0, t0, -1
5914-
; CHECK-NEXT: vmacc.vv v8, v16, v12
5915-
; CHECK-NEXT: add a7, a7, a4
5916-
; CHECK-NEXT: bnez t0, .LBB129_2
5912+
; CHECK-NEXT: vle32.v v12, (t0)
5913+
; CHECK-NEXT: addi t1, t1, -1
5914+
; CHECK-NEXT: vmacc.vx v8, a4, v12
5915+
; CHECK-NEXT: add t0, t0, a5
5916+
; CHECK-NEXT: bnez t1, .LBB129_2
59175917
; CHECK-NEXT: # %bb.3: # %vector.latch
59185918
; CHECK-NEXT: # in Loop: Header=BB129_1 Depth=1
5919-
; CHECK-NEXT: add a6, a0, a6
5920-
; CHECK-NEXT: sub a3, a3, a5
5921-
; CHECK-NEXT: vse32.v v8, (a6)
5922-
; CHECK-NEXT: add a2, a2, a5
5919+
; CHECK-NEXT: add a7, a0, a7
5920+
; CHECK-NEXT: sub a3, a3, a6
5921+
; CHECK-NEXT: vse32.v v8, (a7)
5922+
; CHECK-NEXT: add a2, a2, a6
59235923
; CHECK-NEXT: bnez a3, .LBB129_1
59245924
; CHECK-NEXT: # %bb.4: # %for.cond.cleanup
59255925
; CHECK-NEXT: ret
@@ -5962,24 +5962,23 @@ define void @sink_vp_splat_vfwadd_wf(ptr nocapture %in, float %f) {
59625962
; CHECK-LABEL: sink_vp_splat_vfwadd_wf:
59635963
; CHECK: # %bb.0: # %entry
59645964
; CHECK-NEXT: li a1, 0
5965-
; CHECK-NEXT: fcvt.d.s fa5, fa0
59665965
; CHECK-NEXT: li a2, 1024
59675966
; CHECK-NEXT: lui a3, 2
59685967
; CHECK-NEXT: .LBB130_1: # %vector.body
59695968
; CHECK-NEXT: # =>This Loop Header: Depth=1
59705969
; CHECK-NEXT: # Child Loop BB130_2 Depth 2
5971-
; CHECK-NEXT: vsetvli a4, a2, e64, m8, ta, ma
5970+
; CHECK-NEXT: vsetvli a4, a2, e8, m1, ta, ma
59725971
; CHECK-NEXT: slli a5, a1, 3
5973-
; CHECK-NEXT: vfmv.v.f v8, fa5
59745972
; CHECK-NEXT: add a5, a0, a5
59755973
; CHECK-NEXT: li a6, 1024
59765974
; CHECK-NEXT: .LBB130_2: # %for.body419
59775975
; CHECK-NEXT: # Parent Loop BB130_1 Depth=1
59785976
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
5979-
; CHECK-NEXT: vle64.v v16, (a5)
5977+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5978+
; CHECK-NEXT: vle64.v v8, (a5)
59805979
; CHECK-NEXT: addi a6, a6, -1
5981-
; CHECK-NEXT: vfadd.vv v16, v16, v8
5982-
; CHECK-NEXT: vse64.v v16, (a5)
5980+
; CHECK-NEXT: vfwadd.wf v8, v8, fa0
5981+
; CHECK-NEXT: vse64.v v8, (a5)
59835982
; CHECK-NEXT: add a5, a5, a3
59845983
; CHECK-NEXT: bnez a6, .LBB130_2
59855984
; CHECK-NEXT: # %bb.3: # %vector.latch

0 commit comments

Comments
 (0)