Skip to content

Commit d6011d8

Browse files
committed
[RISCV] Sink vp.splat operands of VP intrinsic.
1 parent fe99074 commit d6011d8

File tree

2 files changed

+40
-36
lines changed

2 files changed

+40
-36
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,39 +2837,43 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
28372837
if (!ST->sinkSplatOperands())
28382838
return false;
28392839

2840-
for (auto OpIdx : enumerate(I->operands())) {
2841-
if (!canSplatOperand(I, OpIdx.index()))
2842-
continue;
2843-
2844-
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2840+
for (auto &U : I->operands()) {
2841+
auto *Op = dyn_cast<Instruction>(U.get());
28452842
// Make sure we are not already sinking this operand
28462843
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
28472844
continue;
28482845

2849-
// We are looking for a splat that can be sunk.
2850-
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
2846+
// We are looking for a splat/vp.splat that can be sunk.
2847+
// FIXME: Should we care about the poison value of inactive elements in
2848+
// vp.splat?
2849+
bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
2850+
m_Value(), m_Value(), m_Value()));
2851+
if (!IsVPSplat &&
2852+
!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
28512853
m_Undef(), m_ZeroMask())))
28522854
continue;
28532855

28542856
// Don't sink i1 splats.
28552857
if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
28562858
continue;
28572859

2858-
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
2859-
// and vector registers
2860-
for (Use &U : Op->uses()) {
2861-
Instruction *Insn = cast<Instruction>(U.getUser());
2862-
if (!canSplatOperand(Insn, U.getOperandNo()))
2863-
return false;
2864-
}
2860+
// All uses of the splat/vp.splat should be sunk to avoid duplicating it
2861+
// across gpr and vector registers
2862+
if (any_of(Op->uses(), [this](Use &U) {
2863+
return !canSplatOperand(cast<Instruction>(U.getUser()),
2864+
U.getOperandNo());
2865+
}))
2866+
continue;
28652867

2866-
Use *InsertEltUse = &Op->getOperandUse(0);
2867-
// Sink any fpexts since they might be used in a widening fp pattern.
2868-
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2869-
if (isa<FPExtInst>(InsertElt->getOperand(1)))
2870-
Ops.push_back(&InsertElt->getOperandUse(1));
2871-
Ops.push_back(InsertEltUse);
2872-
Ops.push_back(&OpIdx.value());
2868+
if (!IsVPSplat) {
2869+
Use *InsertEltUse = &Op->getOperandUse(0);
2870+
// Sink any fpexts since they might be used in a widening fp pattern.
2871+
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2872+
if (isa<FPExtInst>(InsertElt->getOperand(1)))
2873+
Ops.push_back(&InsertElt->getOperandUse(1));
2874+
Ops.push_back(InsertEltUse);
2875+
}
2876+
Ops.push_back(&U);
28732877
}
28742878
return true;
28752879
}

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5896,30 +5896,30 @@ define void @sink_vp_splat(ptr nocapture %out, ptr nocapture %in) {
58965896
; CHECK: # %bb.0: # %entry
58975897
; CHECK-NEXT: li a2, 0
58985898
; CHECK-NEXT: li a3, 1024
5899-
; CHECK-NEXT: lui a4, 1
5899+
; CHECK-NEXT: li a4, 3
5900+
; CHECK-NEXT: lui a5, 1
59005901
; CHECK-NEXT: .LBB129_1: # %vector.body
59015902
; CHECK-NEXT: # =>This Loop Header: Depth=1
59025903
; CHECK-NEXT: # Child Loop BB129_2 Depth 2
5903-
; CHECK-NEXT: vsetvli a5, a3, e32, m4, ta, ma
5904-
; CHECK-NEXT: slli a6, a2, 2
5904+
; CHECK-NEXT: vsetvli a6, a3, e32, m4, ta, ma
5905+
; CHECK-NEXT: slli a7, a2, 2
59055906
; CHECK-NEXT: vmv.v.i v8, 0
5906-
; CHECK-NEXT: vmv.v.i v12, 3
5907-
; CHECK-NEXT: add a7, a1, a6
5908-
; CHECK-NEXT: li t0, 1024
5907+
; CHECK-NEXT: add t0, a1, a7
5908+
; CHECK-NEXT: li t1, 1024
59095909
; CHECK-NEXT: .LBB129_2: # %for.body424
59105910
; CHECK-NEXT: # Parent Loop BB129_1 Depth=1
59115911
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
5912-
; CHECK-NEXT: vle32.v v16, (a7)
5913-
; CHECK-NEXT: addi t0, t0, -1
5914-
; CHECK-NEXT: vmacc.vv v8, v16, v12
5915-
; CHECK-NEXT: add a7, a7, a4
5916-
; CHECK-NEXT: bnez t0, .LBB129_2
5912+
; CHECK-NEXT: vle32.v v12, (t0)
5913+
; CHECK-NEXT: addi t1, t1, -1
5914+
; CHECK-NEXT: vmacc.vx v8, a4, v12
5915+
; CHECK-NEXT: add t0, t0, a5
5916+
; CHECK-NEXT: bnez t1, .LBB129_2
59175917
; CHECK-NEXT: # %bb.3: # %vector.latch
59185918
; CHECK-NEXT: # in Loop: Header=BB129_1 Depth=1
5919-
; CHECK-NEXT: add a6, a0, a6
5920-
; CHECK-NEXT: sub a3, a3, a5
5921-
; CHECK-NEXT: vse32.v v8, (a6)
5922-
; CHECK-NEXT: add a2, a2, a5
5919+
; CHECK-NEXT: add a7, a0, a7
5920+
; CHECK-NEXT: sub a3, a3, a6
5921+
; CHECK-NEXT: vse32.v v8, (a7)
5922+
; CHECK-NEXT: add a2, a2, a6
59235923
; CHECK-NEXT: bnez a3, .LBB129_1
59245924
; CHECK-NEXT: # %bb.4: # %for.cond.cleanup
59255925
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)