Skip to content

Commit 343168a

Browse files
committed
[AArch64] Improve operand sinking for mul instructions
- Sink splat operands to mul instructions for types where we can use the lane-indexed variants. - When sinking operands for [su]mull, also sink the ext instruction.
1 parent f3a58f2 commit 343168a

File tree

5 files changed

+143
-134
lines changed

5 files changed

+143
-134
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5168,26 +5168,41 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
51685168
return false;
51695169
}
51705170
case Instruction::Mul: {
5171+
auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
5172+
auto VT = MVT::getVT(V->getType(), /*HandleUnknown=*/true);
5173+
return (VT == MVT::v4i16 || VT == MVT::v8i16 || VT == MVT::v2i32 ||
5174+
VT == MVT::v4i32);
5175+
};
5176+
51715177
int NumZExts = 0, NumSExts = 0;
51725178
for (auto &Op : I->operands()) {
51735179
// Make sure we are not already sinking this operand
51745180
if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
51755181
continue;
51765182

5177-
if (match(&Op, m_SExt(m_Value()))) {
5178-
NumSExts++;
5179-
continue;
5180-
} else if (match(&Op, m_ZExt(m_Value()))) {
5181-
NumZExts++;
5183+
if (match(&Op, m_ZExtOrSExt(m_Value()))) {
5184+
auto *Ext = cast<Instruction>(Op);
5185+
auto *ExtOp = Ext->getOperand(0);
5186+
if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
5187+
Ops.push_back(&Ext->getOperandUse(0));
5188+
Ops.push_back(&Op);
5189+
5190+
if (isa<SExtInst>(Ext))
5191+
NumSExts++;
5192+
else
5193+
NumZExts++;
5194+
51825195
continue;
51835196
}
51845197

51855198
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
5199+
if (!Shuffle)
5200+
continue;
51865201

51875202
// If the Shuffle is a splat and the operand is a zext/sext, sinking the
51885203
// operand and the s/zext can help create indexed s/umull. This is
51895204
// especially useful to prevent i64 mul being scalarized.
5190-
if (Shuffle && isSplatShuffle(Shuffle) &&
5205+
if (isSplatShuffle(Shuffle) &&
51915206
match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
51925207
Ops.push_back(&Shuffle->getOperandUse(0));
51935208
Ops.push_back(&Op);
@@ -5198,9 +5213,6 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
51985213
continue;
51995214
}
52005215

5201-
if (!Shuffle)
5202-
continue;
5203-
52045216
Value *ShuffleOperand = Shuffle->getOperand(0);
52055217
InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
52065218
if (!Insert)
@@ -5232,12 +5244,26 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
52325244
NumZExts++;
52335245
}
52345246

5247+
Ops.push_back(&Insert->getOperandUse(1));
52355248
Ops.push_back(&Shuffle->getOperandUse(0));
52365249
Ops.push_back(&Op);
52375250
}
52385251

5239-
// Is it profitable to sink if we found two of the same type of extends.
5240-
return !Ops.empty() && (NumSExts == 2 || NumZExts == 2);
5252+
// It is profitable to sink if we found two of the same type of extends.
5253+
if (!Ops.empty() && (NumSExts == 2 || NumZExts == 2))
5254+
return true;
5255+
5256+
// Otherwise, see if we should sink splats for indexed variants.
5257+
if (!ShouldSinkSplatForIndexedVariant(I))
5258+
return false;
5259+
5260+
Ops.clear();
5261+
if (isSplatShuffle(I->getOperand(0)))
5262+
Ops.push_back(&I->getOperandUse(0));
5263+
if (isSplatShuffle(I->getOperand(1)))
5264+
Ops.push_back(&I->getOperandUse(1));
5265+
5266+
return !Ops.empty();
52415267
}
52425268
default:
52435269
return false;

llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,18 @@ target triple = "aarch64-unknown-linux-gnu"
1010
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
1111
; CHECK-LABEL: dupext_crashtest:
1212
; CHECK: // %bb.0: // %for.body.lr.ph
13-
; CHECK-NEXT: mov w8, w0
14-
; CHECK-NEXT: dup v0.2s, w8
1513
; CHECK-NEXT: .LBB0_1: // %vector.body
1614
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
17-
; CHECK-NEXT: ldr d1, [x8]
18-
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
19-
; CHECK-NEXT: xtn v1.2s, v1.2d
20-
; CHECK-NEXT: str d1, [x8]
15+
; CHECK-NEXT: ldr d0, [x8]
16+
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
17+
; CHECK-NEXT: fmov x9, d0
18+
; CHECK-NEXT: mov x8, v0.d[1]
19+
; CHECK-NEXT: mul w9, w0, w9
20+
; CHECK-NEXT: mul w8, w0, w8
21+
; CHECK-NEXT: fmov d0, x9
22+
; CHECK-NEXT: mov v0.d[1], x8
23+
; CHECK-NEXT: xtn v0.2s, v0.2d
24+
; CHECK-NEXT: str d0, [x8]
2125
; CHECK-NEXT: b .LBB0_1
2226
for.body.lr.ph:
2327
%conv314 = zext i32 %e to i64

0 commit comments

Comments
 (0)