Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 64 additions & 4 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1458,16 +1458,76 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
if (LooksLikeAFreeShift())
return 0;

// When targets have both DSP and MVE we find that the
// the compiler will attempt to vectorize as well as using
// scalar (S/U)MLAL operations. This is in cases where we have
// the pattern ext(mul(ext(i16), ext(i16))) we find
// that generated codegen performs better when only using (S/U)MLAL scalar
// ops instead of trying to mix vector ops with (S/U)MLAL ops. We therefore
// check if a mul instruction is used in a SMLAL pattern.
auto MulInDSPMLALPattern = [&](const Instruction *I, unsigned Opcode,
Type *Ty) -> bool {
if (!ST->hasDSP())
return false;

if (!I)
return false;

if (Opcode != Instruction::Mul)
return false;

if (Ty->isVectorTy())
return false;

auto IsSExtInst = [](const Value *V) -> bool { return isa<SExtInst>(V); };
auto IsZExtInst = [](const Value *V) -> bool { return isa<ZExtInst>(V); };
auto IsExtInst = [&, IsSExtInst, IsZExtInst](const Value *V) -> bool {
return IsSExtInst(V) || IsZExtInst(V);
};
auto IsExtensionFromHalf = [&, IsSExtInst,
IsZExtInst](const Value *V) -> bool {
if (IsSExtInst(V))
return dyn_cast<SExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
if (IsZExtInst(V))
return dyn_cast<ZExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
return false;
};

// We check the arguments of the instruction to see if they're extends
auto *BinOp = dyn_cast<BinaryOperator>(I);
if (!BinOp)
return false;
Value *Op0 = BinOp->getOperand(0);
Value *Op1 = BinOp->getOperand(1);
if (IsExtInst(Op0) && IsExtInst(Op1)) {
// We're interested in an ext of an i16
if (!I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
!IsExtensionFromHalf(Op1))
return false;
// We need to check if this result will be further extended to i64
// and that all these uses are SExt
for (auto *U : I->users())
if (!IsExtInst(dyn_cast<Value>(U)))
return false;
return true;
}

return false;
};

if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
return 0;

// Default to cheap (throughput/size of 1 instruction) but adjust throughput
// for "multiple beats" potentially needed by MVE instructions.
int BaseCost = 1;
if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
BaseCost = ST->getMVEVectorCostFactor(CostKind);

// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
// without treating floats as more expensive that scalars or increasing the
// costs for custom operations. The results is also multiplied by the
// MVEVectorCostFactor where appropriate.
// The rest of this mostly follows what is done in
// BaseT::getArithmeticInstrCost, without treating floats as more expensive
// that scalars or increasing the costs for custom operations. The results is
// also multiplied by the MVEVectorCostFactor where appropriate.
if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
return LT.first * BaseCost;

Expand Down
80 changes: 80 additions & 0 deletions llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
define i64 @test(i16 %a, i16 %b) {
; CHECK-LABEL: 'test'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
;
; CHECK-NO-DSP-LABEL: 'test'
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
;
%as = sext i16 %a to i32
%bs = sext i16 %b to i32
%m = mul i32 %as, %bs
%ms = sext i32 %m to i64
ret i64 %ms
}

define i64 @withadd(i16 %a, i16 %b, i64 %c) {
; CHECK-LABEL: 'withadd'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
; CHECK-NO-DSP-LABEL: 'withadd'
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
%as = sext i16 %a to i32
%bs = sext i16 %b to i32
%m = mul i32 %as, %bs
%ms = sext i32 %m to i64
%r = add i64 %c, %ms
ret i64 %r
}

define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
; CHECK-LABEL: 'withloads'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
; CHECK-NO-DSP-LABEL: 'withloads'
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
%a = load i16, ptr %pa
%b = load i16, ptr %pb
%as = sext i16 %a to i32
%bs = sext i16 %b to i32
%m = mul i32 %as, %bs
%ms = sext i32 %m to i64
%r = add i64 %c, %ms
ret i64 %r
}