llvm · nasherm · Mar 19, 2025 · Jan 9, 2025 · Jan 30, 2025 · Feb 5, 2025
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1458,16 +1458,76 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
   if (LooksLikeAFreeShift())
     return 0;
 
+  // When targets have both DSP and MVE we find that the
+  // the compiler will attempt to vectorize as well as using
+  // scalar (S/U)MLAL operations. This is in cases where we have
+  // the pattern ext(mul(ext(i16), ext(i16))) we find
+  // that generated codegen performs better when only using (S/U)MLAL scalar
+  // ops instead of trying to mix vector ops with (S/U)MLAL ops. We therefore
+  // check if a mul instruction is used in a SMLAL pattern.
+  auto MulInDSPMLALPattern = [&](const Instruction *I, unsigned Opcode,
+                                 Type *Ty) -> bool {
+    if (!ST->hasDSP())
+      return false;
+
+    if (!I)
+      return false;
+
+    if (Opcode != Instruction::Mul)
+      return false;
+
+    if (Ty->isVectorTy())
+      return false;
+
+    auto IsSExtInst = [](const Value *V) -> bool { return isa<SExtInst>(V); };
+    auto IsZExtInst = [](const Value *V) -> bool { return isa<ZExtInst>(V); };
+    auto IsExtInst = [&, IsSExtInst, IsZExtInst](const Value *V) -> bool {
+      return IsSExtInst(V) || IsZExtInst(V);
+    };
+    auto IsExtensionFromHalf = [&, IsSExtInst,
+                                IsZExtInst](const Value *V) -> bool {
+      if (IsSExtInst(V))
+        return dyn_cast<SExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
+      if (IsZExtInst(V))
+        return dyn_cast<ZExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
+      return false;
+    };
+
+    // We check the arguments of the instruction to see if they're extends
+    auto *BinOp = dyn_cast<BinaryOperator>(I);
+    if (!BinOp)
+      return false;
+    Value *Op0 = BinOp->getOperand(0);
+    Value *Op1 = BinOp->getOperand(1);
+    if (IsExtInst(Op0) && IsExtInst(Op1)) {
+      // We're interested in an ext of an i16
+      if (!I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
+          !IsExtensionFromHalf(Op1))
+        return false;
+      // We need to check if this result will be further extended to i64
+      // and that all these uses are SExt
+      for (auto *U : I->users())
+        if (!IsExtInst(dyn_cast<Value>(U)))
+          return false;
+      return true;
+    }
+
+    return false;
+  };
+
+  if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
+    return 0;
+
   // Default to cheap (throughput/size of 1 instruction) but adjust throughput
   // for "multiple beats" potentially needed by MVE instructions.
   int BaseCost = 1;
   if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
     BaseCost = ST->getMVEVectorCostFactor(CostKind);
 
-  // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
-  // without treating floats as more expensive that scalars or increasing the
-  // costs for custom operations. The results is also multiplied by the
-  // MVEVectorCostFactor where appropriate.
+  // The rest of this mostly follows what is done in
+  // BaseT::getArithmeticInstrCost, without treating floats as more expensive
+  // that scalars or increasing the costs for custom operations. The results is
+  // also multiplied by the MVEVectorCostFactor where appropriate.
   if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
     return LT.first * BaseCost;
 

diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp  < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
+define i64 @test(i16 %a, i16 %b) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+;
+; CHECK-NO-DSP-LABEL: 'test'
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
+;
+    %as = sext i16 %a to i32
+    %bs = sext i16 %b to i32
+    %m = mul i32 %as, %bs
+    %ms = sext i32 %m to i64
+    ret i64 %ms
+}
+
+define i64 @withadd(i16 %a, i16 %b, i64 %c) {
+; CHECK-LABEL: 'withadd'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
+; CHECK-NO-DSP-LABEL: 'withadd'
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
+    %as = sext i16 %a to i32
+    %bs = sext i16 %b to i32
+    %m = mul i32 %as, %bs
+    %ms = sext i32 %m to i64
+    %r = add i64 %c, %ms
+    ret i64 %r
+}
+
+define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
+; CHECK-LABEL: 'withloads'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
+; CHECK-NO-DSP-LABEL: 'withloads'
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
+; CHECK-NO-DSP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
+    %a = load i16, ptr %pa
+    %b = load i16, ptr %pb
+    %as = sext i16 %a to i32
+    %bs = sext i16 %b to i32
+    %m = mul i32 %as, %bs
+    %ms = sext i32 %m to i64
+    %r = add i64 %c, %ms
+    ret i64 %r
+}