@@ -1497,6 +1497,65 @@ static bool isAllActivePredicate(Value *Pred) {
14971497 return (C && C->isAllOnesValue ());
14981498}
14991499
1500+ // Simplify `V` by only considering the operations that affect active lanes.
1501+ // This function should only return existing Values or newly created Constants.
1502+ static Value *stripInactiveLanes (Value *V, const Value *Pg) {
1503+ auto *Dup = dyn_cast<IntrinsicInst>(V);
1504+ if (Dup && Dup->getIntrinsicID () == Intrinsic::aarch64_sve_dup &&
1505+ Dup->getOperand (1 ) == Pg && isa<Constant>(Dup->getOperand (2 )))
1506+ return ConstantVector::getSplat (
1507+ cast<VectorType>(V->getType ())->getElementCount (),
1508+ cast<Constant>(Dup->getOperand (2 )));
1509+
1510+ return V;
1511+ }
1512+
1513+ static std::optional<Instruction *>
1514+ simplifySVEIntrinsicBinOp (InstCombiner &IC, IntrinsicInst &II,
1515+ const SVEIntrinsicInfo &IInfo) {
1516+ const unsigned Opc = IInfo.getMatchingIROpode ();
1517+ assert (Instruction::isBinaryOp (Opc) && " Expected a binary operation!" );
1518+
1519+ Value *Pg = II.getOperand (0 );
1520+ Value *Op1 = II.getOperand (1 );
1521+ Value *Op2 = II.getOperand (2 );
1522+ const DataLayout &DL = II.getDataLayout ();
1523+
1524+ // Canonicalise constants to the RHS.
1525+ if (Instruction::isCommutative (Opc) && IInfo.inactiveLanesAreNotDefined () &&
1526+ isa<Constant>(Op1) && !isa<Constant>(Op2)) {
1527+ IC.replaceOperand (II, 1 , Op2);
1528+ IC.replaceOperand (II, 2 , Op1);
1529+ return &II;
1530+ }
1531+
1532+ // Only active lanes matter when simplifying the operation.
1533+ Op1 = stripInactiveLanes (Op1, Pg);
1534+ Op2 = stripInactiveLanes (Op2, Pg);
1535+
1536+ Value *SimpleII;
1537+ if (auto FII = dyn_cast<FPMathOperator>(&II))
1538+ SimpleII = simplifyBinOp (Opc, Op1, Op2, FII->getFastMathFlags (), DL);
1539+ else
1540+ SimpleII = simplifyBinOp (Opc, Op1, Op2, DL);
1541+
1542+ if (!SimpleII)
1543+ return std::nullopt ;
1544+
1545+ if (IInfo.inactiveLanesAreNotDefined ())
1546+ return IC.replaceInstUsesWith (II, SimpleII);
1547+
1548+ Value *Inactive = II.getOperand (IInfo.getOperandIdxInactiveLanesTakenFrom ());
1549+
1550+ // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
1551+ if (SimpleII == Inactive)
1552+ return IC.replaceInstUsesWith (II, SimpleII);
1553+
1554+ // Inactive lanes must be preserved.
1555+ SimpleII = IC.Builder .CreateSelect (Pg, SimpleII, Inactive);
1556+ return IC.replaceInstUsesWith (II, SimpleII);
1557+ }
1558+
15001559// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
15011560// to operations with less strict inactive lane requirements.
15021561static std::optional<Instruction *>
@@ -1537,6 +1596,11 @@ simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II,
15371596 }
15381597 }
15391598
1599+ // Operation specific simplifications.
1600+ if (IInfo.hasMatchingIROpode () &&
1601+ Instruction::isBinaryOp (IInfo.getMatchingIROpode ()))
1602+ return simplifySVEIntrinsicBinOp (IC, II, IInfo);
1603+
15401604 return std::nullopt ;
15411605}
15421606
@@ -2220,68 +2284,6 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
22202284 return std::nullopt ;
22212285}
22222286
2223- // Simplify `V` by only considering the operations that affect active lanes.
2224- // This function should only return existing Values or newly created Constants.
2225- static Value *stripInactiveLanes (Value *V, const Value *Pg) {
2226- auto *Dup = dyn_cast<IntrinsicInst>(V);
2227- if (Dup && Dup->getIntrinsicID () == Intrinsic::aarch64_sve_dup &&
2228- Dup->getOperand (1 ) == Pg && isa<Constant>(Dup->getOperand (2 )))
2229- return ConstantVector::getSplat (
2230- cast<VectorType>(V->getType ())->getElementCount (),
2231- cast<Constant>(Dup->getOperand (2 )));
2232-
2233- return V;
2234- }
2235-
2236- static std::optional<Instruction *>
2237- instCombineSVEVectorMul (InstCombiner &IC, IntrinsicInst &II,
2238- const SVEIntrinsicInfo &IInfo) {
2239- const unsigned Opc = IInfo.getMatchingIROpode ();
2240- if (!Instruction::isBinaryOp (Opc))
2241- return std::nullopt ;
2242-
2243- Value *Pg = II.getOperand (0 );
2244- Value *Op1 = II.getOperand (1 );
2245- Value *Op2 = II.getOperand (2 );
2246- const DataLayout &DL = II.getDataLayout ();
2247-
2248- // Canonicalise constants to the RHS.
2249- if (Instruction::isCommutative (Opc) && IInfo.inactiveLanesAreNotDefined () &&
2250- isa<Constant>(Op1) && !isa<Constant>(Op2)) {
2251- IC.replaceOperand (II, 1 , Op2);
2252- IC.replaceOperand (II, 2 , Op1);
2253- return &II;
2254- }
2255-
2256- // Only active lanes matter when simplifying the operation.
2257- Op1 = stripInactiveLanes (Op1, Pg);
2258- Op2 = stripInactiveLanes (Op2, Pg);
2259-
2260- Value *SimpleII;
2261- if (auto FII = dyn_cast<FPMathOperator>(&II))
2262- SimpleII = simplifyBinOp (Opc, Op1, Op2, FII->getFastMathFlags (), DL);
2263- else
2264- SimpleII = simplifyBinOp (Opc, Op1, Op2, DL);
2265-
2266- if (SimpleII) {
2267- if (IInfo.inactiveLanesAreNotDefined ())
2268- return IC.replaceInstUsesWith (II, SimpleII);
2269-
2270- Value *Inactive =
2271- II.getOperand (IInfo.getOperandIdxInactiveLanesTakenFrom ());
2272-
2273- // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
2274- if (SimpleII == Inactive)
2275- return IC.replaceInstUsesWith (II, SimpleII);
2276-
2277- // Inactive lanes must be preserved.
2278- SimpleII = IC.Builder .CreateSelect (Pg, SimpleII, Inactive);
2279- return IC.replaceInstUsesWith (II, SimpleII);
2280- }
2281-
2282- return instCombineSVEVectorBinOp (IC, II);
2283- }
2284-
22852287static std::optional<Instruction *> instCombineSVEUnpack (InstCombiner &IC,
22862288 IntrinsicInst &II) {
22872289 Value *UnpackArg = II.getArgOperand (0 );
@@ -2689,10 +2691,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
26892691 return instCombineSVEVectorFAdd (IC, II);
26902692 case Intrinsic::aarch64_sve_fadd_u:
26912693 return instCombineSVEVectorFAddU (IC, II);
2692- case Intrinsic::aarch64_sve_fmul:
2693- return instCombineSVEVectorMul (IC, II, IInfo);
26942694 case Intrinsic::aarch64_sve_fmul_u:
2695- return instCombineSVEVectorMul (IC, II, IInfo );
2695+ return instCombineSVEVectorBinOp (IC, II);
26962696 case Intrinsic::aarch64_sve_fsub:
26972697 return instCombineSVEVectorFSub (IC, II);
26982698 case Intrinsic::aarch64_sve_fsub_u:
@@ -2703,10 +2703,6 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
27032703 return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
27042704 Intrinsic::aarch64_sve_mla_u>(
27052705 IC, II, true );
2706- case Intrinsic::aarch64_sve_mul:
2707- return instCombineSVEVectorMul (IC, II, IInfo);
2708- case Intrinsic::aarch64_sve_mul_u:
2709- return instCombineSVEVectorMul (IC, II, IInfo);
27102706 case Intrinsic::aarch64_sve_sub:
27112707 return instCombineSVEVectorSub (IC, II);
27122708 case Intrinsic::aarch64_sve_sub_u:
0 commit comments