diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 49467ce0a54cd..8c1e2fa6f57a8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -3261,13 +3261,16 @@ namespace ISD { template bool matchUnaryPredicateImpl(SDValue Op, std::function Match, - bool AllowUndefs = false); + bool AllowUndefs = false, + bool AllowTruncation = false); /// Hook for matching ConstantSDNode predicate inline bool matchUnaryPredicate(SDValue Op, std::function Match, - bool AllowUndefs = false) { - return matchUnaryPredicateImpl(Op, Match, AllowUndefs); + bool AllowUndefs = false, + bool AllowTruncation = false) { + return matchUnaryPredicateImpl(Op, Match, AllowUndefs, + AllowTruncation); } /// Hook for matching ConstantFPSDNode predicate diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b416c0efbbc4f..8f50a14da25a8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -363,7 +363,7 @@ bool ISD::isFreezeUndef(const SDNode *N) { template bool ISD::matchUnaryPredicateImpl(SDValue Op, std::function Match, - bool AllowUndefs) { + bool AllowUndefs, bool AllowTruncation) { // FIXME: Add support for scalar UNDEF cases? if (auto *C = dyn_cast(Op)) return Match(C); @@ -382,16 +382,17 @@ bool ISD::matchUnaryPredicateImpl(SDValue Op, } auto *Cst = dyn_cast(Op.getOperand(i)); - if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + if (!Cst || (!AllowTruncation && Cst->getValueType(0) != SVT) || + !Match(Cst)) return false; } return true; } // Build used template types. template bool ISD::matchUnaryPredicateImpl( - SDValue, std::function, bool); + SDValue, std::function, bool, bool); template bool ISD::matchUnaryPredicateImpl( - SDValue, std::function, bool); + SDValue, std::function, bool, bool); bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 49ec47f4e8a70..98206b7484dc4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7971,7 +7971,7 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { return ISD::matchUnaryPredicate( Z, [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; }, - true); + /*AllowUndef=*/true, /*AllowTruncation=*/true); } static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) { diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index b3ce00aeb36e5..14132928e876f 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -3909,9 +3909,8 @@ entry: define <8 x i8> @fshl_v8i8_c(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: fshl_v8i8_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #1 ; CHECK-SD-NEXT: shl v0.8b, v0.8b, #3 -; CHECK-SD-NEXT: usra v0.8b, v1.8b, #4 +; CHECK-SD-NEXT: usra v0.8b, v1.8b, #5 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fshl_v8i8_c: @@ -3928,8 +3927,7 @@ entry: define <8 x i8> @fshr_v8i8_c(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: fshr_v8i8_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b -; CHECK-SD-NEXT: shl v0.8b, v0.8b, #4 +; CHECK-SD-NEXT: shl v0.8b, v0.8b, #5 ; CHECK-SD-NEXT: usra v0.8b, v1.8b, #3 ; CHECK-SD-NEXT: ret ; @@ -3947,9 +3945,8 @@ entry: define <16 x i8> @fshl_v16i8_c(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: fshl_v16i8_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushr v1.16b, v1.16b, #1 ; CHECK-SD-NEXT: shl v0.16b, v0.16b, #3 -; CHECK-SD-NEXT: usra v0.16b, v1.16b, #4 +; CHECK-SD-NEXT: usra v0.16b, v1.16b, #5 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fshl_v16i8_c: @@ -3966,8 +3963,7 @@ entry: define <16 x i8> @fshr_v16i8_c(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: fshr_v16i8_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-SD-NEXT: shl v0.16b, v0.16b, #4 +; CHECK-SD-NEXT: shl v0.16b, v0.16b, #5 ; CHECK-SD-NEXT: usra v0.16b, v1.16b, #3 ; CHECK-SD-NEXT: ret ; @@ -3985,9 +3981,8 @@ entry: define <4 x i16> @fshl_v4i16_c(<4 x i16> %a, <4 x i16> %b) { ; CHECK-SD-LABEL: fshl_v4i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushr v1.4h, v1.4h, #1 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #3 -; CHECK-SD-NEXT: usra v0.4h, v1.4h, #12 +; CHECK-SD-NEXT: usra v0.4h, v1.4h, #13 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fshl_v4i16_c: @@ -4004,8 +3999,7 @@ entry: define <4 x i16> @fshr_v4i16_c(<4 x i16> %a, <4 x i16> %b) { ; CHECK-SD-LABEL: fshr_v4i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h -; CHECK-SD-NEXT: shl v0.4h, v0.4h, #12 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #13 ; CHECK-SD-NEXT: usra v0.4h, v1.4h, #3 ; CHECK-SD-NEXT: ret ; @@ -4024,7 +4018,6 @@ define <7 x i16> @fshl_v7i16_c(<7 x i16> %a, <7 x i16> %b) { ; CHECK-SD-LABEL: fshl_v7i16_c: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: adrp x8, .LCPI124_0 -; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1 ; CHECK-SD-NEXT: adrp x9, .LCPI124_1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI124_0] ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI124_1] @@ -4066,7 +4059,6 @@ define <7 x i16> @fshr_v7i16_c(<7 x i16> %a, <7 x i16> %b) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: adrp x8, .LCPI125_0 ; CHECK-SD-NEXT: adrp x9, .LCPI125_1 -; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI125_0] ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI125_1] ; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h @@ -4105,9 +4097,8 @@ entry: define <8 x i16> @fshl_v8i16_c(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: fshl_v8i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1 ; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3 -; CHECK-SD-NEXT: usra v0.8h, v1.8h, #12 +; CHECK-SD-NEXT: usra v0.8h, v1.8h, #13 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fshl_v8i16_c: @@ -4124,8 +4115,7 @@ entry: define <8 x i16> @fshr_v8i16_c(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: fshr_v8i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h -; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12 +; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13 ; CHECK-SD-NEXT: usra v0.8h, v1.8h, #3 ; CHECK-SD-NEXT: ret ; @@ -4143,12 +4133,10 @@ entry: define <16 x i16> @fshl_v16i16_c(<16 x i16> %a, <16 x i16> %b) { ; CHECK-SD-LABEL: fshl_v16i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushr v2.8h, v2.8h, #1 -; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3 -; CHECK-SD-NEXT: ushr v3.8h, v3.8h, #1 ; CHECK-SD-NEXT: shl v1.8h, v1.8h, #3 -; CHECK-SD-NEXT: usra v0.8h, v2.8h, #12 -; CHECK-SD-NEXT: usra v1.8h, v3.8h, #12 +; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3 +; CHECK-SD-NEXT: usra v1.8h, v3.8h, #13 +; CHECK-SD-NEXT: usra v0.8h, v2.8h, #13 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fshl_v16i16_c: @@ -4168,10 +4156,8 @@ entry: define <16 x i16> @fshr_v16i16_c(<16 x i16> %a, <16 x i16> %b) { ; CHECK-SD-LABEL: fshr_v16i16_c: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: add v1.8h, v1.8h, v1.8h -; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h -; CHECK-SD-NEXT: shl v1.8h, v1.8h, #12 -; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12 +; CHECK-SD-NEXT: shl v1.8h, v1.8h, #13 +; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13 ; CHECK-SD-NEXT: usra v1.8h, v3.8h, #3 ; CHECK-SD-NEXT: usra v0.8h, v2.8h, #3 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/smul_fix.ll b/llvm/test/CodeGen/AArch64/smul_fix.ll index 50a189df2be53..dacce720a7319 100644 --- a/llvm/test/CodeGen/AArch64/smul_fix.ll +++ b/llvm/test/CodeGen/AArch64/smul_fix.ll @@ -144,8 +144,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind { ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v0.4s, #16 ; CHECK-NEXT: xtn v2.4h, v0.4s -; CHECK-NEXT: add v1.4h, v1.4h, v1.4h -; CHECK-NEXT: shl v0.4h, v1.4h, #13 +; CHECK-NEXT: shl v0.4h, v1.4h, #14 ; CHECK-NEXT: usra v0.4h, v2.4h, #2 ; CHECK-NEXT: ret %tmp = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 2) diff --git a/llvm/test/CodeGen/AArch64/umul_fix.ll b/llvm/test/CodeGen/AArch64/umul_fix.ll index 9f4da82dd74b6..823f4bdecbad6 100644 --- a/llvm/test/CodeGen/AArch64/umul_fix.ll +++ b/llvm/test/CodeGen/AArch64/umul_fix.ll @@ -152,8 +152,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind { ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v0.4s, #16 ; CHECK-NEXT: xtn v2.4h, v0.4s -; CHECK-NEXT: add v1.4h, v1.4h, v1.4h -; CHECK-NEXT: shl v0.4h, v1.4h, #11 +; CHECK-NEXT: shl v0.4h, v1.4h, #12 ; CHECK-NEXT: usra v0.4h, v2.4h, #4 ; CHECK-NEXT: ret %tmp = call <4 x i16> @llvm.umul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 4)