-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAGCombiner] Handle type-promoted constants in UDIV exact lowering #169949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
SavchenkoValeriy
wants to merge
4
commits into
llvm:main
Choose a base branch
from
SavchenkoValeriy:vector-udiv-exact-by-const-lowering
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
[DAGCombiner] Handle type-promoted constants in UDIV exact lowering #169949
SavchenkoValeriy
wants to merge
4
commits into
llvm:main
from
SavchenkoValeriy:vector-udiv-exact-by-const-lowering
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: Valeriy Savchenko (SavchenkoValeriy) ChangesBuilds up on the solution proposed for #169491 and applies it for UDIV exact as well. Full diff: https://github.com/llvm/llvm-project/pull/169949.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..33c6855349928 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+ bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
- if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
- (Const->isOpaque() && NoOpaques))
+ if (!Const || (Const->isOpaque() && NoOpaques))
return false;
+ // When AllowTruncation is true, allow constants that have been promoted
+ // during type legalization as long as the value fits in the target type.
+ if (AllowTruncation) {
+ if (Const->getAPIntValue().getActiveBits() > BitWidth)
+ return false;
+ } else {
+ if (Const->getAPIntValue().getBitWidth() != BitWidth)
+ return false;
+ }
}
return true;
}
@@ -5322,7 +5331,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5336,7 +5346,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5352,7 +5363,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5684e0e4c26c4..be02c451d452c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6403,7 +6403,6 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
@@ -6413,6 +6412,8 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
+
+ EVT CT = C->getValueType(0);
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countr_zero();
if (Shift) {
@@ -6422,14 +6423,15 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
// Calculate the multiplicative inverse modulo BW.
APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
- Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, CT));
return true;
};
SDValue Op1 = N->getOperand(1);
// Collect all magic values from the build vector.
- if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue Shift, Factor;
@@ -6738,7 +6740,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- const APInt& Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
@@ -6779,7 +6783,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};
// Collect the shifts/magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..ffaf045fa45c2 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -1433,35 +1433,13 @@ entry:
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #9362, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w13, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x12, w10, w8
-; CHECK-SD-NEXT: umull x14, w13, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x12, x12, #32
-; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w9, w9, w11
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w10, w12
-; CHECK-SD-NEXT: lsr x9, x14, #32
-; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: add w8, w15, w8
-; CHECK-SD-NEXT: add w9, w13, w9
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #7
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT: mov w14, #100 // =0x64
+; CHECK-SD-NEXT: mov w8, #656 // =0x290
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #655, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w12, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x13, w10, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: lsr x13, x13, #32
-; CHECK-SD-NEXT: msub w9, w11, w14, w9
-; CHECK-SD-NEXT: umull x11, w12, w8
-; CHECK-SD-NEXT: msub w10, w13, w14, w10
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x9, x11, #32
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: msub w8, w8, w14, w15
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #100
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..41f789eaebeed
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %div
+}
+
+define <8 x i16> @udiv_exact_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_exact_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvni v1.8h, #1, lsl #8
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %div = udiv exact <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_exact_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_exact_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvni v2.8h, #1, lsl #8
+; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
+ %div = udiv exact <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %div
+}
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Builds up on the solution proposed for #169491 and applies it for UDIV exact as well.