Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {

// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
// Do not permit build vector implicit truncation unless AllowTruncation is set.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
Expand All @@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
(Const->isOpaque() && NoOpaques))
if (!Const || (Const->isOpaque() && NoOpaques))
return false;
// When AllowTruncation is true, allow constants that have been promoted
// during type legalization as long as the value fits in the target type.
if (AllowTruncation) {
if (Const->getAPIntValue().getActiveBits() > BitWidth)
return false;
} else {
if (Const->getAPIntValue().getBitWidth() != BitWidth)
return false;
}
}
return true;
}
Expand Down Expand Up @@ -5322,7 +5331,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);

// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
/*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());

Expand All @@ -5336,7 +5346,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
/*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());

Expand All @@ -5352,7 +5363,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {

// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
/*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
const APInt& Divisor = C->getAPIntValue();
// Truncate the divisor to the target scalar type in case it was promoted
// during type legalization.
APInt Divisor = C->getAPIntValue().trunc(EltBits);

SDValue PreShift, MagicFactor, NPQFactor, PostShift;

Expand Down Expand Up @@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};

// Collect the shifts/magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
/*AllowTruncation=*/true))
return SDValue();

SDValue PreShift, PostShift, MagicFactor, NPQFactor;
Expand Down
65 changes: 12 additions & 53 deletions llvm/test/CodeGen/AArch64/rem-by-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1433,35 +1433,13 @@ entry:
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_7:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT: movk w8, #9362, lsl #16
; CHECK-SD-NEXT: umov w9, v0.h[0]
; CHECK-SD-NEXT: umov w10, v0.h[1]
; CHECK-SD-NEXT: umov w13, v0.h[2]
; CHECK-SD-NEXT: umov w15, v0.h[3]
; CHECK-SD-NEXT: umull x11, w9, w8
; CHECK-SD-NEXT: umull x12, w10, w8
; CHECK-SD-NEXT: umull x14, w13, w8
; CHECK-SD-NEXT: lsr x11, x11, #32
; CHECK-SD-NEXT: umull x8, w15, w8
; CHECK-SD-NEXT: lsr x12, x12, #32
; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
; CHECK-SD-NEXT: lsr x8, x8, #32
; CHECK-SD-NEXT: add w9, w9, w11
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: add w10, w10, w12
; CHECK-SD-NEXT: lsr x9, x14, #32
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
; CHECK-SD-NEXT: mov v0.h[1], w10
; CHECK-SD-NEXT: add w8, w15, w8
; CHECK-SD-NEXT: add w9, w13, w9
; CHECK-SD-NEXT: mov v0.h[2], w9
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: movi v2.4h, #7
; CHECK-SD-NEXT: dup v1.4h, w8
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_7:
Expand Down Expand Up @@ -1508,32 +1486,13 @@ entry:
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
; CHECK-SD-NEXT: mov w14, #100 // =0x64
; CHECK-SD-NEXT: mov w8, #656 // =0x290
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT: movk w8, #655, lsl #16
; CHECK-SD-NEXT: umov w9, v0.h[0]
; CHECK-SD-NEXT: umov w10, v0.h[1]
; CHECK-SD-NEXT: umov w12, v0.h[2]
; CHECK-SD-NEXT: umov w15, v0.h[3]
; CHECK-SD-NEXT: umull x11, w9, w8
; CHECK-SD-NEXT: umull x13, w10, w8
; CHECK-SD-NEXT: lsr x11, x11, #32
; CHECK-SD-NEXT: lsr x13, x13, #32
; CHECK-SD-NEXT: msub w9, w11, w14, w9
; CHECK-SD-NEXT: umull x11, w12, w8
; CHECK-SD-NEXT: msub w10, w13, w14, w10
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: umull x8, w15, w8
; CHECK-SD-NEXT: lsr x9, x11, #32
; CHECK-SD-NEXT: mov v0.h[1], w10
; CHECK-SD-NEXT: msub w9, w9, w14, w12
; CHECK-SD-NEXT: lsr x8, x8, #32
; CHECK-SD-NEXT: msub w8, w8, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w9
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: movi v2.4h, #100
; CHECK-SD-NEXT: dup v1.4h, w8
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_100:
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s

; This test verifies that udiv by constant works correctly even when type
; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
; This is a regression test for a bug where v16i16 would be split into two
; v8i16 operations during legalization, the i16 constants would be promoted
; to i32, and then the second DAGCombine round would fail to recognize the
; promoted constants when trying to convert udiv into mul+shift.

define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
; CHECK-LABEL: udiv_v8i16_by_255:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32897 // =0x8081
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
; CHECK-NEXT: ret
%div = udiv <8 x i16> %x, splat (i16 255)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why this is an issue for an example this trivial. I would expect this combine would fire in the pre-type-legalization combiner before the vector elements are promoted?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example was working correctly. 8 x i16 is legal on AArch64 and gets lowered properly. For the 16 x i16 case, we don't do it because it's not legal, promote i16 constant and split it into two vector udivs in type legalization, and then go to the combiner.

ret <8 x i16> %div
}

define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
; CHECK-LABEL: udiv_v16i16_by_255:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32897 // =0x8081
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
; CHECK-NEXT: ushr v1.8h, v1.8h, #7
; CHECK-NEXT: ret
%div = udiv <16 x i16> %x, splat (i16 255)
ret <16 x i16> %div
}