[DAGCombiner] Handle type-promoted constants in SDIV exact lowering #169950

SavchenkoValeriy · 2025-11-28T18:19:20Z

Builds up on the solution proposed for #169491 and #169924 and applies it for SDIV exact as well. Almost a carbon copy of UDIV exact solution from #169949.

llvmbot · 2025-11-28T18:19:52Z

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: Valeriy Savchenko (SavchenkoValeriy)

Changes

Builds up on the solution proposed for #169491 and #169924 and applies it for SDIV exact as well. Almost a carbon copy of UDIV exact solution from #169949.

Full diff: https://github.com/llvm/llvm-project/pull/169950.diff

5 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+23-9)
(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+15-8)
(modified) llvm/test/CodeGen/AArch64/rem-by-const.ll (+29-125)
(added) llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll (+58)
(added) llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll (+41)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..82a5a4c3744c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 
 // Determines if it is a constant integer or a splat/build vector of constant
 // integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+                                       bool AllowTruncation = false) {
   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
     return !(Const->isOpaque() && NoOpaques);
   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
     if (Op.isUndef())
       continue;
     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
-    if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
-        (Const->isOpaque() && NoOpaques))
+    if (!Const || (Const->isOpaque() && NoOpaques))
       return false;
+    // When AllowTruncation is true, allow constants that have been promoted
+    // during type legalization as long as the value fits in the target type.
+    if (AllowTruncation) {
+      if (Const->getAPIntValue().getActiveBits() > BitWidth)
+        return false;
+    } else {
+      if (Const->getAPIntValue().getBitWidth() != BitWidth)
+        return false;
+    }
   }
   return true;
 }
@@ -5180,7 +5189,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
     return false;
   };
 
-  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
+                                  /*AllowTruncation=*/true);
 }
 
 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
@@ -5244,7 +5254,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // alternate sequence.  Targets may check function attributes for size/speed
   // trade-offs.
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isConstantOrConstantVector(N1) &&
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+                                 /*AllowTruncation=*/true) &&
       !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildSDIV(N))
       return Op;
@@ -5322,7 +5333,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+                                 /*AllowTruncation=*/true)) {
     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
       AddToWorklist(LogBase2.getNode());
 
@@ -5336,7 +5348,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
     SDValue N10 = N1.getOperand(0);
-    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+    if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+                                   /*AllowTruncation=*/true)) {
       if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
         AddToWorklist(LogBase2.getNode());
 
@@ -5352,7 +5365,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
 
   // fold (udiv x, c) -> alternate
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isConstantOrConstantVector(N1) &&
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+                                 /*AllowTruncation=*/true) &&
       !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildUDIV(N))
       return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5684e0e4c26c4..21b105db6b82c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6345,7 +6345,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
-  EVT SVT = VT.getScalarType();
   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   EVT ShSVT = ShVT.getScalarType();
 
@@ -6355,6 +6354,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
+
+    EVT CT = C->getValueType(0);
     APInt Divisor = C->getAPIntValue();
     unsigned Shift = Divisor.countr_zero();
     if (Shift) {
@@ -6363,12 +6364,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
     }
     APInt Factor = Divisor.multiplicativeInverse();
     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
-    Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+    Factors.push_back(DAG.getConstant(Factor, dl, CT));
     return true;
   };
 
   // Collect all magic values from the build vector.
-  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue Shift, Factor;
@@ -6562,8 +6564,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
-
-    const APInt &Divisor = C->getAPIntValue();
+    // Truncate the divisor to the target scalar type in case it was promoted
+    // during type legalization.
+    APInt Divisor = C->getAPIntValue().trunc(EltBits);
     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
     int NumeratorFactor = 0;
     int ShiftMask = -1;
@@ -6593,7 +6596,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   SDValue N1 = N->getOperand(1);
 
   // Collect the shifts / magic values from each element.
-  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue MagicFactor, Factor, Shift, ShiftMask;
@@ -6738,7 +6742,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
-    const APInt& Divisor = C->getAPIntValue();
+    // Truncate the divisor to the target scalar type in case it was promoted
+    // during type legalization.
+    APInt Divisor = C->getAPIntValue().trunc(EltBits);
 
     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
 
@@ -6779,7 +6785,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   };
 
   // Collect the shifts/magic values from each element.
-  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..c19ded18c94c9 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: sv4i8_7:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-SD-NEXT:    movk x8, #37449, lsl #16
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movi v2.4h, #7
+; CHECK-SD-NEXT:    dup v1.4h, w8
 ; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    smov x10, v0.h[0]
-; CHECK-SD-NEXT:    smov x9, v0.h[1]
-; CHECK-SD-NEXT:    smov w12, v0.h[0]
-; CHECK-SD-NEXT:    smov w11, v0.h[1]
-; CHECK-SD-NEXT:    smov x13, v0.h[2]
-; CHECK-SD-NEXT:    smov w14, v0.h[2]
-; CHECK-SD-NEXT:    smov x17, v0.h[3]
-; CHECK-SD-NEXT:    smull x10, w10, w8
-; CHECK-SD-NEXT:    smull x9, w9, w8
-; CHECK-SD-NEXT:    smull x13, w13, w8
-; CHECK-SD-NEXT:    add x10, x12, x10, lsr #32
-; CHECK-SD-NEXT:    smull x8, w17, w8
-; CHECK-SD-NEXT:    add x9, x11, x9, lsr #32
-; CHECK-SD-NEXT:    asr w16, w10, #2
-; CHECK-SD-NEXT:    add x13, x14, x13, lsr #32
-; CHECK-SD-NEXT:    asr w15, w9, #2
-; CHECK-SD-NEXT:    add w10, w16, w10, lsr #31
-; CHECK-SD-NEXT:    asr w16, w13, #2
-; CHECK-SD-NEXT:    add w9, w15, w9, lsr #31
-; CHECK-SD-NEXT:    smov w15, v0.h[3]
-; CHECK-SD-NEXT:    sub w10, w10, w10, lsl #3
-; CHECK-SD-NEXT:    sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT:    add w10, w12, w10
-; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    add w9, w11, w9
-; CHECK-SD-NEXT:    add w10, w16, w13, lsr #31
-; CHECK-SD-NEXT:    add x8, x15, x8, lsr #32
-; CHECK-SD-NEXT:    mov v0.h[1], w9
-; CHECK-SD-NEXT:    sub w9, w10, w10, lsl #3
-; CHECK-SD-NEXT:    asr w10, w8, #2
-; CHECK-SD-NEXT:    add w9, w14, w9
-; CHECK-SD-NEXT:    add w8, w10, w8, lsr #31
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT:    add w8, w15, w8
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    sshr v1.4s, v1.4s, #17
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8_7:
@@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: sv4i8_100:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    mov w14, #100 // =0x64
-; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
-; CHECK-SD-NEXT:    sshr v1.4h, v0.4h, #8
-; CHECK-SD-NEXT:    smov x9, v1.h[0]
-; CHECK-SD-NEXT:    smov x10, v1.h[1]
-; CHECK-SD-NEXT:    smov x11, v1.h[2]
-; CHECK-SD-NEXT:    smov w12, v1.h[0]
-; CHECK-SD-NEXT:    smov x13, v1.h[3]
-; CHECK-SD-NEXT:    smov w15, v1.h[1]
-; CHECK-SD-NEXT:    smull x9, w9, w8
-; CHECK-SD-NEXT:    smull x10, w10, w8
-; CHECK-SD-NEXT:    smull x11, w11, w8
-; CHECK-SD-NEXT:    asr x9, x9, #37
-; CHECK-SD-NEXT:    smull x8, w13, w8
-; CHECK-SD-NEXT:    asr x10, x10, #37
-; CHECK-SD-NEXT:    add w9, w9, w9, lsr #31
-; CHECK-SD-NEXT:    asr x11, x11, #37
-; CHECK-SD-NEXT:    add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    msub w9, w9, w14, w12
-; CHECK-SD-NEXT:    msub w10, w10, w14, w15
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    add w9, w11, w11, lsr #31
-; CHECK-SD-NEXT:    smov w11, v1.h[2]
-; CHECK-SD-NEXT:    msub w9, w9, w14, w11
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    smov w10, v1.h[3]
-; CHECK-SD-NEXT:    msub w8, w8, w14, w10
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT:    movi v2.4h, #100
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    sshr v1.4s, v1.4s, #19
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8_100:
@@ -1433,35 +1378,13 @@ entry:
 define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8_7:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    mov w8, #9363 // =0x2493
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
-; CHECK-SD-NEXT:    umov w9, v0.h[0]
-; CHECK-SD-NEXT:    umov w10, v0.h[1]
-; CHECK-SD-NEXT:    umov w13, v0.h[2]
-; CHECK-SD-NEXT:    umov w15, v0.h[3]
-; CHECK-SD-NEXT:    umull x11, w9, w8
-; CHECK-SD-NEXT:    umull x12, w10, w8
-; CHECK-SD-NEXT:    umull x14, w13, w8
-; CHECK-SD-NEXT:    lsr x11, x11, #32
-; CHECK-SD-NEXT:    umull x8, w15, w8
-; CHECK-SD-NEXT:    lsr x12, x12, #32
-; CHECK-SD-NEXT:    sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT:    sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT:    lsr x8, x8, #32
-; CHECK-SD-NEXT:    add w9, w9, w11
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    add w10, w10, w12
-; CHECK-SD-NEXT:    lsr x9, x14, #32
-; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT:    sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    add w8, w15, w8
-; CHECK-SD-NEXT:    add w9, w13, w9
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    movi v2.4h, #7
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1431,13 @@ entry:
 define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8_100:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT:    mov w14, #100 // =0x64
+; CHECK-SD-NEXT:    mov w8, #656 // =0x290
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT:    movk w8, #655, lsl #16
-; CHECK-SD-NEXT:    umov w9, v0.h[0]
-; CHECK-SD-NEXT:    umov w10, v0.h[1]
-; CHECK-SD-NEXT:    umov w12, v0.h[2]
-; CHECK-SD-NEXT:    umov w15, v0.h[3]
-; CHECK-SD-NEXT:    umull x11, w9, w8
-; CHECK-SD-NEXT:    umull x13, w10, w8
-; CHECK-SD-NEXT:    lsr x11, x11, #32
-; CHECK-SD-NEXT:    lsr x13, x13, #32
-; CHECK-SD-NEXT:    msub w9, w11, w14, w9
-; CHECK-SD-NEXT:    umull x11, w12, w8
-; CHECK-SD-NEXT:    msub w10, w13, w14, w10
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    umull x8, w15, w8
-; CHECK-SD-NEXT:    lsr x9, x11, #32
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    msub w9, w9, w14, w12
-; CHECK-SD-NEXT:    lsr x8, x8, #32
-; CHECK-SD-NEXT:    msub w8, w8, w14, w15
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    movi v2.4h, #100
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..b686a855b5276
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
+; CHECK-LABEL: sdiv_v8i16_by_7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
+; CHECK-NEXT:    usra v0.8h, v0.8h, #15
+; CHECK-NEXT:    ret
+  %div = sdiv <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
+; CHECK-LABEL: sdiv_v16i16_by_7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    smull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    smull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT:    smull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
+; CHECK-NEXT:    sshr v1.8h, v1.8h, #1
+; CHECK-NEXT:    usra v0.8h, v0.8h, #15
+; CHECK-NEXT:    usra v1.8h, v1.8h, #15
+; CHECK-NEXT:    ret
+  %div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+}
+
+define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: sdiv_exact_v8i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v1.8h, #1, lsl #8
+; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %div = sdiv exact <8 x i16> %x, splat (i16 255)
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: sdiv_exact_v16i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v2.8h, #1, lsl #8
+; CHECK-NEXT:    mul v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    mul v1.8h, v1.8h, v2.8h
+; CHECK-NEXT:    ret
+  %div = sdiv exact <16 x i16> %x, splat (i16 255)
+  ret <16 x i16> %div
+}
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..efe3b84e7a0f0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32897 // =0x8081
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ushr v0.8h, v0.8h, #7
+; CHECK-NEXT:    ret
+  %div = udiv <8 x i16> %x, splat (i16 255)
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32897 // =0x8081
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT:    ushr v0.8h, v0.8h, #7
+; CHECK-NEXT:    ushr v1.8h, v1.8h, #7
+; CHECK-NEXT:    ret
+  %div = udiv <16 x i16> %x, splat (i16 255)
+  ret <16 x i16> %div
+}

SavchenkoValeriy added 6 commits November 25, 2025 14:30

[AArch64][NFC] Add test for vector udiv scalarization

da78214

[DAGCombiner] Allow promoted constants when lowering vector UDIVs

3850f0f

[AArch64][NFC] Add test for vector sdiv scalarization

4663512

[DAGCombiner] Allow promoted constants when lowering vector SDIVs

27f623c

[AArch64][NFC] Add test for vector sdiv exact scalarization

0fb68f4

[DAGCombiner] Allow promoted constants when lowering vector SDIV exacts

feebe26

SavchenkoValeriy requested review from RKSimon, aemerson, arsenm, davemgreen and jyli0116 November 28, 2025 18:19

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Nov 28, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAGCombiner] Handle type-promoted constants in SDIV exact lowering #169950

[DAGCombiner] Handle type-promoted constants in SDIV exact lowering #169950

SavchenkoValeriy commented Nov 28, 2025

Uh oh!

llvmbot commented Nov 28, 2025 •

edited

Loading

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

[DAGCombiner] Handle type-promoted constants in SDIV exact lowering #169950

Are you sure you want to change the base?

[DAGCombiner] Handle type-promoted constants in SDIV exact lowering #169950

Conversation

SavchenkoValeriy commented Nov 28, 2025

Uh oh!

llvmbot commented Nov 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmbot commented Nov 28, 2025 •

edited

Loading